From 6750abf18ae79feda51039946d27bbc212d1d87a Mon Sep 17 00:00:00 2001 From: Yurii Chechur Date: Tue, 24 Dec 2024 15:08:56 +0000 Subject: [PATCH] Add sk1/compresor.c --- sk1/compresor.c | 304 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 304 insertions(+) create mode 100644 sk1/compresor.c diff --git a/sk1/compresor.c b/sk1/compresor.c new file mode 100644 index 0000000..870cf27 --- /dev/null +++ b/sk1/compresor.c @@ -0,0 +1,304 @@ +#include +#include +#include +#include +#include "compressor.h" + +#define BUFSIZE 1024 + +// Huffman Tree Node +struct MinHeapNode { + char data; + unsigned freq; + struct MinHeapNode *left, *right; +}; + +// MinHeap +struct MinHeap { + unsigned size; + unsigned capacity; + struct MinHeapNode** array; +}; + +// Create a new node +struct MinHeapNode* newNode(char data, unsigned freq) { + struct MinHeapNode* temp = (struct MinHeapNode*)malloc(sizeof(struct MinHeapNode)); + temp->data = data; + temp->freq = freq; + temp->left = temp->right = NULL; + return temp; +} + +// Create a MinHeap +struct MinHeap* createMinHeap(unsigned capacity) { + struct MinHeap* minHeap = (struct MinHeap*)malloc(sizeof(struct MinHeap)); + minHeap->size = 0; + minHeap->capacity = capacity; + minHeap->array = (struct MinHeapNode**)malloc(minHeap->capacity * sizeof(struct MinHeapNode*)); + return minHeap; +} + +// Swap two min heap nodes +void swapMinHeapNode(struct MinHeapNode** a, struct MinHeapNode** b) { + struct MinHeapNode* t = *a; + *a = *b; + *b = t; +} + +// MinHeapify a node +void minHeapify(struct MinHeap* minHeap, int idx) { + int smallest = idx; + int left = 2 * idx + 1; + int right = 2 * idx + 2; + + if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq) + smallest = left; + + if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq) + smallest = right; + + if (smallest != idx) { + swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]); + minHeapify(minHeap, smallest); + } +} + +// Extract the minimum value node +struct MinHeapNode* extractMin(struct MinHeap* minHeap) { + struct MinHeapNode* temp = minHeap->array[0]; + minHeap->array[0] = minHeap->array[minHeap->size - 1]; + --minHeap->size; + minHeapify(minHeap, 0); + return temp; +} + +// Insert a node into the MinHeap +void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode) { + ++minHeap->size; + int i = minHeap->size - 1; + + while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) { + minHeap->array[i] = minHeap->array[(i - 1) / 2]; + i = (i - 1) / 2; + } + minHeap->array[i] = minHeapNode; +} + +// Build a MinHeap +struct MinHeap* buildMinHeap(char data[], int freq[], int size) { + struct MinHeap* minHeap = createMinHeap(size); + for (int i = 0; i < size; ++i) + minHeap->array[i] = newNode(data[i], freq[i]); + minHeap->size = size; + for (int i = (minHeap->size - 2) / 2; i >= 0; --i) + minHeapify(minHeap, i); + return minHeap; +} + +// Build Huffman Tree +struct MinHeapNode* buildHuffmanTree(char data[], int freq[], int size) { + struct MinHeapNode *left, *right, *top; + struct MinHeap* minHeap = buildMinHeap(data, freq, size); + + while (minHeap->size != 1) { + left = extractMin(minHeap); + right = extractMin(minHeap); + + top = newNode('$', left->freq + right->freq); + top->left = left; + top->right = right; + + insertMinHeap(minHeap, top); + } + + return extractMin(minHeap); +} + +// Generate Huffman Codes +void generateCodes(struct MinHeapNode* root, char** codes, char* buffer, int top) { + if (root->left) { + buffer[top] = '0'; + generateCodes(root->left, codes, buffer, top + 1); + } + if (root->right) { + buffer[top] = '1'; + generateCodes(root->right, codes, buffer, top + 1); + } + if (!root->left && !root->right) { + buffer[top] = '\0'; + codes[(unsigned char)root->data] = strdup(buffer); + } +} + +// Compress using Huffman encoding +void compress_1(const char* input_file_name, const char* output_file_name) { + FILE* input = fopen(input_file_name, "rb"); + FILE* output = fopen(output_file_name, "wb"); + if (!input || !output) { + perror("File error"); + exit(EXIT_FAILURE); + } + + int freq[256] = {0}; + char buffer[BUFSIZE]; + size_t bytes_read; + + while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) { + for (size_t i = 0; i < bytes_read; i++) { + freq[(unsigned char)buffer[i]]++; + } + } + + char data[256]; + int freq_array[256]; + int size = 0; + + for (int i = 0; i < 256; i++) { + if (freq[i] > 0) { + data[size] = (char)i; + freq_array[size] = freq[i]; + size++; + } + } + + struct MinHeapNode* root = buildHuffmanTree(data, freq_array, size); + char* codes[256] = {NULL}; + char code_buffer[256]; + generateCodes(root, codes, code_buffer, 0); + + rewind(input); + + fwrite(&size, sizeof(int), 1, output); + for (int i = 0; i < size; i++) { + fputc(data[i], output); + fwrite(&freq_array[i], sizeof(int), 1, output); + } + + unsigned char bit_buffer = 0; + int bit_count = 0; + + while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) { + for (size_t i = 0; i < bytes_read; i++) { + char* code = codes[(unsigned char)buffer[i]]; + for (char* p = code; *p; p++) { + bit_buffer = (bit_buffer << 1) | (*p - '0'); + bit_count++; + if (bit_count == 8) { + fputc(bit_buffer, output); + bit_buffer = 0; + bit_count = 0; + } + } + } + } + + if (bit_count > 0) { + bit_buffer <<= (8 - bit_count); + fputc(bit_buffer, output); + } + + fclose(input); + fclose(output); + for (int i = 0; i < 256; i++) { + free(codes[i]); + } +} + +// Decompress using Huffman encoding +void decompress_1(const char* input_file_name, const char* output_file_name) { + FILE* input = fopen(input_file_name, "rb"); + FILE* output = fopen(output_file_name, "wb"); + if (!input || !output) { + perror("File error"); + exit(EXIT_FAILURE); + } + + int size; + fread(&size, sizeof(int), 1, input); + char data[256]; + int freq[256]; + + for (int i = 0; i < size; i++) { + data[i] = fgetc(input); + fread(&freq[i], sizeof(int), 1, input); + } + + struct MinHeapNode* root = buildHuffmanTree(data, freq, size); + struct MinHeapNode* current = root; + + int bit_buffer; + int bit_count = 0; + int byte; + + while ((byte = fgetc(input)) != EOF) { + for (int i = 7; i >= 0; i--) { + int bit = (byte >> i) & 1; + if (bit == 0) { + current = current->left; + } else { + current = current->right; + } + + if (!current->left && !current->right) { + fputc(current->data, output); + current = root; + } + } + } + + fclose(input); + fclose(output); +} + +// Compress using RLE +void compress_2(const char* input_file_name, const char* output_file_name) { + FILE* input = fopen(input_file_name, "rb"); + FILE* output = fopen(output_file_name, "wb"); + if (!input || !output) { + perror("File error"); + exit(EXIT_FAILURE); + } + + unsigned char buffer[BUFSIZE]; + size_t bytes_read; + + while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) { + for (size_t i = 0; i < bytes_read; i++) { + unsigned char current = buffer[i]; + size_t count = 1; + while (i + 1 < bytes_read && buffer[i + 1] == current) { + count++; + i++; + } + + fputc(current, output); + fputc(count, output); + } + } + + fclose(input); + fclose(output); +} + +// Decompress using RLE +void decompress_2(const char* input_file_name, const char* output_file_name) { + FILE* input = fopen(input_file_name, "rb"); + FILE* output = fopen(output_file_name, "wb"); + if (!input || !output) { + perror("File error"); + exit(EXIT_FAILURE); + } + + int current; + int count; + + while ((current = fgetc(input)) != EOF) { + count = fgetc(input); + for (int i = 0; i < count; i++) { + fputc(current, output); + } + } + + fclose(input); + fclose(output); +}