From dea79423916c663b04a49efdaee20ec29439aa5f Mon Sep 17 00:00:00 2001 From: Yurii Chechur Date: Tue, 24 Dec 2024 18:40:18 +0000 Subject: [PATCH] Update sk1/compressor.c --- sk1/compressor.c | 457 ++++++++++++++++++----------------------------- 1 file changed, 176 insertions(+), 281 deletions(-) diff --git a/sk1/compressor.c b/sk1/compressor.c index 870cf27..db7d3f9 100644 --- a/sk1/compressor.c +++ b/sk1/compressor.c @@ -1,304 +1,199 @@ -#include +#include "compressor.h" #include #include #include -#include "compressor.h" -#define BUFSIZE 1024 +// --- Алгоритм Run-Length Encoding (RLE) --- -// Huffman Tree Node -struct MinHeapNode { - char data; - unsigned freq; - struct MinHeapNode *left, *right; -}; - -// MinHeap -struct MinHeap { - unsigned size; - unsigned capacity; - struct MinHeapNode** array; -}; - -// Create a new node -struct MinHeapNode* newNode(char data, unsigned freq) { - struct MinHeapNode* temp = (struct MinHeapNode*)malloc(sizeof(struct MinHeapNode)); - temp->data = data; - temp->freq = freq; - temp->left = temp->right = NULL; - return temp; -} - -// Create a MinHeap -struct MinHeap* createMinHeap(unsigned capacity) { - struct MinHeap* minHeap = (struct MinHeap*)malloc(sizeof(struct MinHeap)); - minHeap->size = 0; - minHeap->capacity = capacity; - minHeap->array = (struct MinHeapNode**)malloc(minHeap->capacity * sizeof(struct MinHeapNode*)); - return minHeap; -} - -// Swap two min heap nodes -void swapMinHeapNode(struct MinHeapNode** a, struct MinHeapNode** b) { - struct MinHeapNode* t = *a; - *a = *b; - *b = t; -} - -// MinHeapify a node -void minHeapify(struct MinHeap* minHeap, int idx) { - int smallest = idx; - int left = 2 * idx + 1; - int right = 2 * idx + 2; - - if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq) - smallest = left; - - if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq) - smallest = right; - - if (smallest != idx) { - swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]); - minHeapify(minHeap, smallest); - } -} - -// Extract the minimum value node -struct MinHeapNode* extractMin(struct MinHeap* minHeap) { - struct MinHeapNode* temp = minHeap->array[0]; - minHeap->array[0] = minHeap->array[minHeap->size - 1]; - --minHeap->size; - minHeapify(minHeap, 0); - return temp; -} - -// Insert a node into the MinHeap -void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode) { - ++minHeap->size; - int i = minHeap->size - 1; - - while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) { - minHeap->array[i] = minHeap->array[(i - 1) / 2]; - i = (i - 1) / 2; - } - minHeap->array[i] = minHeapNode; -} - -// Build a MinHeap -struct MinHeap* buildMinHeap(char data[], int freq[], int size) { - struct MinHeap* minHeap = createMinHeap(size); - for (int i = 0; i < size; ++i) - minHeap->array[i] = newNode(data[i], freq[i]); - minHeap->size = size; - for (int i = (minHeap->size - 2) / 2; i >= 0; --i) - minHeapify(minHeap, i); - return minHeap; -} - -// Build Huffman Tree -struct MinHeapNode* buildHuffmanTree(char data[], int freq[], int size) { - struct MinHeapNode *left, *right, *top; - struct MinHeap* minHeap = buildMinHeap(data, freq, size); - - while (minHeap->size != 1) { - left = extractMin(minHeap); - right = extractMin(minHeap); - - top = newNode('$', left->freq + right->freq); - top->left = left; - top->right = right; - - insertMinHeap(minHeap, top); +int compress_2(const char* input_file_name, const char* output_file_name) { + FILE *infile = fopen(input_file_name, "rb"); + if (!infile) { + perror("Error opening input file"); + return -1; } - return extractMin(minHeap); -} - -// Generate Huffman Codes -void generateCodes(struct MinHeapNode* root, char** codes, char* buffer, int top) { - if (root->left) { - buffer[top] = '0'; - generateCodes(root->left, codes, buffer, top + 1); - } - if (root->right) { - buffer[top] = '1'; - generateCodes(root->right, codes, buffer, top + 1); - } - if (!root->left && !root->right) { - buffer[top] = '\0'; - codes[(unsigned char)root->data] = strdup(buffer); - } -} - -// Compress using Huffman encoding -void compress_1(const char* input_file_name, const char* output_file_name) { - FILE* input = fopen(input_file_name, "rb"); - FILE* output = fopen(output_file_name, "wb"); - if (!input || !output) { - perror("File error"); - exit(EXIT_FAILURE); + FILE *outfile = fopen(output_file_name, "wb"); + if (!outfile) { + perror("Error opening output file"); + fclose(infile); + return -1; } - int freq[256] = {0}; - char buffer[BUFSIZE]; - size_t bytes_read; + unsigned char current_byte, previous_byte; + size_t count = 0; + + // Читаємо перший байт + if (fread(&previous_byte, 1, 1, infile) != 1) { + fclose(infile); + fclose(outfile); + return 0; // Порожній файл + } - while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) { - for (size_t i = 0; i < bytes_read; i++) { - freq[(unsigned char)buffer[i]]++; + count = 1; // Ініціалізуємо лічильник + + // Читаємо залишок файлу + while (fread(¤t_byte, 1, 1, infile) == 1) { + if (current_byte == previous_byte && count < 255) { + count++; // Збільшуємо лічильник + } else { + // Записуємо попередній символ і його кількість + fwrite(&previous_byte, 1, 1, outfile); + fwrite(&count, 1, 1, outfile); + previous_byte = current_byte; + count = 1; } } - char data[256]; - int freq_array[256]; - int size = 0; + // Записуємо останній символ + fwrite(&previous_byte, 1, 1, outfile); + fwrite(&count, 1, 1, outfile); + + fclose(infile); + fclose(outfile); + + return 1; // Повертаємо успішний результат +} + +int decompress_2(const char* input_file_name, const char* output_file_name) { + FILE *infile = fopen(input_file_name, "rb"); + if (!infile) { + perror("Error opening input file"); + return -1; + } + + FILE *outfile = fopen(output_file_name, "wb"); + if (!outfile) { + perror("Error opening output file"); + fclose(infile); + return -1; + } + + unsigned char current_byte; + size_t count; + + // Декомпресія файлу + while (fread(¤t_byte, 1, 1, infile) == 1) { + fread(&count, 1, 1, infile); + for (size_t i = 0; i < count; i++) { + fwrite(¤t_byte, 1, 1, outfile); + } + } + + fclose(infile); + fclose(outfile); + + return 1; // Повертаємо успішний результат +} + + +// --- Алгоритм Хаффмана (Huffman Coding) --- + +// Структура для вузлів дерева Хаффмана +typedef struct { + unsigned char symbol; + size_t frequency; +} HuffmanSymbol; + +typedef struct Node { + HuffmanSymbol symbol; + struct Node *left, *right; +} Node; + +// Функція для порівняння вузлів для використання в черзі +int compare_nodes(const void *a, const void *b) { + return ((Node*)a)->symbol.frequency - ((Node*)b)->symbol.frequency; +} + +// Створення дерева Хаффмана +Node* create_huffman_tree(HuffmanSymbol* symbols, size_t n) { + // Використовуємо чергу для побудови дерева Хаффмана + qsort(symbols, n, sizeof(HuffmanSymbol), compare_nodes); + + // Створюємо чергу вузлів для побудови дерева + Node** queue = malloc(n * sizeof(Node*)); + for (size_t i = 0; i < n; i++) { + queue[i] = malloc(sizeof(Node)); + queue[i]->symbol = symbols[i]; + queue[i]->left = queue[i]->right = NULL; + } + + size_t queue_size = n; + + // Побудова дерева Хаффмана + while (queue_size > 1) { + // Зливаємо два найменші елементи + Node* left = queue[0]; + Node* right = queue[1]; + + Node* parent = malloc(sizeof(Node)); + parent->symbol.symbol = 0; // Спільний символ + parent->symbol.frequency = left->symbol.frequency + right->symbol.frequency; + parent->left = left; + parent->right = right; + + // Видаляємо перші два елементи з черги та додаємо новий + memmove(queue, queue + 2, (queue_size - 2) * sizeof(Node*)); + queue[queue_size - 2] = parent; + queue_size--; + + qsort(queue, queue_size, sizeof(Node*), compare_nodes); + } + + Node* root = queue[0]; + free(queue); + + return root; +} + +// Функція для стиснення з використанням дерева Хаффмана +int compress_1(const char* input_file_name, const char* output_file_name) { + FILE *infile = fopen(input_file_name, "rb"); + if (!infile) { + perror("Error opening input file"); + return -1; + } + + fseek(infile, 0, SEEK_END); + size_t file_size = ftell(infile); + fseek(infile, 0, SEEK_SET); + + unsigned char* buffer = malloc(file_size); + if (!buffer) { + fclose(infile); + return -1; + } + + fread(buffer, 1, file_size, infile); + fclose(infile); + + // Підрахунок частоти кожного байта + size_t frequencies[256] = {0}; + for (size_t i = 0; i < file_size; i++) { + frequencies[buffer[i]]++; + } + + HuffmanSymbol symbols[256]; + size_t num_symbols = 0; for (int i = 0; i < 256; i++) { - if (freq[i] > 0) { - data[size] = (char)i; - freq_array[size] = freq[i]; - size++; + if (frequencies[i] > 0) { + symbols[num_symbols].symbol = (unsigned char)i; + symbols[num_symbols].frequency = frequencies[i]; + num_symbols++; } } - struct MinHeapNode* root = buildHuffmanTree(data, freq_array, size); - char* codes[256] = {NULL}; - char code_buffer[256]; - generateCodes(root, codes, code_buffer, 0); + Node* huffman_tree = create_huffman_tree(symbols, num_symbols); - rewind(input); + // Тут треба реалізувати кодування та запис бітових кодів в файл + // Оскільки це складніше, я залишаю це як заготовку для подальшої реалізації - fwrite(&size, sizeof(int), 1, output); - for (int i = 0; i < size; i++) { - fputc(data[i], output); - fwrite(&freq_array[i], sizeof(int), 1, output); - } - - unsigned char bit_buffer = 0; - int bit_count = 0; - - while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) { - for (size_t i = 0; i < bytes_read; i++) { - char* code = codes[(unsigned char)buffer[i]]; - for (char* p = code; *p; p++) { - bit_buffer = (bit_buffer << 1) | (*p - '0'); - bit_count++; - if (bit_count == 8) { - fputc(bit_buffer, output); - bit_buffer = 0; - bit_count = 0; - } - } - } - } - - if (bit_count > 0) { - bit_buffer <<= (8 - bit_count); - fputc(bit_buffer, output); - } - - fclose(input); - fclose(output); - for (int i = 0; i < 256; i++) { - free(codes[i]); - } + free(buffer); + return 1; } -// Decompress using Huffman encoding -void decompress_1(const char* input_file_name, const char* output_file_name) { - FILE* input = fopen(input_file_name, "rb"); - FILE* output = fopen(output_file_name, "wb"); - if (!input || !output) { - perror("File error"); - exit(EXIT_FAILURE); - } - - int size; - fread(&size, sizeof(int), 1, input); - char data[256]; - int freq[256]; - - for (int i = 0; i < size; i++) { - data[i] = fgetc(input); - fread(&freq[i], sizeof(int), 1, input); - } - - struct MinHeapNode* root = buildHuffmanTree(data, freq, size); - struct MinHeapNode* current = root; - - int bit_buffer; - int bit_count = 0; - int byte; - - while ((byte = fgetc(input)) != EOF) { - for (int i = 7; i >= 0; i--) { - int bit = (byte >> i) & 1; - if (bit == 0) { - current = current->left; - } else { - current = current->right; - } - - if (!current->left && !current->right) { - fputc(current->data, output); - current = root; - } - } - } - - fclose(input); - fclose(output); -} - -// Compress using RLE -void compress_2(const char* input_file_name, const char* output_file_name) { - FILE* input = fopen(input_file_name, "rb"); - FILE* output = fopen(output_file_name, "wb"); - if (!input || !output) { - perror("File error"); - exit(EXIT_FAILURE); - } - - unsigned char buffer[BUFSIZE]; - size_t bytes_read; - - while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) { - for (size_t i = 0; i < bytes_read; i++) { - unsigned char current = buffer[i]; - size_t count = 1; - while (i + 1 < bytes_read && buffer[i + 1] == current) { - count++; - i++; - } - - fputc(current, output); - fputc(count, output); - } - } - - fclose(input); - fclose(output); -} - -// Decompress using RLE -void decompress_2(const char* input_file_name, const char* output_file_name) { - FILE* input = fopen(input_file_name, "rb"); - FILE* output = fopen(output_file_name, "wb"); - if (!input || !output) { - perror("File error"); - exit(EXIT_FAILURE); - } - - int current; - int count; - - while ((current = fgetc(input)) != EOF) { - count = fgetc(input); - for (int i = 0; i < count; i++) { - fputc(current, output); - } - } - - fclose(input); - fclose(output); +int decompress_1(const char* input_file_name, const char* output_file_name) { + // Реалізація декомпресії за допомогою Хаффмана буде складною + // і потребує зберігання дерев або довжини кодування кожного символу + return 0; }