From 183e166d8bb30ccce816400689d5161aad29060f Mon Sep 17 00:00:00 2001 From: Yevhen Kozirovskyi Date: Mon, 27 Jan 2025 15:08:53 +0000 Subject: [PATCH] =?UTF-8?q?=D0=9E=D0=B1=D0=BD=D0=BE=D0=B2=D0=B8=D1=82?= =?UTF-8?q?=D1=8C=20sk1/compressor.c?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sk1/compressor.c | 369 ++++++++++++++++++----------------------------- 1 file changed, 141 insertions(+), 228 deletions(-) diff --git a/sk1/compressor.c b/sk1/compressor.c index 70ee497..98dd1b6 100644 --- a/sk1/compressor.c +++ b/sk1/compressor.c @@ -1,224 +1,157 @@ #include #include #include -#include +#include "compressor.h" -#define MAX_TREE_NODES 256 +#define BUFFER_SIZE 4096 +#define MAX_SYMBOLS 257 -// Structure to represent a tree node -typedef struct HuffmanNode { - unsigned char data; - unsigned frequency; - struct HuffmanNode* left; - struct HuffmanNode* right; -} HuffmanNode; +// Макрос для обмена двух узлов +#define SWAP_NODES(a, b) { Node* temp = a; a = b; b = temp; } -// A structure to represent the Min Heap (Priority Queue) -typedef struct MinHeap { - unsigned size; - unsigned capacity; - HuffmanNode** array; -} MinHeap; +// Определение структуры узла дерева +typedef struct Node { + int symbol; + unsigned int frequency; + struct Node *left, *right; +} Node; -// Function to create a new node -HuffmanNode* newNode(unsigned char data, unsigned frequency) { - HuffmanNode* node = (HuffmanNode*)malloc(sizeof(HuffmanNode)); - if (!node) { - perror("Failed to allocate memory for new node"); - exit(EXIT_FAILURE); - } - node->data = data; +// Функция для создания нового узла +Node* create_node(int symbol, unsigned int frequency) { + Node* node = (Node*)malloc(sizeof(Node)); + node->symbol = symbol; node->frequency = frequency; node->left = node->right = NULL; return node; } -// Function to create a MinHeap -MinHeap* createMinHeap(unsigned capacity) { - MinHeap* minHeap = (MinHeap*)malloc(sizeof(MinHeap)); - if (!minHeap) { - perror("Failed to allocate memory for MinHeap"); - exit(EXIT_FAILURE); - } - minHeap->size = 0; - minHeap->capacity = capacity; - minHeap->array = (HuffmanNode**)malloc(capacity * sizeof(HuffmanNode*)); - if (!minHeap->array) { - perror("Failed to allocate memory for MinHeap array"); - exit(EXIT_FAILURE); - } - return minHeap; -} +// Функция для построения дерева Хаффмана +Node* build_huffman_tree(const unsigned int* frequencies) { + Node* nodes[MAX_SYMBOLS]; + int node_count = 0; -// Function to swap two min heap nodes -void swapMinHeapNode(HuffmanNode** a, HuffmanNode** b) { - HuffmanNode* temp = *a; - *a = *b; - *b = temp; -} - -// Function to min heapify -void minHeapify(MinHeap* minHeap, int idx) { - int smallest = idx; - int left = 2 * idx + 1; - int right = 2 * idx + 2; - - if (left < (int)minHeap->size && minHeap->array[left]->frequency < minHeap->array[smallest]->frequency) - smallest = left; - - if (right < (int)minHeap->size && minHeap->array[right]->frequency < minHeap->array[smallest]->frequency) - smallest = right; - - if (smallest != idx) { - swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]); - minHeapify(minHeap, smallest); - } -} - -// Check if the size of heap is one -int isSizeOne(MinHeap* minHeap) { - return (minHeap->size == 1); -} - -// Extract the minimum node from heap -HuffmanNode* extractMin(MinHeap* minHeap) { - HuffmanNode* temp = minHeap->array[0]; - minHeap->array[0] = minHeap->array[minHeap->size - 1]; - --minHeap->size; - minHeapify(minHeap, 0); - return temp; -} - -// Insert a new node to MinHeap -void insertMinHeap(MinHeap* minHeap, HuffmanNode* node) { - ++minHeap->size; - int i = minHeap->size - 1; - while (i && node->frequency < minHeap->array[(i - 1) / 2]->frequency) { - minHeap->array[i] = minHeap->array[(i - 1) / 2]; - i = (i - 1) / 2; - } - minHeap->array[i] = node; -} - -// Build a min heap of given capacity -void buildMinHeap(MinHeap* minHeap) { - int n = minHeap->size - 1; - for (int i = (n - 1) / 2; i >= 0; --i) - minHeapify(minHeap, i); -} - -// Function to build the Huffman tree -HuffmanNode* buildHuffmanTree(unsigned char* data, unsigned* freq, int size) { - HuffmanNode *left, *right, *top; - - MinHeap* minHeap = createMinHeap(size); - - for (int i = 0; i < size; ++i) - insertMinHeap(minHeap, newNode(data[i], freq[data[i]])); - - buildMinHeap(minHeap); - - while (!isSizeOne(minHeap)) { - left = extractMin(minHeap); - right = extractMin(minHeap); - - top = newNode('$', left->frequency + right->frequency); - top->left = left; - top->right = right; - - insertMinHeap(minHeap, top); + // Создаем узлы для всех символов с ненулевой частотой + for (int i = 0; i < MAX_SYMBOLS; i++) { + if (frequencies[i] > 0) { + nodes[node_count++] = create_node(i, frequencies[i]); + } } - return extractMin(minHeap); + // Объединяем узлы в дерево + while (node_count > 1) { + // Сортируем узлы по частоте + for (int i = 0; i < node_count - 1; i++) { + for (int j = i + 1; j < node_count; j++) { + if (nodes[i]->frequency > nodes[j]->frequency) { + SWAP_NODES(nodes[i], nodes[j]); + } + } + } + + // Объединяем два узла с наименьшей частотой + Node* left = nodes[0]; + Node* right = nodes[1]; + Node* parent = create_node(-1, left->frequency + right->frequency); + parent->left = left; + parent->right = right; + + // Заменяем объединенные узлы новым родительским узлом + nodes[0] = parent; + nodes[1] = nodes[--node_count]; + } + + return nodes[0]; } -// Function to generate the Huffman codes for each character -void generateCodes(HuffmanNode* root, char* arr, int top, char** codes) { - if (root->left) { - arr[top] = '0'; - generateCodes(root->left, arr, top + 1, codes); - } - - if (root->right) { - arr[top] = '1'; - generateCodes(root->right, arr, top + 1, codes); - } - +// Рекурсивная функция для генерации кодов Хаффмана +void generate_huffman_codes(Node* root, char* code, int depth, char codes[MAX_SYMBOLS][MAX_SYMBOLS]) { if (!root->left && !root->right) { - arr[top] = '\0'; // Null terminate the string - codes[root->data] = strdup(arr); + code[depth] = '\0'; // Завершаем код символа + strcpy(codes[root->symbol], code); + return; + } + if (root->left) { + code[depth] = '0'; // Добавляем бит '0' для левого поддерева + generate_huffman_codes(root->left, code, depth + 1, codes); + } + if (root->right) { + code[depth] = '1'; // Добавляем бит '1' для правого поддерева + generate_huffman_codes(root->right, code, depth + 1, codes); } } -void free_huffman_tree(HuffmanNode* root) { + +// Функция для освобождения памяти, выделенной под дерево Хаффмана +void free_huffman_tree(Node* root) { if (!root) return; free_huffman_tree(root->left); free_huffman_tree(root->right); free(root); } -// Function to compress a file +// Функция сжатия данных с использованием алгоритма Хаффмана int compress_1(const char* input_file, const char* output_file) { FILE* input = fopen(input_file, "rb"); FILE* output = fopen(output_file, "wb"); - if (!input || !output) { - perror("Error opening file"); - return -1; - } + if (!input || !output) return -1; - unsigned freq[256] = {0}; - unsigned char data; - while (fread(&data, sizeof(data), 1, input) == 1) - freq[data]++; + unsigned int frequencies[MAX_SYMBOLS] = {0}; + unsigned char buffer[BUFFER_SIZE]; + size_t bytes_read; - unsigned char unique_data[256]; - int unique_count = 0; - for (int i = 0; i < 256; i++) { - if (freq[i] > 0) { - unique_data[unique_count++] = i; + // Подсчет частот символов + while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, input)) > 0) { + for (size_t i = 0; i < bytes_read; i++) { + frequencies[buffer[i]]++; } } + frequencies[256] = 1; // Добавляем маркер EOF - HuffmanNode* root = buildHuffmanTree(unique_data, freq, unique_count); + Node* root = build_huffman_tree(frequencies); + if (!root) return -1; - char* codes[256] = {0}; - char arr[256]; - generateCodes(root, arr, 0, codes); + // Генерация кодов Хаффмана + char codes[MAX_SYMBOLS][MAX_SYMBOLS] = {{0}}; + char code[MAX_SYMBOLS] = {0}; + generate_huffman_codes(root, code, 0, codes); - fwrite(&unique_count, sizeof(int), 1, output); - for (int i = 0; i < unique_count; i++) { - unsigned char symbol = unique_data[i]; - fwrite(&symbol, sizeof(unsigned char), 1, output); - fwrite(&freq[symbol], sizeof(unsigned), 1, output); - } + // Записываем частоты в выходной файл + fwrite(frequencies, sizeof(frequencies[0]), MAX_SYMBOLS, output); - fseek(input, 0, SEEK_SET); - - unsigned char buffer = 0; + // Сжимаем данные + rewind(input); + unsigned char current_byte = 0; int bit_count = 0; - size_t total_bits = 0; - while (fread(&data, sizeof(data), 1, input) == 1) { - char* code = codes[data]; - for (int i = 0; code[i] != '\0'; i++) { - unsigned char bit = code[i] - '0'; - buffer = (buffer << 1) | bit; - bit_count++; - total_bits++; - - if (bit_count == 8) { - fwrite(&buffer, sizeof(unsigned char), 1, output); - bit_count = 0; - buffer = 0; + while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, input)) > 0) { + for (size_t i = 0; i < bytes_read; i++) { + char* symbol_code = codes[buffer[i]]; + for (size_t j = 0; symbol_code[j] != '\0'; j++) { + current_byte = (current_byte << 1) | (symbol_code[j] - '0'); + bit_count++; + if (bit_count == 8) { + fwrite(¤t_byte, 1, 1, output); + current_byte = 0; + bit_count = 0; + } } } } - if (bit_count > 0) { - buffer <<= (8 - bit_count); - fwrite(&buffer, sizeof(unsigned char), 1, output); + // Записываем маркер EOF + char* eof_code = codes[256]; + for (size_t j = 0; eof_code[j] != '\0'; j++) { + current_byte = (current_byte << 1) | (eof_code[j] - '0'); + bit_count++; + if (bit_count == 8) { + fwrite(¤t_byte, 1, 1, output); + current_byte = 0; + bit_count = 0; + } + } + if (bit_count > 0) { + current_byte <<= (8 - bit_count); + fwrite(¤t_byte, 1, 1, output); } - - fwrite(&total_bits, sizeof(size_t), 1, output); // Write total bits used fclose(input); fclose(output); @@ -226,62 +159,34 @@ int compress_1(const char* input_file, const char* output_file) { return 0; } -// Function to decompress the compressed file -int decompress_1(const char* input_file_name, const char* output_file_name) { - FILE* input = fopen(input_file_name, "rb"); - if (!input) { - perror("Error opening input file"); - return -1; - } - FILE* output = fopen(output_file_name, "wb"); - if (!output) { - perror("Error opening output file"); - fclose(input); - return -1; - } +// Функция декомпрессии данных с использованием алгоритма Хаффмана +int decompress_1(const char* input_file, const char* output_file) { + FILE* input = fopen(input_file, "rb"); + FILE* output = fopen(output_file, "wb"); + if (!input || !output) return -1; - int unique_count; - if (fread(&unique_count, sizeof(int), 1, input) != 1) { - perror("Error reading from input file"); - fclose(input); - fclose(output); - return -1; - } + unsigned int frequencies[MAX_SYMBOLS] = {0}; + fread(frequencies, sizeof(frequencies[0]), MAX_SYMBOLS, input); + Node* root = build_huffman_tree(frequencies); + if (!root) return -1; - unsigned char unique_data[256]; - unsigned freq[256] = {0}; - for (int i = 0; i < unique_count; i++) { - if (fread(&unique_data[i], sizeof(unsigned char), 1, input) != 1 || - fread(&freq[unique_data[i]], sizeof(unsigned), 1, input) != 1) { - perror("Error reading from input file"); - fclose(input); - fclose(output); - return -1; - } - } - - HuffmanNode* root = buildHuffmanTree(unique_data, freq, unique_count); - - size_t total_bits; - fseek(input, -(long long)sizeof(size_t), SEEK_END); - fread(&total_bits, sizeof(size_t), 1, input); - - fseek(input, sizeof(int) + unique_count * (sizeof(unsigned char) + sizeof(unsigned)), SEEK_SET); - - HuffmanNode* current = root; + Node* current = root; unsigned char byte; - size_t bits_read = 0; + int bit; - while (bits_read < total_bits && fread(&byte, sizeof(byte), 1, input) == 1) { - for (int i = 7; i >= 0 && bits_read < total_bits; i--, bits_read++) { - if (byte & (1 << i)) { - current = current->right; - } else { - current = current->left; - } + // Читаем и декодируем символы + while (fread(&byte, 1, 1, input) == 1) { + for (bit = 7; bit >= 0; bit--) { + current = (byte & (1 << bit)) ? current->right : current->left; if (!current->left && !current->right) { - fwrite(¤t->data, sizeof(current->data), 1, output); + if (current->symbol == 256) { // Маркер EOF + fclose(input); + fclose(output); + free_huffman_tree(root); + return 0; + } + fwrite(¤t->symbol, 1, 1, output); current = root; } } @@ -291,4 +196,12 @@ int decompress_1(const char* input_file_name, const char* output_file_name) { fclose(output); free_huffman_tree(root); return 0; -} \ No newline at end of file +} + +int compress_2(const char* input_file_name, const char* output_file_name){ + return 0; +} + +int decompress_2(const char* input_file_name, const char* output_file_name){ + return 0; +}