Обновить sk1/compressor.c
This commit is contained in:
		
							parent
							
								
									2aa747983d
								
							
						
					
					
						commit
						183e166d8b
					
				
							
								
								
									
										367
									
								
								sk1/compressor.c
									
									
									
									
									
								
							
							
						
						
									
										367
									
								
								sk1/compressor.c
									
									
									
									
									
								
							| @ -1,224 +1,157 @@ | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #include <string.h> | ||||
| #include <ctype.h> | ||||
| #include "compressor.h" | ||||
| 
 | ||||
| #define MAX_TREE_NODES 256 | ||||
| #define BUFFER_SIZE 4096 | ||||
| #define MAX_SYMBOLS 257 | ||||
| 
 | ||||
| // Structure to represent a tree node
 | ||||
| typedef struct HuffmanNode { | ||||
|     unsigned char data; | ||||
|     unsigned frequency; | ||||
|     struct HuffmanNode* left; | ||||
|     struct HuffmanNode* right; | ||||
| } HuffmanNode; | ||||
| // Макрос для обмена двух узлов
 | ||||
| #define SWAP_NODES(a, b) { Node* temp = a; a = b; b = temp; } | ||||
| 
 | ||||
| // A structure to represent the Min Heap (Priority Queue)
 | ||||
| typedef struct MinHeap { | ||||
|     unsigned size; | ||||
|     unsigned capacity; | ||||
|     HuffmanNode** array; | ||||
| } MinHeap; | ||||
| // Определение структуры узла дерева
 | ||||
| typedef struct Node { | ||||
|     int symbol;                 | ||||
|     unsigned int frequency;      | ||||
|     struct Node *left, *right;   | ||||
| } Node; | ||||
| 
 | ||||
| // Function to create a new node
 | ||||
| HuffmanNode* newNode(unsigned char data, unsigned frequency) { | ||||
|     HuffmanNode* node = (HuffmanNode*)malloc(sizeof(HuffmanNode)); | ||||
|     if (!node) { | ||||
|         perror("Failed to allocate memory for new node"); | ||||
|         exit(EXIT_FAILURE); | ||||
|     } | ||||
|     node->data = data; | ||||
| // Функция для создания нового узла
 | ||||
| Node* create_node(int symbol, unsigned int frequency) { | ||||
|     Node* node = (Node*)malloc(sizeof(Node)); | ||||
|     node->symbol = symbol; | ||||
|     node->frequency = frequency; | ||||
|     node->left = node->right = NULL; | ||||
|     return node; | ||||
| } | ||||
| 
 | ||||
| // Function to create a MinHeap
 | ||||
| MinHeap* createMinHeap(unsigned capacity) { | ||||
|     MinHeap* minHeap = (MinHeap*)malloc(sizeof(MinHeap)); | ||||
|     if (!minHeap) { | ||||
|         perror("Failed to allocate memory for MinHeap"); | ||||
|         exit(EXIT_FAILURE); | ||||
|     } | ||||
|     minHeap->size = 0; | ||||
|     minHeap->capacity = capacity; | ||||
|     minHeap->array = (HuffmanNode**)malloc(capacity * sizeof(HuffmanNode*)); | ||||
|     if (!minHeap->array) { | ||||
|         perror("Failed to allocate memory for MinHeap array"); | ||||
|         exit(EXIT_FAILURE); | ||||
|     } | ||||
|     return minHeap; | ||||
| } | ||||
| // Функция для построения дерева Хаффмана
 | ||||
| Node* build_huffman_tree(const unsigned int* frequencies) { | ||||
|     Node* nodes[MAX_SYMBOLS]; | ||||
|     int node_count = 0; | ||||
| 
 | ||||
| // Function to swap two min heap nodes
 | ||||
| void swapMinHeapNode(HuffmanNode** a, HuffmanNode** b) { | ||||
|     HuffmanNode* temp = *a; | ||||
|     *a = *b; | ||||
|     *b = temp; | ||||
| } | ||||
| 
 | ||||
| // Function to min heapify
 | ||||
| void minHeapify(MinHeap* minHeap, int idx) { | ||||
|     int smallest = idx; | ||||
|     int left = 2 * idx + 1; | ||||
|     int right = 2 * idx + 2; | ||||
| 
 | ||||
|     if (left < (int)minHeap->size && minHeap->array[left]->frequency < minHeap->array[smallest]->frequency) | ||||
|         smallest = left; | ||||
| 
 | ||||
|     if (right < (int)minHeap->size && minHeap->array[right]->frequency < minHeap->array[smallest]->frequency) | ||||
|         smallest = right; | ||||
| 
 | ||||
|     if (smallest != idx) { | ||||
|         swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]); | ||||
|         minHeapify(minHeap, smallest); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| // Check if the size of heap is one
 | ||||
| int isSizeOne(MinHeap* minHeap) { | ||||
|     return (minHeap->size == 1); | ||||
| } | ||||
| 
 | ||||
| // Extract the minimum node from heap
 | ||||
| HuffmanNode* extractMin(MinHeap* minHeap) { | ||||
|     HuffmanNode* temp = minHeap->array[0]; | ||||
|     minHeap->array[0] = minHeap->array[minHeap->size - 1]; | ||||
|     --minHeap->size; | ||||
|     minHeapify(minHeap, 0); | ||||
|     return temp; | ||||
| } | ||||
| 
 | ||||
| // Insert a new node to MinHeap
 | ||||
| void insertMinHeap(MinHeap* minHeap, HuffmanNode* node) { | ||||
|     ++minHeap->size; | ||||
|     int i = minHeap->size - 1; | ||||
|     while (i && node->frequency < minHeap->array[(i - 1) / 2]->frequency) { | ||||
|         minHeap->array[i] = minHeap->array[(i - 1) / 2]; | ||||
|         i = (i - 1) / 2; | ||||
|     } | ||||
|     minHeap->array[i] = node; | ||||
| } | ||||
| 
 | ||||
| // Build a min heap of given capacity
 | ||||
| void buildMinHeap(MinHeap* minHeap) { | ||||
|     int n = minHeap->size - 1; | ||||
|     for (int i = (n - 1) / 2; i >= 0; --i) | ||||
|         minHeapify(minHeap, i); | ||||
| } | ||||
| 
 | ||||
| // Function to build the Huffman tree
 | ||||
| HuffmanNode* buildHuffmanTree(unsigned char* data, unsigned* freq, int size) { | ||||
|     HuffmanNode *left, *right, *top; | ||||
| 
 | ||||
|     MinHeap* minHeap = createMinHeap(size); | ||||
| 
 | ||||
|     for (int i = 0; i < size; ++i) | ||||
|         insertMinHeap(minHeap, newNode(data[i], freq[data[i]])); | ||||
| 
 | ||||
|     buildMinHeap(minHeap); | ||||
| 
 | ||||
|     while (!isSizeOne(minHeap)) { | ||||
|         left = extractMin(minHeap); | ||||
|         right = extractMin(minHeap); | ||||
| 
 | ||||
|         top = newNode('$', left->frequency + right->frequency); | ||||
|         top->left = left; | ||||
|         top->right = right; | ||||
| 
 | ||||
|         insertMinHeap(minHeap, top); | ||||
|     // Создаем узлы для всех символов с ненулевой частотой
 | ||||
|     for (int i = 0; i < MAX_SYMBOLS; i++) { | ||||
|         if (frequencies[i] > 0) { | ||||
|             nodes[node_count++] = create_node(i, frequencies[i]); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return extractMin(minHeap); | ||||
|     // Объединяем узлы в дерево
 | ||||
|     while (node_count > 1) { | ||||
|         // Сортируем узлы по частоте
 | ||||
|         for (int i = 0; i < node_count - 1; i++) { | ||||
|             for (int j = i + 1; j < node_count; j++) { | ||||
|                 if (nodes[i]->frequency > nodes[j]->frequency) { | ||||
|                     SWAP_NODES(nodes[i], nodes[j]); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         // Объединяем два узла с наименьшей частотой
 | ||||
|         Node* left = nodes[0]; | ||||
|         Node* right = nodes[1]; | ||||
|         Node* parent = create_node(-1, left->frequency + right->frequency); | ||||
|         parent->left = left; | ||||
|         parent->right = right; | ||||
| 
 | ||||
|         // Заменяем объединенные узлы новым родительским узлом
 | ||||
|         nodes[0] = parent; | ||||
|         nodes[1] = nodes[--node_count]; | ||||
|     } | ||||
| 
 | ||||
|     return nodes[0]; | ||||
| } | ||||
| 
 | ||||
| // Function to generate the Huffman codes for each character
 | ||||
| void generateCodes(HuffmanNode* root, char* arr, int top, char** codes) { | ||||
|     if (root->left) { | ||||
|         arr[top] = '0'; | ||||
|         generateCodes(root->left, arr, top + 1, codes); | ||||
|     } | ||||
| 
 | ||||
|     if (root->right) { | ||||
|         arr[top] = '1'; | ||||
|         generateCodes(root->right, arr, top + 1, codes); | ||||
|     } | ||||
| 
 | ||||
| // Рекурсивная функция для генерации кодов Хаффмана
 | ||||
| void generate_huffman_codes(Node* root, char* code, int depth, char codes[MAX_SYMBOLS][MAX_SYMBOLS]) { | ||||
|     if (!root->left && !root->right) { | ||||
|         arr[top] = '\0';  // Null terminate the string
 | ||||
|         codes[root->data] = strdup(arr); | ||||
|         code[depth] = '\0'; // Завершаем код символа
 | ||||
|         strcpy(codes[root->symbol], code); | ||||
|         return; | ||||
|     } | ||||
|     if (root->left) { | ||||
|         code[depth] = '0'; // Добавляем бит '0' для левого поддерева
 | ||||
|         generate_huffman_codes(root->left, code, depth + 1, codes); | ||||
|     } | ||||
|     if (root->right) { | ||||
|         code[depth] = '1'; // Добавляем бит '1' для правого поддерева
 | ||||
|         generate_huffman_codes(root->right, code, depth + 1, codes); | ||||
|     } | ||||
| } | ||||
| void free_huffman_tree(HuffmanNode* root) { | ||||
| 
 | ||||
| // Функция для освобождения памяти, выделенной под дерево Хаффмана
 | ||||
| void free_huffman_tree(Node* root) { | ||||
|     if (!root) return; | ||||
|     free_huffman_tree(root->left); | ||||
|     free_huffman_tree(root->right); | ||||
|     free(root); | ||||
| } | ||||
| 
 | ||||
| // Function to compress a file
 | ||||
| // Функция сжатия данных с использованием алгоритма Хаффмана
 | ||||
| int compress_1(const char* input_file, const char* output_file) { | ||||
|     FILE* input = fopen(input_file, "rb"); | ||||
|     FILE* output = fopen(output_file, "wb"); | ||||
|     if (!input || !output) { | ||||
|         perror("Error opening file"); | ||||
|         return -1; | ||||
|     } | ||||
|     if (!input || !output) return -1; | ||||
| 
 | ||||
|     unsigned freq[256] = {0}; | ||||
|     unsigned char data; | ||||
|     while (fread(&data, sizeof(data), 1, input) == 1) | ||||
|         freq[data]++; | ||||
|     unsigned int frequencies[MAX_SYMBOLS] = {0}; | ||||
|     unsigned char buffer[BUFFER_SIZE]; | ||||
|     size_t bytes_read; | ||||
| 
 | ||||
|     unsigned char unique_data[256]; | ||||
|     int unique_count = 0; | ||||
|     for (int i = 0; i < 256; i++) { | ||||
|         if (freq[i] > 0) { | ||||
|             unique_data[unique_count++] = i; | ||||
|     // Подсчет частот символов
 | ||||
|     while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, input)) > 0) { | ||||
|         for (size_t i = 0; i < bytes_read; i++) { | ||||
|             frequencies[buffer[i]]++; | ||||
|         } | ||||
|     } | ||||
|     frequencies[256] = 1; // Добавляем маркер EOF
 | ||||
| 
 | ||||
|     HuffmanNode* root = buildHuffmanTree(unique_data, freq, unique_count); | ||||
|     Node* root = build_huffman_tree(frequencies); | ||||
|     if (!root) return -1; | ||||
| 
 | ||||
|     char* codes[256] = {0}; | ||||
|     char arr[256]; | ||||
|     generateCodes(root, arr, 0, codes); | ||||
|     // Генерация кодов Хаффмана
 | ||||
|     char codes[MAX_SYMBOLS][MAX_SYMBOLS] = {{0}}; | ||||
|     char code[MAX_SYMBOLS] = {0}; | ||||
|     generate_huffman_codes(root, code, 0, codes); | ||||
| 
 | ||||
|     fwrite(&unique_count, sizeof(int), 1, output); | ||||
|     for (int i = 0; i < unique_count; i++) { | ||||
|         unsigned char symbol = unique_data[i]; | ||||
|         fwrite(&symbol, sizeof(unsigned char), 1, output); | ||||
|         fwrite(&freq[symbol], sizeof(unsigned), 1, output); | ||||
|     } | ||||
|     // Записываем частоты в выходной файл
 | ||||
|     fwrite(frequencies, sizeof(frequencies[0]), MAX_SYMBOLS, output); | ||||
| 
 | ||||
|     fseek(input, 0, SEEK_SET); | ||||
| 
 | ||||
|     unsigned char buffer = 0; | ||||
|     // Сжимаем данные
 | ||||
|     rewind(input); | ||||
|     unsigned char current_byte = 0; | ||||
|     int bit_count = 0; | ||||
|     size_t total_bits = 0; | ||||
| 
 | ||||
|     while (fread(&data, sizeof(data), 1, input) == 1) { | ||||
|         char* code = codes[data]; | ||||
|         for (int i = 0; code[i] != '\0'; i++) { | ||||
|             unsigned char bit = code[i] - '0'; | ||||
|             buffer = (buffer << 1) | bit; | ||||
|             bit_count++; | ||||
|             total_bits++; | ||||
| 
 | ||||
|             if (bit_count == 8) { | ||||
|                 fwrite(&buffer, sizeof(unsigned char), 1, output); | ||||
|                 bit_count = 0; | ||||
|                 buffer = 0; | ||||
|     while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, input)) > 0) { | ||||
|         for (size_t i = 0; i < bytes_read; i++) { | ||||
|             char* symbol_code = codes[buffer[i]]; | ||||
|             for (size_t j = 0; symbol_code[j] != '\0'; j++) { | ||||
|                 current_byte = (current_byte << 1) | (symbol_code[j] - '0'); | ||||
|                 bit_count++; | ||||
|                 if (bit_count == 8) { | ||||
|                     fwrite(¤t_byte, 1, 1, output); | ||||
|                     current_byte = 0; | ||||
|                     bit_count = 0; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (bit_count > 0) { | ||||
|         buffer <<= (8 - bit_count); | ||||
|         fwrite(&buffer, sizeof(unsigned char), 1, output); | ||||
|     // Записываем маркер EOF
 | ||||
|     char* eof_code = codes[256]; | ||||
|     for (size_t j = 0; eof_code[j] != '\0'; j++) { | ||||
|         current_byte = (current_byte << 1) | (eof_code[j] - '0'); | ||||
|         bit_count++; | ||||
|         if (bit_count == 8) { | ||||
|             fwrite(¤t_byte, 1, 1, output); | ||||
|             current_byte = 0; | ||||
|             bit_count = 0; | ||||
|         } | ||||
|     } | ||||
|     if (bit_count > 0) { | ||||
|         current_byte <<= (8 - bit_count); | ||||
|         fwrite(¤t_byte, 1, 1, output); | ||||
|     } | ||||
| 
 | ||||
|     fwrite(&total_bits, sizeof(size_t), 1, output); // Write total bits used
 | ||||
| 
 | ||||
|     fclose(input); | ||||
|     fclose(output); | ||||
| @ -226,62 +159,34 @@ int compress_1(const char* input_file, const char* output_file) { | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| // Function to decompress the compressed file
 | ||||
| int decompress_1(const char* input_file_name, const char* output_file_name) { | ||||
|     FILE* input = fopen(input_file_name, "rb"); | ||||
|     if (!input) { | ||||
|         perror("Error opening input file"); | ||||
|         return -1; | ||||
|     } | ||||
|     FILE* output = fopen(output_file_name, "wb"); | ||||
|     if (!output) { | ||||
|         perror("Error opening output file"); | ||||
|         fclose(input); | ||||
|         return -1; | ||||
|     } | ||||
| // Функция декомпрессии данных с использованием алгоритма Хаффмана
 | ||||
| int decompress_1(const char* input_file, const char* output_file) { | ||||
|     FILE* input = fopen(input_file, "rb"); | ||||
|     FILE* output = fopen(output_file, "wb"); | ||||
|     if (!input || !output) return -1; | ||||
| 
 | ||||
|     int unique_count; | ||||
|     if (fread(&unique_count, sizeof(int), 1, input) != 1) { | ||||
|         perror("Error reading from input file"); | ||||
|         fclose(input); | ||||
|         fclose(output); | ||||
|         return -1; | ||||
|     } | ||||
|     unsigned int frequencies[MAX_SYMBOLS] = {0}; | ||||
|     fread(frequencies, sizeof(frequencies[0]), MAX_SYMBOLS, input); | ||||
|     Node* root = build_huffman_tree(frequencies); | ||||
|     if (!root) return -1; | ||||
| 
 | ||||
|     unsigned char unique_data[256]; | ||||
|     unsigned freq[256] = {0}; | ||||
|     for (int i = 0; i < unique_count; i++) { | ||||
|         if (fread(&unique_data[i], sizeof(unsigned char), 1, input) != 1 || | ||||
|             fread(&freq[unique_data[i]], sizeof(unsigned), 1, input) != 1) { | ||||
|             perror("Error reading from input file"); | ||||
|             fclose(input); | ||||
|             fclose(output); | ||||
|             return -1; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     HuffmanNode* root = buildHuffmanTree(unique_data, freq, unique_count); | ||||
| 
 | ||||
|     size_t total_bits; | ||||
|     fseek(input, -(long long)sizeof(size_t), SEEK_END); | ||||
|     fread(&total_bits, sizeof(size_t), 1, input); | ||||
| 
 | ||||
|     fseek(input, sizeof(int) + unique_count * (sizeof(unsigned char) + sizeof(unsigned)), SEEK_SET); | ||||
| 
 | ||||
|     HuffmanNode* current = root; | ||||
|     Node* current = root; | ||||
|     unsigned char byte; | ||||
|     size_t bits_read = 0; | ||||
|     int bit; | ||||
| 
 | ||||
|     while (bits_read < total_bits && fread(&byte, sizeof(byte), 1, input) == 1) { | ||||
|         for (int i = 7; i >= 0 && bits_read < total_bits; i--, bits_read++) { | ||||
|             if (byte & (1 << i)) { | ||||
|                 current = current->right; | ||||
|             } else { | ||||
|                 current = current->left; | ||||
|             } | ||||
|     // Читаем и декодируем символы
 | ||||
|     while (fread(&byte, 1, 1, input) == 1) { | ||||
|         for (bit = 7; bit >= 0; bit--) { | ||||
|             current = (byte & (1 << bit)) ? current->right : current->left; | ||||
| 
 | ||||
|             if (!current->left && !current->right) { | ||||
|                 fwrite(¤t->data, sizeof(current->data), 1, output); | ||||
|                 if (current->symbol == 256) { // Маркер EOF
 | ||||
|                     fclose(input); | ||||
|                     fclose(output); | ||||
|                     free_huffman_tree(root); | ||||
|                     return 0; | ||||
|                 } | ||||
|                 fwrite(¤t->symbol, 1, 1, output); | ||||
|                 current = root; | ||||
|             } | ||||
|         } | ||||
| @ -292,3 +197,11 @@ int decompress_1(const char* input_file_name, const char* output_file_name) { | ||||
|     free_huffman_tree(root); | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| int compress_2(const char* input_file_name, const char* output_file_name){ | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| int decompress_2(const char* input_file_name, const char* output_file_name){ | ||||
|     return 0; | ||||
| } | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user