Обновить sk1/compressor.c
This commit is contained in:
		
							parent
							
								
									2aa747983d
								
							
						
					
					
						commit
						183e166d8b
					
				
							
								
								
									
										369
									
								
								sk1/compressor.c
									
									
									
									
									
								
							
							
						
						
									
										369
									
								
								sk1/compressor.c
									
									
									
									
									
								
							| @ -1,224 +1,157 @@ | |||||||
| #include <stdio.h> | #include <stdio.h> | ||||||
| #include <stdlib.h> | #include <stdlib.h> | ||||||
| #include <string.h> | #include <string.h> | ||||||
| #include <ctype.h> | #include "compressor.h" | ||||||
| 
 | 
 | ||||||
| #define MAX_TREE_NODES 256 | #define BUFFER_SIZE 4096 | ||||||
|  | #define MAX_SYMBOLS 257 | ||||||
| 
 | 
 | ||||||
| // Structure to represent a tree node
 | // Макрос для обмена двух узлов
 | ||||||
| typedef struct HuffmanNode { | #define SWAP_NODES(a, b) { Node* temp = a; a = b; b = temp; } | ||||||
|     unsigned char data; |  | ||||||
|     unsigned frequency; |  | ||||||
|     struct HuffmanNode* left; |  | ||||||
|     struct HuffmanNode* right; |  | ||||||
| } HuffmanNode; |  | ||||||
| 
 | 
 | ||||||
| // A structure to represent the Min Heap (Priority Queue)
 | // Определение структуры узла дерева
 | ||||||
| typedef struct MinHeap { | typedef struct Node { | ||||||
|     unsigned size; |     int symbol;                 | ||||||
|     unsigned capacity; |     unsigned int frequency;      | ||||||
|     HuffmanNode** array; |     struct Node *left, *right;   | ||||||
| } MinHeap; | } Node; | ||||||
| 
 | 
 | ||||||
| // Function to create a new node
 | // Функция для создания нового узла
 | ||||||
| HuffmanNode* newNode(unsigned char data, unsigned frequency) { | Node* create_node(int symbol, unsigned int frequency) { | ||||||
|     HuffmanNode* node = (HuffmanNode*)malloc(sizeof(HuffmanNode)); |     Node* node = (Node*)malloc(sizeof(Node)); | ||||||
|     if (!node) { |     node->symbol = symbol; | ||||||
|         perror("Failed to allocate memory for new node"); |  | ||||||
|         exit(EXIT_FAILURE); |  | ||||||
|     } |  | ||||||
|     node->data = data; |  | ||||||
|     node->frequency = frequency; |     node->frequency = frequency; | ||||||
|     node->left = node->right = NULL; |     node->left = node->right = NULL; | ||||||
|     return node; |     return node; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Function to create a MinHeap
 | // Функция для построения дерева Хаффмана
 | ||||||
| MinHeap* createMinHeap(unsigned capacity) { | Node* build_huffman_tree(const unsigned int* frequencies) { | ||||||
|     MinHeap* minHeap = (MinHeap*)malloc(sizeof(MinHeap)); |     Node* nodes[MAX_SYMBOLS]; | ||||||
|     if (!minHeap) { |     int node_count = 0; | ||||||
|         perror("Failed to allocate memory for MinHeap"); |  | ||||||
|         exit(EXIT_FAILURE); |  | ||||||
|     } |  | ||||||
|     minHeap->size = 0; |  | ||||||
|     minHeap->capacity = capacity; |  | ||||||
|     minHeap->array = (HuffmanNode**)malloc(capacity * sizeof(HuffmanNode*)); |  | ||||||
|     if (!minHeap->array) { |  | ||||||
|         perror("Failed to allocate memory for MinHeap array"); |  | ||||||
|         exit(EXIT_FAILURE); |  | ||||||
|     } |  | ||||||
|     return minHeap; |  | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| // Function to swap two min heap nodes
 |     // Создаем узлы для всех символов с ненулевой частотой
 | ||||||
| void swapMinHeapNode(HuffmanNode** a, HuffmanNode** b) { |     for (int i = 0; i < MAX_SYMBOLS; i++) { | ||||||
|     HuffmanNode* temp = *a; |         if (frequencies[i] > 0) { | ||||||
|     *a = *b; |             nodes[node_count++] = create_node(i, frequencies[i]); | ||||||
|     *b = temp; |         } | ||||||
| } |  | ||||||
| 
 |  | ||||||
| // Function to min heapify
 |  | ||||||
| void minHeapify(MinHeap* minHeap, int idx) { |  | ||||||
|     int smallest = idx; |  | ||||||
|     int left = 2 * idx + 1; |  | ||||||
|     int right = 2 * idx + 2; |  | ||||||
| 
 |  | ||||||
|     if (left < (int)minHeap->size && minHeap->array[left]->frequency < minHeap->array[smallest]->frequency) |  | ||||||
|         smallest = left; |  | ||||||
| 
 |  | ||||||
|     if (right < (int)minHeap->size && minHeap->array[right]->frequency < minHeap->array[smallest]->frequency) |  | ||||||
|         smallest = right; |  | ||||||
| 
 |  | ||||||
|     if (smallest != idx) { |  | ||||||
|         swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]); |  | ||||||
|         minHeapify(minHeap, smallest); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| // Check if the size of heap is one
 |  | ||||||
| int isSizeOne(MinHeap* minHeap) { |  | ||||||
|     return (minHeap->size == 1); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| // Extract the minimum node from heap
 |  | ||||||
| HuffmanNode* extractMin(MinHeap* minHeap) { |  | ||||||
|     HuffmanNode* temp = minHeap->array[0]; |  | ||||||
|     minHeap->array[0] = minHeap->array[minHeap->size - 1]; |  | ||||||
|     --minHeap->size; |  | ||||||
|     minHeapify(minHeap, 0); |  | ||||||
|     return temp; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| // Insert a new node to MinHeap
 |  | ||||||
| void insertMinHeap(MinHeap* minHeap, HuffmanNode* node) { |  | ||||||
|     ++minHeap->size; |  | ||||||
|     int i = minHeap->size - 1; |  | ||||||
|     while (i && node->frequency < minHeap->array[(i - 1) / 2]->frequency) { |  | ||||||
|         minHeap->array[i] = minHeap->array[(i - 1) / 2]; |  | ||||||
|         i = (i - 1) / 2; |  | ||||||
|     } |  | ||||||
|     minHeap->array[i] = node; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| // Build a min heap of given capacity
 |  | ||||||
| void buildMinHeap(MinHeap* minHeap) { |  | ||||||
|     int n = minHeap->size - 1; |  | ||||||
|     for (int i = (n - 1) / 2; i >= 0; --i) |  | ||||||
|         minHeapify(minHeap, i); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| // Function to build the Huffman tree
 |  | ||||||
| HuffmanNode* buildHuffmanTree(unsigned char* data, unsigned* freq, int size) { |  | ||||||
|     HuffmanNode *left, *right, *top; |  | ||||||
| 
 |  | ||||||
|     MinHeap* minHeap = createMinHeap(size); |  | ||||||
| 
 |  | ||||||
|     for (int i = 0; i < size; ++i) |  | ||||||
|         insertMinHeap(minHeap, newNode(data[i], freq[data[i]])); |  | ||||||
| 
 |  | ||||||
|     buildMinHeap(minHeap); |  | ||||||
| 
 |  | ||||||
|     while (!isSizeOne(minHeap)) { |  | ||||||
|         left = extractMin(minHeap); |  | ||||||
|         right = extractMin(minHeap); |  | ||||||
| 
 |  | ||||||
|         top = newNode('$', left->frequency + right->frequency); |  | ||||||
|         top->left = left; |  | ||||||
|         top->right = right; |  | ||||||
| 
 |  | ||||||
|         insertMinHeap(minHeap, top); |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     return extractMin(minHeap); |     // Объединяем узлы в дерево
 | ||||||
|  |     while (node_count > 1) { | ||||||
|  |         // Сортируем узлы по частоте
 | ||||||
|  |         for (int i = 0; i < node_count - 1; i++) { | ||||||
|  |             for (int j = i + 1; j < node_count; j++) { | ||||||
|  |                 if (nodes[i]->frequency > nodes[j]->frequency) { | ||||||
|  |                     SWAP_NODES(nodes[i], nodes[j]); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Объединяем два узла с наименьшей частотой
 | ||||||
|  |         Node* left = nodes[0]; | ||||||
|  |         Node* right = nodes[1]; | ||||||
|  |         Node* parent = create_node(-1, left->frequency + right->frequency); | ||||||
|  |         parent->left = left; | ||||||
|  |         parent->right = right; | ||||||
|  | 
 | ||||||
|  |         // Заменяем объединенные узлы новым родительским узлом
 | ||||||
|  |         nodes[0] = parent; | ||||||
|  |         nodes[1] = nodes[--node_count]; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return nodes[0]; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Function to generate the Huffman codes for each character
 | // Рекурсивная функция для генерации кодов Хаффмана
 | ||||||
| void generateCodes(HuffmanNode* root, char* arr, int top, char** codes) { | void generate_huffman_codes(Node* root, char* code, int depth, char codes[MAX_SYMBOLS][MAX_SYMBOLS]) { | ||||||
|     if (root->left) { |  | ||||||
|         arr[top] = '0'; |  | ||||||
|         generateCodes(root->left, arr, top + 1, codes); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (root->right) { |  | ||||||
|         arr[top] = '1'; |  | ||||||
|         generateCodes(root->right, arr, top + 1, codes); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (!root->left && !root->right) { |     if (!root->left && !root->right) { | ||||||
|         arr[top] = '\0';  // Null terminate the string
 |         code[depth] = '\0'; // Завершаем код символа
 | ||||||
|         codes[root->data] = strdup(arr); |         strcpy(codes[root->symbol], code); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     if (root->left) { | ||||||
|  |         code[depth] = '0'; // Добавляем бит '0' для левого поддерева
 | ||||||
|  |         generate_huffman_codes(root->left, code, depth + 1, codes); | ||||||
|  |     } | ||||||
|  |     if (root->right) { | ||||||
|  |         code[depth] = '1'; // Добавляем бит '1' для правого поддерева
 | ||||||
|  |         generate_huffman_codes(root->right, code, depth + 1, codes); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| void free_huffman_tree(HuffmanNode* root) { | 
 | ||||||
|  | // Функция для освобождения памяти, выделенной под дерево Хаффмана
 | ||||||
|  | void free_huffman_tree(Node* root) { | ||||||
|     if (!root) return; |     if (!root) return; | ||||||
|     free_huffman_tree(root->left); |     free_huffman_tree(root->left); | ||||||
|     free_huffman_tree(root->right); |     free_huffman_tree(root->right); | ||||||
|     free(root); |     free(root); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Function to compress a file
 | // Функция сжатия данных с использованием алгоритма Хаффмана
 | ||||||
| int compress_1(const char* input_file, const char* output_file) { | int compress_1(const char* input_file, const char* output_file) { | ||||||
|     FILE* input = fopen(input_file, "rb"); |     FILE* input = fopen(input_file, "rb"); | ||||||
|     FILE* output = fopen(output_file, "wb"); |     FILE* output = fopen(output_file, "wb"); | ||||||
|     if (!input || !output) { |     if (!input || !output) return -1; | ||||||
|         perror("Error opening file"); |  | ||||||
|         return -1; |  | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
|     unsigned freq[256] = {0}; |     unsigned int frequencies[MAX_SYMBOLS] = {0}; | ||||||
|     unsigned char data; |     unsigned char buffer[BUFFER_SIZE]; | ||||||
|     while (fread(&data, sizeof(data), 1, input) == 1) |     size_t bytes_read; | ||||||
|         freq[data]++; |  | ||||||
| 
 | 
 | ||||||
|     unsigned char unique_data[256]; |     // Подсчет частот символов
 | ||||||
|     int unique_count = 0; |     while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, input)) > 0) { | ||||||
|     for (int i = 0; i < 256; i++) { |         for (size_t i = 0; i < bytes_read; i++) { | ||||||
|         if (freq[i] > 0) { |             frequencies[buffer[i]]++; | ||||||
|             unique_data[unique_count++] = i; |  | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |     frequencies[256] = 1; // Добавляем маркер EOF
 | ||||||
| 
 | 
 | ||||||
|     HuffmanNode* root = buildHuffmanTree(unique_data, freq, unique_count); |     Node* root = build_huffman_tree(frequencies); | ||||||
|  |     if (!root) return -1; | ||||||
| 
 | 
 | ||||||
|     char* codes[256] = {0}; |     // Генерация кодов Хаффмана
 | ||||||
|     char arr[256]; |     char codes[MAX_SYMBOLS][MAX_SYMBOLS] = {{0}}; | ||||||
|     generateCodes(root, arr, 0, codes); |     char code[MAX_SYMBOLS] = {0}; | ||||||
|  |     generate_huffman_codes(root, code, 0, codes); | ||||||
| 
 | 
 | ||||||
|     fwrite(&unique_count, sizeof(int), 1, output); |     // Записываем частоты в выходной файл
 | ||||||
|     for (int i = 0; i < unique_count; i++) { |     fwrite(frequencies, sizeof(frequencies[0]), MAX_SYMBOLS, output); | ||||||
|         unsigned char symbol = unique_data[i]; |  | ||||||
|         fwrite(&symbol, sizeof(unsigned char), 1, output); |  | ||||||
|         fwrite(&freq[symbol], sizeof(unsigned), 1, output); |  | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
|     fseek(input, 0, SEEK_SET); |     // Сжимаем данные
 | ||||||
| 
 |     rewind(input); | ||||||
|     unsigned char buffer = 0; |     unsigned char current_byte = 0; | ||||||
|     int bit_count = 0; |     int bit_count = 0; | ||||||
|     size_t total_bits = 0; |  | ||||||
| 
 | 
 | ||||||
|     while (fread(&data, sizeof(data), 1, input) == 1) { |     while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, input)) > 0) { | ||||||
|         char* code = codes[data]; |         for (size_t i = 0; i < bytes_read; i++) { | ||||||
|         for (int i = 0; code[i] != '\0'; i++) { |             char* symbol_code = codes[buffer[i]]; | ||||||
|             unsigned char bit = code[i] - '0'; |             for (size_t j = 0; symbol_code[j] != '\0'; j++) { | ||||||
|             buffer = (buffer << 1) | bit; |                 current_byte = (current_byte << 1) | (symbol_code[j] - '0'); | ||||||
|             bit_count++; |                 bit_count++; | ||||||
|             total_bits++; |                 if (bit_count == 8) { | ||||||
| 
 |                     fwrite(¤t_byte, 1, 1, output); | ||||||
|             if (bit_count == 8) { |                     current_byte = 0; | ||||||
|                 fwrite(&buffer, sizeof(unsigned char), 1, output); |                     bit_count = 0; | ||||||
|                 bit_count = 0; |                 } | ||||||
|                 buffer = 0; |  | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (bit_count > 0) { |     // Записываем маркер EOF
 | ||||||
|         buffer <<= (8 - bit_count); |     char* eof_code = codes[256]; | ||||||
|         fwrite(&buffer, sizeof(unsigned char), 1, output); |     for (size_t j = 0; eof_code[j] != '\0'; j++) { | ||||||
|  |         current_byte = (current_byte << 1) | (eof_code[j] - '0'); | ||||||
|  |         bit_count++; | ||||||
|  |         if (bit_count == 8) { | ||||||
|  |             fwrite(¤t_byte, 1, 1, output); | ||||||
|  |             current_byte = 0; | ||||||
|  |             bit_count = 0; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     if (bit_count > 0) { | ||||||
|  |         current_byte <<= (8 - bit_count); | ||||||
|  |         fwrite(¤t_byte, 1, 1, output); | ||||||
|     } |     } | ||||||
| 
 |  | ||||||
|     fwrite(&total_bits, sizeof(size_t), 1, output); // Write total bits used
 |  | ||||||
| 
 | 
 | ||||||
|     fclose(input); |     fclose(input); | ||||||
|     fclose(output); |     fclose(output); | ||||||
| @ -226,62 +159,34 @@ int compress_1(const char* input_file, const char* output_file) { | |||||||
|     return 0; |     return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Function to decompress the compressed file
 | // Функция декомпрессии данных с использованием алгоритма Хаффмана
 | ||||||
| int decompress_1(const char* input_file_name, const char* output_file_name) { | int decompress_1(const char* input_file, const char* output_file) { | ||||||
|     FILE* input = fopen(input_file_name, "rb"); |     FILE* input = fopen(input_file, "rb"); | ||||||
|     if (!input) { |     FILE* output = fopen(output_file, "wb"); | ||||||
|         perror("Error opening input file"); |     if (!input || !output) return -1; | ||||||
|         return -1; |  | ||||||
|     } |  | ||||||
|     FILE* output = fopen(output_file_name, "wb"); |  | ||||||
|     if (!output) { |  | ||||||
|         perror("Error opening output file"); |  | ||||||
|         fclose(input); |  | ||||||
|         return -1; |  | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
|     int unique_count; |     unsigned int frequencies[MAX_SYMBOLS] = {0}; | ||||||
|     if (fread(&unique_count, sizeof(int), 1, input) != 1) { |     fread(frequencies, sizeof(frequencies[0]), MAX_SYMBOLS, input); | ||||||
|         perror("Error reading from input file"); |     Node* root = build_huffman_tree(frequencies); | ||||||
|         fclose(input); |     if (!root) return -1; | ||||||
|         fclose(output); |  | ||||||
|         return -1; |  | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
|     unsigned char unique_data[256]; |     Node* current = root; | ||||||
|     unsigned freq[256] = {0}; |  | ||||||
|     for (int i = 0; i < unique_count; i++) { |  | ||||||
|         if (fread(&unique_data[i], sizeof(unsigned char), 1, input) != 1 || |  | ||||||
|             fread(&freq[unique_data[i]], sizeof(unsigned), 1, input) != 1) { |  | ||||||
|             perror("Error reading from input file"); |  | ||||||
|             fclose(input); |  | ||||||
|             fclose(output); |  | ||||||
|             return -1; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     HuffmanNode* root = buildHuffmanTree(unique_data, freq, unique_count); |  | ||||||
| 
 |  | ||||||
|     size_t total_bits; |  | ||||||
|     fseek(input, -(long long)sizeof(size_t), SEEK_END); |  | ||||||
|     fread(&total_bits, sizeof(size_t), 1, input); |  | ||||||
| 
 |  | ||||||
|     fseek(input, sizeof(int) + unique_count * (sizeof(unsigned char) + sizeof(unsigned)), SEEK_SET); |  | ||||||
| 
 |  | ||||||
|     HuffmanNode* current = root; |  | ||||||
|     unsigned char byte; |     unsigned char byte; | ||||||
|     size_t bits_read = 0; |     int bit; | ||||||
| 
 | 
 | ||||||
|     while (bits_read < total_bits && fread(&byte, sizeof(byte), 1, input) == 1) { |     // Читаем и декодируем символы
 | ||||||
|         for (int i = 7; i >= 0 && bits_read < total_bits; i--, bits_read++) { |     while (fread(&byte, 1, 1, input) == 1) { | ||||||
|             if (byte & (1 << i)) { |         for (bit = 7; bit >= 0; bit--) { | ||||||
|                 current = current->right; |             current = (byte & (1 << bit)) ? current->right : current->left; | ||||||
|             } else { |  | ||||||
|                 current = current->left; |  | ||||||
|             } |  | ||||||
| 
 | 
 | ||||||
|             if (!current->left && !current->right) { |             if (!current->left && !current->right) { | ||||||
|                 fwrite(¤t->data, sizeof(current->data), 1, output); |                 if (current->symbol == 256) { // Маркер EOF
 | ||||||
|  |                     fclose(input); | ||||||
|  |                     fclose(output); | ||||||
|  |                     free_huffman_tree(root); | ||||||
|  |                     return 0; | ||||||
|  |                 } | ||||||
|  |                 fwrite(¤t->symbol, 1, 1, output); | ||||||
|                 current = root; |                 current = root; | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
| @ -291,4 +196,12 @@ int decompress_1(const char* input_file_name, const char* output_file_name) { | |||||||
|     fclose(output); |     fclose(output); | ||||||
|     free_huffman_tree(root); |     free_huffman_tree(root); | ||||||
|     return 0; |     return 0; | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | int compress_2(const char* input_file_name, const char* output_file_name){ | ||||||
|  |     return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | int decompress_2(const char* input_file_name, const char* output_file_name){ | ||||||
|  |     return 0; | ||||||
|  | } | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user