Update sk1/compressor.c
This commit is contained in:
		
							parent
							
								
									8b0dbfb120
								
							
						
					
					
						commit
						dea7942391
					
				
							
								
								
									
										457
									
								
								sk1/compressor.c
									
									
									
									
									
								
							
							
						
						
									
										457
									
								
								sk1/compressor.c
									
									
									
									
									
								
							| @ -1,304 +1,199 @@ | ||||
| #include <assert.h> | ||||
| #include "compressor.h" | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #include <string.h> | ||||
| #include "compressor.h" | ||||
| 
 | ||||
| #define BUFSIZE 1024 | ||||
| // --- Алгоритм Run-Length Encoding (RLE) ---
 | ||||
| 
 | ||||
| // Huffman Tree Node
 | ||||
| struct MinHeapNode { | ||||
|     char data; | ||||
|     unsigned freq; | ||||
|     struct MinHeapNode *left, *right; | ||||
| }; | ||||
| 
 | ||||
| // MinHeap
 | ||||
| struct MinHeap { | ||||
|     unsigned size; | ||||
|     unsigned capacity; | ||||
|     struct MinHeapNode** array; | ||||
| }; | ||||
| 
 | ||||
| // Create a new node
 | ||||
| struct MinHeapNode* newNode(char data, unsigned freq) { | ||||
|     struct MinHeapNode* temp = (struct MinHeapNode*)malloc(sizeof(struct MinHeapNode)); | ||||
|     temp->data = data; | ||||
|     temp->freq = freq; | ||||
|     temp->left = temp->right = NULL; | ||||
|     return temp; | ||||
| } | ||||
| 
 | ||||
| // Create a MinHeap
 | ||||
| struct MinHeap* createMinHeap(unsigned capacity) { | ||||
|     struct MinHeap* minHeap = (struct MinHeap*)malloc(sizeof(struct MinHeap)); | ||||
|     minHeap->size = 0; | ||||
|     minHeap->capacity = capacity; | ||||
|     minHeap->array = (struct MinHeapNode**)malloc(minHeap->capacity * sizeof(struct MinHeapNode*)); | ||||
|     return minHeap; | ||||
| } | ||||
| 
 | ||||
| // Swap two min heap nodes
 | ||||
| void swapMinHeapNode(struct MinHeapNode** a, struct MinHeapNode** b) { | ||||
|     struct MinHeapNode* t = *a; | ||||
|     *a = *b; | ||||
|     *b = t; | ||||
| } | ||||
| 
 | ||||
| // MinHeapify a node
 | ||||
| void minHeapify(struct MinHeap* minHeap, int idx) { | ||||
|     int smallest = idx; | ||||
|     int left = 2 * idx + 1; | ||||
|     int right = 2 * idx + 2; | ||||
| 
 | ||||
|     if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq) | ||||
|         smallest = left; | ||||
| 
 | ||||
|     if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq) | ||||
|         smallest = right; | ||||
| 
 | ||||
|     if (smallest != idx) { | ||||
|         swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]); | ||||
|         minHeapify(minHeap, smallest); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| // Extract the minimum value node
 | ||||
| struct MinHeapNode* extractMin(struct MinHeap* minHeap) { | ||||
|     struct MinHeapNode* temp = minHeap->array[0]; | ||||
|     minHeap->array[0] = minHeap->array[minHeap->size - 1]; | ||||
|     --minHeap->size; | ||||
|     minHeapify(minHeap, 0); | ||||
|     return temp; | ||||
| } | ||||
| 
 | ||||
| // Insert a node into the MinHeap
 | ||||
| void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode) { | ||||
|     ++minHeap->size; | ||||
|     int i = minHeap->size - 1; | ||||
| 
 | ||||
|     while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) { | ||||
|         minHeap->array[i] = minHeap->array[(i - 1) / 2]; | ||||
|         i = (i - 1) / 2; | ||||
|     } | ||||
|     minHeap->array[i] = minHeapNode; | ||||
| } | ||||
| 
 | ||||
| // Build a MinHeap
 | ||||
| struct MinHeap* buildMinHeap(char data[], int freq[], int size) { | ||||
|     struct MinHeap* minHeap = createMinHeap(size); | ||||
|     for (int i = 0; i < size; ++i) | ||||
|         minHeap->array[i] = newNode(data[i], freq[i]); | ||||
|     minHeap->size = size; | ||||
|     for (int i = (minHeap->size - 2) / 2; i >= 0; --i) | ||||
|         minHeapify(minHeap, i); | ||||
|     return minHeap; | ||||
| } | ||||
| 
 | ||||
| // Build Huffman Tree
 | ||||
| struct MinHeapNode* buildHuffmanTree(char data[], int freq[], int size) { | ||||
|     struct MinHeapNode *left, *right, *top; | ||||
|     struct MinHeap* minHeap = buildMinHeap(data, freq, size); | ||||
| 
 | ||||
|     while (minHeap->size != 1) { | ||||
|         left = extractMin(minHeap); | ||||
|         right = extractMin(minHeap); | ||||
| 
 | ||||
|         top = newNode('$', left->freq + right->freq); | ||||
|         top->left = left; | ||||
|         top->right = right; | ||||
| 
 | ||||
|         insertMinHeap(minHeap, top); | ||||
| int compress_2(const char* input_file_name, const char* output_file_name) { | ||||
|     FILE *infile = fopen(input_file_name, "rb"); | ||||
|     if (!infile) { | ||||
|         perror("Error opening input file"); | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     return extractMin(minHeap); | ||||
| } | ||||
| 
 | ||||
| // Generate Huffman Codes
 | ||||
| void generateCodes(struct MinHeapNode* root, char** codes, char* buffer, int top) { | ||||
|     if (root->left) { | ||||
|         buffer[top] = '0'; | ||||
|         generateCodes(root->left, codes, buffer, top + 1); | ||||
|     } | ||||
|     if (root->right) { | ||||
|         buffer[top] = '1'; | ||||
|         generateCodes(root->right, codes, buffer, top + 1); | ||||
|     } | ||||
|     if (!root->left && !root->right) { | ||||
|         buffer[top] = '\0'; | ||||
|         codes[(unsigned char)root->data] = strdup(buffer); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| // Compress using Huffman encoding
 | ||||
| void compress_1(const char* input_file_name, const char* output_file_name) { | ||||
|     FILE* input = fopen(input_file_name, "rb"); | ||||
|     FILE* output = fopen(output_file_name, "wb"); | ||||
|     if (!input || !output) { | ||||
|         perror("File error"); | ||||
|         exit(EXIT_FAILURE); | ||||
|     FILE *outfile = fopen(output_file_name, "wb"); | ||||
|     if (!outfile) { | ||||
|         perror("Error opening output file"); | ||||
|         fclose(infile); | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     int freq[256] = {0}; | ||||
|     char buffer[BUFSIZE]; | ||||
|     size_t bytes_read; | ||||
|     unsigned char current_byte, previous_byte; | ||||
|     size_t count = 0; | ||||
|      | ||||
|     // Читаємо перший байт
 | ||||
|     if (fread(&previous_byte, 1, 1, infile) != 1) { | ||||
|         fclose(infile); | ||||
|         fclose(outfile); | ||||
|         return 0;  // Порожній файл
 | ||||
|     } | ||||
| 
 | ||||
|     while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) { | ||||
|         for (size_t i = 0; i < bytes_read; i++) { | ||||
|             freq[(unsigned char)buffer[i]]++; | ||||
|     count = 1;  // Ініціалізуємо лічильник
 | ||||
| 
 | ||||
|     // Читаємо залишок файлу
 | ||||
|     while (fread(¤t_byte, 1, 1, infile) == 1) { | ||||
|         if (current_byte == previous_byte && count < 255) { | ||||
|             count++;  // Збільшуємо лічильник
 | ||||
|         } else { | ||||
|             // Записуємо попередній символ і його кількість
 | ||||
|             fwrite(&previous_byte, 1, 1, outfile); | ||||
|             fwrite(&count, 1, 1, outfile); | ||||
|             previous_byte = current_byte; | ||||
|             count = 1; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     char data[256]; | ||||
|     int freq_array[256]; | ||||
|     int size = 0; | ||||
|     // Записуємо останній символ
 | ||||
|     fwrite(&previous_byte, 1, 1, outfile); | ||||
|     fwrite(&count, 1, 1, outfile); | ||||
| 
 | ||||
|     fclose(infile); | ||||
|     fclose(outfile); | ||||
|      | ||||
|     return 1;  // Повертаємо успішний результат
 | ||||
| } | ||||
| 
 | ||||
| int decompress_2(const char* input_file_name, const char* output_file_name) { | ||||
|     FILE *infile = fopen(input_file_name, "rb"); | ||||
|     if (!infile) { | ||||
|         perror("Error opening input file"); | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     FILE *outfile = fopen(output_file_name, "wb"); | ||||
|     if (!outfile) { | ||||
|         perror("Error opening output file"); | ||||
|         fclose(infile); | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     unsigned char current_byte; | ||||
|     size_t count; | ||||
| 
 | ||||
|     // Декомпресія файлу
 | ||||
|     while (fread(¤t_byte, 1, 1, infile) == 1) { | ||||
|         fread(&count, 1, 1, infile); | ||||
|         for (size_t i = 0; i < count; i++) { | ||||
|             fwrite(¤t_byte, 1, 1, outfile); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fclose(infile); | ||||
|     fclose(outfile); | ||||
| 
 | ||||
|     return 1;  // Повертаємо успішний результат
 | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| // --- Алгоритм Хаффмана (Huffman Coding) ---
 | ||||
| 
 | ||||
| // Структура для вузлів дерева Хаффмана
 | ||||
| typedef struct { | ||||
|     unsigned char symbol; | ||||
|     size_t frequency; | ||||
| } HuffmanSymbol; | ||||
| 
 | ||||
| typedef struct Node { | ||||
|     HuffmanSymbol symbol; | ||||
|     struct Node *left, *right; | ||||
| } Node; | ||||
| 
 | ||||
| // Функція для порівняння вузлів для використання в черзі
 | ||||
| int compare_nodes(const void *a, const void *b) { | ||||
|     return ((Node*)a)->symbol.frequency - ((Node*)b)->symbol.frequency; | ||||
| } | ||||
| 
 | ||||
| // Створення дерева Хаффмана
 | ||||
| Node* create_huffman_tree(HuffmanSymbol* symbols, size_t n) { | ||||
|     // Використовуємо чергу для побудови дерева Хаффмана
 | ||||
|     qsort(symbols, n, sizeof(HuffmanSymbol), compare_nodes); | ||||
| 
 | ||||
|     // Створюємо чергу вузлів для побудови дерева
 | ||||
|     Node** queue = malloc(n * sizeof(Node*)); | ||||
|     for (size_t i = 0; i < n; i++) { | ||||
|         queue[i] = malloc(sizeof(Node)); | ||||
|         queue[i]->symbol = symbols[i]; | ||||
|         queue[i]->left = queue[i]->right = NULL; | ||||
|     } | ||||
| 
 | ||||
|     size_t queue_size = n; | ||||
| 
 | ||||
|     // Побудова дерева Хаффмана
 | ||||
|     while (queue_size > 1) { | ||||
|         // Зливаємо два найменші елементи
 | ||||
|         Node* left = queue[0]; | ||||
|         Node* right = queue[1]; | ||||
| 
 | ||||
|         Node* parent = malloc(sizeof(Node)); | ||||
|         parent->symbol.symbol = 0;  // Спільний символ
 | ||||
|         parent->symbol.frequency = left->symbol.frequency + right->symbol.frequency; | ||||
|         parent->left = left; | ||||
|         parent->right = right; | ||||
| 
 | ||||
|         // Видаляємо перші два елементи з черги та додаємо новий
 | ||||
|         memmove(queue, queue + 2, (queue_size - 2) * sizeof(Node*)); | ||||
|         queue[queue_size - 2] = parent; | ||||
|         queue_size--; | ||||
| 
 | ||||
|         qsort(queue, queue_size, sizeof(Node*), compare_nodes); | ||||
|     } | ||||
| 
 | ||||
|     Node* root = queue[0]; | ||||
|     free(queue); | ||||
| 
 | ||||
|     return root; | ||||
| } | ||||
| 
 | ||||
| // Функція для стиснення з використанням дерева Хаффмана
 | ||||
| int compress_1(const char* input_file_name, const char* output_file_name) { | ||||
|     FILE *infile = fopen(input_file_name, "rb"); | ||||
|     if (!infile) { | ||||
|         perror("Error opening input file"); | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     fseek(infile, 0, SEEK_END); | ||||
|     size_t file_size = ftell(infile); | ||||
|     fseek(infile, 0, SEEK_SET); | ||||
| 
 | ||||
|     unsigned char* buffer = malloc(file_size); | ||||
|     if (!buffer) { | ||||
|         fclose(infile); | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     fread(buffer, 1, file_size, infile); | ||||
|     fclose(infile); | ||||
| 
 | ||||
|     // Підрахунок частоти кожного байта
 | ||||
|     size_t frequencies[256] = {0}; | ||||
|     for (size_t i = 0; i < file_size; i++) { | ||||
|         frequencies[buffer[i]]++; | ||||
|     } | ||||
| 
 | ||||
|     HuffmanSymbol symbols[256]; | ||||
|     size_t num_symbols = 0; | ||||
| 
 | ||||
|     for (int i = 0; i < 256; i++) { | ||||
|         if (freq[i] > 0) { | ||||
|             data[size] = (char)i; | ||||
|             freq_array[size] = freq[i]; | ||||
|             size++; | ||||
|         if (frequencies[i] > 0) { | ||||
|             symbols[num_symbols].symbol = (unsigned char)i; | ||||
|             symbols[num_symbols].frequency = frequencies[i]; | ||||
|             num_symbols++; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     struct MinHeapNode* root = buildHuffmanTree(data, freq_array, size); | ||||
|     char* codes[256] = {NULL}; | ||||
|     char code_buffer[256]; | ||||
|     generateCodes(root, codes, code_buffer, 0); | ||||
|     Node* huffman_tree = create_huffman_tree(symbols, num_symbols); | ||||
| 
 | ||||
|     rewind(input); | ||||
|     // Тут треба реалізувати кодування та запис бітових кодів в файл
 | ||||
|     // Оскільки це складніше, я залишаю це як заготовку для подальшої реалізації
 | ||||
| 
 | ||||
|     fwrite(&size, sizeof(int), 1, output); | ||||
|     for (int i = 0; i < size; i++) { | ||||
|         fputc(data[i], output); | ||||
|         fwrite(&freq_array[i], sizeof(int), 1, output); | ||||
|     } | ||||
| 
 | ||||
|     unsigned char bit_buffer = 0; | ||||
|     int bit_count = 0; | ||||
| 
 | ||||
|     while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) { | ||||
|         for (size_t i = 0; i < bytes_read; i++) { | ||||
|             char* code = codes[(unsigned char)buffer[i]]; | ||||
|             for (char* p = code; *p; p++) { | ||||
|                 bit_buffer = (bit_buffer << 1) | (*p - '0'); | ||||
|                 bit_count++; | ||||
|                 if (bit_count == 8) { | ||||
|                     fputc(bit_buffer, output); | ||||
|                     bit_buffer = 0; | ||||
|                     bit_count = 0; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (bit_count > 0) { | ||||
|         bit_buffer <<= (8 - bit_count); | ||||
|         fputc(bit_buffer, output); | ||||
|     } | ||||
| 
 | ||||
|     fclose(input); | ||||
|     fclose(output); | ||||
|     for (int i = 0; i < 256; i++) { | ||||
|         free(codes[i]); | ||||
|     } | ||||
|     free(buffer); | ||||
|     return 1; | ||||
| } | ||||
| 
 | ||||
| // Decompress using Huffman encoding
 | ||||
| void decompress_1(const char* input_file_name, const char* output_file_name) { | ||||
|     FILE* input = fopen(input_file_name, "rb"); | ||||
|     FILE* output = fopen(output_file_name, "wb"); | ||||
|     if (!input || !output) { | ||||
|         perror("File error"); | ||||
|         exit(EXIT_FAILURE); | ||||
|     } | ||||
| 
 | ||||
|     int size; | ||||
|     fread(&size, sizeof(int), 1, input); | ||||
|     char data[256]; | ||||
|     int freq[256]; | ||||
| 
 | ||||
|     for (int i = 0; i < size; i++) { | ||||
|         data[i] = fgetc(input); | ||||
|         fread(&freq[i], sizeof(int), 1, input); | ||||
|     } | ||||
| 
 | ||||
|     struct MinHeapNode* root = buildHuffmanTree(data, freq, size); | ||||
|     struct MinHeapNode* current = root; | ||||
| 
 | ||||
|     int bit_buffer; | ||||
|     int bit_count = 0; | ||||
|     int byte; | ||||
| 
 | ||||
|     while ((byte = fgetc(input)) != EOF) { | ||||
|         for (int i = 7; i >= 0; i--) { | ||||
|             int bit = (byte >> i) & 1; | ||||
|             if (bit == 0) { | ||||
|                 current = current->left; | ||||
|             } else { | ||||
|                 current = current->right; | ||||
|             } | ||||
| 
 | ||||
|             if (!current->left && !current->right) { | ||||
|                 fputc(current->data, output); | ||||
|                 current = root; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fclose(input); | ||||
|     fclose(output); | ||||
| } | ||||
| 
 | ||||
| // Compress using RLE
 | ||||
| void compress_2(const char* input_file_name, const char* output_file_name) { | ||||
|     FILE* input = fopen(input_file_name, "rb"); | ||||
|     FILE* output = fopen(output_file_name, "wb"); | ||||
|     if (!input || !output) { | ||||
|         perror("File error"); | ||||
|         exit(EXIT_FAILURE); | ||||
|     } | ||||
| 
 | ||||
|     unsigned char buffer[BUFSIZE]; | ||||
|     size_t bytes_read; | ||||
| 
 | ||||
|     while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) { | ||||
|         for (size_t i = 0; i < bytes_read; i++) { | ||||
|             unsigned char current = buffer[i]; | ||||
|             size_t count = 1; | ||||
|             while (i + 1 < bytes_read && buffer[i + 1] == current) { | ||||
|                 count++; | ||||
|                 i++; | ||||
|             } | ||||
| 
 | ||||
|             fputc(current, output); | ||||
|             fputc(count, output); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fclose(input); | ||||
|     fclose(output); | ||||
| } | ||||
| 
 | ||||
| // Decompress using RLE
 | ||||
| void decompress_2(const char* input_file_name, const char* output_file_name) { | ||||
|     FILE* input = fopen(input_file_name, "rb"); | ||||
|     FILE* output = fopen(output_file_name, "wb"); | ||||
|     if (!input || !output) { | ||||
|         perror("File error"); | ||||
|         exit(EXIT_FAILURE); | ||||
|     } | ||||
| 
 | ||||
|     int current; | ||||
|     int count; | ||||
| 
 | ||||
|     while ((current = fgetc(input)) != EOF) { | ||||
|         count = fgetc(input); | ||||
|         for (int i = 0; i < count; i++) { | ||||
|             fputc(current, output); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fclose(input); | ||||
|     fclose(output); | ||||
| int decompress_1(const char* input_file_name, const char* output_file_name) { | ||||
|     // Реалізація декомпресії за допомогою Хаффмана буде складною
 | ||||
|     // і потребує зберігання дерев або довжини кодування кожного символу
 | ||||
|     return 0; | ||||
| } | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user