Update sk1/compressor.c
This commit is contained in:
		
							parent
							
								
									2f587f253b
								
							
						
					
					
						commit
						70c293a075
					
				
							
								
								
									
										461
									
								
								sk1/compressor.c
									
									
									
									
									
								
							
							
						
						
									
										461
									
								
								sk1/compressor.c
									
									
									
									
									
								
							| @ -1,291 +1,250 @@ | |||||||
| #include "compressor.h" |  | ||||||
| #include <stdio.h> | #include <stdio.h> | ||||||
| #include <stdlib.h> | #include <stdint.h> | ||||||
| #include <string.h> | #include <string.h> | ||||||
|  | #include <stdlib.h> | ||||||
| 
 | 
 | ||||||
| // --- Алгоритм Run-Length Encoding (RLE) ---
 | #define WINDOW_SIZE 4096 | ||||||
|  | #define LOOKAHEAD_BUFFER_SIZE 18 | ||||||
| 
 | 
 | ||||||
| int compress_2(const char* input_file_name, const char* output_file_name) { | typedef struct { | ||||||
|     FILE *infile = fopen(input_file_name, "rb"); |     uint16_t offset; | ||||||
|     if (!infile) { |     uint8_t length; | ||||||
|  |     uint8_t next_char; | ||||||
|  | } LZ77Triple; | ||||||
|  | 
 | ||||||
|  | int lz77_compress(const char *input_filename, const char *output_filename) { | ||||||
|  |     // Open the input file in binary read mode
 | ||||||
|  |     FILE *input_file = fopen(input_filename, "rb"); | ||||||
|  |     if (!input_file) { | ||||||
|         perror("Error opening input file"); |         perror("Error opening input file"); | ||||||
|         return -1; |         return -1; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     FILE *outfile = fopen(output_file_name, "wb"); |     // Open the output file in binary write mode
 | ||||||
|     if (!outfile) { |     FILE *output_file = fopen(output_filename, "wb"); | ||||||
|  |     if (!output_file) { | ||||||
|         perror("Error opening output file"); |         perror("Error opening output file"); | ||||||
|         fclose(infile); |         fclose(input_file); | ||||||
|         return -1; |         return -2; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     unsigned char current_byte, previous_byte; |     uint8_t *window = (uint8_t *)malloc(WINDOW_SIZE + LOOKAHEAD_BUFFER_SIZE); | ||||||
|     size_t count = 0; |     if (!window) { | ||||||
| 
 |         perror("Memory allocation failed"); | ||||||
|     if (fread(&previous_byte, 1, 1, infile) != 1) { |         fclose(input_file); | ||||||
|         fclose(infile); |         fclose(output_file); | ||||||
|         fclose(outfile); |         return -3; | ||||||
|         return 0;  // Порожній файл
 |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     count = 1; |     size_t window_start = 0; | ||||||
|  |     size_t lookahead_start = 0; | ||||||
|  |     size_t bytes_read; | ||||||
| 
 | 
 | ||||||
|     while (fread(¤t_byte, 1, 1, infile) == 1) { |     // Initialize the window with data from the input file
 | ||||||
|  |     bytes_read = fread(window + WINDOW_SIZE, 1, LOOKAHEAD_BUFFER_SIZE, input_file); | ||||||
|  | 
 | ||||||
|  |     while (bytes_read > 0) { | ||||||
|  |         size_t best_match_offset = 0; | ||||||
|  |         size_t best_match_length = 0; | ||||||
|  | 
 | ||||||
|  |         // Search for the best match within the sliding window
 | ||||||
|  |         for (size_t i = window_start; i < WINDOW_SIZE + lookahead_start; i++) { | ||||||
|  |             size_t match_length = 0; | ||||||
|  | 
 | ||||||
|  |             while (match_length < bytes_read && | ||||||
|  |                    window[i + match_length] == window[WINDOW_SIZE + match_length]) { | ||||||
|  |                 match_length++; | ||||||
|  |                 if (match_length >= LOOKAHEAD_BUFFER_SIZE) { | ||||||
|  |                     break; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             if (match_length > best_match_length) { | ||||||
|  |                 best_match_length = match_length; | ||||||
|  |                 best_match_offset = WINDOW_SIZE + lookahead_start - i; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Create a triple and write it to the output file
 | ||||||
|  |         LZ77Triple triple; | ||||||
|  |         triple.offset = (uint16_t)best_match_offset; | ||||||
|  |         triple.length = (uint8_t)best_match_length; | ||||||
|  |         triple.next_char = window[WINDOW_SIZE + best_match_length]; | ||||||
|  | 
 | ||||||
|  |         // Write the triple to the output file
 | ||||||
|  |         fwrite(&triple, sizeof(LZ77Triple), 1, output_file); | ||||||
|  | 
 | ||||||
|  |         // Slide the window
 | ||||||
|  |         window_start = (window_start + best_match_length + 1) % WINDOW_SIZE; | ||||||
|  |         lookahead_start = (lookahead_start + best_match_length + 1) % LOOKAHEAD_BUFFER_SIZE; | ||||||
|  | 
 | ||||||
|  |         // Read new byte into the lookahead buffer
 | ||||||
|  |         bytes_read = fread(window + WINDOW_SIZE, 1, LOOKAHEAD_BUFFER_SIZE - lookahead_start, input_file); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Cleanup and close files
 | ||||||
|  |     fclose(input_file); | ||||||
|  |     fclose(output_file); | ||||||
|  |     free(window); | ||||||
|  | 
 | ||||||
|  |     return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | int lz77_decompress(const char *input_filename, const char *output_filename) { | ||||||
|  |     FILE *input = fopen(input_filename, "rb"); | ||||||
|  |     FILE *output = fopen(output_filename, "wb"); | ||||||
|  |     if (!input || !output) { | ||||||
|  |         if (input) fclose(input); | ||||||
|  |         if (output) fclose(output); | ||||||
|  |         return -1; // Помилка відкриття файлу
 | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     size_t buffer_size = 4096; // Максимальний розмір вікна
 | ||||||
|  |     unsigned char *window = malloc(buffer_size); | ||||||
|  |     size_t window_size = 0; // Розмір заповненої частини вікна
 | ||||||
|  |     size_t window_pos = 0;  // Поточна позиція в межах вікна
 | ||||||
|  | 
 | ||||||
|  |     if (!window) { | ||||||
|  |         fclose(input); | ||||||
|  |         fclose(output); | ||||||
|  |         return -1; // Помилка пам'яті
 | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     while (!feof(input)) { | ||||||
|  |         unsigned char flag; | ||||||
|  |         if (fread(&flag, 1, 1, input) != 1) break; | ||||||
|  | 
 | ||||||
|  |         for (int i = 0; i < 8 && !feof(input); i++) { | ||||||
|  |             if (flag & (1 << i)) { // Літеральний символ
 | ||||||
|  |                 unsigned char literal; | ||||||
|  |                 if (fread(&literal, 1, 1, input) != 1) break; | ||||||
|  | 
 | ||||||
|  |                 fputc(literal, output); | ||||||
|  | 
 | ||||||
|  |                 // Додати символ у вікно
 | ||||||
|  |                 window[window_pos] = literal; | ||||||
|  |                 window_pos = (window_pos + 1) % buffer_size; | ||||||
|  |                 if (window_size < buffer_size) window_size++; | ||||||
|  |             } else { // Посилання
 | ||||||
|  |                 unsigned short offset_length; | ||||||
|  |                 if (fread(&offset_length, 2, 1, input) != 1) break; | ||||||
|  | 
 | ||||||
|  |                 size_t offset = offset_length >> 4; | ||||||
|  |                 size_t length = (offset_length & 0xF) + 3; | ||||||
|  | 
 | ||||||
|  |                 for (size_t j = 0; j < length; j++) { | ||||||
|  |                     unsigned char byte = window[(window_pos - offset + buffer_size) % buffer_size]; | ||||||
|  |                     fputc(byte, output); | ||||||
|  | 
 | ||||||
|  |                     // Додати байт у вікно
 | ||||||
|  |                     window[window_pos] = byte; | ||||||
|  |                     window_pos = (window_pos + 1) % buffer_size; | ||||||
|  |                     if (window_size < buffer_size) window_size++; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     free(window); | ||||||
|  |     fclose(input); | ||||||
|  |     fclose(output); | ||||||
|  |     return 0; // Успішна декомпресія
 | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | void rle_compress(const char *input_filename, const char *output_filename) { | ||||||
|  |     FILE *input_file = fopen(input_filename, "rb"); | ||||||
|  |     if (!input_file) { | ||||||
|  |         perror("Error opening input file"); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     FILE *output_file = fopen(output_filename, "wb"); | ||||||
|  |     if (!output_file) { | ||||||
|  |         perror("Error opening output file"); | ||||||
|  |         fclose(input_file); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     uint8_t current_byte, previous_byte; | ||||||
|  |     uint8_t count = 1; | ||||||
|  | 
 | ||||||
|  |     if (fread(&previous_byte, 1, 1, input_file) != 1) { | ||||||
|  |         printf("Input file is empty or read error occurred.\n"); | ||||||
|  |         fclose(input_file); | ||||||
|  |         fclose(output_file); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     while (fread(¤t_byte, 1, 1, input_file) == 1) { | ||||||
|         if (current_byte == previous_byte && count < 255) { |         if (current_byte == previous_byte && count < 255) { | ||||||
|             count++; |             count++; | ||||||
|         } else { |         } else { | ||||||
|             fwrite(&previous_byte, 1, 1, outfile); |             fwrite(&previous_byte, 1, 1, output_file); | ||||||
|             fwrite(&count, 1, 1, outfile); |             fwrite(&count, 1, 1, output_file); | ||||||
|  |             printf("Writing byte: %c with count: %d\n", previous_byte, count); | ||||||
|             previous_byte = current_byte; |             previous_byte = current_byte; | ||||||
|             count = 1; |             count = 1; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     fwrite(&previous_byte, 1, 1, outfile); |     fwrite(&previous_byte, 1, 1, output_file); | ||||||
|     fwrite(&count, 1, 1, outfile); |     fwrite(&count, 1, 1, output_file); | ||||||
|  |     printf("Writing byte: %c with count: %d\n", previous_byte, count); | ||||||
| 
 | 
 | ||||||
|     fclose(infile); |     fclose(input_file); | ||||||
|     fclose(outfile); |     fclose(output_file); | ||||||
| 
 |  | ||||||
|     return 1; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int decompress_2(const char* input_file_name, const char* output_file_name) { | 
 | ||||||
|     FILE *infile = fopen(input_file_name, "rb"); | 
 | ||||||
|     if (!infile) { | int rle_decompress(const char *input_filename, const char *output_filename) { | ||||||
|  |     // Open the input file in binary read mode
 | ||||||
|  |     FILE *input_file = fopen(input_filename, "rb"); | ||||||
|  |     if (!input_file) { | ||||||
|         perror("Error opening input file"); |         perror("Error opening input file"); | ||||||
|         return -1; |         return -1; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     FILE *outfile = fopen(output_file_name, "wb"); |     // Open the output file in binary write mode
 | ||||||
|     if (!outfile) { |     FILE *output_file = fopen(output_filename, "wb"); | ||||||
|  |     if (!output_file) { | ||||||
|         perror("Error opening output file"); |         perror("Error opening output file"); | ||||||
|         fclose(infile); |         fclose(input_file); | ||||||
|         return -1; |         return -2; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     unsigned char current_byte; |     uint8_t byte; | ||||||
|     unsigned char count; |     uint8_t count; | ||||||
|  |     size_t decompressed_size = 0; | ||||||
| 
 | 
 | ||||||
|     while (fread(¤t_byte, 1, 1, infile) == 1) { |     // Read [byte, count] pairs from the input file
 | ||||||
|         if (fread(&count, 1, 1, infile) != 1) { |     while (fread(&byte, 1, 1, input_file) == 1) { | ||||||
|             perror("Malformed input file"); |         if (fread(&count, 1, 1, input_file) != 1) { | ||||||
|             fclose(infile); |             // Handle malformed input file
 | ||||||
|             fclose(outfile); |             fprintf(stderr, "Error: Malformed input file\n"); | ||||||
|             return -1; |             fclose(input_file); | ||||||
|  |             fclose(output_file); | ||||||
|  |             return -3; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         for (size_t i = 0; i < count; i++) { |         // Write 'count' occurrences of 'byte' to the output file
 | ||||||
|             fwrite(¤t_byte, 1, 1, outfile); |         for (uint8_t i = 0; i < count; i++) { | ||||||
|  |             if (fwrite(&byte, 1, 1, output_file) != 1) { | ||||||
|  |                 perror("Error writing to output file"); | ||||||
|  |                 fclose(input_file); | ||||||
|  |                 fclose(output_file); | ||||||
|  |                 return -4; | ||||||
|  |             } | ||||||
|  |             decompressed_size++; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     fclose(infile); |     // Clean up and close files
 | ||||||
|     fclose(outfile); |     fclose(input_file); | ||||||
|  |     fclose(output_file); | ||||||
| 
 | 
 | ||||||
|     return 1; |     return (int)decompressed_size; | ||||||
| } | } | ||||||
| 
 |  | ||||||
| // --- Алгоритм Хаффмана (Huffman Coding) ---
 |  | ||||||
| 
 |  | ||||||
| typedef struct Node { |  | ||||||
|     unsigned char symbol; |  | ||||||
|     size_t frequency; |  | ||||||
|     struct Node *left, *right; |  | ||||||
| } Node; |  | ||||||
| 
 |  | ||||||
| typedef struct { |  | ||||||
|     unsigned char symbol; |  | ||||||
|     char *code; |  | ||||||
| } HuffmanCode; |  | ||||||
| 
 |  | ||||||
| int compare_nodes(const void *a, const void *b) { |  | ||||||
|     return (*(Node**)a)->frequency - (*(Node**)b)->frequency; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node* create_huffman_tree(unsigned char *data, size_t size) { |  | ||||||
|     size_t freq[256] = {0}; |  | ||||||
|     for (size_t i = 0; i < size; i++) { |  | ||||||
|         freq[data[i]]++; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Node *nodes[256]; |  | ||||||
|     size_t node_count = 0; |  | ||||||
| 
 |  | ||||||
|     for (int i = 0; i < 256; i++) { |  | ||||||
|         if (freq[i] > 0) { |  | ||||||
|             nodes[node_count] = malloc(sizeof(Node)); |  | ||||||
|             if (!nodes[node_count]) { |  | ||||||
|                 perror("Memory allocation failed"); |  | ||||||
|                 return NULL; |  | ||||||
|             } |  | ||||||
|             nodes[node_count]->symbol = (unsigned char)i; |  | ||||||
|             nodes[node_count]->frequency = freq[i]; |  | ||||||
|             nodes[node_count]->left = nodes[node_count]->right = NULL; |  | ||||||
|             node_count++; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     while (node_count > 1) { |  | ||||||
|         qsort(nodes, node_count, sizeof(Node*), compare_nodes); |  | ||||||
| 
 |  | ||||||
|         Node* left = nodes[0]; |  | ||||||
|         Node* right = nodes[1]; |  | ||||||
| 
 |  | ||||||
|         Node* parent = malloc(sizeof(Node)); |  | ||||||
|         if (!parent) { |  | ||||||
|             perror("Memory allocation failed"); |  | ||||||
|             return NULL; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         parent->symbol = 0; |  | ||||||
|         parent->frequency = left->frequency + right->frequency; |  | ||||||
|         parent->left = left; |  | ||||||
|         parent->right = right; |  | ||||||
| 
 |  | ||||||
|         memmove(nodes, nodes + 2, (node_count - 2) * sizeof(Node*)); |  | ||||||
|         nodes[node_count - 2] = parent; |  | ||||||
|         node_count--; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return nodes[0]; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void generate_huffman_codes(Node* root, HuffmanCode* codes, char* current_code, int depth) { |  | ||||||
|     if (!root) return; |  | ||||||
| 
 |  | ||||||
|     if (root->left == NULL && root->right == NULL) { |  | ||||||
|         current_code[depth] = '\0'; |  | ||||||
|         codes[root->symbol].symbol = root->symbol; |  | ||||||
|         codes[root->symbol].code = strdup(current_code); |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     current_code[depth] = '0'; |  | ||||||
|     generate_huffman_codes(root->left, codes, current_code, depth + 1); |  | ||||||
| 
 |  | ||||||
|     current_code[depth] = '1'; |  | ||||||
|     generate_huffman_codes(root->right, codes, current_code, depth + 1); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void serialize_huffman_tree(Node* root, FILE* outfile) { |  | ||||||
|     if (!root) return; |  | ||||||
| 
 |  | ||||||
|     if (root->left == NULL && root->right == NULL) { |  | ||||||
|         fputc('L', outfile); |  | ||||||
|         fputc(root->symbol, outfile); |  | ||||||
|     } else { |  | ||||||
|         fputc('I', outfile); |  | ||||||
|         serialize_huffman_tree(root->left, outfile); |  | ||||||
|         serialize_huffman_tree(root->right, outfile); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node* rebuild_huffman_tree(unsigned char* tree_data, size_t size) { |  | ||||||
|     (void)size;  // Позначаємо параметр як тимчасово невикористаний
 |  | ||||||
|     size_t index = 0; |  | ||||||
| 
 |  | ||||||
|     Node* build_tree_recursively(unsigned char* data, size_t* index) { |  | ||||||
|         if (data[*index] == 'L') { // Лист (Leaf)
 |  | ||||||
|             (*index)++; |  | ||||||
|             Node* leaf = malloc(sizeof(Node)); |  | ||||||
|             if (!leaf) { |  | ||||||
|                 perror("Memory allocation failed"); |  | ||||||
|                 return NULL; |  | ||||||
|             } |  | ||||||
|             leaf->symbol = data[*index]; |  | ||||||
|             leaf->frequency = 0; // частота не потрібна для декомпресії
 |  | ||||||
|             leaf->left = leaf->right = NULL; |  | ||||||
|             (*index)++; |  | ||||||
|             return leaf; |  | ||||||
|         } else if (data[*index] == 'I') { // Вузол (Internal)
 |  | ||||||
|             (*index)++; |  | ||||||
|             Node* internal = malloc(sizeof(Node)); |  | ||||||
|             if (!internal) { |  | ||||||
|                 perror("Memory allocation failed"); |  | ||||||
|                 return NULL; |  | ||||||
|             } |  | ||||||
|             internal->symbol = 0; // внутрішні вузли не мають символів
 |  | ||||||
|             internal->frequency = 0; |  | ||||||
|             internal->left = build_tree_recursively(data, index); |  | ||||||
|             internal->right = build_tree_recursively(data, index); |  | ||||||
|             return internal; |  | ||||||
|         } |  | ||||||
|         return NULL; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return build_tree_recursively(tree_data, &index); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void free_huffman_tree(Node* root) { |  | ||||||
|     if (!root) return; |  | ||||||
|     free_huffman_tree(root->left); |  | ||||||
|     free_huffman_tree(root->right); |  | ||||||
|     free(root); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| int compress_1(const char* input_file_name, const char* output_file_name) { |  | ||||||
|     FILE *infile = fopen(input_file_name, "rb"); |  | ||||||
|     if (!infile) { |  | ||||||
|         perror("Error opening input file"); |  | ||||||
|         return -1; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     fseek(infile, 0, SEEK_END); |  | ||||||
|     size_t file_size = ftell(infile); |  | ||||||
|     fseek(infile, 0, SEEK_SET); |  | ||||||
| 
 |  | ||||||
|     unsigned char* data = malloc(file_size); |  | ||||||
|     if (!data) { |  | ||||||
|         fclose(infile); |  | ||||||
|         perror("Memory allocation failed"); |  | ||||||
|         return -1; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     fread(data, 1, file_size, infile); |  | ||||||
|     fclose(infile); |  | ||||||
| 
 |  | ||||||
|     Node* root = create_huffman_tree(data, file_size); |  | ||||||
|     if (!root) { |  | ||||||
|         free(data); |  | ||||||
|         return -1; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     HuffmanCode codes[256] = {0}; |  | ||||||
|     char current_code[256]; |  | ||||||
|     generate_huffman_codes(root, codes, current_code, 0); |  | ||||||
| 
 |  | ||||||
|     FILE *outfile = fopen(output_file_name, "wb"); |  | ||||||
|     if (!outfile) { |  | ||||||
|         free(data); |  | ||||||
|         free_huffman_tree(root); |  | ||||||
|         return -1; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     serialize_huffman_tree(root, outfile); |  | ||||||
| 
 |  | ||||||
|     for (size_t i = 0; i < file_size; i++) { |  | ||||||
|         const char* code = codes[data[i]].code; |  | ||||||
|         for (size_t j = 0; code[j] != '\0'; j++) { |  | ||||||
|             fputc(code[j], outfile); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     fclose(outfile); |  | ||||||
|     free(data); |  | ||||||
|     free_huffman_tree(root); |  | ||||||
| 
 |  | ||||||
|     for (int i = 0; i < 256; i++) { |  | ||||||
|         free(codes[i].code); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return 1; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| // Декомпресія (приклад потребує уточнень залежно від специфіки формату)
 |  | ||||||
| int decompress_1(const char* input_file_name, const char* output_file_name) { |  | ||||||
|     (void)input_file_name; |  | ||||||
|     (void)output_file_name; |  | ||||||
|     return -1;  // Поки що не реалізовано.
 |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user