#include #include #include #include "compressor.h" #define BUFFER_SIZE 4096 #define MAX_SYMBOLS 257 #define WINDOW_CAPACITY 4096 #define MAX_MATCH_LENGTH 15 #define BUFFER_CAPACITY 4096 // Макрос для обмена двух узлов #define SWAP_NODES(a, b) { Node* temp = a; a = b; b = temp; } // Определение структуры узла дерева typedef struct Node { int symbol; unsigned int frequency; struct Node *left, *right; } Node; typedef struct LZ77{ int posun; int dlzka; char dalsi_znak; } LZ77; Node* create_node(int symbol, unsigned int frequency) { Node* node = (Node*)malloc(sizeof(Node)); node->symbol = symbol; node->frequency = frequency; node->left = node->right = NULL; return node; } Node* build_huffman_tree(const unsigned int* frequencies) { Node* nodes[MAX_SYMBOLS]; int node_count = 0; for (int i = 0; i < MAX_SYMBOLS; i++) { if (frequencies[i] > 0) { nodes[node_count++] = create_node(i, frequencies[i]); } } while (node_count > 1) { // Сортируем узлы по частоте for (int i = 0; i < node_count - 1; i++) { for (int j = i + 1; j < node_count; j++) { if (nodes[i]->frequency > nodes[j]->frequency) { SWAP_NODES(nodes[i], nodes[j]); } } } Node* left = nodes[0]; Node* right = nodes[1]; Node* parent = create_node(-1, left->frequency + right->frequency); parent->left = left; parent->right = right; // Заменяем объединенные узлы новым родительским узлом nodes[0] = parent; nodes[1] = nodes[--node_count]; } return nodes[0]; } void generate_huffman_codes(Node* root, char* code, int depth, char codes[MAX_SYMBOLS][MAX_SYMBOLS]) { if (!root->left && !root->right) { code[depth] = '\0'; strcpy(codes[root->symbol], code); return; } if (root->left) { code[depth] = '0'; generate_huffman_codes(root->left, code, depth + 1, codes); } if (root->right) { code[depth] = '1'; generate_huffman_codes(root->right, code, depth + 1, codes); } } void free_huffman_tree(Node* root) { if (!root) return; free_huffman_tree(root->left); free_huffman_tree(root->right); free(root); } int compress_1(const char* input_file, const char* output_file) { FILE* input = fopen(input_file, "rb"); FILE* output = fopen(output_file, "wb"); if (!input || !output) return -1; unsigned int frequencies[MAX_SYMBOLS] = {0}; unsigned char buffer[BUFFER_SIZE]; size_t bytes_read; // Подсчет частот символов while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, input)) > 0) { for (size_t i = 0; i < bytes_read; i++) { frequencies[buffer[i]]++; } } frequencies[256] = 1; // Добавляем маркер EOF Node* root = build_huffman_tree(frequencies); if (!root) return -1; // Генерация кодов Хаффмана char codes[MAX_SYMBOLS][MAX_SYMBOLS] = {{0}}; char code[MAX_SYMBOLS] = {0}; generate_huffman_codes(root, code, 0, codes); fwrite(frequencies, sizeof(frequencies[0]), MAX_SYMBOLS, output); rewind(input); unsigned char current_byte = 0; int bit_count = 0; while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, input)) > 0) { for (size_t i = 0; i < bytes_read; i++) { char* symbol_code = codes[buffer[i]]; for (size_t j = 0; symbol_code[j] != '\0'; j++) { current_byte = (current_byte << 1) | (symbol_code[j] - '0'); bit_count++; if (bit_count == 8) { fwrite(¤t_byte, 1, 1, output); current_byte = 0; bit_count = 0; } } } } char* eof_code = codes[256]; for (size_t j = 0; eof_code[j] != '\0'; j++) { current_byte = (current_byte << 1) | (eof_code[j] - '0'); bit_count++; if (bit_count == 8) { fwrite(¤t_byte, 1, 1, output); current_byte = 0; bit_count = 0; } } if (bit_count > 0) { current_byte <<= (8 - bit_count); fwrite(¤t_byte, 1, 1, output); } fclose(input); fclose(output); free_huffman_tree(root); return 0; } int decompress_1(const char* input_file, const char* output_file) { FILE* input = fopen(input_file, "rb"); FILE* output = fopen(output_file, "wb"); if (!input || !output) return -1; unsigned int frequencies[MAX_SYMBOLS] = {0}; fread(frequencies, sizeof(frequencies[0]), MAX_SYMBOLS, input); Node* root = build_huffman_tree(frequencies); if (!root) return -1; Node* current = root; unsigned char byte; int bit; while (fread(&byte, 1, 1, input) == 1) { for (bit = 7; bit >= 0; bit--) { current = (byte & (1 << bit)) ? current->right : current->left; if (!current->left && !current->right) { if (current->symbol == 256) { // Маркер EOF fclose(input); fclose(output); free_huffman_tree(root); return 0; } fwrite(¤t->symbol, 1, 1, output); current = root; } } } fclose(input); fclose(output); free_huffman_tree(root); return 0; } int compress_2(const char *input_file, const char *output_file) { FILE *source = fopen(input_file, "rb"); if (source == NULL) { return -1; } FILE *destination = fopen(output_file, "wb"); if (destination == NULL) { fclose(source); return -1; } fseek(source, 0, SEEK_END); long file_size = ftell(source); if (file_size > 10 * 1024 * 1024) { fclose(source); fclose(destination); return -1; } fseek(source, 0, SEEK_SET); // Возврат в начало файла для чтения char *input_data = malloc(file_size); if (input_data == NULL) { fclose(source); fclose(destination); return -1; } fread(input_data, 1, file_size, source); int index = 0; while (index < file_size) { LZ77 element = {0, 0, input_data[index]}; // Инициализация элемента с текущим символом int best_length = 0, best_offset = 0; int start_idx; if ((index - WINDOW_CAPACITY) > 0) { start_idx = index - WINDOW_CAPACITY; } else { start_idx = 0; } // Поиск начала окна // Поиск наилучшего совпадения в окне for (int i = start_idx; i < index; i++) { int match_length = 0; while (match_length < MAX_MATCH_LENGTH && index + match_length < file_size && input_data[i + match_length] == input_data[index + match_length]) { match_length++; } if (match_length > best_length) { best_length = match_length; best_offset = index - i; } if (match_length == MAX_MATCH_LENGTH) { break; } } // Если найдено совпадение записываем его в элемент иначе сохраняем символ if (best_length > 1) { element.posun = best_offset; element.dlzka = best_length; element.dalsi_znak = input_data[index + best_length]; index += best_length + 1; } else { element.posun = 0; element.dlzka = 0; element.dalsi_znak = input_data[index]; index++; } unsigned short combined = (element.posun << 4) | (element.dlzka & 0xF); // Объединение posun и dlzka в одно значение fwrite(&combined, sizeof(unsigned short), 1, destination); fwrite(&element.dalsi_znak, sizeof(char), 1, destination); } fwrite(&file_size, sizeof(file_size), 1, destination); free(input_data); fclose(source); fclose(destination); return ftell(destination); } int decompress_2(const char *input_file, const char *output_file) { FILE *source = fopen(input_file, "rb"); if (source == NULL) { return -1; } FILE *destination = fopen(output_file, "wb"); if (destination == NULL) { fclose(source); return -1; } char *buffer = malloc(WINDOW_CAPACITY); if (buffer == NULL) { fclose(source); fclose(destination); return -1; } memset(buffer, 0, WINDOW_CAPACITY); // Переход к концу файла для чтения размера исходного файла fseek(source, -sizeof(long), SEEK_END); long original_size; if (fread(&original_size, sizeof(long), 1, source) != 1) { free(buffer); fclose(source); fclose(destination); return -1; } // Возврат в начало файла для распаковки fseek(source, 0, SEEK_SET); int written_bytes = 0, buffer_position = 0; while (written_bytes < original_size) { unsigned short combined; char dalsi_znak; if (fread(&combined, sizeof(unsigned short), 1, source) != 1) { break; } if (fread(&dalsi_znak, sizeof(char), 1, source) != 1) { break; } int posun = combined >> 4; int dlzka = combined & 0xF; // Восстановление данных из совпадений в буфере if (dlzka > 0) { int start_idx = buffer_position - posun; for (int i = 0; i < dlzka; i++) { char current = buffer[(start_idx + i) % WINDOW_CAPACITY]; fputc(current, destination); buffer[buffer_position % WINDOW_CAPACITY] = current; buffer_position++; written_bytes++; } } // Запись следующего символа в файл if (written_bytes < original_size) { fputc(dalsi_znak, destination); buffer[buffer_position % WINDOW_CAPACITY] = dalsi_znak; buffer_position++; written_bytes++; } } free(buffer); fclose(source); fclose(destination); return ftell(destination); }