#include "compressor.h" #include #include #include #include #define WINDOW_SIZE 4096 // Размер скользящего окна #define LOOKAHEAD_BUFFER_SIZE 15 // Размер буфера предпросмотра // Структура для хранения токена typedef struct { int offset; int length; char next_char; } LZ77Token; #define MAX_TREE_NODES 256 // Huffman tree node typedef struct Node { unsigned char symbol; int frequency; struct Node* left; struct Node* right; } Node; // Min-heap for Huffman tree typedef struct MinHeap { Node* nodes[MAX_TREE_NODES]; int size; } MinHeap; void swap_nodes(Node** a, Node** b) { Node* temp = *a; *a = *b; *b = temp; } void heapify(MinHeap* heap, int idx) { int smallest = idx; int left = 2 * idx + 1; int right = 2 * idx + 2; if (left < heap->size && heap->nodes[left]->frequency < heap->nodes[smallest]->frequency) { smallest = left; } if (right < heap->size && heap->nodes[right]->frequency < heap->nodes[smallest]->frequency) { smallest = right; } if (smallest != idx) { swap_nodes(&heap->nodes[smallest], &heap->nodes[idx]); heapify(heap, smallest); } } Node* extract_min(MinHeap* heap) { Node* temp = heap->nodes[0]; heap->nodes[0] = heap->nodes[heap->size - 1]; heap->size--; heapify(heap, 0); return temp; } void insert_min_heap(MinHeap* heap, Node* node) { heap->size++; int i = heap->size - 1; while (i && node->frequency < heap->nodes[(i - 1) / 2]->frequency) { heap->nodes[i] = heap->nodes[(i - 1) / 2]; i = (i - 1) / 2; } heap->nodes[i] = node; } MinHeap* create_min_heap() { MinHeap* heap = (MinHeap*)malloc(sizeof(MinHeap)); heap->size = 0; return heap; } Node* create_node(unsigned char symbol, int frequency) { Node* node = (Node*)malloc(sizeof(Node)); node->symbol = symbol; node->frequency = frequency; node->left = node->right = NULL; return node; } void build_huffman_tree(MinHeap* heap) { while (heap->size > 1) { Node* left = extract_min(heap); Node* right = extract_min(heap); Node* new_node = create_node(0, left->frequency + right->frequency); new_node->left = left; new_node->right = right; insert_min_heap(heap, new_node); } } void build_codes(Node* root, char* code, int top, char codes[MAX_TREE_NODES][MAX_TREE_NODES]) { if (root->left) { code[top] = '0'; build_codes(root->left, code, top + 1, codes); } if (root->right) { code[top] = '1'; build_codes(root->right, code, top + 1, codes); } if (!root->left && !root->right) { code[top] = '\0'; strcpy(codes[root->symbol], code); } } void free_tree(Node* root) { if (root) { free_tree(root->left); free_tree(root->right); free(root); } } int compress_2(const char* input_file_name, const char* output_file_name) { FILE* input = fopen(input_file_name, "rb"); if (!input) return -1; int freq[MAX_TREE_NODES] = {0}; unsigned char buffer; while (fread(&buffer, 1, 1, input)) { freq[buffer]++; } MinHeap* heap = create_min_heap(); for (int i = 0; i < MAX_TREE_NODES; i++) { if (freq[i] > 0) { insert_min_heap(heap, create_node((unsigned char)i, freq[i])); } } build_huffman_tree(heap); char codes[MAX_TREE_NODES][MAX_TREE_NODES] = {0}; char code[MAX_TREE_NODES]; build_codes(heap->nodes[0], code, 0, codes); fseek(input, 0, SEEK_SET); FILE* output = fopen(output_file_name, "wb"); if (!output) { fclose(input); return -1; } fwrite(freq, sizeof(freq), 1, output); unsigned char out_buffer = 0; int bit_count = 0; while (fread(&buffer, 1, 1, input)) { char* symbol_code = codes[buffer]; for (int i = 0; symbol_code[i] != '\0'; i++) { out_buffer <<= 1; if (symbol_code[i] == '1') { out_buffer |= 1; } bit_count++; if (bit_count == 8) { fwrite(&out_buffer, 1, 1, output); bit_count = 0; out_buffer = 0; } } } if (bit_count > 0) { out_buffer <<= (8 - bit_count); fwrite(&out_buffer, 1, 1, output); } fseek(output, 0, SEEK_END); int sizeOUT = ftell(output); fseek(output, 0, SEEK_SET); fclose(input); fclose(output); free_tree(heap->nodes[0]); free(heap); return sizeOUT; } int decompress_2(const char* input_file_name, const char* output_file_name) { FILE* input = fopen(input_file_name, "rb"); if (!input) return -1; int freq[MAX_TREE_NODES]; fread(freq, sizeof(freq), 1, input); MinHeap* heap = create_min_heap(); for (int i = 0; i < MAX_TREE_NODES; i++) { if (freq[i] > 0) { insert_min_heap(heap, create_node((unsigned char)i, freq[i])); } } build_huffman_tree(heap); FILE* output = fopen(output_file_name, "wb"); if (!output) { fclose(input); return -1; } Node* root = heap->nodes[0]; Node* current = root; unsigned char buffer; while (fread(&buffer, 1, 1, input)) { for (int i = 7; i >= 0; i--) { if ((buffer >> i) & 1) { current = current->right; } else { current = current->left; } if (!current->left && !current->right) { fwrite(¤t->symbol, 1, 1, output); current = root; } } } fseek(output, 0, SEEK_END); int sizeOUT = ftell(output); fseek(output, 0, SEEK_SET); fclose(input); fclose(output); free_tree(root); free(heap); return sizeOUT; } // Функция для записи токена в файл в компактном формате void write_token(FILE *file, LZ77Token token) { // Записываем offset и length как 2 байта (можно оптимизировать дальше) unsigned short offset_length = (token.offset << 4) | (token.length & 0xF); fwrite(&offset_length, sizeof(unsigned short), 1, file); fwrite(&token.next_char, sizeof(char), 1, file); } // Функция для чтения токена из файла LZ77Token read_token(FILE *file) { LZ77Token token; unsigned short offset_length; fread(&offset_length, sizeof(unsigned short), 1, file); token.offset = offset_length >> 4; token.length = offset_length & 0xF; fread(&token.next_char, sizeof(char), 1, file); return token; } int compress_1(const char* input_file_name, const char* output_file_name) { FILE *input_file = fopen(input_file_name, "rb"); FILE *output_file = fopen(output_file_name, "wb"); if (!input_file || !output_file) { return -1; } fseek(input_file, 0, SEEK_END); long file_size = ftell(input_file); fseek(input_file, 0, SEEK_SET); if (file_size > 10 * 1024 * 1024) { return -1; } char *data = (char*)malloc(file_size); fread(data, 1, file_size, input_file); int pos = 0; while (pos < file_size) { LZ77Token token = {0, 0, data[pos]}; int max_length = 0; int max_offset = 0; int start = (pos - WINDOW_SIZE) > 0 ? (pos - WINDOW_SIZE) : 0; for (int i = start; i < pos; i++) { int length = 0; while (length < LOOKAHEAD_BUFFER_SIZE && pos + length < file_size && data[i + length] == data[pos + length]) { length++; } if (length > max_length) { max_length = length; max_offset = pos - i; } } if (max_length > 1) { token.offset = max_offset; token.length = max_length; token.next_char = data[pos + max_length]; pos += max_length + 1; } else { pos++; } write_token(output_file, token); } fseek(output_file, 0, SEEK_END); int sizeOUT = ftell(output_file); fseek(output_file, 0, SEEK_SET); free(data); fclose(input_file); fclose(output_file); return sizeOUT; } int decompress_1(const char* input_file_name, const char* output_file_name) { FILE *input_file = fopen(input_file_name, "rb"); FILE *output_file = fopen(output_file_name, "wb"); if (!input_file || !output_file) { return -1; } char *window = (char*)malloc(WINDOW_SIZE); int window_pos = 0; while (!feof(input_file)) { LZ77Token token = read_token(input_file); if (token.length > 0) { int start = window_pos - token.offset; for (int i = 0; i < token.length; i++) { char c = window[(start + i) % WINDOW_SIZE]; fputc(c, output_file); window[window_pos % WINDOW_SIZE] = c; window_pos++; } } fputc(token.next_char, output_file); window[window_pos % WINDOW_SIZE] = token.next_char; window_pos++; } fseek(output_file, 0, SEEK_END); int sizeOUT = ftell(output_file); fseek(output_file, 0, SEEK_SET); free(window); fclose(input_file); fclose(output_file); return sizeOUT; }