427 lines
9.6 KiB
C
427 lines
9.6 KiB
C
#include "compressor.h"
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdint.h>
|
|
|
|
#define WINDOW_SIZE 4096 // Размер скользящего окна
|
|
#define LOOKAHEAD_BUFFER_SIZE 15 // Размер буфера предпросмотра
|
|
// Структура для хранения токена
|
|
typedef struct
|
|
{
|
|
int offset;
|
|
int length;
|
|
char next_char;
|
|
} LZ77Token;
|
|
|
|
#define MAX_TREE_NODES 256
|
|
|
|
// Huffman tree node
|
|
typedef struct Node
|
|
{
|
|
unsigned char symbol;
|
|
int frequency;
|
|
struct Node* left;
|
|
struct Node* right;
|
|
} Node;
|
|
|
|
// Min-heap for Huffman tree
|
|
typedef struct MinHeap
|
|
{
|
|
Node* nodes[MAX_TREE_NODES];
|
|
int size;
|
|
} MinHeap;
|
|
|
|
void swap_nodes(Node** a, Node** b)
|
|
{
|
|
Node* temp = *a;
|
|
*a = *b;
|
|
*b = temp;
|
|
}
|
|
|
|
void heapify(MinHeap* heap, int idx)
|
|
{
|
|
int smallest = idx;
|
|
int left = 2 * idx + 1;
|
|
int right = 2 * idx + 2;
|
|
|
|
if (left < heap->size && heap->nodes[left]->frequency < heap->nodes[smallest]->frequency)
|
|
{
|
|
smallest = left;
|
|
}
|
|
|
|
if (right < heap->size && heap->nodes[right]->frequency < heap->nodes[smallest]->frequency)
|
|
{
|
|
smallest = right;
|
|
}
|
|
|
|
if (smallest != idx)
|
|
{
|
|
swap_nodes(&heap->nodes[smallest], &heap->nodes[idx]);
|
|
heapify(heap, smallest);
|
|
}
|
|
}
|
|
|
|
Node* extract_min(MinHeap* heap)
|
|
{
|
|
Node* temp = heap->nodes[0];
|
|
heap->nodes[0] = heap->nodes[heap->size - 1];
|
|
heap->size--;
|
|
heapify(heap, 0);
|
|
return temp;
|
|
}
|
|
|
|
void insert_min_heap(MinHeap* heap, Node* node)
|
|
{
|
|
heap->size++;
|
|
int i = heap->size - 1;
|
|
|
|
while (i && node->frequency < heap->nodes[(i - 1) / 2]->frequency)
|
|
{
|
|
heap->nodes[i] = heap->nodes[(i - 1) / 2];
|
|
i = (i - 1) / 2;
|
|
}
|
|
|
|
heap->nodes[i] = node;
|
|
}
|
|
|
|
MinHeap* create_min_heap()
|
|
{
|
|
MinHeap* heap = (MinHeap*)malloc(sizeof(MinHeap));
|
|
heap->size = 0;
|
|
return heap;
|
|
}
|
|
|
|
Node* create_node(unsigned char symbol, int frequency)
|
|
{
|
|
Node* node = (Node*)malloc(sizeof(Node));
|
|
node->symbol = symbol;
|
|
node->frequency = frequency;
|
|
node->left = node->right = NULL;
|
|
return node;
|
|
}
|
|
|
|
void build_huffman_tree(MinHeap* heap)
|
|
{
|
|
while (heap->size > 1)
|
|
{
|
|
Node* left = extract_min(heap);
|
|
Node* right = extract_min(heap);
|
|
|
|
Node* new_node = create_node(0, left->frequency + right->frequency);
|
|
new_node->left = left;
|
|
new_node->right = right;
|
|
|
|
insert_min_heap(heap, new_node);
|
|
}
|
|
}
|
|
|
|
void build_codes(Node* root, char* code, int top, char codes[MAX_TREE_NODES][MAX_TREE_NODES])
|
|
{
|
|
if (root->left)
|
|
{
|
|
code[top] = '0';
|
|
build_codes(root->left, code, top + 1, codes);
|
|
}
|
|
|
|
if (root->right)
|
|
{
|
|
code[top] = '1';
|
|
build_codes(root->right, code, top + 1, codes);
|
|
}
|
|
|
|
if (!root->left && !root->right)
|
|
{
|
|
code[top] = '\0';
|
|
strcpy(codes[root->symbol], code);
|
|
}
|
|
}
|
|
|
|
void free_tree(Node* root)
|
|
{
|
|
if (root)
|
|
{
|
|
free_tree(root->left);
|
|
free_tree(root->right);
|
|
free(root);
|
|
}
|
|
}
|
|
|
|
int compress_2(const char* input_file_name, const char* output_file_name)
|
|
{
|
|
FILE* input = fopen(input_file_name, "rb");
|
|
if (!input) return -1;
|
|
|
|
int freq[MAX_TREE_NODES] = {0};
|
|
unsigned char buffer;
|
|
|
|
while (fread(&buffer, 1, 1, input))
|
|
{
|
|
freq[buffer]++;
|
|
}
|
|
|
|
MinHeap* heap = create_min_heap();
|
|
for (int i = 0; i < MAX_TREE_NODES; i++)
|
|
{
|
|
if (freq[i] > 0)
|
|
{
|
|
insert_min_heap(heap, create_node((unsigned char)i, freq[i]));
|
|
}
|
|
}
|
|
|
|
build_huffman_tree(heap);
|
|
|
|
char codes[MAX_TREE_NODES][MAX_TREE_NODES] = {0};
|
|
char code[MAX_TREE_NODES];
|
|
build_codes(heap->nodes[0], code, 0, codes);
|
|
|
|
fseek(input, 0, SEEK_SET);
|
|
FILE* output = fopen(output_file_name, "wb");
|
|
if (!output)
|
|
{
|
|
fclose(input);
|
|
return -1;
|
|
}
|
|
|
|
fwrite(freq, sizeof(freq), 1, output);
|
|
|
|
unsigned char out_buffer = 0;
|
|
int bit_count = 0;
|
|
|
|
while (fread(&buffer, 1, 1, input))
|
|
{
|
|
char* symbol_code = codes[buffer];
|
|
for (int i = 0; symbol_code[i] != '\0'; i++)
|
|
{
|
|
out_buffer <<= 1;
|
|
if (symbol_code[i] == '1')
|
|
{
|
|
out_buffer |= 1;
|
|
}
|
|
bit_count++;
|
|
|
|
if (bit_count == 8)
|
|
{
|
|
fwrite(&out_buffer, 1, 1, output);
|
|
bit_count = 0;
|
|
out_buffer = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (bit_count > 0)
|
|
{
|
|
out_buffer <<= (8 - bit_count);
|
|
fwrite(&out_buffer, 1, 1, output);
|
|
}
|
|
|
|
fseek(output, 0, SEEK_END);
|
|
int sizeOUT = ftell(output);
|
|
fseek(output, 0, SEEK_SET);
|
|
|
|
fclose(input);
|
|
fclose(output);
|
|
free_tree(heap->nodes[0]);
|
|
free(heap);
|
|
|
|
return sizeOUT;
|
|
}
|
|
|
|
int decompress_2(const char* input_file_name, const char* output_file_name)
|
|
{
|
|
FILE* input = fopen(input_file_name, "rb");
|
|
if (!input) return -1;
|
|
|
|
int freq[MAX_TREE_NODES];
|
|
fread(freq, sizeof(freq), 1, input);
|
|
|
|
MinHeap* heap = create_min_heap();
|
|
for (int i = 0; i < MAX_TREE_NODES; i++)
|
|
{
|
|
if (freq[i] > 0)
|
|
{
|
|
insert_min_heap(heap, create_node((unsigned char)i, freq[i]));
|
|
}
|
|
}
|
|
|
|
build_huffman_tree(heap);
|
|
|
|
FILE* output = fopen(output_file_name, "wb");
|
|
if (!output)
|
|
{
|
|
fclose(input);
|
|
return -1;
|
|
}
|
|
|
|
Node* root = heap->nodes[0];
|
|
Node* current = root;
|
|
unsigned char buffer;
|
|
|
|
while (fread(&buffer, 1, 1, input))
|
|
{
|
|
for (int i = 7; i >= 0; i--)
|
|
{
|
|
if ((buffer >> i) & 1)
|
|
{
|
|
current = current->right;
|
|
}
|
|
else
|
|
{
|
|
current = current->left;
|
|
}
|
|
|
|
if (!current->left && !current->right)
|
|
{
|
|
fwrite(¤t->symbol, 1, 1, output);
|
|
current = root;
|
|
}
|
|
}
|
|
}
|
|
|
|
fseek(output, 0, SEEK_END);
|
|
int sizeOUT = ftell(output);
|
|
fseek(output, 0, SEEK_SET);
|
|
|
|
fclose(input);
|
|
fclose(output);
|
|
free_tree(root);
|
|
free(heap);
|
|
|
|
return sizeOUT;
|
|
}
|
|
|
|
// Функция для записи токена в файл в компактном формате
|
|
void write_token(FILE *file, LZ77Token token)
|
|
{
|
|
// Записываем offset и length как 2 байта (можно оптимизировать дальше)
|
|
unsigned short offset_length = (token.offset << 4) | (token.length & 0xF);
|
|
fwrite(&offset_length, sizeof(unsigned short), 1, file);
|
|
fwrite(&token.next_char, sizeof(char), 1, file);
|
|
}
|
|
|
|
// Функция для чтения токена из файла
|
|
LZ77Token read_token(FILE *file)
|
|
{
|
|
LZ77Token token;
|
|
unsigned short offset_length;
|
|
fread(&offset_length, sizeof(unsigned short), 1, file);
|
|
token.offset = offset_length >> 4;
|
|
token.length = offset_length & 0xF;
|
|
fread(&token.next_char, sizeof(char), 1, file);
|
|
return token;
|
|
}
|
|
|
|
int compress_1(const char* input_file_name, const char* output_file_name)
|
|
{
|
|
FILE *input_file = fopen(input_file_name, "rb");
|
|
FILE *output_file = fopen(output_file_name, "wb");
|
|
|
|
if (!input_file || !output_file)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
fseek(input_file, 0, SEEK_END);
|
|
long file_size = ftell(input_file);
|
|
fseek(input_file, 0, SEEK_SET);
|
|
|
|
if (file_size > 10 * 1024 * 1024)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
char *data = (char*)malloc(file_size);
|
|
fread(data, 1, file_size, input_file);
|
|
|
|
int pos = 0;
|
|
while (pos < file_size)
|
|
{
|
|
LZ77Token token = {0, 0, data[pos]};
|
|
|
|
int max_length = 0;
|
|
int max_offset = 0;
|
|
|
|
int start = (pos - WINDOW_SIZE) > 0 ? (pos - WINDOW_SIZE) : 0;
|
|
for (int i = start; i < pos; i++)
|
|
{
|
|
int length = 0;
|
|
while (length < LOOKAHEAD_BUFFER_SIZE && pos + length < file_size && data[i + length] == data[pos + length])
|
|
{
|
|
length++;
|
|
}
|
|
if (length > max_length)
|
|
{
|
|
max_length = length;
|
|
max_offset = pos - i;
|
|
}
|
|
}
|
|
|
|
if (max_length > 1)
|
|
{
|
|
token.offset = max_offset;
|
|
token.length = max_length;
|
|
token.next_char = data[pos + max_length];
|
|
pos += max_length + 1;
|
|
}
|
|
else
|
|
{
|
|
pos++;
|
|
}
|
|
|
|
write_token(output_file, token);
|
|
}
|
|
|
|
fseek(output_file, 0, SEEK_END);
|
|
int sizeOUT = ftell(output_file);
|
|
fseek(output_file, 0, SEEK_SET);
|
|
|
|
free(data);
|
|
fclose(input_file);
|
|
fclose(output_file);
|
|
|
|
return sizeOUT;
|
|
}
|
|
|
|
int decompress_1(const char* input_file_name, const char* output_file_name)
|
|
{
|
|
FILE *input_file = fopen(input_file_name, "rb");
|
|
FILE *output_file = fopen(output_file_name, "wb");
|
|
|
|
if (!input_file || !output_file)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
char *window = (char*)malloc(WINDOW_SIZE);
|
|
int window_pos = 0;
|
|
|
|
while (!feof(input_file))
|
|
{
|
|
LZ77Token token = read_token(input_file);
|
|
|
|
if (token.length > 0)
|
|
{
|
|
int start = window_pos - token.offset;
|
|
for (int i = 0; i < token.length; i++)
|
|
{
|
|
char c = window[(start + i) % WINDOW_SIZE];
|
|
fputc(c, output_file);
|
|
window[window_pos % WINDOW_SIZE] = c;
|
|
window_pos++;
|
|
}
|
|
}
|
|
fputc(token.next_char, output_file);
|
|
window[window_pos % WINDOW_SIZE] = token.next_char;
|
|
window_pos++;
|
|
}
|
|
|
|
fseek(output_file, 0, SEEK_END);
|
|
int sizeOUT = ftell(output_file);
|
|
fseek(output_file, 0, SEEK_SET);
|
|
|
|
free(window);
|
|
fclose(input_file);
|
|
fclose(output_file);
|
|
|
|
return sizeOUT;
|
|
} |