2025-01-19 14:10:47 +00:00
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
2025-01-27 15:08:53 +00:00
|
|
|
|
#include "compressor.h"
|
2025-01-19 14:10:47 +00:00
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
#define BUFFER_SIZE 4096
|
|
|
|
|
#define MAX_SYMBOLS 257
|
2025-01-28 19:05:01 +00:00
|
|
|
|
#define WINDOW_CAPACITY 4096
|
|
|
|
|
#define MAX_MATCH_LENGTH 15
|
|
|
|
|
#define BUFFER_CAPACITY 4096
|
2025-01-27 15:08:53 +00:00
|
|
|
|
// Макрос для обмена двух узлов
|
|
|
|
|
#define SWAP_NODES(a, b) { Node* temp = a; a = b; b = temp; }
|
2025-01-19 14:10:47 +00:00
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
// Определение структуры узла дерева
|
|
|
|
|
typedef struct Node {
|
|
|
|
|
int symbol;
|
|
|
|
|
unsigned int frequency;
|
|
|
|
|
struct Node *left, *right;
|
|
|
|
|
} Node;
|
2025-01-19 14:10:47 +00:00
|
|
|
|
|
2025-01-28 19:03:42 +00:00
|
|
|
|
typedef struct LZ77{
|
|
|
|
|
int posun;
|
|
|
|
|
int dlzka;
|
|
|
|
|
char dalsi_znak;
|
|
|
|
|
} LZ77;
|
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
Node* create_node(int symbol, unsigned int frequency) {
|
|
|
|
|
Node* node = (Node*)malloc(sizeof(Node));
|
|
|
|
|
node->symbol = symbol;
|
2025-01-26 19:22:02 +00:00
|
|
|
|
node->frequency = frequency;
|
|
|
|
|
node->left = node->right = NULL;
|
|
|
|
|
return node;
|
2025-01-19 14:10:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
Node* build_huffman_tree(const unsigned int* frequencies) {
|
|
|
|
|
Node* nodes[MAX_SYMBOLS];
|
|
|
|
|
int node_count = 0;
|
2025-01-19 14:10:47 +00:00
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
for (int i = 0; i < MAX_SYMBOLS; i++) {
|
|
|
|
|
if (frequencies[i] > 0) {
|
|
|
|
|
nodes[node_count++] = create_node(i, frequencies[i]);
|
|
|
|
|
}
|
2025-01-19 14:10:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
while (node_count > 1) {
|
|
|
|
|
// Сортируем узлы по частоте
|
|
|
|
|
for (int i = 0; i < node_count - 1; i++) {
|
|
|
|
|
for (int j = i + 1; j < node_count; j++) {
|
|
|
|
|
if (nodes[i]->frequency > nodes[j]->frequency) {
|
|
|
|
|
SWAP_NODES(nodes[i], nodes[j]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-01-26 19:22:02 +00:00
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
Node* left = nodes[0];
|
|
|
|
|
Node* right = nodes[1];
|
|
|
|
|
Node* parent = create_node(-1, left->frequency + right->frequency);
|
|
|
|
|
parent->left = left;
|
|
|
|
|
parent->right = right;
|
2025-01-19 14:10:47 +00:00
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
// Заменяем объединенные узлы новым родительским узлом
|
|
|
|
|
nodes[0] = parent;
|
|
|
|
|
nodes[1] = nodes[--node_count];
|
2025-01-19 14:10:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
return nodes[0];
|
2025-01-19 17:39:39 +00:00
|
|
|
|
}
|
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
void generate_huffman_codes(Node* root, char* code, int depth, char codes[MAX_SYMBOLS][MAX_SYMBOLS]) {
|
|
|
|
|
if (!root->left && !root->right) {
|
2025-01-28 19:08:59 +00:00
|
|
|
|
code[depth] = '\0';
|
2025-01-27 15:08:53 +00:00
|
|
|
|
strcpy(codes[root->symbol], code);
|
|
|
|
|
return;
|
2025-01-19 14:10:47 +00:00
|
|
|
|
}
|
2025-01-19 17:53:01 +00:00
|
|
|
|
if (root->left) {
|
2025-01-28 19:08:59 +00:00
|
|
|
|
code[depth] = '0';
|
2025-01-27 15:08:53 +00:00
|
|
|
|
generate_huffman_codes(root->left, code, depth + 1, codes);
|
2025-01-19 17:53:01 +00:00
|
|
|
|
}
|
|
|
|
|
if (root->right) {
|
2025-01-28 19:08:59 +00:00
|
|
|
|
code[depth] = '1';
|
2025-01-27 15:08:53 +00:00
|
|
|
|
generate_huffman_codes(root->right, code, depth + 1, codes);
|
2025-01-19 17:53:01 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2025-01-27 15:08:53 +00:00
|
|
|
|
|
|
|
|
|
void free_huffman_tree(Node* root) {
|
2025-01-26 20:02:31 +00:00
|
|
|
|
if (!root) return;
|
|
|
|
|
free_huffman_tree(root->left);
|
|
|
|
|
free_huffman_tree(root->right);
|
|
|
|
|
free(root);
|
|
|
|
|
}
|
2025-01-26 20:09:11 +00:00
|
|
|
|
|
2025-01-28 19:06:35 +00:00
|
|
|
|
int compress_1(const char* input_file, const char* output_file) {
|
2025-01-26 19:22:02 +00:00
|
|
|
|
FILE* input = fopen(input_file, "rb");
|
|
|
|
|
FILE* output = fopen(output_file, "wb");
|
2025-01-27 15:08:53 +00:00
|
|
|
|
if (!input || !output) return -1;
|
2025-01-19 14:10:47 +00:00
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
unsigned int frequencies[MAX_SYMBOLS] = {0};
|
|
|
|
|
unsigned char buffer[BUFFER_SIZE];
|
|
|
|
|
size_t bytes_read;
|
2025-01-19 14:10:47 +00:00
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
// Подсчет частот символов
|
|
|
|
|
while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, input)) > 0) {
|
|
|
|
|
for (size_t i = 0; i < bytes_read; i++) {
|
|
|
|
|
frequencies[buffer[i]]++;
|
2025-01-19 14:10:47 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2025-01-27 15:08:53 +00:00
|
|
|
|
frequencies[256] = 1; // Добавляем маркер EOF
|
2025-01-19 14:10:47 +00:00
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
Node* root = build_huffman_tree(frequencies);
|
|
|
|
|
if (!root) return -1;
|
2025-01-19 14:10:47 +00:00
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
// Генерация кодов Хаффмана
|
|
|
|
|
char codes[MAX_SYMBOLS][MAX_SYMBOLS] = {{0}};
|
|
|
|
|
char code[MAX_SYMBOLS] = {0};
|
|
|
|
|
generate_huffman_codes(root, code, 0, codes);
|
2025-01-26 19:22:02 +00:00
|
|
|
|
|
2025-01-28 19:08:59 +00:00
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
fwrite(frequencies, sizeof(frequencies[0]), MAX_SYMBOLS, output);
|
2025-01-19 14:10:47 +00:00
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
rewind(input);
|
|
|
|
|
unsigned char current_byte = 0;
|
2025-01-26 19:22:02 +00:00
|
|
|
|
int bit_count = 0;
|
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, input)) > 0) {
|
|
|
|
|
for (size_t i = 0; i < bytes_read; i++) {
|
|
|
|
|
char* symbol_code = codes[buffer[i]];
|
|
|
|
|
for (size_t j = 0; symbol_code[j] != '\0'; j++) {
|
|
|
|
|
current_byte = (current_byte << 1) | (symbol_code[j] - '0');
|
|
|
|
|
bit_count++;
|
|
|
|
|
if (bit_count == 8) {
|
|
|
|
|
fwrite(¤t_byte, 1, 1, output);
|
|
|
|
|
current_byte = 0;
|
|
|
|
|
bit_count = 0;
|
|
|
|
|
}
|
2025-01-19 14:10:47 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-01-26 19:22:02 +00:00
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
char* eof_code = codes[256];
|
|
|
|
|
for (size_t j = 0; eof_code[j] != '\0'; j++) {
|
|
|
|
|
current_byte = (current_byte << 1) | (eof_code[j] - '0');
|
|
|
|
|
bit_count++;
|
|
|
|
|
if (bit_count == 8) {
|
|
|
|
|
fwrite(¤t_byte, 1, 1, output);
|
|
|
|
|
current_byte = 0;
|
|
|
|
|
bit_count = 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-01-26 19:22:02 +00:00
|
|
|
|
if (bit_count > 0) {
|
2025-01-27 15:08:53 +00:00
|
|
|
|
current_byte <<= (8 - bit_count);
|
|
|
|
|
fwrite(¤t_byte, 1, 1, output);
|
2025-01-19 14:10:47 +00:00
|
|
|
|
}
|
2025-01-19 17:39:39 +00:00
|
|
|
|
|
2025-01-26 19:22:02 +00:00
|
|
|
|
fclose(input);
|
|
|
|
|
fclose(output);
|
2025-01-26 20:02:31 +00:00
|
|
|
|
free_huffman_tree(root);
|
2025-01-19 14:10:47 +00:00
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-28 19:06:35 +00:00
|
|
|
|
int decompress_1(const char* input_file, const char* output_file) {
|
2025-01-27 15:08:53 +00:00
|
|
|
|
FILE* input = fopen(input_file, "rb");
|
|
|
|
|
FILE* output = fopen(output_file, "wb");
|
|
|
|
|
if (!input || !output) return -1;
|
2025-01-19 14:10:47 +00:00
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
unsigned int frequencies[MAX_SYMBOLS] = {0};
|
|
|
|
|
fread(frequencies, sizeof(frequencies[0]), MAX_SYMBOLS, input);
|
|
|
|
|
Node* root = build_huffman_tree(frequencies);
|
|
|
|
|
if (!root) return -1;
|
2025-01-26 19:22:02 +00:00
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
Node* current = root;
|
2025-01-26 19:22:02 +00:00
|
|
|
|
unsigned char byte;
|
2025-01-27 15:08:53 +00:00
|
|
|
|
int bit;
|
2025-01-26 19:22:02 +00:00
|
|
|
|
|
2025-01-28 19:08:59 +00:00
|
|
|
|
|
2025-01-27 15:08:53 +00:00
|
|
|
|
while (fread(&byte, 1, 1, input) == 1) {
|
|
|
|
|
for (bit = 7; bit >= 0; bit--) {
|
|
|
|
|
current = (byte & (1 << bit)) ? current->right : current->left;
|
2025-01-19 14:10:47 +00:00
|
|
|
|
|
2025-01-26 19:22:02 +00:00
|
|
|
|
if (!current->left && !current->right) {
|
2025-01-27 15:08:53 +00:00
|
|
|
|
if (current->symbol == 256) { // Маркер EOF
|
|
|
|
|
fclose(input);
|
|
|
|
|
fclose(output);
|
|
|
|
|
free_huffman_tree(root);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
fwrite(¤t->symbol, 1, 1, output);
|
2025-01-19 14:10:47 +00:00
|
|
|
|
current = root;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-01-19 17:39:39 +00:00
|
|
|
|
|
2025-01-26 19:22:02 +00:00
|
|
|
|
fclose(input);
|
|
|
|
|
fclose(output);
|
2025-01-26 20:02:31 +00:00
|
|
|
|
free_huffman_tree(root);
|
2025-01-26 19:22:02 +00:00
|
|
|
|
return 0;
|
2025-01-27 15:08:53 +00:00
|
|
|
|
}
|
|
|
|
|
|
2025-01-28 19:03:42 +00:00
|
|
|
|
|
2025-01-28 19:06:35 +00:00
|
|
|
|
int compress_2(const char *input_file, const char *output_file) {
|
2025-01-28 19:03:42 +00:00
|
|
|
|
FILE *source = fopen(input_file, "rb");
|
|
|
|
|
if (source == NULL) {
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FILE *destination = fopen(output_file, "wb");
|
|
|
|
|
if (destination == NULL) {
|
|
|
|
|
fclose(source);
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fseek(source, 0, SEEK_END);
|
|
|
|
|
long file_size = ftell(source);
|
|
|
|
|
if (file_size > 10 * 1024 * 1024) {
|
|
|
|
|
fclose(source);
|
|
|
|
|
fclose(destination);
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
fseek(source, 0, SEEK_SET); // Возврат в начало файла для чтения
|
|
|
|
|
|
|
|
|
|
char *input_data = malloc(file_size);
|
|
|
|
|
if (input_data == NULL) {
|
|
|
|
|
fclose(source);
|
|
|
|
|
fclose(destination);
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
fread(input_data, 1, file_size, source);
|
|
|
|
|
|
|
|
|
|
int index = 0;
|
|
|
|
|
while (index < file_size) {
|
|
|
|
|
LZ77 element = {0, 0, input_data[index]}; // Инициализация элемента с текущим символом
|
|
|
|
|
int best_length = 0, best_offset = 0;
|
|
|
|
|
int start_idx;
|
|
|
|
|
|
|
|
|
|
if ((index - WINDOW_CAPACITY) > 0) {
|
|
|
|
|
start_idx = index - WINDOW_CAPACITY;
|
|
|
|
|
} else {
|
|
|
|
|
start_idx = 0;
|
|
|
|
|
} // Поиск начала окна
|
|
|
|
|
|
|
|
|
|
// Поиск наилучшего совпадения в окне
|
|
|
|
|
for (int i = start_idx; i < index; i++) {
|
|
|
|
|
int match_length = 0;
|
|
|
|
|
while (match_length < MAX_MATCH_LENGTH && index + match_length < file_size &&
|
|
|
|
|
input_data[i + match_length] == input_data[index + match_length]) {
|
|
|
|
|
match_length++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (match_length > best_length) {
|
|
|
|
|
best_length = match_length;
|
|
|
|
|
best_offset = index - i;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (match_length == MAX_MATCH_LENGTH) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Если найдено совпадение записываем его в элемент иначе сохраняем символ
|
|
|
|
|
if (best_length > 1) {
|
|
|
|
|
element.posun = best_offset;
|
|
|
|
|
element.dlzka = best_length;
|
|
|
|
|
element.dalsi_znak = input_data[index + best_length];
|
|
|
|
|
index += best_length + 1;
|
|
|
|
|
} else {
|
|
|
|
|
element.posun = 0;
|
|
|
|
|
element.dlzka = 0;
|
|
|
|
|
element.dalsi_znak = input_data[index];
|
|
|
|
|
index++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
unsigned short combined = (element.posun << 4) | (element.dlzka & 0xF); // Объединение posun и dlzka в одно значение
|
|
|
|
|
fwrite(&combined, sizeof(unsigned short), 1, destination);
|
|
|
|
|
fwrite(&element.dalsi_znak, sizeof(char), 1, destination);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fwrite(&file_size, sizeof(file_size), 1, destination);
|
|
|
|
|
free(input_data);
|
|
|
|
|
fclose(source);
|
|
|
|
|
fclose(destination);
|
|
|
|
|
return ftell(destination);
|
2025-01-27 15:08:53 +00:00
|
|
|
|
}
|
|
|
|
|
|
2025-01-28 19:03:42 +00:00
|
|
|
|
|
2025-01-28 19:06:35 +00:00
|
|
|
|
int decompress_2(const char *input_file, const char *output_file) {
|
2025-01-28 19:03:42 +00:00
|
|
|
|
FILE *source = fopen(input_file, "rb");
|
|
|
|
|
if (source == NULL) {
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FILE *destination = fopen(output_file, "wb");
|
|
|
|
|
if (destination == NULL) {
|
|
|
|
|
fclose(source);
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char *buffer = malloc(WINDOW_CAPACITY);
|
|
|
|
|
if (buffer == NULL) {
|
|
|
|
|
fclose(source);
|
|
|
|
|
fclose(destination);
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memset(buffer, 0, WINDOW_CAPACITY);
|
|
|
|
|
// Переход к концу файла для чтения размера исходного файла
|
|
|
|
|
fseek(source, -sizeof(long), SEEK_END);
|
|
|
|
|
long original_size;
|
|
|
|
|
if (fread(&original_size, sizeof(long), 1, source) != 1) {
|
|
|
|
|
free(buffer);
|
|
|
|
|
fclose(source);
|
|
|
|
|
fclose(destination);
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
// Возврат в начало файла для распаковки
|
|
|
|
|
fseek(source, 0, SEEK_SET);
|
|
|
|
|
|
|
|
|
|
int written_bytes = 0, buffer_position = 0;
|
|
|
|
|
while (written_bytes < original_size) {
|
|
|
|
|
unsigned short combined;
|
|
|
|
|
char dalsi_znak;
|
|
|
|
|
if (fread(&combined, sizeof(unsigned short), 1, source) != 1) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (fread(&dalsi_znak, sizeof(char), 1, source) != 1) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int posun = combined >> 4;
|
|
|
|
|
int dlzka = combined & 0xF;
|
|
|
|
|
|
|
|
|
|
// Восстановление данных из совпадений в буфере
|
|
|
|
|
if (dlzka > 0) {
|
|
|
|
|
int start_idx = buffer_position - posun;
|
|
|
|
|
for (int i = 0; i < dlzka; i++) {
|
|
|
|
|
char current = buffer[(start_idx + i) % WINDOW_CAPACITY];
|
|
|
|
|
fputc(current, destination);
|
|
|
|
|
buffer[buffer_position % WINDOW_CAPACITY] = current;
|
|
|
|
|
buffer_position++;
|
|
|
|
|
written_bytes++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Запись следующего символа в файл
|
|
|
|
|
if (written_bytes < original_size) {
|
|
|
|
|
fputc(dalsi_znak, destination);
|
|
|
|
|
buffer[buffer_position % WINDOW_CAPACITY] = dalsi_znak;
|
|
|
|
|
buffer_position++;
|
|
|
|
|
written_bytes++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
free(buffer);
|
|
|
|
|
fclose(source);
|
|
|
|
|
fclose(destination);
|
|
|
|
|
return ftell(destination);
|
2025-01-27 15:08:53 +00:00
|
|
|
|
}
|