usaa24/sk1/compressor.c

357 lines
10 KiB
C
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "compressor.h"
#define BUFFER_SIZE 4096
#define MAX_SYMBOLS 257
#define WINDOW_CAPACITY 4096
#define MAX_MATCH_LENGTH 15
#define BUFFER_CAPACITY 4096
// Макрос для обмена двух узлов
#define SWAP_NODES(a, b) { Node* temp = a; a = b; b = temp; }
// Определение структуры узла дерева
typedef struct Node {
int symbol;
unsigned int frequency;
struct Node *left, *right;
} Node;
typedef struct LZ77{
int posun;
int dlzka;
char dalsi_znak;
} LZ77;
Node* create_node(int symbol, unsigned int frequency) {
Node* node = (Node*)malloc(sizeof(Node));
node->symbol = symbol;
node->frequency = frequency;
node->left = node->right = NULL;
return node;
}
Node* build_huffman_tree(const unsigned int* frequencies) {
Node* nodes[MAX_SYMBOLS];
int node_count = 0;
for (int i = 0; i < MAX_SYMBOLS; i++) {
if (frequencies[i] > 0) {
nodes[node_count++] = create_node(i, frequencies[i]);
}
}
while (node_count > 1) {
// Сортируем узлы по частоте
for (int i = 0; i < node_count - 1; i++) {
for (int j = i + 1; j < node_count; j++) {
if (nodes[i]->frequency > nodes[j]->frequency) {
SWAP_NODES(nodes[i], nodes[j]);
}
}
}
Node* left = nodes[0];
Node* right = nodes[1];
Node* parent = create_node(-1, left->frequency + right->frequency);
parent->left = left;
parent->right = right;
// Заменяем объединенные узлы новым родительским узлом
nodes[0] = parent;
nodes[1] = nodes[--node_count];
}
return nodes[0];
}
void generate_huffman_codes(Node* root, char* code, int depth, char codes[MAX_SYMBOLS][MAX_SYMBOLS]) {
if (!root->left && !root->right) {
code[depth] = '\0';
strcpy(codes[root->symbol], code);
return;
}
if (root->left) {
code[depth] = '0';
generate_huffman_codes(root->left, code, depth + 1, codes);
}
if (root->right) {
code[depth] = '1';
generate_huffman_codes(root->right, code, depth + 1, codes);
}
}
void free_huffman_tree(Node* root) {
if (!root) return;
free_huffman_tree(root->left);
free_huffman_tree(root->right);
free(root);
}
int compress_1(const char* input_file, const char* output_file) {
FILE* input = fopen(input_file, "rb");
FILE* output = fopen(output_file, "wb");
if (!input || !output) return -1;
unsigned int frequencies[MAX_SYMBOLS] = {0};
unsigned char buffer[BUFFER_SIZE];
size_t bytes_read;
// Подсчет частот символов
while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, input)) > 0) {
for (size_t i = 0; i < bytes_read; i++) {
frequencies[buffer[i]]++;
}
}
frequencies[256] = 1; // Добавляем маркер EOF
Node* root = build_huffman_tree(frequencies);
if (!root) return -1;
// Генерация кодов Хаффмана
char codes[MAX_SYMBOLS][MAX_SYMBOLS] = {{0}};
char code[MAX_SYMBOLS] = {0};
generate_huffman_codes(root, code, 0, codes);
fwrite(frequencies, sizeof(frequencies[0]), MAX_SYMBOLS, output);
rewind(input);
unsigned char current_byte = 0;
int bit_count = 0;
while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, input)) > 0) {
for (size_t i = 0; i < bytes_read; i++) {
char* symbol_code = codes[buffer[i]];
for (size_t j = 0; symbol_code[j] != '\0'; j++) {
current_byte = (current_byte << 1) | (symbol_code[j] - '0');
bit_count++;
if (bit_count == 8) {
fwrite(&current_byte, 1, 1, output);
current_byte = 0;
bit_count = 0;
}
}
}
}
char* eof_code = codes[256];
for (size_t j = 0; eof_code[j] != '\0'; j++) {
current_byte = (current_byte << 1) | (eof_code[j] - '0');
bit_count++;
if (bit_count == 8) {
fwrite(&current_byte, 1, 1, output);
current_byte = 0;
bit_count = 0;
}
}
if (bit_count > 0) {
current_byte <<= (8 - bit_count);
fwrite(&current_byte, 1, 1, output);
}
fclose(input);
fclose(output);
free_huffman_tree(root);
return 0;
}
int decompress_1(const char* input_file, const char* output_file) {
FILE* input = fopen(input_file, "rb");
FILE* output = fopen(output_file, "wb");
if (!input || !output) return -1;
unsigned int frequencies[MAX_SYMBOLS] = {0};
fread(frequencies, sizeof(frequencies[0]), MAX_SYMBOLS, input);
Node* root = build_huffman_tree(frequencies);
if (!root) return -1;
Node* current = root;
unsigned char byte;
int bit;
while (fread(&byte, 1, 1, input) == 1) {
for (bit = 7; bit >= 0; bit--) {
current = (byte & (1 << bit)) ? current->right : current->left;
if (!current->left && !current->right) {
if (current->symbol == 256) { // Маркер EOF
fclose(input);
fclose(output);
free_huffman_tree(root);
return 0;
}
fwrite(&current->symbol, 1, 1, output);
current = root;
}
}
}
fclose(input);
fclose(output);
free_huffman_tree(root);
return 0;
}
int compress_2(const char *input_file, const char *output_file) {
FILE *source = fopen(input_file, "rb");
if (source == NULL) {
return -1;
}
FILE *destination = fopen(output_file, "wb");
if (destination == NULL) {
fclose(source);
return -1;
}
fseek(source, 0, SEEK_END);
long file_size = ftell(source);
if (file_size > 10 * 1024 * 1024) {
fclose(source);
fclose(destination);
return -1;
}
fseek(source, 0, SEEK_SET); // Возврат в начало файла для чтения
char *input_data = malloc(file_size);
if (input_data == NULL) {
fclose(source);
fclose(destination);
return -1;
}
fread(input_data, 1, file_size, source);
int index = 0;
while (index < file_size) {
LZ77 element = {0, 0, input_data[index]}; // Инициализация элемента с текущим символом
int best_length = 0, best_offset = 0;
int start_idx;
if ((index - WINDOW_CAPACITY) > 0) {
start_idx = index - WINDOW_CAPACITY;
} else {
start_idx = 0;
} // Поиск начала окна
// Поиск наилучшего совпадения в окне
for (int i = start_idx; i < index; i++) {
int match_length = 0;
while (match_length < MAX_MATCH_LENGTH && index + match_length < file_size &&
input_data[i + match_length] == input_data[index + match_length]) {
match_length++;
}
if (match_length > best_length) {
best_length = match_length;
best_offset = index - i;
}
if (match_length == MAX_MATCH_LENGTH) {
break;
}
}
// Если найдено совпадение записываем его в элемент иначе сохраняем символ
if (best_length > 1) {
element.posun = best_offset;
element.dlzka = best_length;
element.dalsi_znak = input_data[index + best_length];
index += best_length + 1;
} else {
element.posun = 0;
element.dlzka = 0;
element.dalsi_znak = input_data[index];
index++;
}
unsigned short combined = (element.posun << 4) | (element.dlzka & 0xF); // Объединение posun и dlzka в одно значение
fwrite(&combined, sizeof(unsigned short), 1, destination);
fwrite(&element.dalsi_znak, sizeof(char), 1, destination);
}
fwrite(&file_size, sizeof(file_size), 1, destination);
free(input_data);
fclose(source);
fclose(destination);
return ftell(destination);
}
int decompress_2(const char *input_file, const char *output_file) {
FILE *source = fopen(input_file, "rb");
if (source == NULL) {
return -1;
}
FILE *destination = fopen(output_file, "wb");
if (destination == NULL) {
fclose(source);
return -1;
}
char *buffer = malloc(WINDOW_CAPACITY);
if (buffer == NULL) {
fclose(source);
fclose(destination);
return -1;
}
memset(buffer, 0, WINDOW_CAPACITY);
// Переход к концу файла для чтения размера исходного файла
fseek(source, -sizeof(long), SEEK_END);
long original_size;
if (fread(&original_size, sizeof(long), 1, source) != 1) {
free(buffer);
fclose(source);
fclose(destination);
return -1;
}
// Возврат в начало файла для распаковки
fseek(source, 0, SEEK_SET);
int written_bytes = 0, buffer_position = 0;
while (written_bytes < original_size) {
unsigned short combined;
char dalsi_znak;
if (fread(&combined, sizeof(unsigned short), 1, source) != 1) {
break;
}
if (fread(&dalsi_znak, sizeof(char), 1, source) != 1) {
break;
}
int posun = combined >> 4;
int dlzka = combined & 0xF;
// Восстановление данных из совпадений в буфере
if (dlzka > 0) {
int start_idx = buffer_position - posun;
for (int i = 0; i < dlzka; i++) {
char current = buffer[(start_idx + i) % WINDOW_CAPACITY];
fputc(current, destination);
buffer[buffer_position % WINDOW_CAPACITY] = current;
buffer_position++;
written_bytes++;
}
}
// Запись следующего символа в файл
if (written_bytes < original_size) {
fputc(dalsi_znak, destination);
buffer[buffer_position % WINDOW_CAPACITY] = dalsi_znak;
buffer_position++;
written_bytes++;
}
}
free(buffer);
fclose(source);
fclose(destination);
return ftell(destination);
}