Обновить sk1/compressor.c
This commit is contained in:
parent
2aa747983d
commit
183e166d8b
369
sk1/compressor.c
369
sk1/compressor.c
@ -1,224 +1,157 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "compressor.h"
|
||||
|
||||
#define MAX_TREE_NODES 256
|
||||
#define BUFFER_SIZE 4096
|
||||
#define MAX_SYMBOLS 257
|
||||
|
||||
// Structure to represent a tree node
|
||||
typedef struct HuffmanNode {
|
||||
unsigned char data;
|
||||
unsigned frequency;
|
||||
struct HuffmanNode* left;
|
||||
struct HuffmanNode* right;
|
||||
} HuffmanNode;
|
||||
// Макрос для обмена двух узлов
|
||||
#define SWAP_NODES(a, b) { Node* temp = a; a = b; b = temp; }
|
||||
|
||||
// A structure to represent the Min Heap (Priority Queue)
|
||||
typedef struct MinHeap {
|
||||
unsigned size;
|
||||
unsigned capacity;
|
||||
HuffmanNode** array;
|
||||
} MinHeap;
|
||||
// Определение структуры узла дерева
|
||||
typedef struct Node {
|
||||
int symbol;
|
||||
unsigned int frequency;
|
||||
struct Node *left, *right;
|
||||
} Node;
|
||||
|
||||
// Function to create a new node
|
||||
HuffmanNode* newNode(unsigned char data, unsigned frequency) {
|
||||
HuffmanNode* node = (HuffmanNode*)malloc(sizeof(HuffmanNode));
|
||||
if (!node) {
|
||||
perror("Failed to allocate memory for new node");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
node->data = data;
|
||||
// Функция для создания нового узла
|
||||
Node* create_node(int symbol, unsigned int frequency) {
|
||||
Node* node = (Node*)malloc(sizeof(Node));
|
||||
node->symbol = symbol;
|
||||
node->frequency = frequency;
|
||||
node->left = node->right = NULL;
|
||||
return node;
|
||||
}
|
||||
|
||||
// Function to create a MinHeap
|
||||
MinHeap* createMinHeap(unsigned capacity) {
|
||||
MinHeap* minHeap = (MinHeap*)malloc(sizeof(MinHeap));
|
||||
if (!minHeap) {
|
||||
perror("Failed to allocate memory for MinHeap");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
minHeap->size = 0;
|
||||
minHeap->capacity = capacity;
|
||||
minHeap->array = (HuffmanNode**)malloc(capacity * sizeof(HuffmanNode*));
|
||||
if (!minHeap->array) {
|
||||
perror("Failed to allocate memory for MinHeap array");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
return minHeap;
|
||||
}
|
||||
// Функция для построения дерева Хаффмана
|
||||
Node* build_huffman_tree(const unsigned int* frequencies) {
|
||||
Node* nodes[MAX_SYMBOLS];
|
||||
int node_count = 0;
|
||||
|
||||
// Function to swap two min heap nodes
|
||||
void swapMinHeapNode(HuffmanNode** a, HuffmanNode** b) {
|
||||
HuffmanNode* temp = *a;
|
||||
*a = *b;
|
||||
*b = temp;
|
||||
}
|
||||
|
||||
// Function to min heapify
|
||||
void minHeapify(MinHeap* minHeap, int idx) {
|
||||
int smallest = idx;
|
||||
int left = 2 * idx + 1;
|
||||
int right = 2 * idx + 2;
|
||||
|
||||
if (left < (int)minHeap->size && minHeap->array[left]->frequency < minHeap->array[smallest]->frequency)
|
||||
smallest = left;
|
||||
|
||||
if (right < (int)minHeap->size && minHeap->array[right]->frequency < minHeap->array[smallest]->frequency)
|
||||
smallest = right;
|
||||
|
||||
if (smallest != idx) {
|
||||
swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]);
|
||||
minHeapify(minHeap, smallest);
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the size of heap is one
|
||||
int isSizeOne(MinHeap* minHeap) {
|
||||
return (minHeap->size == 1);
|
||||
}
|
||||
|
||||
// Extract the minimum node from heap
|
||||
HuffmanNode* extractMin(MinHeap* minHeap) {
|
||||
HuffmanNode* temp = minHeap->array[0];
|
||||
minHeap->array[0] = minHeap->array[minHeap->size - 1];
|
||||
--minHeap->size;
|
||||
minHeapify(minHeap, 0);
|
||||
return temp;
|
||||
}
|
||||
|
||||
// Insert a new node to MinHeap
|
||||
void insertMinHeap(MinHeap* minHeap, HuffmanNode* node) {
|
||||
++minHeap->size;
|
||||
int i = minHeap->size - 1;
|
||||
while (i && node->frequency < minHeap->array[(i - 1) / 2]->frequency) {
|
||||
minHeap->array[i] = minHeap->array[(i - 1) / 2];
|
||||
i = (i - 1) / 2;
|
||||
}
|
||||
minHeap->array[i] = node;
|
||||
}
|
||||
|
||||
// Build a min heap of given capacity
|
||||
void buildMinHeap(MinHeap* minHeap) {
|
||||
int n = minHeap->size - 1;
|
||||
for (int i = (n - 1) / 2; i >= 0; --i)
|
||||
minHeapify(minHeap, i);
|
||||
}
|
||||
|
||||
// Function to build the Huffman tree
|
||||
HuffmanNode* buildHuffmanTree(unsigned char* data, unsigned* freq, int size) {
|
||||
HuffmanNode *left, *right, *top;
|
||||
|
||||
MinHeap* minHeap = createMinHeap(size);
|
||||
|
||||
for (int i = 0; i < size; ++i)
|
||||
insertMinHeap(minHeap, newNode(data[i], freq[data[i]]));
|
||||
|
||||
buildMinHeap(minHeap);
|
||||
|
||||
while (!isSizeOne(minHeap)) {
|
||||
left = extractMin(minHeap);
|
||||
right = extractMin(minHeap);
|
||||
|
||||
top = newNode('$', left->frequency + right->frequency);
|
||||
top->left = left;
|
||||
top->right = right;
|
||||
|
||||
insertMinHeap(minHeap, top);
|
||||
// Создаем узлы для всех символов с ненулевой частотой
|
||||
for (int i = 0; i < MAX_SYMBOLS; i++) {
|
||||
if (frequencies[i] > 0) {
|
||||
nodes[node_count++] = create_node(i, frequencies[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return extractMin(minHeap);
|
||||
// Объединяем узлы в дерево
|
||||
while (node_count > 1) {
|
||||
// Сортируем узлы по частоте
|
||||
for (int i = 0; i < node_count - 1; i++) {
|
||||
for (int j = i + 1; j < node_count; j++) {
|
||||
if (nodes[i]->frequency > nodes[j]->frequency) {
|
||||
SWAP_NODES(nodes[i], nodes[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Объединяем два узла с наименьшей частотой
|
||||
Node* left = nodes[0];
|
||||
Node* right = nodes[1];
|
||||
Node* parent = create_node(-1, left->frequency + right->frequency);
|
||||
parent->left = left;
|
||||
parent->right = right;
|
||||
|
||||
// Заменяем объединенные узлы новым родительским узлом
|
||||
nodes[0] = parent;
|
||||
nodes[1] = nodes[--node_count];
|
||||
}
|
||||
|
||||
return nodes[0];
|
||||
}
|
||||
|
||||
// Function to generate the Huffman codes for each character
|
||||
void generateCodes(HuffmanNode* root, char* arr, int top, char** codes) {
|
||||
if (root->left) {
|
||||
arr[top] = '0';
|
||||
generateCodes(root->left, arr, top + 1, codes);
|
||||
}
|
||||
|
||||
if (root->right) {
|
||||
arr[top] = '1';
|
||||
generateCodes(root->right, arr, top + 1, codes);
|
||||
}
|
||||
|
||||
// Рекурсивная функция для генерации кодов Хаффмана
|
||||
void generate_huffman_codes(Node* root, char* code, int depth, char codes[MAX_SYMBOLS][MAX_SYMBOLS]) {
|
||||
if (!root->left && !root->right) {
|
||||
arr[top] = '\0'; // Null terminate the string
|
||||
codes[root->data] = strdup(arr);
|
||||
code[depth] = '\0'; // Завершаем код символа
|
||||
strcpy(codes[root->symbol], code);
|
||||
return;
|
||||
}
|
||||
if (root->left) {
|
||||
code[depth] = '0'; // Добавляем бит '0' для левого поддерева
|
||||
generate_huffman_codes(root->left, code, depth + 1, codes);
|
||||
}
|
||||
if (root->right) {
|
||||
code[depth] = '1'; // Добавляем бит '1' для правого поддерева
|
||||
generate_huffman_codes(root->right, code, depth + 1, codes);
|
||||
}
|
||||
}
|
||||
void free_huffman_tree(HuffmanNode* root) {
|
||||
|
||||
// Функция для освобождения памяти, выделенной под дерево Хаффмана
|
||||
void free_huffman_tree(Node* root) {
|
||||
if (!root) return;
|
||||
free_huffman_tree(root->left);
|
||||
free_huffman_tree(root->right);
|
||||
free(root);
|
||||
}
|
||||
|
||||
// Function to compress a file
|
||||
// Функция сжатия данных с использованием алгоритма Хаффмана
|
||||
int compress_1(const char* input_file, const char* output_file) {
|
||||
FILE* input = fopen(input_file, "rb");
|
||||
FILE* output = fopen(output_file, "wb");
|
||||
if (!input || !output) {
|
||||
perror("Error opening file");
|
||||
return -1;
|
||||
}
|
||||
if (!input || !output) return -1;
|
||||
|
||||
unsigned freq[256] = {0};
|
||||
unsigned char data;
|
||||
while (fread(&data, sizeof(data), 1, input) == 1)
|
||||
freq[data]++;
|
||||
unsigned int frequencies[MAX_SYMBOLS] = {0};
|
||||
unsigned char buffer[BUFFER_SIZE];
|
||||
size_t bytes_read;
|
||||
|
||||
unsigned char unique_data[256];
|
||||
int unique_count = 0;
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (freq[i] > 0) {
|
||||
unique_data[unique_count++] = i;
|
||||
// Подсчет частот символов
|
||||
while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, input)) > 0) {
|
||||
for (size_t i = 0; i < bytes_read; i++) {
|
||||
frequencies[buffer[i]]++;
|
||||
}
|
||||
}
|
||||
frequencies[256] = 1; // Добавляем маркер EOF
|
||||
|
||||
HuffmanNode* root = buildHuffmanTree(unique_data, freq, unique_count);
|
||||
Node* root = build_huffman_tree(frequencies);
|
||||
if (!root) return -1;
|
||||
|
||||
char* codes[256] = {0};
|
||||
char arr[256];
|
||||
generateCodes(root, arr, 0, codes);
|
||||
// Генерация кодов Хаффмана
|
||||
char codes[MAX_SYMBOLS][MAX_SYMBOLS] = {{0}};
|
||||
char code[MAX_SYMBOLS] = {0};
|
||||
generate_huffman_codes(root, code, 0, codes);
|
||||
|
||||
fwrite(&unique_count, sizeof(int), 1, output);
|
||||
for (int i = 0; i < unique_count; i++) {
|
||||
unsigned char symbol = unique_data[i];
|
||||
fwrite(&symbol, sizeof(unsigned char), 1, output);
|
||||
fwrite(&freq[symbol], sizeof(unsigned), 1, output);
|
||||
}
|
||||
// Записываем частоты в выходной файл
|
||||
fwrite(frequencies, sizeof(frequencies[0]), MAX_SYMBOLS, output);
|
||||
|
||||
fseek(input, 0, SEEK_SET);
|
||||
|
||||
unsigned char buffer = 0;
|
||||
// Сжимаем данные
|
||||
rewind(input);
|
||||
unsigned char current_byte = 0;
|
||||
int bit_count = 0;
|
||||
size_t total_bits = 0;
|
||||
|
||||
while (fread(&data, sizeof(data), 1, input) == 1) {
|
||||
char* code = codes[data];
|
||||
for (int i = 0; code[i] != '\0'; i++) {
|
||||
unsigned char bit = code[i] - '0';
|
||||
buffer = (buffer << 1) | bit;
|
||||
bit_count++;
|
||||
total_bits++;
|
||||
|
||||
if (bit_count == 8) {
|
||||
fwrite(&buffer, sizeof(unsigned char), 1, output);
|
||||
bit_count = 0;
|
||||
buffer = 0;
|
||||
while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, input)) > 0) {
|
||||
for (size_t i = 0; i < bytes_read; i++) {
|
||||
char* symbol_code = codes[buffer[i]];
|
||||
for (size_t j = 0; symbol_code[j] != '\0'; j++) {
|
||||
current_byte = (current_byte << 1) | (symbol_code[j] - '0');
|
||||
bit_count++;
|
||||
if (bit_count == 8) {
|
||||
fwrite(¤t_byte, 1, 1, output);
|
||||
current_byte = 0;
|
||||
bit_count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bit_count > 0) {
|
||||
buffer <<= (8 - bit_count);
|
||||
fwrite(&buffer, sizeof(unsigned char), 1, output);
|
||||
// Записываем маркер EOF
|
||||
char* eof_code = codes[256];
|
||||
for (size_t j = 0; eof_code[j] != '\0'; j++) {
|
||||
current_byte = (current_byte << 1) | (eof_code[j] - '0');
|
||||
bit_count++;
|
||||
if (bit_count == 8) {
|
||||
fwrite(¤t_byte, 1, 1, output);
|
||||
current_byte = 0;
|
||||
bit_count = 0;
|
||||
}
|
||||
}
|
||||
if (bit_count > 0) {
|
||||
current_byte <<= (8 - bit_count);
|
||||
fwrite(¤t_byte, 1, 1, output);
|
||||
}
|
||||
|
||||
fwrite(&total_bits, sizeof(size_t), 1, output); // Write total bits used
|
||||
|
||||
fclose(input);
|
||||
fclose(output);
|
||||
@ -226,62 +159,34 @@ int compress_1(const char* input_file, const char* output_file) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Function to decompress the compressed file
|
||||
int decompress_1(const char* input_file_name, const char* output_file_name) {
|
||||
FILE* input = fopen(input_file_name, "rb");
|
||||
if (!input) {
|
||||
perror("Error opening input file");
|
||||
return -1;
|
||||
}
|
||||
FILE* output = fopen(output_file_name, "wb");
|
||||
if (!output) {
|
||||
perror("Error opening output file");
|
||||
fclose(input);
|
||||
return -1;
|
||||
}
|
||||
// Функция декомпрессии данных с использованием алгоритма Хаффмана
|
||||
int decompress_1(const char* input_file, const char* output_file) {
|
||||
FILE* input = fopen(input_file, "rb");
|
||||
FILE* output = fopen(output_file, "wb");
|
||||
if (!input || !output) return -1;
|
||||
|
||||
int unique_count;
|
||||
if (fread(&unique_count, sizeof(int), 1, input) != 1) {
|
||||
perror("Error reading from input file");
|
||||
fclose(input);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
unsigned int frequencies[MAX_SYMBOLS] = {0};
|
||||
fread(frequencies, sizeof(frequencies[0]), MAX_SYMBOLS, input);
|
||||
Node* root = build_huffman_tree(frequencies);
|
||||
if (!root) return -1;
|
||||
|
||||
unsigned char unique_data[256];
|
||||
unsigned freq[256] = {0};
|
||||
for (int i = 0; i < unique_count; i++) {
|
||||
if (fread(&unique_data[i], sizeof(unsigned char), 1, input) != 1 ||
|
||||
fread(&freq[unique_data[i]], sizeof(unsigned), 1, input) != 1) {
|
||||
perror("Error reading from input file");
|
||||
fclose(input);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
HuffmanNode* root = buildHuffmanTree(unique_data, freq, unique_count);
|
||||
|
||||
size_t total_bits;
|
||||
fseek(input, -(long long)sizeof(size_t), SEEK_END);
|
||||
fread(&total_bits, sizeof(size_t), 1, input);
|
||||
|
||||
fseek(input, sizeof(int) + unique_count * (sizeof(unsigned char) + sizeof(unsigned)), SEEK_SET);
|
||||
|
||||
HuffmanNode* current = root;
|
||||
Node* current = root;
|
||||
unsigned char byte;
|
||||
size_t bits_read = 0;
|
||||
int bit;
|
||||
|
||||
while (bits_read < total_bits && fread(&byte, sizeof(byte), 1, input) == 1) {
|
||||
for (int i = 7; i >= 0 && bits_read < total_bits; i--, bits_read++) {
|
||||
if (byte & (1 << i)) {
|
||||
current = current->right;
|
||||
} else {
|
||||
current = current->left;
|
||||
}
|
||||
// Читаем и декодируем символы
|
||||
while (fread(&byte, 1, 1, input) == 1) {
|
||||
for (bit = 7; bit >= 0; bit--) {
|
||||
current = (byte & (1 << bit)) ? current->right : current->left;
|
||||
|
||||
if (!current->left && !current->right) {
|
||||
fwrite(¤t->data, sizeof(current->data), 1, output);
|
||||
if (current->symbol == 256) { // Маркер EOF
|
||||
fclose(input);
|
||||
fclose(output);
|
||||
free_huffman_tree(root);
|
||||
return 0;
|
||||
}
|
||||
fwrite(¤t->symbol, 1, 1, output);
|
||||
current = root;
|
||||
}
|
||||
}
|
||||
@ -291,4 +196,12 @@ int decompress_1(const char* input_file_name, const char* output_file_name) {
|
||||
fclose(output);
|
||||
free_huffman_tree(root);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int compress_2(const char* input_file_name, const char* output_file_name){
|
||||
return 0;
|
||||
}
|
||||
|
||||
int decompress_2(const char* input_file_name, const char* output_file_name){
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user