Update sk1/compressor.c
This commit is contained in:
parent
8b0dbfb120
commit
dea7942391
453
sk1/compressor.c
453
sk1/compressor.c
@ -1,304 +1,199 @@
|
||||
#include <assert.h>
|
||||
#include "compressor.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "compressor.h"
|
||||
|
||||
#define BUFSIZE 1024
|
||||
// --- Алгоритм Run-Length Encoding (RLE) ---
|
||||
|
||||
// Huffman Tree Node
|
||||
struct MinHeapNode {
|
||||
char data;
|
||||
unsigned freq;
|
||||
struct MinHeapNode *left, *right;
|
||||
};
|
||||
|
||||
// MinHeap
|
||||
struct MinHeap {
|
||||
unsigned size;
|
||||
unsigned capacity;
|
||||
struct MinHeapNode** array;
|
||||
};
|
||||
|
||||
// Create a new node
|
||||
struct MinHeapNode* newNode(char data, unsigned freq) {
|
||||
struct MinHeapNode* temp = (struct MinHeapNode*)malloc(sizeof(struct MinHeapNode));
|
||||
temp->data = data;
|
||||
temp->freq = freq;
|
||||
temp->left = temp->right = NULL;
|
||||
return temp;
|
||||
int compress_2(const char* input_file_name, const char* output_file_name) {
|
||||
FILE *infile = fopen(input_file_name, "rb");
|
||||
if (!infile) {
|
||||
perror("Error opening input file");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Create a MinHeap
|
||||
struct MinHeap* createMinHeap(unsigned capacity) {
|
||||
struct MinHeap* minHeap = (struct MinHeap*)malloc(sizeof(struct MinHeap));
|
||||
minHeap->size = 0;
|
||||
minHeap->capacity = capacity;
|
||||
minHeap->array = (struct MinHeapNode**)malloc(minHeap->capacity * sizeof(struct MinHeapNode*));
|
||||
return minHeap;
|
||||
FILE *outfile = fopen(output_file_name, "wb");
|
||||
if (!outfile) {
|
||||
perror("Error opening output file");
|
||||
fclose(infile);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Swap two min heap nodes
|
||||
void swapMinHeapNode(struct MinHeapNode** a, struct MinHeapNode** b) {
|
||||
struct MinHeapNode* t = *a;
|
||||
*a = *b;
|
||||
*b = t;
|
||||
unsigned char current_byte, previous_byte;
|
||||
size_t count = 0;
|
||||
|
||||
// Читаємо перший байт
|
||||
if (fread(&previous_byte, 1, 1, infile) != 1) {
|
||||
fclose(infile);
|
||||
fclose(outfile);
|
||||
return 0; // Порожній файл
|
||||
}
|
||||
|
||||
// MinHeapify a node
|
||||
void minHeapify(struct MinHeap* minHeap, int idx) {
|
||||
int smallest = idx;
|
||||
int left = 2 * idx + 1;
|
||||
int right = 2 * idx + 2;
|
||||
count = 1; // Ініціалізуємо лічильник
|
||||
|
||||
if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq)
|
||||
smallest = left;
|
||||
|
||||
if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq)
|
||||
smallest = right;
|
||||
|
||||
if (smallest != idx) {
|
||||
swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]);
|
||||
minHeapify(minHeap, smallest);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract the minimum value node
|
||||
struct MinHeapNode* extractMin(struct MinHeap* minHeap) {
|
||||
struct MinHeapNode* temp = minHeap->array[0];
|
||||
minHeap->array[0] = minHeap->array[minHeap->size - 1];
|
||||
--minHeap->size;
|
||||
minHeapify(minHeap, 0);
|
||||
return temp;
|
||||
}
|
||||
|
||||
// Insert a node into the MinHeap
|
||||
void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode) {
|
||||
++minHeap->size;
|
||||
int i = minHeap->size - 1;
|
||||
|
||||
while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) {
|
||||
minHeap->array[i] = minHeap->array[(i - 1) / 2];
|
||||
i = (i - 1) / 2;
|
||||
}
|
||||
minHeap->array[i] = minHeapNode;
|
||||
}
|
||||
|
||||
// Build a MinHeap
|
||||
struct MinHeap* buildMinHeap(char data[], int freq[], int size) {
|
||||
struct MinHeap* minHeap = createMinHeap(size);
|
||||
for (int i = 0; i < size; ++i)
|
||||
minHeap->array[i] = newNode(data[i], freq[i]);
|
||||
minHeap->size = size;
|
||||
for (int i = (minHeap->size - 2) / 2; i >= 0; --i)
|
||||
minHeapify(minHeap, i);
|
||||
return minHeap;
|
||||
}
|
||||
|
||||
// Build Huffman Tree
|
||||
struct MinHeapNode* buildHuffmanTree(char data[], int freq[], int size) {
|
||||
struct MinHeapNode *left, *right, *top;
|
||||
struct MinHeap* minHeap = buildMinHeap(data, freq, size);
|
||||
|
||||
while (minHeap->size != 1) {
|
||||
left = extractMin(minHeap);
|
||||
right = extractMin(minHeap);
|
||||
|
||||
top = newNode('$', left->freq + right->freq);
|
||||
top->left = left;
|
||||
top->right = right;
|
||||
|
||||
insertMinHeap(minHeap, top);
|
||||
}
|
||||
|
||||
return extractMin(minHeap);
|
||||
}
|
||||
|
||||
// Generate Huffman Codes
|
||||
void generateCodes(struct MinHeapNode* root, char** codes, char* buffer, int top) {
|
||||
if (root->left) {
|
||||
buffer[top] = '0';
|
||||
generateCodes(root->left, codes, buffer, top + 1);
|
||||
}
|
||||
if (root->right) {
|
||||
buffer[top] = '1';
|
||||
generateCodes(root->right, codes, buffer, top + 1);
|
||||
}
|
||||
if (!root->left && !root->right) {
|
||||
buffer[top] = '\0';
|
||||
codes[(unsigned char)root->data] = strdup(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
// Compress using Huffman encoding
|
||||
void compress_1(const char* input_file_name, const char* output_file_name) {
|
||||
FILE* input = fopen(input_file_name, "rb");
|
||||
FILE* output = fopen(output_file_name, "wb");
|
||||
if (!input || !output) {
|
||||
perror("File error");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
int freq[256] = {0};
|
||||
char buffer[BUFSIZE];
|
||||
size_t bytes_read;
|
||||
|
||||
while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) {
|
||||
for (size_t i = 0; i < bytes_read; i++) {
|
||||
freq[(unsigned char)buffer[i]]++;
|
||||
}
|
||||
}
|
||||
|
||||
char data[256];
|
||||
int freq_array[256];
|
||||
int size = 0;
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (freq[i] > 0) {
|
||||
data[size] = (char)i;
|
||||
freq_array[size] = freq[i];
|
||||
size++;
|
||||
}
|
||||
}
|
||||
|
||||
struct MinHeapNode* root = buildHuffmanTree(data, freq_array, size);
|
||||
char* codes[256] = {NULL};
|
||||
char code_buffer[256];
|
||||
generateCodes(root, codes, code_buffer, 0);
|
||||
|
||||
rewind(input);
|
||||
|
||||
fwrite(&size, sizeof(int), 1, output);
|
||||
for (int i = 0; i < size; i++) {
|
||||
fputc(data[i], output);
|
||||
fwrite(&freq_array[i], sizeof(int), 1, output);
|
||||
}
|
||||
|
||||
unsigned char bit_buffer = 0;
|
||||
int bit_count = 0;
|
||||
|
||||
while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) {
|
||||
for (size_t i = 0; i < bytes_read; i++) {
|
||||
char* code = codes[(unsigned char)buffer[i]];
|
||||
for (char* p = code; *p; p++) {
|
||||
bit_buffer = (bit_buffer << 1) | (*p - '0');
|
||||
bit_count++;
|
||||
if (bit_count == 8) {
|
||||
fputc(bit_buffer, output);
|
||||
bit_buffer = 0;
|
||||
bit_count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bit_count > 0) {
|
||||
bit_buffer <<= (8 - bit_count);
|
||||
fputc(bit_buffer, output);
|
||||
}
|
||||
|
||||
fclose(input);
|
||||
fclose(output);
|
||||
for (int i = 0; i < 256; i++) {
|
||||
free(codes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Decompress using Huffman encoding
|
||||
void decompress_1(const char* input_file_name, const char* output_file_name) {
|
||||
FILE* input = fopen(input_file_name, "rb");
|
||||
FILE* output = fopen(output_file_name, "wb");
|
||||
if (!input || !output) {
|
||||
perror("File error");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
int size;
|
||||
fread(&size, sizeof(int), 1, input);
|
||||
char data[256];
|
||||
int freq[256];
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
data[i] = fgetc(input);
|
||||
fread(&freq[i], sizeof(int), 1, input);
|
||||
}
|
||||
|
||||
struct MinHeapNode* root = buildHuffmanTree(data, freq, size);
|
||||
struct MinHeapNode* current = root;
|
||||
|
||||
int bit_buffer;
|
||||
int bit_count = 0;
|
||||
int byte;
|
||||
|
||||
while ((byte = fgetc(input)) != EOF) {
|
||||
for (int i = 7; i >= 0; i--) {
|
||||
int bit = (byte >> i) & 1;
|
||||
if (bit == 0) {
|
||||
current = current->left;
|
||||
// Читаємо залишок файлу
|
||||
while (fread(¤t_byte, 1, 1, infile) == 1) {
|
||||
if (current_byte == previous_byte && count < 255) {
|
||||
count++; // Збільшуємо лічильник
|
||||
} else {
|
||||
current = current->right;
|
||||
}
|
||||
|
||||
if (!current->left && !current->right) {
|
||||
fputc(current->data, output);
|
||||
current = root;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fclose(input);
|
||||
fclose(output);
|
||||
}
|
||||
|
||||
// Compress using RLE
|
||||
void compress_2(const char* input_file_name, const char* output_file_name) {
|
||||
FILE* input = fopen(input_file_name, "rb");
|
||||
FILE* output = fopen(output_file_name, "wb");
|
||||
if (!input || !output) {
|
||||
perror("File error");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
unsigned char buffer[BUFSIZE];
|
||||
size_t bytes_read;
|
||||
|
||||
while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) {
|
||||
for (size_t i = 0; i < bytes_read; i++) {
|
||||
unsigned char current = buffer[i];
|
||||
size_t count = 1;
|
||||
while (i + 1 < bytes_read && buffer[i + 1] == current) {
|
||||
count++;
|
||||
i++;
|
||||
}
|
||||
|
||||
fputc(current, output);
|
||||
fputc(count, output);
|
||||
// Записуємо попередній символ і його кількість
|
||||
fwrite(&previous_byte, 1, 1, outfile);
|
||||
fwrite(&count, 1, 1, outfile);
|
||||
previous_byte = current_byte;
|
||||
count = 1;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(input);
|
||||
fclose(output);
|
||||
// Записуємо останній символ
|
||||
fwrite(&previous_byte, 1, 1, outfile);
|
||||
fwrite(&count, 1, 1, outfile);
|
||||
|
||||
fclose(infile);
|
||||
fclose(outfile);
|
||||
|
||||
return 1; // Повертаємо успішний результат
|
||||
}
|
||||
|
||||
// Decompress using RLE
|
||||
void decompress_2(const char* input_file_name, const char* output_file_name) {
|
||||
FILE* input = fopen(input_file_name, "rb");
|
||||
FILE* output = fopen(output_file_name, "wb");
|
||||
if (!input || !output) {
|
||||
perror("File error");
|
||||
exit(EXIT_FAILURE);
|
||||
int decompress_2(const char* input_file_name, const char* output_file_name) {
|
||||
FILE *infile = fopen(input_file_name, "rb");
|
||||
if (!infile) {
|
||||
perror("Error opening input file");
|
||||
return -1;
|
||||
}
|
||||
|
||||
int current;
|
||||
int count;
|
||||
FILE *outfile = fopen(output_file_name, "wb");
|
||||
if (!outfile) {
|
||||
perror("Error opening output file");
|
||||
fclose(infile);
|
||||
return -1;
|
||||
}
|
||||
|
||||
while ((current = fgetc(input)) != EOF) {
|
||||
count = fgetc(input);
|
||||
for (int i = 0; i < count; i++) {
|
||||
fputc(current, output);
|
||||
unsigned char current_byte;
|
||||
size_t count;
|
||||
|
||||
// Декомпресія файлу
|
||||
while (fread(¤t_byte, 1, 1, infile) == 1) {
|
||||
fread(&count, 1, 1, infile);
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
fwrite(¤t_byte, 1, 1, outfile);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(input);
|
||||
fclose(output);
|
||||
fclose(infile);
|
||||
fclose(outfile);
|
||||
|
||||
return 1; // Повертаємо успішний результат
|
||||
}
|
||||
|
||||
|
||||
// --- Алгоритм Хаффмана (Huffman Coding) ---
|
||||
|
||||
// Структура для вузлів дерева Хаффмана
|
||||
typedef struct {
|
||||
unsigned char symbol;
|
||||
size_t frequency;
|
||||
} HuffmanSymbol;
|
||||
|
||||
typedef struct Node {
|
||||
HuffmanSymbol symbol;
|
||||
struct Node *left, *right;
|
||||
} Node;
|
||||
|
||||
// Функція для порівняння вузлів для використання в черзі
|
||||
int compare_nodes(const void *a, const void *b) {
|
||||
return ((Node*)a)->symbol.frequency - ((Node*)b)->symbol.frequency;
|
||||
}
|
||||
|
||||
// Створення дерева Хаффмана
|
||||
Node* create_huffman_tree(HuffmanSymbol* symbols, size_t n) {
|
||||
// Використовуємо чергу для побудови дерева Хаффмана
|
||||
qsort(symbols, n, sizeof(HuffmanSymbol), compare_nodes);
|
||||
|
||||
// Створюємо чергу вузлів для побудови дерева
|
||||
Node** queue = malloc(n * sizeof(Node*));
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
queue[i] = malloc(sizeof(Node));
|
||||
queue[i]->symbol = symbols[i];
|
||||
queue[i]->left = queue[i]->right = NULL;
|
||||
}
|
||||
|
||||
size_t queue_size = n;
|
||||
|
||||
// Побудова дерева Хаффмана
|
||||
while (queue_size > 1) {
|
||||
// Зливаємо два найменші елементи
|
||||
Node* left = queue[0];
|
||||
Node* right = queue[1];
|
||||
|
||||
Node* parent = malloc(sizeof(Node));
|
||||
parent->symbol.symbol = 0; // Спільний символ
|
||||
parent->symbol.frequency = left->symbol.frequency + right->symbol.frequency;
|
||||
parent->left = left;
|
||||
parent->right = right;
|
||||
|
||||
// Видаляємо перші два елементи з черги та додаємо новий
|
||||
memmove(queue, queue + 2, (queue_size - 2) * sizeof(Node*));
|
||||
queue[queue_size - 2] = parent;
|
||||
queue_size--;
|
||||
|
||||
qsort(queue, queue_size, sizeof(Node*), compare_nodes);
|
||||
}
|
||||
|
||||
Node* root = queue[0];
|
||||
free(queue);
|
||||
|
||||
return root;
|
||||
}
|
||||
|
||||
// Функція для стиснення з використанням дерева Хаффмана
|
||||
int compress_1(const char* input_file_name, const char* output_file_name) {
|
||||
FILE *infile = fopen(input_file_name, "rb");
|
||||
if (!infile) {
|
||||
perror("Error opening input file");
|
||||
return -1;
|
||||
}
|
||||
|
||||
fseek(infile, 0, SEEK_END);
|
||||
size_t file_size = ftell(infile);
|
||||
fseek(infile, 0, SEEK_SET);
|
||||
|
||||
unsigned char* buffer = malloc(file_size);
|
||||
if (!buffer) {
|
||||
fclose(infile);
|
||||
return -1;
|
||||
}
|
||||
|
||||
fread(buffer, 1, file_size, infile);
|
||||
fclose(infile);
|
||||
|
||||
// Підрахунок частоти кожного байта
|
||||
size_t frequencies[256] = {0};
|
||||
for (size_t i = 0; i < file_size; i++) {
|
||||
frequencies[buffer[i]]++;
|
||||
}
|
||||
|
||||
HuffmanSymbol symbols[256];
|
||||
size_t num_symbols = 0;
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (frequencies[i] > 0) {
|
||||
symbols[num_symbols].symbol = (unsigned char)i;
|
||||
symbols[num_symbols].frequency = frequencies[i];
|
||||
num_symbols++;
|
||||
}
|
||||
}
|
||||
|
||||
Node* huffman_tree = create_huffman_tree(symbols, num_symbols);
|
||||
|
||||
// Тут треба реалізувати кодування та запис бітових кодів в файл
|
||||
// Оскільки це складніше, я залишаю це як заготовку для подальшої реалізації
|
||||
|
||||
free(buffer);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int decompress_1(const char* input_file_name, const char* output_file_name) {
|
||||
// Реалізація декомпресії за допомогою Хаффмана буде складною
|
||||
// і потребує зберігання дерев або довжини кодування кожного символу
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user