Add sk1/compresor.c
This commit is contained in:
parent
babf4f5ed3
commit
6750abf18a
304
sk1/compresor.c
Normal file
304
sk1/compresor.c
Normal file
@ -0,0 +1,304 @@
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "compressor.h"
|
||||
|
||||
#define BUFSIZE 1024
|
||||
|
||||
// Huffman Tree Node
|
||||
struct MinHeapNode {
|
||||
char data;
|
||||
unsigned freq;
|
||||
struct MinHeapNode *left, *right;
|
||||
};
|
||||
|
||||
// MinHeap
|
||||
struct MinHeap {
|
||||
unsigned size;
|
||||
unsigned capacity;
|
||||
struct MinHeapNode** array;
|
||||
};
|
||||
|
||||
// Create a new node
|
||||
struct MinHeapNode* newNode(char data, unsigned freq) {
|
||||
struct MinHeapNode* temp = (struct MinHeapNode*)malloc(sizeof(struct MinHeapNode));
|
||||
temp->data = data;
|
||||
temp->freq = freq;
|
||||
temp->left = temp->right = NULL;
|
||||
return temp;
|
||||
}
|
||||
|
||||
// Create a MinHeap
|
||||
struct MinHeap* createMinHeap(unsigned capacity) {
|
||||
struct MinHeap* minHeap = (struct MinHeap*)malloc(sizeof(struct MinHeap));
|
||||
minHeap->size = 0;
|
||||
minHeap->capacity = capacity;
|
||||
minHeap->array = (struct MinHeapNode**)malloc(minHeap->capacity * sizeof(struct MinHeapNode*));
|
||||
return minHeap;
|
||||
}
|
||||
|
||||
// Swap two min heap nodes
|
||||
void swapMinHeapNode(struct MinHeapNode** a, struct MinHeapNode** b) {
|
||||
struct MinHeapNode* t = *a;
|
||||
*a = *b;
|
||||
*b = t;
|
||||
}
|
||||
|
||||
// MinHeapify a node
|
||||
void minHeapify(struct MinHeap* minHeap, int idx) {
|
||||
int smallest = idx;
|
||||
int left = 2 * idx + 1;
|
||||
int right = 2 * idx + 2;
|
||||
|
||||
if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq)
|
||||
smallest = left;
|
||||
|
||||
if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq)
|
||||
smallest = right;
|
||||
|
||||
if (smallest != idx) {
|
||||
swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]);
|
||||
minHeapify(minHeap, smallest);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract the minimum value node
|
||||
struct MinHeapNode* extractMin(struct MinHeap* minHeap) {
|
||||
struct MinHeapNode* temp = minHeap->array[0];
|
||||
minHeap->array[0] = minHeap->array[minHeap->size - 1];
|
||||
--minHeap->size;
|
||||
minHeapify(minHeap, 0);
|
||||
return temp;
|
||||
}
|
||||
|
||||
// Insert a node into the MinHeap
|
||||
void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode) {
|
||||
++minHeap->size;
|
||||
int i = minHeap->size - 1;
|
||||
|
||||
while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) {
|
||||
minHeap->array[i] = minHeap->array[(i - 1) / 2];
|
||||
i = (i - 1) / 2;
|
||||
}
|
||||
minHeap->array[i] = minHeapNode;
|
||||
}
|
||||
|
||||
// Build a MinHeap
|
||||
struct MinHeap* buildMinHeap(char data[], int freq[], int size) {
|
||||
struct MinHeap* minHeap = createMinHeap(size);
|
||||
for (int i = 0; i < size; ++i)
|
||||
minHeap->array[i] = newNode(data[i], freq[i]);
|
||||
minHeap->size = size;
|
||||
for (int i = (minHeap->size - 2) / 2; i >= 0; --i)
|
||||
minHeapify(minHeap, i);
|
||||
return minHeap;
|
||||
}
|
||||
|
||||
// Build Huffman Tree
|
||||
struct MinHeapNode* buildHuffmanTree(char data[], int freq[], int size) {
|
||||
struct MinHeapNode *left, *right, *top;
|
||||
struct MinHeap* minHeap = buildMinHeap(data, freq, size);
|
||||
|
||||
while (minHeap->size != 1) {
|
||||
left = extractMin(minHeap);
|
||||
right = extractMin(minHeap);
|
||||
|
||||
top = newNode('$', left->freq + right->freq);
|
||||
top->left = left;
|
||||
top->right = right;
|
||||
|
||||
insertMinHeap(minHeap, top);
|
||||
}
|
||||
|
||||
return extractMin(minHeap);
|
||||
}
|
||||
|
||||
// Generate Huffman Codes
|
||||
void generateCodes(struct MinHeapNode* root, char** codes, char* buffer, int top) {
|
||||
if (root->left) {
|
||||
buffer[top] = '0';
|
||||
generateCodes(root->left, codes, buffer, top + 1);
|
||||
}
|
||||
if (root->right) {
|
||||
buffer[top] = '1';
|
||||
generateCodes(root->right, codes, buffer, top + 1);
|
||||
}
|
||||
if (!root->left && !root->right) {
|
||||
buffer[top] = '\0';
|
||||
codes[(unsigned char)root->data] = strdup(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
// Compress using Huffman encoding
|
||||
void compress_1(const char* input_file_name, const char* output_file_name) {
|
||||
FILE* input = fopen(input_file_name, "rb");
|
||||
FILE* output = fopen(output_file_name, "wb");
|
||||
if (!input || !output) {
|
||||
perror("File error");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
int freq[256] = {0};
|
||||
char buffer[BUFSIZE];
|
||||
size_t bytes_read;
|
||||
|
||||
while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) {
|
||||
for (size_t i = 0; i < bytes_read; i++) {
|
||||
freq[(unsigned char)buffer[i]]++;
|
||||
}
|
||||
}
|
||||
|
||||
char data[256];
|
||||
int freq_array[256];
|
||||
int size = 0;
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (freq[i] > 0) {
|
||||
data[size] = (char)i;
|
||||
freq_array[size] = freq[i];
|
||||
size++;
|
||||
}
|
||||
}
|
||||
|
||||
struct MinHeapNode* root = buildHuffmanTree(data, freq_array, size);
|
||||
char* codes[256] = {NULL};
|
||||
char code_buffer[256];
|
||||
generateCodes(root, codes, code_buffer, 0);
|
||||
|
||||
rewind(input);
|
||||
|
||||
fwrite(&size, sizeof(int), 1, output);
|
||||
for (int i = 0; i < size; i++) {
|
||||
fputc(data[i], output);
|
||||
fwrite(&freq_array[i], sizeof(int), 1, output);
|
||||
}
|
||||
|
||||
unsigned char bit_buffer = 0;
|
||||
int bit_count = 0;
|
||||
|
||||
while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) {
|
||||
for (size_t i = 0; i < bytes_read; i++) {
|
||||
char* code = codes[(unsigned char)buffer[i]];
|
||||
for (char* p = code; *p; p++) {
|
||||
bit_buffer = (bit_buffer << 1) | (*p - '0');
|
||||
bit_count++;
|
||||
if (bit_count == 8) {
|
||||
fputc(bit_buffer, output);
|
||||
bit_buffer = 0;
|
||||
bit_count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bit_count > 0) {
|
||||
bit_buffer <<= (8 - bit_count);
|
||||
fputc(bit_buffer, output);
|
||||
}
|
||||
|
||||
fclose(input);
|
||||
fclose(output);
|
||||
for (int i = 0; i < 256; i++) {
|
||||
free(codes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Decompress using Huffman encoding
|
||||
void decompress_1(const char* input_file_name, const char* output_file_name) {
|
||||
FILE* input = fopen(input_file_name, "rb");
|
||||
FILE* output = fopen(output_file_name, "wb");
|
||||
if (!input || !output) {
|
||||
perror("File error");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
int size;
|
||||
fread(&size, sizeof(int), 1, input);
|
||||
char data[256];
|
||||
int freq[256];
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
data[i] = fgetc(input);
|
||||
fread(&freq[i], sizeof(int), 1, input);
|
||||
}
|
||||
|
||||
struct MinHeapNode* root = buildHuffmanTree(data, freq, size);
|
||||
struct MinHeapNode* current = root;
|
||||
|
||||
int bit_buffer;
|
||||
int bit_count = 0;
|
||||
int byte;
|
||||
|
||||
while ((byte = fgetc(input)) != EOF) {
|
||||
for (int i = 7; i >= 0; i--) {
|
||||
int bit = (byte >> i) & 1;
|
||||
if (bit == 0) {
|
||||
current = current->left;
|
||||
} else {
|
||||
current = current->right;
|
||||
}
|
||||
|
||||
if (!current->left && !current->right) {
|
||||
fputc(current->data, output);
|
||||
current = root;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fclose(input);
|
||||
fclose(output);
|
||||
}
|
||||
|
||||
// Compress using RLE
|
||||
void compress_2(const char* input_file_name, const char* output_file_name) {
|
||||
FILE* input = fopen(input_file_name, "rb");
|
||||
FILE* output = fopen(output_file_name, "wb");
|
||||
if (!input || !output) {
|
||||
perror("File error");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
unsigned char buffer[BUFSIZE];
|
||||
size_t bytes_read;
|
||||
|
||||
while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) {
|
||||
for (size_t i = 0; i < bytes_read; i++) {
|
||||
unsigned char current = buffer[i];
|
||||
size_t count = 1;
|
||||
while (i + 1 < bytes_read && buffer[i + 1] == current) {
|
||||
count++;
|
||||
i++;
|
||||
}
|
||||
|
||||
fputc(current, output);
|
||||
fputc(count, output);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(input);
|
||||
fclose(output);
|
||||
}
|
||||
|
||||
// Decompress using RLE
|
||||
void decompress_2(const char* input_file_name, const char* output_file_name) {
|
||||
FILE* input = fopen(input_file_name, "rb");
|
||||
FILE* output = fopen(output_file_name, "wb");
|
||||
if (!input || !output) {
|
||||
perror("File error");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
int current;
|
||||
int count;
|
||||
|
||||
while ((current = fgetc(input)) != EOF) {
|
||||
count = fgetc(input);
|
||||
for (int i = 0; i < count; i++) {
|
||||
fputc(current, output);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(input);
|
||||
fclose(output);
|
||||
}
|
Loading…
Reference in New Issue
Block a user