Обновить sk1/compressor.c

This commit is contained in:
Yevhen Kozirovskyi 2025-01-26 19:22:02 +00:00
parent 6c5368fde3
commit dbba54a974

View File

@ -1,58 +1,72 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <ctype.h>
#define MAX_TREE_HT 256 #define MAX_TREE_NODES 256
// A Huffman tree node // Structure to represent a tree node
struct MinHeapNode { typedef struct HuffmanNode {
unsigned char data; unsigned char data;
unsigned freq; unsigned frequency;
struct MinHeapNode *left, *right; struct HuffmanNode* left;
}; struct HuffmanNode* right;
} HuffmanNode;
// A MinHeap // A structure to represent the Min Heap (Priority Queue)
struct MinHeap { typedef struct MinHeap {
unsigned size; unsigned size;
unsigned capacity; unsigned capacity;
struct MinHeapNode** array; HuffmanNode** array;
}; } MinHeap;
// Function to create a new node // Function to create a new node
struct MinHeapNode* createNode(unsigned char data, unsigned freq) { HuffmanNode* newNode(unsigned char data, unsigned frequency) {
struct MinHeapNode* temp = (struct MinHeapNode*)malloc(sizeof(struct MinHeapNode)); HuffmanNode* node = (HuffmanNode*)malloc(sizeof(HuffmanNode));
temp->data = data; if (!node) {
temp->freq = freq; perror("Failed to allocate memory for new node");
temp->left = temp->right = NULL; exit(EXIT_FAILURE);
return temp; }
node->data = data;
node->frequency = frequency;
node->left = node->right = NULL;
return node;
} }
// Function to create a MinHeap // Function to create a MinHeap
struct MinHeap* createMinHeap(unsigned capacity) { MinHeap* createMinHeap(unsigned capacity) {
struct MinHeap* minHeap = (struct MinHeap*)malloc(sizeof(struct MinHeap)); MinHeap* minHeap = (MinHeap*)malloc(sizeof(MinHeap));
if (!minHeap) {
perror("Failed to allocate memory for MinHeap");
exit(EXIT_FAILURE);
}
minHeap->size = 0; minHeap->size = 0;
minHeap->capacity = capacity; minHeap->capacity = capacity;
minHeap->array = (struct MinHeapNode**)malloc(minHeap->capacity * sizeof(struct MinHeapNode*)); minHeap->array = (HuffmanNode**)malloc(capacity * sizeof(HuffmanNode*));
if (!minHeap->array) {
perror("Failed to allocate memory for MinHeap array");
exit(EXIT_FAILURE);
}
return minHeap; return minHeap;
} }
// Swap function // Function to swap two min heap nodes
void swapMinHeapNode(struct MinHeapNode** a, struct MinHeapNode** b) { void swapMinHeapNode(HuffmanNode** a, HuffmanNode** b) {
struct MinHeapNode* t = *a; HuffmanNode* temp = *a;
*a = *b; *a = *b;
*b = t; *b = temp;
} }
// MinHeapify function // Function to min heapify
void minHeapify(struct MinHeap* minHeap, unsigned idx) { void minHeapify(MinHeap* minHeap, int idx) {
unsigned smallest = idx; // Changed to unsigned int smallest = idx;
unsigned left = 2 * idx + 1; // Changed to unsigned int left = 2 * idx + 1;
unsigned right = 2 * idx + 2; // Changed to unsigned int right = 2 * idx + 2;
if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq) if (left < minHeap->size && minHeap->array[left]->frequency < minHeap->array[smallest]->frequency)
smallest = left; smallest = left;
if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq) if (right < minHeap->size && minHeap->array[right]->frequency < minHeap->array[smallest]->frequency)
smallest = right; smallest = right;
if (smallest != idx) { if (smallest != idx) {
@ -61,9 +75,14 @@ void minHeapify(struct MinHeap* minHeap, unsigned idx) {
} }
} }
// Extract minimum value node from heap // Check if the size of heap is one
struct MinHeapNode* extractMin(struct MinHeap* minHeap) { int isSizeOne(MinHeap* minHeap) {
struct MinHeapNode* temp = minHeap->array[0]; return (minHeap->size == 1);
}
// Extract the minimum node from heap
HuffmanNode* extractMin(MinHeap* minHeap) {
HuffmanNode* temp = minHeap->array[0];
minHeap->array[0] = minHeap->array[minHeap->size - 1]; minHeap->array[0] = minHeap->array[minHeap->size - 1];
--minHeap->size; --minHeap->size;
minHeapify(minHeap, 0); minHeapify(minHeap, 0);
@ -71,237 +90,200 @@ struct MinHeapNode* extractMin(struct MinHeap* minHeap) {
} }
// Insert a new node to MinHeap // Insert a new node to MinHeap
void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode) { void insertMinHeap(MinHeap* minHeap, HuffmanNode* node) {
++minHeap->size; ++minHeap->size;
int i = minHeap->size - 1; int i = minHeap->size - 1;
while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) { while (i && node->frequency < minHeap->array[(i - 1) / 2]->frequency) {
minHeap->array[i] = minHeap->array[(i - 1) / 2]; minHeap->array[i] = minHeap->array[(i - 1) / 2];
i = (i - 1) / 2; i = (i - 1) / 2;
} }
minHeap->array[i] = minHeapNode; minHeap->array[i] = node;
} }
// Build the MinHeap // Build a min heap of given capacity
void buildMinHeap(struct MinHeap* minHeap) { void buildMinHeap(MinHeap* minHeap) {
int n = minHeap->size - 1; int n = minHeap->size - 1;
for (int i = (n - 1) / 2; i >= 0; --i) for (int i = (n - 1) / 2; i >= 0; --i)
minHeapify(minHeap, i); minHeapify(minHeap, i);
} }
// Check if size is 1 // Function to build the Huffman tree
int isSizeOne(struct MinHeap* minHeap) { HuffmanNode* buildHuffmanTree(unsigned char* data, unsigned* freq, int size) {
return (minHeap->size == 1); HuffmanNode *left, *right, *top;
}
MinHeap* minHeap = createMinHeap(size);
// Create and build a MinHeap
struct MinHeap* createAndBuildMinHeap(unsigned char data[], int freq[], int size) {
struct MinHeap* minHeap = createMinHeap(size);
for (int i = 0; i < size; ++i) for (int i = 0; i < size; ++i)
minHeap->array[i] = createNode(data[i], freq[i]); insertMinHeap(minHeap, newNode(data[i], freq[data[i]]));
minHeap->size = size;
buildMinHeap(minHeap);
return minHeap;
}
void freeMinHeap(struct MinHeap* minHeap) {
for (unsigned i = 0; i < minHeap->size; ++i) { // Changed to unsigned
free(minHeap->array[i]);
}
free(minHeap->array);
free(minHeap);
}
void freeHuffmanCodes(char* codes[256]) { buildMinHeap(minHeap);
for (int i = 0; i < 256; ++i) {
if (codes[i]) {
free(codes[i]);
}
}
}
// Build Huffman Tree
struct MinHeapNode* buildHuffmanTree(unsigned char data[], int freq[], int size) {
struct MinHeapNode *left, *right, *top;
struct MinHeap* minHeap = createAndBuildMinHeap(data, freq, size);
while (!isSizeOne(minHeap)) { while (!isSizeOne(minHeap)) {
left = extractMin(minHeap); left = extractMin(minHeap);
right = extractMin(minHeap); right = extractMin(minHeap);
top = createNode('$', left->freq + right->freq); top = newNode('$', left->frequency + right->frequency);
top->left = left; top->left = left;
top->right = right; top->right = right;
insertMinHeap(minHeap, top); insertMinHeap(minHeap, top);
} }
return extractMin(minHeap); return extractMin(minHeap);
} }
// Print Huffman Codes to a map // Function to generate the Huffman codes for each character
void generateCodes(HuffmanNode* root, char* arr, int top, char** codes) {
// Updated compressFile function
void storeCodes(struct MinHeapNode* root, char** codes, char* currentCode, int top) {
if (!root) return;
if (root->left) { if (root->left) {
currentCode[top] = '0'; arr[top] = '0';
storeCodes(root->left, codes, currentCode, top + 1); generateCodes(root->left, arr, top + 1, codes);
} }
if (root->right) { if (root->right) {
currentCode[top] = '1'; arr[top] = '1';
storeCodes(root->right, codes, currentCode, top + 1); generateCodes(root->right, arr, top + 1, codes);
} }
if (!(root->left) && !(root->right)) {
currentCode[top] = '\0'; if (!root->left && !root->right) {
codes[root->data] = (char*)malloc(strlen(currentCode) + 1); // Use malloc instead of strdup arr[top] = '\0'; // Null terminate the string
strcpy(codes[root->data], currentCode); // Copy the string codes[root->data] = strdup(arr);
} }
} }
// Исправление для minHeap в compressFile // Function to compress a file
int compressFile(const char* input_file_name, const char* output_file_name) { int compress_1(const char* input_file, const char* output_file) {
FILE* inputFile = fopen(input_file_name, "rb"); FILE* input = fopen(input_file, "rb");
if (!inputFile) { FILE* output = fopen(output_file, "wb");
perror("Error opening input file"); if (!input || !output) {
perror("Error opening file");
return -1; return -1;
} }
int freq[256] = {0}; unsigned freq[256] = {0};
unsigned char buffer; unsigned char data;
while (fread(&data, sizeof(data), 1, input) == 1)
freq[data]++;
// Count frequency of each byte unsigned char unique_data[256];
while (fread(&buffer, sizeof(unsigned char), 1, inputFile)) { int unique_count = 0;
freq[buffer]++;
}
rewind(inputFile);
unsigned char data[256];
int frequencies[256], size = 0;
for (int i = 0; i < 256; i++) { for (int i = 0; i < 256; i++) {
if (freq[i] > 0) { if (freq[i] > 0) {
data[size] = (unsigned char)i; unique_data[unique_count++] = i;
frequencies[size] = freq[i];
size++;
} }
} }
struct MinHeap* minHeap = createAndBuildMinHeap(data, frequencies, size); // Create heap HuffmanNode* root = buildHuffmanTree(unique_data, freq, unique_count);
struct MinHeapNode* root = buildHuffmanTree(data, frequencies, size);
char* codes[256] = {0}; char* codes[256] = {0};
char currentCode[MAX_TREE_HT] = {0}; // Initialize to avoid garbage values char arr[256];
storeCodes(root, codes, currentCode, 0); generateCodes(root, arr, 0, codes);
FILE* outputFile = fopen(output_file_name, "wb"); fwrite(&unique_count, sizeof(int), 1, output);
if (!outputFile) { for (int i = 0; i < unique_count; i++) {
perror("Error opening output file"); unsigned char symbol = unique_data[i];
fclose(inputFile); fwrite(&symbol, sizeof(unsigned char), 1, output);
return -1; fwrite(&freq[symbol], sizeof(unsigned), 1, output);
} }
// Write codes to output file fseek(input, 0, SEEK_SET);
fwrite(freq, sizeof(int), 256, outputFile);
unsigned char byte = 0; unsigned char buffer = 0;
int bitCount = 0; int bit_count = 0;
while (fread(&buffer, sizeof(unsigned char), 1, inputFile)) { size_t total_bits = 0;
char* code = codes[buffer];
if (!code) { while (fread(&data, sizeof(data), 1, input) == 1) {
fprintf(stderr, "Error: Undefined code for byte %u\n", buffer); char* code = codes[data];
fclose(inputFile);
fclose(outputFile);
return -1;
}
for (int i = 0; code[i] != '\0'; i++) { for (int i = 0; code[i] != '\0'; i++) {
byte = (byte << 1) | (code[i] - '0'); unsigned char bit = code[i] - '0';
bitCount++; buffer = (buffer << 1) | bit;
if (bitCount == 8) { bit_count++;
fwrite(&byte, sizeof(unsigned char), 1, outputFile); total_bits++;
byte = 0;
bitCount = 0; if (bit_count == 8) {
fwrite(&buffer, sizeof(unsigned char), 1, output);
bit_count = 0;
buffer = 0;
} }
} }
} }
if (bitCount > 0) {
byte <<= (8 - bitCount);
fwrite(&byte, sizeof(unsigned char), 1, outputFile);
}
fclose(inputFile); if (bit_count > 0) {
fclose(outputFile); buffer <<= (8 - bit_count);
fwrite(&buffer, sizeof(unsigned char), 1, output);
}
freeMinHeap(minHeap); // Free the min heap memory fwrite(&total_bits, sizeof(size_t), 1, output); // Write total bits used
freeHuffmanCodes(codes); // Free Huffman codes
fclose(input);
fclose(output);
return 0; return 0;
} }
// Function to decompress the compressed file
// Decompress the file int decompress_1(const char* input_file_name, const char* output_file_name) {
int decompressFile(const char* input_file_name, const char* output_file_name) { FILE* input = fopen(input_file_name, "rb");
FILE* inputFile = fopen(input_file_name, "rb"); if (!input) {
if (!inputFile) {
perror("Error opening input file"); perror("Error opening input file");
return -1; return -1;
} }
FILE* output = fopen(output_file_name, "wb");
int freq[256]; if (!output) {
fread(freq, sizeof(int), 256, inputFile);
unsigned char data[256];
int frequencies[256], size = 0;
for (int i = 0; i < 256; i++) {
if (freq[i] > 0) {
data[size] = (unsigned char)i;
frequencies[size] = freq[i];
size++;
}
}
struct MinHeapNode* root = buildHuffmanTree(data, frequencies, size);
FILE* outputFile = fopen(output_file_name, "wb");
if (!outputFile) {
perror("Error opening output file"); perror("Error opening output file");
fclose(inputFile); fclose(input);
return -1; return -1;
} }
struct MinHeapNode* current = root; int unique_count;
unsigned char buffer; if (fread(&unique_count, sizeof(int), 1, input) != 1) {
while (fread(&buffer, sizeof(unsigned char), 1, inputFile)) { perror("Error reading from input file");
for (int i = 7; i >= 0; i--) { fclose(input);
int bit = (buffer >> i) & 1; fclose(output);
if (bit == 0) return -1;
current = current->left; }
else
current = current->right;
if (!(current->left) && !(current->right)) { unsigned char unique_data[256];
fwrite(&current->data, sizeof(unsigned char), 1, outputFile); unsigned freq[256] = {0};
for (int i = 0; i < unique_count; i++) {
if (fread(&unique_data[i], sizeof(unsigned char), 1, input) != 1 ||
fread(&freq[unique_data[i]], sizeof(unsigned), 1, input) != 1) {
perror("Error reading from input file");
fclose(input);
fclose(output);
return -1;
}
}
HuffmanNode* root = buildHuffmanTree(unique_data, freq, unique_count);
size_t total_bits;
fseek(input, -sizeof(size_t), SEEK_END);
fread(&total_bits, sizeof(size_t), 1, input);
fseek(input, sizeof(int) + unique_count * (sizeof(unsigned char) + sizeof(unsigned)), SEEK_SET);
HuffmanNode* current = root;
unsigned char byte;
size_t bits_read = 0;
while (bits_read < total_bits && fread(&byte, sizeof(byte), 1, input) == 1) {
for (int i = 7; i >= 0 && bits_read < total_bits; i--, bits_read++) {
if (byte & (1 << i)) {
current = current->right;
} else {
current = current->left;
}
if (!current->left && !current->right) {
fwrite(&current->data, sizeof(current->data), 1, output);
current = root; current = root;
} }
} }
} }
fclose(inputFile); fclose(input);
fclose(outputFile); fclose(output);
return 0; return 0;
} }
int compress_1(const char* input_file_name, const char* output_file_name){
if(compressFile(input_file_name, output_file_name) == 0){
printf("Succsses!");
}
return 0;
}
int decompress_1(const char* input_file_name, const char* output_file_name){
if(decompressFile(input_file_name, output_file_name) == 0){
printf("Succsses!");
}
return 0;
}
int compress_2(const char* input_file_name, const char* output_file_name); int compress_2(const char* input_file_name, const char* output_file_name);
int decompress_2(const char* input_file_name, const char* output_file_name); int decompress_2(const char* input_file_name, const char* output_file_name);