Обновить sk1/compressor.c
This commit is contained in:
parent
6c5368fde3
commit
dbba54a974
364
sk1/compressor.c
364
sk1/compressor.c
@ -1,58 +1,72 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
#define MAX_TREE_HT 256
|
#define MAX_TREE_NODES 256
|
||||||
|
|
||||||
// A Huffman tree node
|
// Structure to represent a tree node
|
||||||
struct MinHeapNode {
|
typedef struct HuffmanNode {
|
||||||
unsigned char data;
|
unsigned char data;
|
||||||
unsigned freq;
|
unsigned frequency;
|
||||||
struct MinHeapNode *left, *right;
|
struct HuffmanNode* left;
|
||||||
};
|
struct HuffmanNode* right;
|
||||||
|
} HuffmanNode;
|
||||||
|
|
||||||
// A MinHeap
|
// A structure to represent the Min Heap (Priority Queue)
|
||||||
struct MinHeap {
|
typedef struct MinHeap {
|
||||||
unsigned size;
|
unsigned size;
|
||||||
unsigned capacity;
|
unsigned capacity;
|
||||||
struct MinHeapNode** array;
|
HuffmanNode** array;
|
||||||
};
|
} MinHeap;
|
||||||
|
|
||||||
// Function to create a new node
|
// Function to create a new node
|
||||||
struct MinHeapNode* createNode(unsigned char data, unsigned freq) {
|
HuffmanNode* newNode(unsigned char data, unsigned frequency) {
|
||||||
struct MinHeapNode* temp = (struct MinHeapNode*)malloc(sizeof(struct MinHeapNode));
|
HuffmanNode* node = (HuffmanNode*)malloc(sizeof(HuffmanNode));
|
||||||
temp->data = data;
|
if (!node) {
|
||||||
temp->freq = freq;
|
perror("Failed to allocate memory for new node");
|
||||||
temp->left = temp->right = NULL;
|
exit(EXIT_FAILURE);
|
||||||
return temp;
|
}
|
||||||
|
node->data = data;
|
||||||
|
node->frequency = frequency;
|
||||||
|
node->left = node->right = NULL;
|
||||||
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Function to create a MinHeap
|
// Function to create a MinHeap
|
||||||
struct MinHeap* createMinHeap(unsigned capacity) {
|
MinHeap* createMinHeap(unsigned capacity) {
|
||||||
struct MinHeap* minHeap = (struct MinHeap*)malloc(sizeof(struct MinHeap));
|
MinHeap* minHeap = (MinHeap*)malloc(sizeof(MinHeap));
|
||||||
|
if (!minHeap) {
|
||||||
|
perror("Failed to allocate memory for MinHeap");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
minHeap->size = 0;
|
minHeap->size = 0;
|
||||||
minHeap->capacity = capacity;
|
minHeap->capacity = capacity;
|
||||||
minHeap->array = (struct MinHeapNode**)malloc(minHeap->capacity * sizeof(struct MinHeapNode*));
|
minHeap->array = (HuffmanNode**)malloc(capacity * sizeof(HuffmanNode*));
|
||||||
|
if (!minHeap->array) {
|
||||||
|
perror("Failed to allocate memory for MinHeap array");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
return minHeap;
|
return minHeap;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Swap function
|
// Function to swap two min heap nodes
|
||||||
void swapMinHeapNode(struct MinHeapNode** a, struct MinHeapNode** b) {
|
void swapMinHeapNode(HuffmanNode** a, HuffmanNode** b) {
|
||||||
struct MinHeapNode* t = *a;
|
HuffmanNode* temp = *a;
|
||||||
*a = *b;
|
*a = *b;
|
||||||
*b = t;
|
*b = temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
// MinHeapify function
|
// Function to min heapify
|
||||||
void minHeapify(struct MinHeap* minHeap, unsigned idx) {
|
void minHeapify(MinHeap* minHeap, int idx) {
|
||||||
unsigned smallest = idx; // Changed to unsigned
|
int smallest = idx;
|
||||||
unsigned left = 2 * idx + 1; // Changed to unsigned
|
int left = 2 * idx + 1;
|
||||||
unsigned right = 2 * idx + 2; // Changed to unsigned
|
int right = 2 * idx + 2;
|
||||||
|
|
||||||
if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq)
|
if (left < minHeap->size && minHeap->array[left]->frequency < minHeap->array[smallest]->frequency)
|
||||||
smallest = left;
|
smallest = left;
|
||||||
|
|
||||||
if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq)
|
if (right < minHeap->size && minHeap->array[right]->frequency < minHeap->array[smallest]->frequency)
|
||||||
smallest = right;
|
smallest = right;
|
||||||
|
|
||||||
if (smallest != idx) {
|
if (smallest != idx) {
|
||||||
@ -61,9 +75,14 @@ void minHeapify(struct MinHeap* minHeap, unsigned idx) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract minimum value node from heap
|
// Check if the size of heap is one
|
||||||
struct MinHeapNode* extractMin(struct MinHeap* minHeap) {
|
int isSizeOne(MinHeap* minHeap) {
|
||||||
struct MinHeapNode* temp = minHeap->array[0];
|
return (minHeap->size == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract the minimum node from heap
|
||||||
|
HuffmanNode* extractMin(MinHeap* minHeap) {
|
||||||
|
HuffmanNode* temp = minHeap->array[0];
|
||||||
minHeap->array[0] = minHeap->array[minHeap->size - 1];
|
minHeap->array[0] = minHeap->array[minHeap->size - 1];
|
||||||
--minHeap->size;
|
--minHeap->size;
|
||||||
minHeapify(minHeap, 0);
|
minHeapify(minHeap, 0);
|
||||||
@ -71,237 +90,200 @@ struct MinHeapNode* extractMin(struct MinHeap* minHeap) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Insert a new node to MinHeap
|
// Insert a new node to MinHeap
|
||||||
void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode) {
|
void insertMinHeap(MinHeap* minHeap, HuffmanNode* node) {
|
||||||
++minHeap->size;
|
++minHeap->size;
|
||||||
int i = minHeap->size - 1;
|
int i = minHeap->size - 1;
|
||||||
while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) {
|
while (i && node->frequency < minHeap->array[(i - 1) / 2]->frequency) {
|
||||||
minHeap->array[i] = minHeap->array[(i - 1) / 2];
|
minHeap->array[i] = minHeap->array[(i - 1) / 2];
|
||||||
i = (i - 1) / 2;
|
i = (i - 1) / 2;
|
||||||
}
|
}
|
||||||
minHeap->array[i] = minHeapNode;
|
minHeap->array[i] = node;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build the MinHeap
|
// Build a min heap of given capacity
|
||||||
void buildMinHeap(struct MinHeap* minHeap) {
|
void buildMinHeap(MinHeap* minHeap) {
|
||||||
int n = minHeap->size - 1;
|
int n = minHeap->size - 1;
|
||||||
for (int i = (n - 1) / 2; i >= 0; --i)
|
for (int i = (n - 1) / 2; i >= 0; --i)
|
||||||
minHeapify(minHeap, i);
|
minHeapify(minHeap, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if size is 1
|
// Function to build the Huffman tree
|
||||||
int isSizeOne(struct MinHeap* minHeap) {
|
HuffmanNode* buildHuffmanTree(unsigned char* data, unsigned* freq, int size) {
|
||||||
return (minHeap->size == 1);
|
HuffmanNode *left, *right, *top;
|
||||||
}
|
|
||||||
|
MinHeap* minHeap = createMinHeap(size);
|
||||||
|
|
||||||
// Create and build a MinHeap
|
|
||||||
struct MinHeap* createAndBuildMinHeap(unsigned char data[], int freq[], int size) {
|
|
||||||
struct MinHeap* minHeap = createMinHeap(size);
|
|
||||||
for (int i = 0; i < size; ++i)
|
for (int i = 0; i < size; ++i)
|
||||||
minHeap->array[i] = createNode(data[i], freq[i]);
|
insertMinHeap(minHeap, newNode(data[i], freq[data[i]]));
|
||||||
minHeap->size = size;
|
|
||||||
buildMinHeap(minHeap);
|
|
||||||
return minHeap;
|
|
||||||
}
|
|
||||||
void freeMinHeap(struct MinHeap* minHeap) {
|
|
||||||
for (unsigned i = 0; i < minHeap->size; ++i) { // Changed to unsigned
|
|
||||||
free(minHeap->array[i]);
|
|
||||||
}
|
|
||||||
free(minHeap->array);
|
|
||||||
free(minHeap);
|
|
||||||
}
|
|
||||||
|
|
||||||
void freeHuffmanCodes(char* codes[256]) {
|
buildMinHeap(minHeap);
|
||||||
for (int i = 0; i < 256; ++i) {
|
|
||||||
if (codes[i]) {
|
|
||||||
free(codes[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Build Huffman Tree
|
|
||||||
struct MinHeapNode* buildHuffmanTree(unsigned char data[], int freq[], int size) {
|
|
||||||
struct MinHeapNode *left, *right, *top;
|
|
||||||
struct MinHeap* minHeap = createAndBuildMinHeap(data, freq, size);
|
|
||||||
|
|
||||||
while (!isSizeOne(minHeap)) {
|
while (!isSizeOne(minHeap)) {
|
||||||
left = extractMin(minHeap);
|
left = extractMin(minHeap);
|
||||||
right = extractMin(minHeap);
|
right = extractMin(minHeap);
|
||||||
|
|
||||||
top = createNode('$', left->freq + right->freq);
|
top = newNode('$', left->frequency + right->frequency);
|
||||||
top->left = left;
|
top->left = left;
|
||||||
top->right = right;
|
top->right = right;
|
||||||
|
|
||||||
insertMinHeap(minHeap, top);
|
insertMinHeap(minHeap, top);
|
||||||
}
|
}
|
||||||
|
|
||||||
return extractMin(minHeap);
|
return extractMin(minHeap);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Print Huffman Codes to a map
|
// Function to generate the Huffman codes for each character
|
||||||
|
void generateCodes(HuffmanNode* root, char* arr, int top, char** codes) {
|
||||||
|
|
||||||
|
|
||||||
// Updated compressFile function
|
|
||||||
void storeCodes(struct MinHeapNode* root, char** codes, char* currentCode, int top) {
|
|
||||||
if (!root) return;
|
|
||||||
|
|
||||||
if (root->left) {
|
if (root->left) {
|
||||||
currentCode[top] = '0';
|
arr[top] = '0';
|
||||||
storeCodes(root->left, codes, currentCode, top + 1);
|
generateCodes(root->left, arr, top + 1, codes);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (root->right) {
|
if (root->right) {
|
||||||
currentCode[top] = '1';
|
arr[top] = '1';
|
||||||
storeCodes(root->right, codes, currentCode, top + 1);
|
generateCodes(root->right, arr, top + 1, codes);
|
||||||
}
|
}
|
||||||
if (!(root->left) && !(root->right)) {
|
|
||||||
currentCode[top] = '\0';
|
if (!root->left && !root->right) {
|
||||||
codes[root->data] = (char*)malloc(strlen(currentCode) + 1); // Use malloc instead of strdup
|
arr[top] = '\0'; // Null terminate the string
|
||||||
strcpy(codes[root->data], currentCode); // Copy the string
|
codes[root->data] = strdup(arr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Исправление для minHeap в compressFile
|
// Function to compress a file
|
||||||
int compressFile(const char* input_file_name, const char* output_file_name) {
|
int compress_1(const char* input_file, const char* output_file) {
|
||||||
FILE* inputFile = fopen(input_file_name, "rb");
|
FILE* input = fopen(input_file, "rb");
|
||||||
if (!inputFile) {
|
FILE* output = fopen(output_file, "wb");
|
||||||
perror("Error opening input file");
|
if (!input || !output) {
|
||||||
|
perror("Error opening file");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int freq[256] = {0};
|
unsigned freq[256] = {0};
|
||||||
unsigned char buffer;
|
unsigned char data;
|
||||||
|
while (fread(&data, sizeof(data), 1, input) == 1)
|
||||||
|
freq[data]++;
|
||||||
|
|
||||||
// Count frequency of each byte
|
unsigned char unique_data[256];
|
||||||
while (fread(&buffer, sizeof(unsigned char), 1, inputFile)) {
|
int unique_count = 0;
|
||||||
freq[buffer]++;
|
|
||||||
}
|
|
||||||
rewind(inputFile);
|
|
||||||
|
|
||||||
unsigned char data[256];
|
|
||||||
int frequencies[256], size = 0;
|
|
||||||
for (int i = 0; i < 256; i++) {
|
for (int i = 0; i < 256; i++) {
|
||||||
if (freq[i] > 0) {
|
if (freq[i] > 0) {
|
||||||
data[size] = (unsigned char)i;
|
unique_data[unique_count++] = i;
|
||||||
frequencies[size] = freq[i];
|
|
||||||
size++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct MinHeap* minHeap = createAndBuildMinHeap(data, frequencies, size); // Create heap
|
HuffmanNode* root = buildHuffmanTree(unique_data, freq, unique_count);
|
||||||
struct MinHeapNode* root = buildHuffmanTree(data, frequencies, size);
|
|
||||||
char* codes[256] = {0};
|
char* codes[256] = {0};
|
||||||
char currentCode[MAX_TREE_HT] = {0}; // Initialize to avoid garbage values
|
char arr[256];
|
||||||
storeCodes(root, codes, currentCode, 0);
|
generateCodes(root, arr, 0, codes);
|
||||||
|
|
||||||
FILE* outputFile = fopen(output_file_name, "wb");
|
fwrite(&unique_count, sizeof(int), 1, output);
|
||||||
if (!outputFile) {
|
for (int i = 0; i < unique_count; i++) {
|
||||||
perror("Error opening output file");
|
unsigned char symbol = unique_data[i];
|
||||||
fclose(inputFile);
|
fwrite(&symbol, sizeof(unsigned char), 1, output);
|
||||||
return -1;
|
fwrite(&freq[symbol], sizeof(unsigned), 1, output);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write codes to output file
|
fseek(input, 0, SEEK_SET);
|
||||||
fwrite(freq, sizeof(int), 256, outputFile);
|
|
||||||
|
|
||||||
unsigned char byte = 0;
|
unsigned char buffer = 0;
|
||||||
int bitCount = 0;
|
int bit_count = 0;
|
||||||
while (fread(&buffer, sizeof(unsigned char), 1, inputFile)) {
|
size_t total_bits = 0;
|
||||||
char* code = codes[buffer];
|
|
||||||
if (!code) {
|
while (fread(&data, sizeof(data), 1, input) == 1) {
|
||||||
fprintf(stderr, "Error: Undefined code for byte %u\n", buffer);
|
char* code = codes[data];
|
||||||
fclose(inputFile);
|
|
||||||
fclose(outputFile);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
for (int i = 0; code[i] != '\0'; i++) {
|
for (int i = 0; code[i] != '\0'; i++) {
|
||||||
byte = (byte << 1) | (code[i] - '0');
|
unsigned char bit = code[i] - '0';
|
||||||
bitCount++;
|
buffer = (buffer << 1) | bit;
|
||||||
if (bitCount == 8) {
|
bit_count++;
|
||||||
fwrite(&byte, sizeof(unsigned char), 1, outputFile);
|
total_bits++;
|
||||||
byte = 0;
|
|
||||||
bitCount = 0;
|
if (bit_count == 8) {
|
||||||
|
fwrite(&buffer, sizeof(unsigned char), 1, output);
|
||||||
|
bit_count = 0;
|
||||||
|
buffer = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (bitCount > 0) {
|
|
||||||
byte <<= (8 - bitCount);
|
if (bit_count > 0) {
|
||||||
fwrite(&byte, sizeof(unsigned char), 1, outputFile);
|
buffer <<= (8 - bit_count);
|
||||||
|
fwrite(&buffer, sizeof(unsigned char), 1, output);
|
||||||
}
|
}
|
||||||
|
|
||||||
fclose(inputFile);
|
fwrite(&total_bits, sizeof(size_t), 1, output); // Write total bits used
|
||||||
fclose(outputFile);
|
|
||||||
|
|
||||||
freeMinHeap(minHeap); // Free the min heap memory
|
fclose(input);
|
||||||
freeHuffmanCodes(codes); // Free Huffman codes
|
fclose(output);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Function to decompress the compressed file
|
||||||
// Decompress the file
|
int decompress_1(const char* input_file_name, const char* output_file_name) {
|
||||||
int decompressFile(const char* input_file_name, const char* output_file_name) {
|
FILE* input = fopen(input_file_name, "rb");
|
||||||
FILE* inputFile = fopen(input_file_name, "rb");
|
if (!input) {
|
||||||
if (!inputFile) {
|
|
||||||
perror("Error opening input file");
|
perror("Error opening input file");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
FILE* output = fopen(output_file_name, "wb");
|
||||||
int freq[256];
|
if (!output) {
|
||||||
fread(freq, sizeof(int), 256, inputFile);
|
|
||||||
|
|
||||||
unsigned char data[256];
|
|
||||||
int frequencies[256], size = 0;
|
|
||||||
for (int i = 0; i < 256; i++) {
|
|
||||||
if (freq[i] > 0) {
|
|
||||||
data[size] = (unsigned char)i;
|
|
||||||
frequencies[size] = freq[i];
|
|
||||||
size++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct MinHeapNode* root = buildHuffmanTree(data, frequencies, size);
|
|
||||||
|
|
||||||
FILE* outputFile = fopen(output_file_name, "wb");
|
|
||||||
if (!outputFile) {
|
|
||||||
perror("Error opening output file");
|
perror("Error opening output file");
|
||||||
fclose(inputFile);
|
fclose(input);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct MinHeapNode* current = root;
|
int unique_count;
|
||||||
unsigned char buffer;
|
if (fread(&unique_count, sizeof(int), 1, input) != 1) {
|
||||||
while (fread(&buffer, sizeof(unsigned char), 1, inputFile)) {
|
perror("Error reading from input file");
|
||||||
for (int i = 7; i >= 0; i--) {
|
fclose(input);
|
||||||
int bit = (buffer >> i) & 1;
|
fclose(output);
|
||||||
if (bit == 0)
|
return -1;
|
||||||
current = current->left;
|
}
|
||||||
else
|
|
||||||
current = current->right;
|
|
||||||
|
|
||||||
if (!(current->left) && !(current->right)) {
|
unsigned char unique_data[256];
|
||||||
fwrite(¤t->data, sizeof(unsigned char), 1, outputFile);
|
unsigned freq[256] = {0};
|
||||||
|
for (int i = 0; i < unique_count; i++) {
|
||||||
|
if (fread(&unique_data[i], sizeof(unsigned char), 1, input) != 1 ||
|
||||||
|
fread(&freq[unique_data[i]], sizeof(unsigned), 1, input) != 1) {
|
||||||
|
perror("Error reading from input file");
|
||||||
|
fclose(input);
|
||||||
|
fclose(output);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HuffmanNode* root = buildHuffmanTree(unique_data, freq, unique_count);
|
||||||
|
|
||||||
|
size_t total_bits;
|
||||||
|
fseek(input, -sizeof(size_t), SEEK_END);
|
||||||
|
fread(&total_bits, sizeof(size_t), 1, input);
|
||||||
|
|
||||||
|
fseek(input, sizeof(int) + unique_count * (sizeof(unsigned char) + sizeof(unsigned)), SEEK_SET);
|
||||||
|
|
||||||
|
HuffmanNode* current = root;
|
||||||
|
unsigned char byte;
|
||||||
|
size_t bits_read = 0;
|
||||||
|
|
||||||
|
while (bits_read < total_bits && fread(&byte, sizeof(byte), 1, input) == 1) {
|
||||||
|
for (int i = 7; i >= 0 && bits_read < total_bits; i--, bits_read++) {
|
||||||
|
if (byte & (1 << i)) {
|
||||||
|
current = current->right;
|
||||||
|
} else {
|
||||||
|
current = current->left;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!current->left && !current->right) {
|
||||||
|
fwrite(¤t->data, sizeof(current->data), 1, output);
|
||||||
current = root;
|
current = root;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fclose(inputFile);
|
fclose(input);
|
||||||
fclose(outputFile);
|
fclose(output);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int compress_1(const char* input_file_name, const char* output_file_name){
|
|
||||||
if(compressFile(input_file_name, output_file_name) == 0){
|
|
||||||
printf("Succsses!");
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int decompress_1(const char* input_file_name, const char* output_file_name){
|
|
||||||
if(decompressFile(input_file_name, output_file_name) == 0){
|
|
||||||
printf("Succsses!");
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
}
|
|
||||||
int compress_2(const char* input_file_name, const char* output_file_name);
|
int compress_2(const char* input_file_name, const char* output_file_name);
|
||||||
int decompress_2(const char* input_file_name, const char* output_file_name);
|
int decompress_2(const char* input_file_name, const char* output_file_name);
|
Loading…
Reference in New Issue
Block a user