usaa23/sk1/compressor.c

205 lines
5.8 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "compressor.h"
#define SIZE 256
#define MAX_WORD_COUNT 100000
#define MAX_REPEAT 255
struct Input {
char* buffer;
int size;
};
struct Output {
char* result;
int length;
};
struct RunLengthData {
int head;
int length;
int position;
};
//для хранения и управления словарём подстрок, в процессе сжатия (LZ78, RLE)
struct TrieNode {
int id;
struct TrieNode* toddler[SIZE];
};
struct TrieNode* create_node() {
struct TrieNode* new_node = (struct TrieNode*)malloc(sizeof(struct TrieNode));
new_node->id = 0;
for (int i = 0; i < SIZE; i++) {
new_node->toddler[i] = NULL;
}
return new_node;
}
void process_trie(struct TrieNode* root, char* word, int word_Id, char* words[], int operation) {
if (root == NULL) {
return;
}
if (operation == 1) { // Destroy operation
for (int i = 0; i < SIZE; i++) {
process_trie(root->toddler[i], NULL, 0, NULL, 1);
}
free(root);
} else if (operation == 2 && word != NULL && *word != '\0') { // Add word operation
int char_Id = (int)(*word);
if (root->toddler[char_Id] == NULL) {
root->toddler[char_Id] = create_node();
root->toddler[char_Id]->id = word_Id;
}
int needSize = snprintf(NULL, 0, "-%d-%c\n", root->id, char_Id);
words[root->toddler[char_Id]->id] = calloc(needSize + 1, sizeof(char));
memset(words[root->toddler[char_Id]->id], 0, needSize + 1);
sprintf(words[root->toddler[char_Id]->id], "-%d-%c\n", root->id, char_Id);
process_trie(root->toddler[char_Id], word + 1, word_Id, words, 2);
}
}
int LZ78compress(struct Input input, struct Output* output) {
struct TrieNode* root = create_node();
struct TrieNode* current = root;
char* last = NULL;
int currentIndex = 0;
for (int i = 0; i < input.size; i++) {
int character = input.buffer[i];
if (current->toddler[character] != NULL) {
current = current->toddler[character];
} else {
current->toddler[character] = create_node();
current->toddler[character]->id = ++currentIndex;
int sz_needed = snprintf(NULL, 0, "-%d-%c\n", current->id, character);
char* new_last = calloc(sz_needed + 1, sizeof(char));
sprintf(new_last, "-%d-%c\n", current->id, character );
if (last != NULL) {
size_t last_len = strlen(last);
char* temp = calloc(last_len + sz_needed + 1, sizeof(char));
strcpy(temp, last);
strcat(temp, new_last);
free(last);
last = temp;
} else {
last = new_last;
}
current = root;
}
}
if (last != NULL) {
output->length = strlen(last);
output->result = malloc(output->length + 1);
strcpy(output->result, last);
free(last);
}else {
output->result = NULL;
output->length = 0;
}
process_trie(root, NULL, 0, NULL, 1);
return output->length;
}
void processRLE(struct RunLengthData* data, struct Output* output) {
if (data->head != -1) {
output->result[data->position] = data->head;
output->result[data->position + 1] = data->length;
data->position += 2;
}
}
int RLEcompress(struct Input input, struct Output* output) {
output->result = calloc(2 * input.size, sizeof(char));
if(output->result == NULL) {
return -1;
}
struct RunLengthData RLE_information;
memset(&RLE_information, 0, sizeof(struct RunLengthData));
for (int i = 0; i < input.size; i++) {
int currentChar = input.buffer[i];
if(RLE_information.position + 1 >= 2 * input.size) {
break;
}
if (currentChar == RLE_information.head && RLE_information.length < MAX_REPEAT) {
RLE_information.length += 1;
if (i == input.size - 1) {
output->result[RLE_information.position] = currentChar;
output->result[RLE_information.position + 1] = RLE_information.length;
RLE_information.position += 2;
}
} else {
if (RLE_information.length > 0) {
output->result[RLE_information.position] = RLE_information.head;
output->result[RLE_information.position + 1] = RLE_information.length;
RLE_information.position += 2;
}
if (i == input.size - 1) {
output->result[RLE_information.position] = currentChar;
output->result[RLE_information.position + 1] = 1;
RLE_information.position += 2;
} else {
RLE_information.head = currentChar;
RLE_information.length = 1;
}
}
}
output->length = RLE_information.position;
return output->length;
}
void compress(FILE* infile, FILE* outfile) {
fseek(infile, 0, SEEK_END);
int insize = ftell(infile) + 1;
rewind(infile);
char* buffer = calloc(insize, sizeof(char));
memset(buffer, 0, insize);
insize = fread(buffer, sizeof(char), insize - 1, infile);
if (insize == 0) {
assert(!ferror(infile));
}
struct Input input = {.buffer = buffer, .size = insize};
struct Output tempOutput;
RLEcompress(input, &tempOutput);
struct Input lz78input = {.buffer = tempOutput.result, .size = tempOutput.length};
struct Output finalOutput;
LZ78compress(lz78input, &finalOutput);
if (finalOutput.length > 0) {
fwrite(finalOutput.result, sizeof(char), finalOutput.length, outfile);
}
free(buffer);
free(tempOutput.result);
free(finalOutput.result);
}