224 lines
6.2 KiB
C
224 lines
6.2 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
#include "compressor.h"
|
|
|
|
#define SIZE 256
|
|
#define MAX_WORD_COUNT 100000
|
|
#define MAX_REPEAT 255
|
|
|
|
struct Input {
|
|
char* buffer;
|
|
int size;
|
|
};
|
|
|
|
struct Output {
|
|
char* result;
|
|
int length;
|
|
};
|
|
|
|
struct RunLengthData {
|
|
int head;
|
|
int length;
|
|
int position;
|
|
};
|
|
|
|
//для хранения и управления словарём подстрок, в процессе сжатия (LZ78, RLE)
|
|
struct TrieNode {
|
|
int id;
|
|
struct TrieNode* toddler[SIZE];
|
|
};
|
|
|
|
struct TrieNode* create_node() {
|
|
struct TrieNode* new_node = (struct TrieNode*)malloc(sizeof(struct TrieNode));
|
|
new_node->id = 0;
|
|
for (int i = 0; i < SIZE; i++) {
|
|
new_node->toddler[i] = NULL;
|
|
}
|
|
return new_node;
|
|
}
|
|
|
|
void process_trie(struct TrieNode* root, char* word, int word_Id, char* words[], int operation) {
|
|
if (root == NULL) {
|
|
return;
|
|
}
|
|
|
|
if (operation == 1) { // Destroy operation
|
|
for (int i = 0; i < SIZE; i++) {
|
|
process_trie(root->toddler[i], NULL, 0, NULL, 1);
|
|
}
|
|
free(root);
|
|
} else if (operation == 2 && word != NULL && *word != '\0') { // Add word operation
|
|
int char_Id = (int)(*word);
|
|
|
|
if (root->toddler[char_Id] == NULL) {
|
|
root->toddler[char_Id] = create_node();
|
|
root->toddler[char_Id]->id = word_Id;
|
|
}
|
|
|
|
int needSize = snprintf(NULL, 0, "-%d-%c\n", root->id, char_Id);
|
|
words[root->toddler[char_Id]->id] = calloc(needSize + 1, sizeof(char));
|
|
memset(words[root->toddler[char_Id]->id], 0, needSize + 1);
|
|
sprintf(words[root->toddler[char_Id]->id], "-%d-%c\n", root->id, char_Id);
|
|
|
|
process_trie(root->toddler[char_Id], word + 1, word_Id, words, 2);
|
|
}
|
|
}
|
|
|
|
int LZ78compress(struct Input input, struct Output* output) {
|
|
struct TrieNode* root = create_node();
|
|
struct TrieNode* current = root;
|
|
|
|
char* last = NULL;
|
|
int currentIndex = 0;
|
|
|
|
for (int i = 0; i < input.size; i++) {
|
|
int character = input.buffer[i];
|
|
|
|
if (current->toddler[character] != NULL) {
|
|
current = current->toddler[character];
|
|
} else {
|
|
current->toddler[character] = create_node();
|
|
current->toddler[character]->id = ++currentIndex;
|
|
|
|
int size_n = snprintf(NULL, 0, "-%d-%c\n", current->id, character);
|
|
char* new_last = calloc(size_n + 1, sizeof(char));
|
|
sprintf(new_last, "-%d-%c\n", current->id, character );
|
|
|
|
if (last != NULL) {
|
|
size_t last_len = strlen(last);
|
|
char* temp = calloc(last_len + size_n + 1, sizeof(char));
|
|
strcpy(temp, last);
|
|
strcat(temp, new_last);
|
|
free(last);
|
|
last = temp;
|
|
} else {
|
|
last = new_last;
|
|
}
|
|
current = root;
|
|
}
|
|
}
|
|
|
|
if (last != NULL) {
|
|
output->length = strlen(last);
|
|
output->result = malloc(output->length + 1);
|
|
strcpy(output->result, last);
|
|
free(last);
|
|
|
|
}else {
|
|
output->result = NULL;
|
|
output->length = 0;
|
|
}
|
|
|
|
process_trie(root, NULL, 0, NULL, 1);
|
|
|
|
return output->length;
|
|
}
|
|
|
|
|
|
void processRLE(struct RunLengthData* data, struct Output* output) {
|
|
if (data->head != -1) {
|
|
output->result[data->position] = data->head;
|
|
output->result[data->position + 1] = data->length;
|
|
data->position += 2;
|
|
}
|
|
}
|
|
|
|
int RLEcompress(struct Input input, struct Output* output) {
|
|
output->result = calloc(2 * input.size, sizeof(char));
|
|
|
|
if(output->result == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
struct RunLengthData RLE_information;
|
|
memset(&RLE_information, 0, sizeof(struct RunLengthData));
|
|
|
|
for (int i = 0; i < input.size; i++) {
|
|
int currentChar = input.buffer[i];
|
|
|
|
if(RLE_information.position + 1 >= 2 * input.size) {
|
|
break;
|
|
}
|
|
|
|
if (currentChar == RLE_information.head && RLE_information.length < MAX_REPEAT) {
|
|
RLE_information.length += 1;
|
|
|
|
if (i == input.size - 1) {
|
|
output->result[RLE_information.position] = currentChar;
|
|
output->result[RLE_information.position + 1] = RLE_information.length;
|
|
RLE_information.position += 2;
|
|
}
|
|
} else {
|
|
if (RLE_information.length > 0) {
|
|
output->result[RLE_information.position] = RLE_information.head;
|
|
output->result[RLE_information.position + 1] = RLE_information.length;
|
|
RLE_information.position += 2;
|
|
}
|
|
|
|
if (i == input.size - 1) {
|
|
output->result[RLE_information.position] = currentChar;
|
|
output->result[RLE_information.position + 1] = 1;
|
|
RLE_information.position += 2;
|
|
} else {
|
|
RLE_information.head = currentChar;
|
|
RLE_information.length = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
output->length = RLE_information.position;
|
|
return output->length;
|
|
}
|
|
|
|
int compress(const char* input_file_name, const char* output_file_name) {
|
|
FILE *infile = fopen(input_file_name, "rb");
|
|
if (infile == NULL) {
|
|
perror("Error opening input file");
|
|
return -1;
|
|
}
|
|
|
|
fseek(infile, 0, SEEK_END);
|
|
int insize = ftell(infile) + 1;
|
|
rewind(infile);
|
|
char* buffer = calloc(insize, sizeof(char));
|
|
|
|
memset(buffer, 0, insize);
|
|
insize = fread(buffer, sizeof(char), insize - 1, infile);
|
|
|
|
if (insize == 0) {
|
|
assert(!ferror(infile));
|
|
}
|
|
|
|
fclose(infile);
|
|
|
|
FILE *outfile = fopen(output_file_name, "wb");
|
|
if (outfile == NULL) {
|
|
perror("Error opening output file");
|
|
free(buffer);
|
|
return -1;
|
|
}
|
|
|
|
struct Input input = {.buffer = buffer, .size = insize};
|
|
struct Output tempOutput;
|
|
|
|
RLEcompress(input, &tempOutput);
|
|
|
|
struct Input lz78input = {.buffer = tempOutput.result, .size = tempOutput.length};
|
|
struct Output finalOutput;
|
|
|
|
LZ78compress(lz78input, &finalOutput);
|
|
|
|
if (finalOutput.length > 0) {
|
|
fwrite(finalOutput.result, sizeof(char), finalOutput.length, outfile);
|
|
}
|
|
|
|
free(buffer);
|
|
free(tempOutput.result);
|
|
free(finalOutput.result);
|
|
|
|
fclose(outfile);
|
|
|
|
return 0;
|