Update 'sk1/compressor.c'
This commit is contained in:
parent
48dd9c393a
commit
2247f621e4
205
sk1/compressor.c
205
sk1/compressor.c
@ -0,0 +1,205 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "compressor.h"
|
||||
|
||||
#define SIZE 256
|
||||
#define MAX_WORD_COUNT 100000
|
||||
#define MAX_REPEAT 255
|
||||
|
||||
struct Input {
|
||||
char* buffer;
|
||||
int size;
|
||||
};
|
||||
|
||||
struct Output {
|
||||
char* result;
|
||||
int length;
|
||||
};
|
||||
|
||||
struct RunLengthData {
|
||||
int head;
|
||||
int length;
|
||||
int position;
|
||||
};
|
||||
|
||||
//для хранения и управления словарём подстрок, в процессе сжатия (LZ78, RLE)
|
||||
struct TrieNode {
|
||||
int id;
|
||||
struct TrieNode* toddler[SIZE];
|
||||
};
|
||||
|
||||
struct TrieNode* create_node() {
|
||||
struct TrieNode* new_node = (struct TrieNode*)malloc(sizeof(struct TrieNode));
|
||||
new_node->id = 0;
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
new_node->toddler[i] = NULL;
|
||||
}
|
||||
return new_node;
|
||||
}
|
||||
|
||||
void process_trie(struct TrieNode* root, char* word, int word_Id, char* words[], int operation) {
|
||||
if (root == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (operation == 1) { // Destroy operation
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
process_trie(root->toddler[i], NULL, 0, NULL, 1);
|
||||
}
|
||||
free(root);
|
||||
} else if (operation == 2 && word != NULL && *word != '\0') { // Add word operation
|
||||
int char_Id = (int)(*word);
|
||||
|
||||
if (root->toddler[char_Id] == NULL) {
|
||||
root->toddler[char_Id] = create_node();
|
||||
root->toddler[char_Id]->id = word_Id;
|
||||
}
|
||||
|
||||
int needSize = snprintf(NULL, 0, "-%d-%c\n", root->id, char_Id);
|
||||
words[root->toddler[char_Id]->id] = calloc(needSize + 1, sizeof(char));
|
||||
memset(words[root->toddler[char_Id]->id], 0, needSize + 1);
|
||||
sprintf(words[root->toddler[char_Id]->id], "-%d-%c\n", root->id, char_Id);
|
||||
|
||||
process_trie(root->toddler[char_Id], word + 1, word_Id, words, 2);
|
||||
}
|
||||
}
|
||||
|
||||
int LZ78compress(struct Input input, struct Output* output) {
|
||||
struct TrieNode* root = create_node();
|
||||
struct TrieNode* current = root;
|
||||
|
||||
char* last = NULL;
|
||||
int currentIndex = 0;
|
||||
|
||||
for (int i = 0; i < input.size; i++) {
|
||||
int character = input.buffer[i];
|
||||
|
||||
if (current->toddler[character] != NULL) {
|
||||
current = current->toddler[character];
|
||||
} else {
|
||||
current->toddler[character] = create_node();
|
||||
current->toddler[character]->id = ++currentIndex;
|
||||
|
||||
int sz_needed = snprintf(NULL, 0, "-%d-%c\n", current->id, character);
|
||||
char* new_last = calloc(sz_needed + 1, sizeof(char));
|
||||
sprintf(new_last, "-%d-%c\n", current->id, character );
|
||||
|
||||
if (last != NULL) {
|
||||
size_t last_len = strlen(last);
|
||||
char* temp = calloc(last_len + sz_needed + 1, sizeof(char));
|
||||
strcpy(temp, last);
|
||||
strcat(temp, new_last);
|
||||
free(last);
|
||||
last = temp;
|
||||
} else {
|
||||
last = new_last;
|
||||
}
|
||||
current = root;
|
||||
}
|
||||
}
|
||||
|
||||
if (last != NULL) {
|
||||
output->length = strlen(last);
|
||||
output->result = malloc(output->length + 1);
|
||||
strcpy(output->result, last);
|
||||
free(last);
|
||||
|
||||
}else {
|
||||
output->result = NULL;
|
||||
output->length = 0;
|
||||
}
|
||||
|
||||
process_trie(root, NULL, 0, NULL, 1);
|
||||
|
||||
return output->length;
|
||||
}
|
||||
|
||||
|
||||
void processRLE(struct RunLengthData* data, struct Output* output) {
|
||||
if (data->head != -1) {
|
||||
output->result[data->position] = data->head;
|
||||
output->result[data->position + 1] = data->length;
|
||||
data->position += 2;
|
||||
}
|
||||
}
|
||||
|
||||
int RLEcompress(struct Input input, struct Output* output) {
|
||||
output->result = calloc(2 * input.size, sizeof(char));
|
||||
|
||||
if(output->result == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct RunLengthData RLE_information;
|
||||
memset(&RLE_information, 0, sizeof(struct RunLengthData));
|
||||
|
||||
for (int i = 0; i < input.size; i++) {
|
||||
int currentChar = input.buffer[i];
|
||||
|
||||
if(RLE_information.position + 1 >= 2 * input.size) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (currentChar == RLE_information.head && RLE_information.length < MAX_REPEAT) {
|
||||
RLE_information.length += 1;
|
||||
|
||||
if (i == input.size - 1) {
|
||||
output->result[RLE_information.position] = currentChar;
|
||||
output->result[RLE_information.position + 1] = RLE_information.length;
|
||||
RLE_information.position += 2;
|
||||
}
|
||||
} else {
|
||||
if (RLE_information.length > 0) {
|
||||
output->result[RLE_information.position] = RLE_information.head;
|
||||
output->result[RLE_information.position + 1] = RLE_information.length;
|
||||
RLE_information.position += 2;
|
||||
}
|
||||
|
||||
if (i == input.size - 1) {
|
||||
output->result[RLE_information.position] = currentChar;
|
||||
output->result[RLE_information.position + 1] = 1;
|
||||
RLE_information.position += 2;
|
||||
} else {
|
||||
RLE_information.head = currentChar;
|
||||
RLE_information.length = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output->length = RLE_information.position;
|
||||
return output->length;
|
||||
}
|
||||
|
||||
void compress(FILE* infile, FILE* outfile) {
|
||||
fseek(infile, 0, SEEK_END);
|
||||
int insize = ftell(infile) + 1;
|
||||
rewind(infile);
|
||||
char* buffer = calloc(insize, sizeof(char));
|
||||
|
||||
memset(buffer, 0, insize);
|
||||
insize = fread(buffer, sizeof(char), insize - 1, infile);
|
||||
|
||||
if (insize == 0) {
|
||||
assert(!ferror(infile));
|
||||
}
|
||||
|
||||
struct Input input = {.buffer = buffer, .size = insize};
|
||||
struct Output tempOutput;
|
||||
|
||||
RLEcompress(input, &tempOutput);
|
||||
|
||||
struct Input lz78input = {.buffer = tempOutput.result, .size = tempOutput.length};
|
||||
struct Output finalOutput;
|
||||
|
||||
LZ78compress(lz78input, &finalOutput);
|
||||
|
||||
if (finalOutput.length > 0) {
|
||||
fwrite(finalOutput.result, sizeof(char), finalOutput.length, outfile);
|
||||
}
|
||||
|
||||
free(buffer);
|
||||
free(tempOutput.result);
|
||||
free(finalOutput.result);
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user