From 2247f621e411ae20dc77f2304635e68bb1664a07 Mon Sep 17 00:00:00 2001 From: Anzhelika Nikolaieva Date: Mon, 29 Jan 2024 23:13:30 +0000 Subject: [PATCH] Update 'sk1/compressor.c' --- sk1/compressor.c | 205 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) diff --git a/sk1/compressor.c b/sk1/compressor.c index e69de29..9594cf8 100644 --- a/sk1/compressor.c +++ b/sk1/compressor.c @@ -0,0 +1,205 @@ +#include +#include +#include +#include +#include "compressor.h" + +#define SIZE 256 +#define MAX_WORD_COUNT 100000 +#define MAX_REPEAT 255 + +struct Input { + char* buffer; + int size; +}; + +struct Output { + char* result; + int length; +}; + +struct RunLengthData { + int head; + int length; + int position; +}; + +//для хранения и управления словарём подстрок, в процессе сжатия (LZ78, RLE) +struct TrieNode { + int id; + struct TrieNode* toddler[SIZE]; +}; + +struct TrieNode* create_node() { + struct TrieNode* new_node = (struct TrieNode*)malloc(sizeof(struct TrieNode)); + new_node->id = 0; + for (int i = 0; i < SIZE; i++) { + new_node->toddler[i] = NULL; + } + return new_node; +} + +void process_trie(struct TrieNode* root, char* word, int word_Id, char* words[], int operation) { + if (root == NULL) { + return; + } + + if (operation == 1) { // Destroy operation + for (int i = 0; i < SIZE; i++) { + process_trie(root->toddler[i], NULL, 0, NULL, 1); + } + free(root); + } else if (operation == 2 && word != NULL && *word != '\0') { // Add word operation + int char_Id = (int)(*word); + + if (root->toddler[char_Id] == NULL) { + root->toddler[char_Id] = create_node(); + root->toddler[char_Id]->id = word_Id; + } + + int needSize = snprintf(NULL, 0, "-%d-%c\n", root->id, char_Id); + words[root->toddler[char_Id]->id] = calloc(needSize + 1, sizeof(char)); + memset(words[root->toddler[char_Id]->id], 0, needSize + 1); + sprintf(words[root->toddler[char_Id]->id], "-%d-%c\n", root->id, char_Id); + + process_trie(root->toddler[char_Id], word + 1, word_Id, words, 2); + } +} + +int LZ78compress(struct Input input, struct Output* output) { + struct TrieNode* root = create_node(); + struct TrieNode* current = root; + + char* last = NULL; + int currentIndex = 0; + + for (int i = 0; i < input.size; i++) { + int character = input.buffer[i]; + + if (current->toddler[character] != NULL) { + current = current->toddler[character]; + } else { + current->toddler[character] = create_node(); + current->toddler[character]->id = ++currentIndex; + + int sz_needed = snprintf(NULL, 0, "-%d-%c\n", current->id, character); + char* new_last = calloc(sz_needed + 1, sizeof(char)); + sprintf(new_last, "-%d-%c\n", current->id, character ); + + if (last != NULL) { + size_t last_len = strlen(last); + char* temp = calloc(last_len + sz_needed + 1, sizeof(char)); + strcpy(temp, last); + strcat(temp, new_last); + free(last); + last = temp; + } else { + last = new_last; + } + current = root; + } + } + + if (last != NULL) { + output->length = strlen(last); + output->result = malloc(output->length + 1); + strcpy(output->result, last); + free(last); + + }else { + output->result = NULL; + output->length = 0; + } + + process_trie(root, NULL, 0, NULL, 1); + + return output->length; +} + + +void processRLE(struct RunLengthData* data, struct Output* output) { + if (data->head != -1) { + output->result[data->position] = data->head; + output->result[data->position + 1] = data->length; + data->position += 2; + } +} + +int RLEcompress(struct Input input, struct Output* output) { + output->result = calloc(2 * input.size, sizeof(char)); + + if(output->result == NULL) { + return -1; + } + + struct RunLengthData RLE_information; + memset(&RLE_information, 0, sizeof(struct RunLengthData)); + + for (int i = 0; i < input.size; i++) { + int currentChar = input.buffer[i]; + + if(RLE_information.position + 1 >= 2 * input.size) { + break; + } + + if (currentChar == RLE_information.head && RLE_information.length < MAX_REPEAT) { + RLE_information.length += 1; + + if (i == input.size - 1) { + output->result[RLE_information.position] = currentChar; + output->result[RLE_information.position + 1] = RLE_information.length; + RLE_information.position += 2; + } + } else { + if (RLE_information.length > 0) { + output->result[RLE_information.position] = RLE_information.head; + output->result[RLE_information.position + 1] = RLE_information.length; + RLE_information.position += 2; + } + + if (i == input.size - 1) { + output->result[RLE_information.position] = currentChar; + output->result[RLE_information.position + 1] = 1; + RLE_information.position += 2; + } else { + RLE_information.head = currentChar; + RLE_information.length = 1; + } + } + } + + output->length = RLE_information.position; + return output->length; +} + +void compress(FILE* infile, FILE* outfile) { + fseek(infile, 0, SEEK_END); + int insize = ftell(infile) + 1; + rewind(infile); + char* buffer = calloc(insize, sizeof(char)); + + memset(buffer, 0, insize); + insize = fread(buffer, sizeof(char), insize - 1, infile); + + if (insize == 0) { + assert(!ferror(infile)); + } + + struct Input input = {.buffer = buffer, .size = insize}; + struct Output tempOutput; + + RLEcompress(input, &tempOutput); + + struct Input lz78input = {.buffer = tempOutput.result, .size = tempOutput.length}; + struct Output finalOutput; + + LZ78compress(lz78input, &finalOutput); + + if (finalOutput.length > 0) { + fwrite(finalOutput.result, sizeof(char), finalOutput.length, outfile); + } + + free(buffer); + free(tempOutput.result); + free(finalOutput.result); +} \ No newline at end of file