#include #include #include #include #include "compressor.h" #define SIZE 256 #define MAX_WORD_COUNT 100000 #define MAX_REPEAT 255 struct Input { char* buffer; int size; }; struct Output { char* result; int length; }; struct RunLengthData { int head; int length; int position; }; //для хранения и управления словарём подстрок, в процессе сжатия (LZ78, RLE) struct TrieNode { int id; struct TrieNode* toddler[SIZE]; }; struct TrieNode* create_node() { struct TrieNode* new_node = (struct TrieNode*)malloc(sizeof(struct TrieNode)); new_node->id = 0; for (int i = 0; i < SIZE; i++) { new_node->toddler[i] = NULL; } return new_node; } void process_trie(struct TrieNode* root, char* word, int word_Id, char* words[], int operation) { if (root == NULL) { return; } if (operation == 1) { // Destroy operation for (int i = 0; i < SIZE; i++) { process_trie(root->toddler[i], NULL, 0, NULL, 1); } free(root); } else if (operation == 2 && word != NULL && *word != '\0') { // Add word operation int char_Id = (int)(*word); if (root->toddler[char_Id] == NULL) { root->toddler[char_Id] = create_node(); root->toddler[char_Id]->id = word_Id; } int needSize = snprintf(NULL, 0, "-%d-%c\n", root->id, char_Id); words[root->toddler[char_Id]->id] = calloc(needSize + 1, sizeof(char)); memset(words[root->toddler[char_Id]->id], 0, needSize + 1); sprintf(words[root->toddler[char_Id]->id], "-%d-%c\n", root->id, char_Id); process_trie(root->toddler[char_Id], word + 1, word_Id, words, 2); } } int LZ78compress(struct Input input, struct Output* output) { struct TrieNode* root = create_node(); struct TrieNode* current = root; char* last = NULL; int currentIndex = 0; for (int i = 0; i < input.size; i++) { int character = input.buffer[i]; if (current->toddler[character] != NULL) { current = current->toddler[character]; } else { current->toddler[character] = create_node(); current->toddler[character]->id = ++currentIndex; int size_n = snprintf(NULL, 0, "-%d-%c\n", current->id, character); char* new_last = calloc(size_n + 1, sizeof(char)); sprintf(new_last, "-%d-%c\n", current->id, character ); if (last != NULL) { size_t last_len = strlen(last); char* temp = calloc(last_len + size_n + 1, sizeof(char)); strcpy(temp, last); strcat(temp, new_last); free(last); last = temp; } else { last = new_last; } current = root; } } if (last != NULL) { output->length = strlen(last); output->result = malloc(output->length + 1); strcpy(output->result, last); free(last); }else { output->result = NULL; output->length = 0; } process_trie(root, NULL, 0, NULL, 1); return output->length; } void processRLE(struct RunLengthData* data, struct Output* output) { if (data->head != -1) { output->result[data->position] = data->head; output->result[data->position + 1] = data->length; data->position += 2; } } int RLEcompress(struct Input input, struct Output* output) { output->result = calloc(2 * input.size, sizeof(char)); if(output->result == NULL) { return -1; } struct RunLengthData RLE_information; memset(&RLE_information, 0, sizeof(struct RunLengthData)); for (int i = 0; i < input.size; i++) { int currentChar = input.buffer[i]; if(RLE_information.position + 1 >= 2 * input.size) { break; } if (currentChar == RLE_information.head && RLE_information.length < MAX_REPEAT) { RLE_information.length += 1; if (i == input.size - 1) { output->result[RLE_information.position] = currentChar; output->result[RLE_information.position + 1] = RLE_information.length; RLE_information.position += 2; } } else { if (RLE_information.length > 0) { output->result[RLE_information.position] = RLE_information.head; output->result[RLE_information.position + 1] = RLE_information.length; RLE_information.position += 2; } if (i == input.size - 1) { output->result[RLE_information.position] = currentChar; output->result[RLE_information.position + 1] = 1; RLE_information.position += 2; } else { RLE_information.head = currentChar; RLE_information.length = 1; } } } output->length = RLE_information.position; return output->length; } int compress(const char* input_file_name, const char* output_file_name) { FILE *infile = fopen(input_file_name, "rb"); if (infile == NULL) { perror("Error opening input file"); return -1; } fseek(infile, 0, SEEK_END); int insize = ftell(infile) + 1; rewind(infile); char* buffer = calloc(insize, sizeof(char)); memset(buffer, 0, insize); insize = fread(buffer, sizeof(char), insize - 1, infile); if (insize == 0) { assert(!ferror(infile)); } fclose(infile); FILE *outfile = fopen(output_file_name, "wb"); if (outfile == NULL) { perror("Error opening output file"); free(buffer); return -1; } struct Input input = {.buffer = buffer, .size = insize}; struct Output tempOutput; RLEcompress(input, &tempOutput); struct Input lz78input = {.buffer = tempOutput.result, .size = tempOutput.length}; struct Output finalOutput; LZ78compress(lz78input, &finalOutput); if (finalOutput.length > 0) { fwrite(finalOutput.result, sizeof(char), finalOutput.length, outfile); } free(buffer); free(tempOutput.result); free(finalOutput.result); fclose(outfile); return 0;