usaa23/sk1/compressor.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "compressor.h"

#define SIZE 256
#define MAX_WORD_COUNT 100000
#define MAX_REPEAT 255

struct Input {
    char* buffer;
    int size;
};

struct Output {
    char* result;
    int length;
};

struct RunLengthData {
    int head;
    int length;
    int position;
};

//для хранения и управления словарём подстрок, в процессе сжатия (LZ78, RLE)
struct TrieNode {
    int id;
    struct TrieNode* toddler[SIZE];
};

struct TrieNode* create_node() {
    struct TrieNode* new_node = (struct TrieNode*)malloc(sizeof(struct TrieNode));
    new_node->id = 0;
    for (int i = 0; i < SIZE; i++) {
        new_node->toddler[i] = NULL;
    }
    return new_node;
}

void process_trie(struct TrieNode* root, char* word, int word_Id, char* words[], int operation) {
    if (root == NULL) {
    return;
    }

    if (operation == 1) { // Destroy operation
        for (int i = 0; i < SIZE; i++) {
            process_trie(root->toddler[i], NULL, 0, NULL, 1);
        }
        free(root);
    } else if (operation == 2 && word != NULL && *word != '\0') { // Add word operation
        int char_Id = (int)(*word);

        if (root->toddler[char_Id] == NULL) {
            root->toddler[char_Id] = create_node();
            root->toddler[char_Id]->id = word_Id;
        }

        int needSize = snprintf(NULL, 0, "-%d-%c\n", root->id, char_Id);
        words[root->toddler[char_Id]->id] = calloc(needSize + 1, sizeof(char));
        memset(words[root->toddler[char_Id]->id], 0, needSize + 1);
        sprintf(words[root->toddler[char_Id]->id], "-%d-%c\n", root->id, char_Id);

        process_trie(root->toddler[char_Id], word + 1, word_Id, words, 2);
      }
}

int LZ78compress(struct Input input, struct Output* output) {
    struct TrieNode* root = create_node();
    struct TrieNode* current = root;

    char* last = NULL;
    int currentIndex = 0;

    for (int i = 0; i < input.size; i++) {
        int character = input.buffer[i];

        if (current->toddler[character] != NULL) {
            current = current->toddler[character];
        } else {
            current->toddler[character] = create_node();
            current->toddler[character]->id = ++currentIndex;

        int size_n = snprintf(NULL, 0, "-%d-%c\n", current->id, character);
        char* new_last = calloc(size_n + 1, sizeof(char));
        sprintf(new_last, "-%d-%c\n", current->id, character );

            if (last != NULL) {
                size_t last_len = strlen(last);
                char* temp = calloc(last_len + size_n + 1, sizeof(char));
                strcpy(temp, last);
                strcat(temp, new_last);
                free(last);
                last = temp;
            } else {
                last = new_last;
            }
        current = root;
        }
    }

    if (last != NULL) {
        output->length = strlen(last);
        output->result = malloc(output->length + 1);
        strcpy(output->result, last);
        free(last);

    }else {
        output->result = NULL;
        output->length = 0;
    }

    process_trie(root, NULL, 0, NULL, 1);

    return output->length;
}


void processRLE(struct RunLengthData* data, struct Output* output) {
    if (data->head != -1) {
        output->result[data->position] = data->head;
        output->result[data->position + 1] = data->length;
        data->position += 2;
    }
}

int RLEcompress(struct Input input, struct Output* output) {
    output->result = calloc(2 * input.size, sizeof(char));

    if(output->result == NULL) {
       return -1;
    }

    struct RunLengthData RLE_information;
    memset(&RLE_information, 0, sizeof(struct RunLengthData));

    for (int i = 0; i < input.size; i++) {
        int currentChar = input.buffer[i];

            if(RLE_information.position + 1 >= 2 * input.size) {
               break;
            }

            if (currentChar == RLE_information.head && RLE_information.length < MAX_REPEAT) {
            RLE_information.length += 1;

            if (i == input.size - 1) {
                output->result[RLE_information.position] = currentChar;
                output->result[RLE_information.position + 1] = RLE_information.length;
                RLE_information.position += 2;
            }
            } else {
                if (RLE_information.length > 0) {
                output->result[RLE_information.position] = RLE_information.head;
                output->result[RLE_information.position + 1] = RLE_information.length;
                RLE_information.position += 2;
            }

            if (i == input.size - 1) {
                output->result[RLE_information.position] = currentChar;
                output->result[RLE_information.position + 1] = 1;
                RLE_information.position += 2;
            } else {
                RLE_information.head = currentChar;
                RLE_information.length = 1;
            }
        }
    }

    output->length = RLE_information.position;
    return output->length;
}

int compress(const char* input_file_name, const char* output_file_name) {
    FILE *infile = fopen(input_file_name, "rb");
    if (infile == NULL) {
        perror("Error opening input file");
        return -1;
    }

    fseek(infile, 0, SEEK_END);
    int insize = ftell(infile) + 1;
    rewind(infile);
    char* buffer = calloc(insize, sizeof(char));

    memset(buffer, 0, insize);
    insize = fread(buffer, sizeof(char), insize - 1, infile);

    if (insize == 0) {
        assert(!ferror(infile));
    }

    fclose(infile);

    FILE *outfile = fopen(output_file_name, "wb");
    if (outfile == NULL) {
        perror("Error opening output file");
        free(buffer);
        return -1;
    }

    struct Input input = {.buffer = buffer, .size = insize};
    struct Output tempOutput;

    RLEcompress(input, &tempOutput);

    struct Input lz78input = {.buffer = tempOutput.result, .size = tempOutput.length};
    struct Output finalOutput;

    LZ78compress(lz78input, &finalOutput);

    if (finalOutput.length > 0) {
        fwrite(finalOutput.result, sizeof(char), finalOutput.length, outfile);
    }

    free(buffer);
    free(tempOutput.result);
    free(finalOutput.result);

    fclose(outfile);

    return 0;