usaa21/sk2a/readme.md
2022-01-20 08:04:32 +01:00

8.8 KiB

#include <assert.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include "compressor.h" #define BUFSIZE 1024 #define SIZE 256 #define MAX_WORDS 100000

// One codeword is previous index - new index -character

// Trie (a.k.a prefix tree) struct dict { int index; // Character is encoded as non zero value in characters struct dict* characters[SIZE]; };

// Free trie void delete(struct dict* tree){ //skopirovane z lz78, vymaze strukturu co sa pouziva v lz78 if (tree == NULL){ //maze to pamat stromu return; } for (int i = 0 ;i < SIZE; i++){ delete(tree-> characters[i]); } }

void get_words (struct dict* tree,char* words[]){ //posielam cely strom a smernik na dvojrozmerne pole if (tree == NULL){ return; } for (int i = 0 ;i < SIZE; i++){ //prejdeme vsetkych potomkov if (tree-> characters[i] > 0){ //zistime ci tam je nieco naalokovane int need_size = snprintf(NULL,0,"-%d-%c\n",tree->index,i); //kolko pamate words[tree->characters[i]->index] = calloc(need_size + 1,sizeof(char)); //naalokujeme pamat memset(words[tree->characters[i]->index],0,need_size + 1); sprintf(words[tree->characters[i]->index],"-%d-%c\n",tree->index,i); //napise sa vo formate -index predosleho notu - aktualny znak

        get_words(tree-> characters[i],words);								//zavolam na kazdeho potomka a potom na dalsich potomkov alebo syna
    }
}

}

int mylz78compress(char* buff,int size,char** outbuf){ // https://medium.com/@dbudhrani/how-data-compression-works-exploring-lz78-e97e539138 // The trie represents the current phrase // The first node represents the empty string with index 0 struct dict* root = calloc(1,sizeof(struct dict)); struct dict* current = root; int index = 1; int c = 0; char *last = NULL; for(int i = 0; i < size; i++){ c = buff[i]; // Current character is in dictionary, continue searching if (current->characters[c] > 0){
// ak este nieco ostalo a toto je koniec if (i == size - 1){ // pridem na koniec suboru a musim si to zapamatat, inac to stratim int sz_needed = snprintf(NULL,0,"-%d-%c\n",current->index,c); //kolko bajtov to bude zaberat last = calloc(sz_needed + 1,sizeof(char)); //alokuje do do last sprintf(last,"-%d-%c\n",current->index,c); // }

        current = current->characters[c];
    }
    else {
        current->characters[c] = calloc(1,sizeof(struct dict));
        current->characters[c]->index = index;
        index += 1;
        current = root;
    }
}

char **words = calloc(index,sizeof(char*));					// z stromu vytvori format
get_words(root,words);

int size_needed = 0;		
for(int i=1; i < index; i++){									//nastavime velkost aku bude mat vystupny buffer
    if(words[i] != NULL){
        size_needed += strlen(words[i]);						//poscitavame velkosti vsetkych stringov
    }
}

if (last != NULL){												//mame string last?
    size_needed += strlen(last);								//priratam
}

char *temp = calloc(size_needed + 10,sizeof(char));				//alokujem velkost, padalo mi to ked som mal mensie cislo ako 10
int pos = 0;													//pamatam kde som to uz vypisal
for(int i=0; i < index; i++){
    if (words[i] != NULL){
        strcpy(temp + pos,words[i]);							//nebude sa to pisat na zaciatok ale az za tym
        pos += strlen(words[i]);
    }
}

if (last != NULL){											// zapisem aj last
    strcpy(temp + pos,last);
    pos += strlen(last);
}

delete(root);													//nepotrebujeme strom, uvolnime pamat


*outbuf = temp;										//adresa sa zapise do outbuffer
return strlen(temp);

}

int myrlcompress(char* buff,int size,char** outbuf){ // 2 * size je urcite dost, kedze v najhorsom pripade bude kazdy znak osamote *outbuf = calloc(2 * size + 1,sizeof(char)); //smernik na smernik, napisem adresu novej pamate memset(*outbuf,0,size + 1); //vymaze

int head = -1;
int length = 0;
int pos = 0;
for(int i = 0; i < size; i++){
    int c = buff[i];

    if (c == head){
        length += 1;
        if (i == size - 1){				//posledne pismenko
            (*outbuf)[pos] = head;
            (*outbuf)[pos + 1] = length;
            pos += 2;                                      //aby som vedel kde to zapisovat
            break;
        }
    }
    else {
        if (head != -1){
            (*outbuf)[pos] = head;
            (*outbuf)[pos + 1] = length;
            pos += 2;
        }
        if (i == size - 1){						  //posledne pismenko
            (*outbuf)[pos] = c;
            (*outbuf)[pos + 1] = 1;
            pos += 2;
            break;
        }

        head = c;
        length = 1;
    }

    // ak sme blizko preteceniu, vypiseme aktualny znak
    if (length == 255){								//nemozme ist viac, vynulujem a idem od nuly
        (*outbuf)[pos] = head;
        (*outbuf)[pos + 1] = length;
        pos += 2;
        head = -1;
        length = 0;
    }
}

return pos;    //velkost vystupneho buffera

}

void compress(FILE* infile,FILE* outfile){ fseek(infile, 0, SEEK_END); //vstupny subor, a chceme presunut kurzor na konci suboru int insize = ftell(infile) + 1; // na akej pozicii je kurzor, zistime velkost suboru rewind(infile); //vrati kurzor na zaciatok char *buffer = calloc(insize,sizeof(char)); //alokuje pamat

memset(buffer,0,insize);
insize = fread(buffer,sizeof(char),insize - 1,infile);     //nacita obsah do bufferu

if (insize == 0){
    assert(!ferror(infile)); 
}

char *tempbuf = NULL;											// az vo funkcii to alokujem
int tempsize = myrlcompress(buffer,insize,&tempbuf);			//vstupny buffer, velkost a adresu bufferu

char *outbuf = NULL;
int outsize = mylz78compress(tempbuf,tempsize,&outbuf);

if (outsize   > 0){
    fwrite(outbuf,sizeof(char),outsize,outfile);				//napise sa do suboru, z akeho buffera do akeho suboru
}

}

int myrldecompress(char* buff,int size,char** outbuf){ *outbuf = calloc(size + 1,sizeof(char)); memset(*outbuf,0,size + 1);

int pos = 0;
for(int i = 0; i < size; i+=2){
    char c = buff[i];
    char length = buff[i + 1];

    // ak nam nebude stacit miesto v *outbuf, treba allocovat viac
    while((length + pos) > (size - 1)){
        size *= 2;
        char *tempbuf = calloc(size,sizeof(char));
        memset(tempbuf,'\0',size);
        strcpy(tempbuf,*outbuf);
        *outbuf = tempbuf;
    }

    for (int j = 0; j < length; j++){					//tolko krat ho pridavam, kolko mam cisielko
        (*outbuf)[pos] = c;
        pos += 1;
    }
}

return pos;

}

int mylz78decompress(char* buff,int size,char** outbuf){ //nepotrebujem pouzivat strom *outbuf = calloc(size,sizeof(char)); //taka ista velkost ako vstup memset(*outbuf,0,size); char *words[MAX_WORDS]; //dvojrozmerne pole words[0] = ""; char c; int index = 1; int number; int pos = 0; int bytes_read, bytes_read_all = 0; //citame vstupny buffer, necitame po bajtoch, viac ciferne cisla

while (sscanf(buff + bytes_read_all,"-%d-%c\n%n",&number,&c,&bytes_read) == 2){						//formatovacie citanie, pusaveme o bajty, ktore sa precitali	ak vrati ine cislo ako 2 tak je nakocni
    bytes_read_all += bytes_read;				//o tolko sme sa posunuli

    words[index] = calloc(strlen(words[number]) + 2,sizeof(char*));				//vo words sa vytvoria prefixi
    strcpy(words[index],words[number]);											//nakopirujeme words number a pridame nakoniec c 
    words[index][strlen(words[number])] = c;

    // ak nam nebude stacit miesto v *outbuf, treba allocovat viac
    while((strlen(words[index]) + pos) > (size - 1)){
        size *= 2;
        char *tempbuf = calloc(size,sizeof(char));
        memset(tempbuf,'\0',size);
        strcpy(tempbuf,*outbuf);
        *outbuf = tempbuf;
    }

    strcpy(*outbuf + pos,words[index]);					//nakopiruje najnovsie slovo
    pos += strlen(words[index]);
    index += 1;
}

return strlen(*outbuf);						//vratim velkost

}

void decompress(FILE* infile,FILE* outfile){ fseek(infile, 0, SEEK_END); int insize = ftell(infile) + 1; rewind(infile); char *buffer = calloc(insize,sizeof(char));

memset(buffer,0,insize);
insize = fread(buffer,sizeof(char),insize - 1,infile);

if (insize == 0){
    assert(!ferror(infile));
}

char *tempbuf = NULL;
int tempsize = mylz78decompress(buffer,insize,&tempbuf);				//najprv lz78

char *outbuf = NULL;
int outsize = myrldecompress(tempbuf,tempsize,&outbuf);

if (outsize > 0){
    fwrite(outbuf,sizeof(char),outsize,outfile);
}

}