diff --git a/sk2a/readme.md b/sk2a/readme.md deleted file mode 100644 index 051ae94..0000000 --- a/sk2a/readme.md +++ /dev/null @@ -1,278 +0,0 @@ -// najprv robim kompresiu algoritmu runlength a potom lz78, ak sa robi nasledne dekompresia je to naopak najprv dekompresia lz78 a potom runlength. - -#include -#include -#include -#include -#include "compressor.h" -#define BUFSIZE 1024 -#define SIZE 256 -#define MAX_WORDS 100000 - -// One codeword is previous index - new index -character - -// Trie (a.k.a prefix tree) -struct dict { -int index; -// Character is encoded as non zero value in characters -struct dict* characters[SIZE]; -}; - -// Free trie -void delete(struct dict* tree){ //skopirovane z lz78, vymaze strukturu co sa pouziva v lz78 -if (tree == NULL){ //maze to pamat stromu -return; -} -for (int i = 0 ;i < SIZE; i++){ -delete(tree-> characters[i]); -} -} - -void get_words (struct dict* tree,char* words[]){ //posielam cely strom a smernik na dvojrozmerne pole -if (tree == NULL){ -return; -} -for (int i = 0 ;i < SIZE; i++){ //prejdeme vsetkych potomkov -if (tree-> characters[i] > 0){ //zistime ci tam je nieco naalokovane -int need_size = snprintf(NULL,0,"-%d-%c\n",tree->index,i); //kolko pamate -words[tree->characters[i]->index] = calloc(need_size + 1,sizeof(char)); //naalokujeme pamat -memset(words[tree->characters[i]->index],0,need_size + 1); -sprintf(words[tree->characters[i]->index],"-%d-%c\n",tree->index,i); //napise sa vo formate -index predosleho notu - aktualny znak - - get_words(tree-> characters[i],words); //zavolam na kazdeho potomka a potom na dalsich potomkov alebo syna - } - } -} - -int mylz78compress(char* buff,int size,char** outbuf){ -// https://medium.com/@dbudhrani/how-data-compression-works-exploring-lz78-e97e539138 -// The trie represents the current phrase -// The first node represents the empty string with index 0 -struct dict* root = calloc(1,sizeof(struct dict)); -struct dict* current = root; -int index = 1; -int c = 0; -char *last = NULL; -for(int i = 0; i < size; i++){ -c = buff[i]; -// Current character is in dictionary, continue searching -if (current->characters[c] > 0){ -// ak este nieco ostalo a toto je koniec -if (i == size - 1){ // pridem na koniec suboru a musim si to zapamatat, inac to stratim -int sz_needed = snprintf(NULL,0,"-%d-%c\n",current->index,c); //kolko bajtov to bude zaberat -last = calloc(sz_needed + 1,sizeof(char)); //alokuje do do last -sprintf(last,"-%d-%c\n",current->index,c); // -} - - current = current->characters[c]; - } - else { - current->characters[c] = calloc(1,sizeof(struct dict)); - current->characters[c]->index = index; - index += 1; - current = root; - } - } - - char **words = calloc(index,sizeof(char*)); // z stromu vytvori format - get_words(root,words); - - int size_needed = 0; - for(int i=1; i < index; i++){ //nastavime velkost aku bude mat vystupny buffer - if(words[i] != NULL){ - size_needed += strlen(words[i]); //poscitavame velkosti vsetkych stringov - } - } - - if (last != NULL){ //mame string last? - size_needed += strlen(last); //priratam - } - - char *temp = calloc(size_needed + 10,sizeof(char)); //alokujem velkost, padalo mi to ked som mal mensie cislo ako 10 - int pos = 0; //pamatam kde som to uz vypisal - for(int i=0; i < index; i++){ - if (words[i] != NULL){ - strcpy(temp + pos,words[i]); //nebude sa to pisat na zaciatok ale az za tym - pos += strlen(words[i]); - } - } - - if (last != NULL){ // zapisem aj last - strcpy(temp + pos,last); - pos += strlen(last); - } - - delete(root); //nepotrebujeme strom, uvolnime pamat - - - *outbuf = temp; //adresa sa zapise do outbuffer - return strlen(temp); -} - - -int myrlcompress(char* buff,int size,char** outbuf){ -// 2 * size je urcite dost, kedze v najhorsom pripade bude kazdy znak osamote -*outbuf = calloc(2 * size + 1,sizeof(char)); //smernik na smernik, napisem adresu novej pamate -memset(*outbuf,0,size + 1); //vymaze - - int head = -1; - int length = 0; - int pos = 0; - for(int i = 0; i < size; i++){ - int c = buff[i]; - - if (c == head){ - length += 1; - if (i == size - 1){ //posledne pismenko - (*outbuf)[pos] = head; - (*outbuf)[pos + 1] = length; - pos += 2; //aby som vedel kde to zapisovat - break; - } - } - else { - if (head != -1){ - (*outbuf)[pos] = head; - (*outbuf)[pos + 1] = length; - pos += 2; - } - if (i == size - 1){ //posledne pismenko - (*outbuf)[pos] = c; - (*outbuf)[pos + 1] = 1; - pos += 2; - break; - } - - head = c; - length = 1; - } - - // ak sme blizko preteceniu, vypiseme aktualny znak - if (length == 255){ //nemozme ist viac, vynulujem a idem od nuly - (*outbuf)[pos] = head; - (*outbuf)[pos + 1] = length; - pos += 2; - head = -1; - length = 0; - } - } - - return pos; //velkost vystupneho buffera -} - - -void compress(FILE* infile,FILE* outfile){ -fseek(infile, 0, SEEK_END); //vstupny subor, a chceme presunut kurzor na konci suboru -int insize = ftell(infile) + 1; // na akej pozicii je kurzor, zistime velkost suboru -rewind(infile); //vrati kurzor na zaciatok -char *buffer = calloc(insize,sizeof(char)); //alokuje pamat - - memset(buffer,0,insize); - insize = fread(buffer,sizeof(char),insize - 1,infile); //nacita obsah do bufferu - - if (insize == 0){ - assert(!ferror(infile)); - } - - char *tempbuf = NULL; // az vo funkcii to alokujem - int tempsize = myrlcompress(buffer,insize,&tempbuf); //vstupny buffer, velkost a adresu bufferu - - char *outbuf = NULL; - int outsize = mylz78compress(tempbuf,tempsize,&outbuf); - - if (outsize > 0){ - fwrite(outbuf,sizeof(char),outsize,outfile); //napise sa do suboru, z akeho buffera do akeho suboru - } - - -} - - -int myrldecompress(char* buff,int size,char** outbuf){ -*outbuf = calloc(size + 1,sizeof(char)); -memset(*outbuf,0,size + 1); - - int pos = 0; - for(int i = 0; i < size; i+=2){ - char c = buff[i]; - char length = buff[i + 1]; - - // ak nam nebude stacit miesto v *outbuf, treba allocovat viac - while((length + pos) > (size - 1)){ - size *= 2; - char *tempbuf = calloc(size,sizeof(char)); - memset(tempbuf,'\0',size); - strcpy(tempbuf,*outbuf); - *outbuf = tempbuf; - } - - for (int j = 0; j < length; j++){ //tolko krat ho pridavam, kolko mam cisielko - (*outbuf)[pos] = c; - pos += 1; - } - } - - return pos; -} - - -int mylz78decompress(char* buff,int size,char** outbuf){ //nepotrebujem pouzivat strom -*outbuf = calloc(size,sizeof(char)); //taka ista velkost ako vstup -memset(*outbuf,0,size); -char *words[MAX_WORDS]; //dvojrozmerne pole -words[0] = ""; -char c; -int index = 1; -int number; -int pos = 0; -int bytes_read, bytes_read_all = 0; //citame vstupny buffer, necitame po bajtoch, viac ciferne cisla - - while (sscanf(buff + bytes_read_all,"-%d-%c\n%n",&number,&c,&bytes_read) == 2){ //formatovacie citanie, pusaveme o bajty, ktore sa precitali ak vrati ine cislo ako 2 tak je nakocni - bytes_read_all += bytes_read; //o tolko sme sa posunuli - - words[index] = calloc(strlen(words[number]) + 2,sizeof(char*)); //vo words sa vytvoria prefixi - strcpy(words[index],words[number]); //nakopirujeme words number a pridame nakoniec c - words[index][strlen(words[number])] = c; - - // ak nam nebude stacit miesto v *outbuf, treba allocovat viac - while((strlen(words[index]) + pos) > (size - 1)){ - size *= 2; - char *tempbuf = calloc(size,sizeof(char)); - memset(tempbuf,'\0',size); - strcpy(tempbuf,*outbuf); - *outbuf = tempbuf; - } - - strcpy(*outbuf + pos,words[index]); //nakopiruje najnovsie slovo - pos += strlen(words[index]); - index += 1; - } - - return strlen(*outbuf); //vratim velkost -} - - -void decompress(FILE* infile,FILE* outfile){ -fseek(infile, 0, SEEK_END); -int insize = ftell(infile) + 1; -rewind(infile); -char *buffer = calloc(insize,sizeof(char)); - - memset(buffer,0,insize); - insize = fread(buffer,sizeof(char),insize - 1,infile); - - if (insize == 0){ - assert(!ferror(infile)); - } - - char *tempbuf = NULL; - int tempsize = mylz78decompress(buffer,insize,&tempbuf); //najprv lz78 - - char *outbuf = NULL; - int outsize = myrldecompress(tempbuf,tempsize,&outbuf); - - if (outsize > 0){ - fwrite(outbuf,sizeof(char),outsize,outfile); - } - -} \ No newline at end of file