diff --git a/sk1a.zip b/sk1a.zip new file mode 100644 index 0000000..098b10f Binary files /dev/null and b/sk1a.zip differ diff --git a/sk2a/Makefile b/sk2a/Makefile new file mode 100644 index 0000000..f451ff9 --- /dev/null +++ b/sk2a/Makefile @@ -0,0 +1,8 @@ +all: program + +program: compressor.c main.c + gcc -Wall -g compressor.c main.c -o program + +clean: + rm program + diff --git a/sk2a/compressor.c b/sk2a/compressor.c new file mode 100644 index 0000000..c45b0cb --- /dev/null +++ b/sk2a/compressor.c @@ -0,0 +1,285 @@ +#include +#include +#include +#include +#include "compressor.h" +#define BUFSIZE 1024 +#define SIZE 256 +#define MAX_WORDS 100000 + +// One codeword is previous index - new index -character + +// Trie (a.k.a prefix tree) +struct dict { + int index; + // Character is encoded as non zero value in characters + struct dict* characters[SIZE]; +}; + +// Free trie +void delete(struct dict* tree){ + if (tree == NULL){ + return; + } + for (int i = 0 ;i < SIZE; i++){ + delete(tree-> characters[i]); + } + free(tree); +} + +void get_words (struct dict* tree,char* words[]){ + if (tree == NULL){ + return; + } + for (int i = 0 ;i < SIZE; i++){ + if (tree-> characters[i] > 0){ + int need_size = snprintf(NULL,0,"-%d-%c\n",tree->index,i); + words[tree->characters[i]->index] = calloc(need_size + 1,sizeof(char)); + memset(words[tree->characters[i]->index],0,need_size + 1); + sprintf(words[tree->characters[i]->index],"-%d-%c\n",tree->index,i); + + get_words(tree-> characters[i],words); + } + } +} + +int mylz78compress(char* buff,int size,char** outbuf){ + // https://medium.com/@dbudhrani/how-data-compression-works-exploring-lz78-e97e539138 + // The trie represents the current phrase + // The first node represents the empty string with index 0 + struct dict* root = calloc(1,sizeof(struct dict)); + struct dict* current = root; + int index = 1; + int c = 0; + char *last = NULL; + for(int i = 0; i < size; i++){ + c = buff[i]; + // Current character is in dictionary, continue searching + if (current->characters[c] > 0){ + // ak este nieco ostalo a toto je koniec + if (i == size - 1){ + int sz_needed = snprintf(NULL,0,"-%d-%c\n",current->index,c); + last = calloc(sz_needed + 1,sizeof(char)); + sprintf(last,"-%d-%c\n",current->index,c); + } + + current = current->characters[c]; + } + else { + current->characters[c] = calloc(1,sizeof(struct dict)); + current->characters[c]->index = index; + index += 1; + current = root; + } + } + + char **words = calloc(index,sizeof(char*)); + get_words(root,words); + + int size_needed = 0; + for(int i=1; i < index; i++){ + if(words[i] != NULL){ + size_needed += strlen(words[i]); + } + } + + if (last != NULL){ + size_needed += strlen(last); + } + + char *temp = calloc(size_needed + 10,sizeof(char)); + int pos = 0; + for(int i=0; i < index; i++){ + if (words[i] != NULL){ + strcpy(temp + pos,words[i]); + pos += strlen(words[i]); + } + } + + if (last != NULL){ + strcpy(temp + pos,last); + pos += strlen(last); + } + + delete(root); + free(words); + + *outbuf = temp; + return strlen(temp); +} + + +int myrlcompress(char* buff,int size,char** outbuf){ + // 2 * size je urcite dost, kedze v najhorsom pripade bude kazdy znak osamote + *outbuf = calloc(2 * size + 1,sizeof(char)); + memset(*outbuf,0,size + 1); + + int head = -1; + int length = 0; + int pos = 0; + for(int i = 0; i < size; i++){ + int c = buff[i]; + + if (c == head){ + length += 1; + if (i == size - 1){ + (*outbuf)[pos] = head; + (*outbuf)[pos + 1] = length; + pos += 2; + break; + } + } + else { + if (head != -1){ + (*outbuf)[pos] = head; + (*outbuf)[pos + 1] = length; + pos += 2; + } + if (i == size - 1){ + (*outbuf)[pos] = c; + (*outbuf)[pos + 1] = 1; + pos += 2; + break; + } + + head = c; + length = 1; + } + + // ak sme blizko preteceniu, vypiseme aktualny znak + if (length == 255){ + (*outbuf)[pos] = head; + (*outbuf)[pos + 1] = length; + pos += 2; + head = -1; + length = 0; + } + } + + return pos; +} + + +void compress(FILE* infile,FILE* outfile){ + fseek(infile, 0, SEEK_END); + int insize = ftell(infile) + 1; + rewind(infile); + char *buffer = calloc(insize,sizeof(char)); + + memset(buffer,0,insize); + insize = fread(buffer,sizeof(char),insize - 1,infile); + + if (insize == 0){ + assert(!ferror(infile)); + } + + char *tempbuf = NULL; + int tempsize = myrlcompress(buffer,insize,&tempbuf); + + char *outbuf = NULL; + int outsize = mylz78compress(tempbuf,tempsize,&outbuf); + + if (outsize > 0){ + fwrite(outbuf,sizeof(char),outsize,outfile); + } + + free(buffer); + free(tempbuf); + free(outbuf); +} + + +int myrldecompress(char* buff,int size,char** outbuf){ + *outbuf = calloc(size + 1,sizeof(char)); + memset(*outbuf,0,size + 1); + + int pos = 0; + for(int i = 0; i < size; i+=2){ + char c = buff[i]; + char length = buff[i + 1]; + + // ak nam nebude stacit miesto v *outbuf, treba allocovat viac + while((length + pos) > (size - 1)){ + size *= 2; + char *tempbuf = calloc(size,sizeof(char)); + memset(tempbuf,'\0',size); + strcpy(tempbuf,*outbuf); + free(*outbuf); + *outbuf = tempbuf; + } + + for (int j = 0; j < length; j++){ + (*outbuf)[pos] = c; + pos += 1; + } + } + + return pos; +} + + +int mylz78decompress(char* buff,int size,char** outbuf){ + *outbuf = calloc(size,sizeof(char)); + memset(*outbuf,0,size); + char *words[MAX_WORDS]; + words[0] = ""; + char c; + int index = 1; + int number; + int pos = 0; + int bytes_read, bytes_read_all = 0; + + while (sscanf(buff + bytes_read_all,"-%d-%c\n%n",&number,&c,&bytes_read) == 2){ + bytes_read_all += bytes_read; + + words[index] = calloc(strlen(words[number]) + 2,sizeof(char*)); + strcpy(words[index],words[number]); + words[index][strlen(words[number])] = c; + + // ak nam nebude stacit miesto v *outbuf, treba allocovat viac + while((strlen(words[index]) + pos) > (size - 1)){ + size *= 2; + char *tempbuf = calloc(size,sizeof(char)); + memset(tempbuf,'\0',size); + strcpy(tempbuf,*outbuf); + free(*outbuf); + *outbuf = tempbuf; + } + + strcpy(*outbuf + pos,words[index]); + pos += strlen(words[index]); + index += 1; + } + + return strlen(*outbuf); +} + + +void decompress(FILE* infile,FILE* outfile){ + fseek(infile, 0, SEEK_END); + int insize = ftell(infile) + 1; + rewind(infile); + char *buffer = calloc(insize,sizeof(char)); + + memset(buffer,0,insize); + insize = fread(buffer,sizeof(char),insize - 1,infile); + + if (insize == 0){ + assert(!ferror(infile)); + } + + char *tempbuf = NULL; + int tempsize = mylz78decompress(buffer,insize,&tempbuf); + + char *outbuf = NULL; + int outsize = myrldecompress(tempbuf,tempsize,&outbuf); + + if (outsize > 0){ + fwrite(outbuf,sizeof(char),outsize,outfile); + } + + free(buffer); + free(tempbuf); + free(outbuf); +} + diff --git a/sk2a/compressor.h b/sk2a/compressor.h new file mode 100644 index 0000000..489a5f1 --- /dev/null +++ b/sk2a/compressor.h @@ -0,0 +1,18 @@ +#ifndef _COMPRESSORH +#define _COMPRESSORH +#include + +/** + * Skomprimuje súbor in a zapíše do súboru out. + * @arg in smerník na otvorený vstupný súbor (na čítanie) + * @arg out smerník na otvorený výstupný súbor (na zápis) + */ +void compress(FILE* in, FILE* out); +/** + * Dekomprimuje súbor in a zapíše do súboru out. + * @arg in smerník na otvorený vstupný súbor (na čítanie) + * @arg out smerník na otvorený výstupný súbor (na zápis) + */ +void decompress(FILE* in, FILE* out); + +#endif diff --git a/sk2a/main.c b/sk2a/main.c new file mode 100644 index 0000000..c4d3bc0 --- /dev/null +++ b/sk2a/main.c @@ -0,0 +1,24 @@ +#include "compressor.h" + +int main(int argc,char** argv){ + if (argc != 4 || (argv[1][1] != 'c' && argv[1][1] != 'd')){ + printf("Usage: \n"); + printf(" Compress ./compress -c infile.txt outfile.compress\n"); + printf(" decompress ./compress -d outfile.compress infile.txt\n"); + return 0; + } + char* action = argv[1]; + char* infile = argv[2]; + char* outfile = argv[3]; + FILE* inf = fopen(infile,"r"); + FILE* outf = fopen(outfile,"w"); + if (action[1] == 'c'){ + compress(inf,outf); + } + else if (action[1] == 'd'){ + decompress(inf,outf); + } + fclose(inf); + fclose(outf); +} + diff --git a/sk2a/program b/sk2a/program new file mode 100644 index 0000000..c6f2609 Binary files /dev/null and b/sk2a/program differ diff --git a/sk2a/test.compress b/sk2a/test.compress new file mode 100644 index 0000000..4830d7f --- /dev/null +++ b/sk2a/test.compress @@ -0,0 +1,641 @@ +-0-L +-0- +-0-o +-2-r +-2-e +-2-m +-2- +-2-i +-2-p +-2-s +-2-u +-6- +-0- +-2-d +-2-o +-2-l +-15- +-0-r +-7- +-0-s +-8- +-0-t +-19-a +-12-e +-2-t +-2-, +-19-c +-17-n +-10- +-0-e +-2-c +-25- +-30- +-22- +-0-u +-5- +-18- +-13- +-0-a +-14- +-0-i +-9- +-41- +-20- +-0-c +-21-n +-2-g +-19-e +-16- +-43-t +-2-. +-19-A +-36-n +-36-a +-2-n +-27- +-3- +-0-m +-0- +-57-d +-17- +-49-i +-47- +-35- +-0-l +-2-a +-48- +-0-g +-36-t +-19-d +-17-l +-17-r +-51- +-38-A +-53- +-33-a +-55- +-38-m +-66- +-20- +-39- +-0-. +-19-C +-11- +-58- +-38-s +-17-c +-8- +-44- +-77-a +-32-o +-2-q +-84-e +-19-p +-75-a +-32-i +-2-b +-84-s +-67-t +-19-m +-79-g +-77-i +-29- +-40-i +-103- +-0-p +-79-r +-32-u +-4- +-43-e +-77-t +-100- +-57-n +-32-e +-29-, +-19-n +-79-s +-31- +-33-t +-84-r +-19-r +-21-d +-21-c +-84-l +-98- +-78- +-64-s +-73- +-2-D +-28- +-33-c +-19-q +-84-a +-12- +-2-f +-36-l +-21-s +-26- +-38-u +-49-t +-109-i +-118-i +-36-s +-116- +-131- +-0-, +-94- +-33-l +-59-e +-111- +-33-s +-92- +-64-e +-67-u +-138- +-42-r +-69- +-43-u +-134- +-0-q +-84-i +-115- +-86- +-33-m +-128- +-0-N +-124- +-81- +-118-o +-77-s +-36-q +-133- +-34- +-12-a +-10- +-168- +-160- +-64-i +-105-e +-102- +-85-. +-19-D +-130-e +-118- +-42-e +-40-e +-19-j +-125-t +-17-, +-19-f +-141- +-0-n +-63-i +-16- +-176-v +-136- +-146- +-38-a +-62- +-177-u +-157- +-77-e +-118-, +-19-v +-124- +-106- +-64-t +-79-t +-36- +-36-g +-157-, +-23- +-37-c +-84-. +-19-I +-77- +-75-i +-159-j +-188- +-57-, +-121- +-0-h +-130-c +-125- +-84-t +-155- +-43-m +-185- +-37-d +-21-e +-32- +-79-, +-204- +-33-n +-95- +-34-i +-105-v +-21-t +-79-e +-226-j +-219-o +-165-N +-167-a +-159-d +-123- +-34-u +-159-f +-196-i +-179- +-64- +-228-d +-209- +-85-o +-194-i +-105-p +-109-e +-96- +-64-m +-165-I +-150-e +-63-e +-109- +-257-n +-142- +-0-d +-84-n +-32-. +-83- +-37-a +-105-d +-79-p +-21-b +-125-. +-19-V +-21-v +-79-m +-224- +-148- +-164- +-234- +-246- +-85- +-29-e +-12-p +-36-r +-144-i +-29-i +-165-A +-75-e +-79-n +-233-u +-49-p +-225- +-81-t +-252-e +-49-e +-21-f +-75-d +-19-t +-136- +-127- +-82- +-74- +-280-e +-290- +-38-l +-36-o +-19-l +-21-g +-205-a +-226-p +-72- +-22- +-50- +-57-r +-154- +-197- +-169- +-192- +-44-e +-152-u +-208- +-38-v +-238- +-81-e +-226-e +-296- +-43-f +-298- +-198- +-45- +-317- +-280-i +-12-. +-52- +-65- +-43-q +-172-m +-308- +-315- +-279- +-305-t +-36-, +-70- +-81-p +-272- +-301- +-46- +-332-v +-275- +-33-r +-59-a +-132- +-178- +-44-, +-190- +-33-u +-193- +-294- +-330-, +-299- +-148- +-301-. +-19-P +-2-h +-117- +-362-u +-237- +-43-v +-285- +-176-n +-243- +-139- +-173- +-85-e +-108- +-89- +-0-v +-107- +-158- +-377-l +-79-o +-256- +-34-. +-19-Q +-161- +-44-q +-93- +-38-r +-293-r +-84-m +-288- +-304- +-81-n +-19-i +-284- +-351- +-265- +-110- +-384- +-38-E +-257-a +-159-u +-140- +-37-i +-264-e +-105-n +-137- +-43- +-2-v +-196- +-79-u +-63-u +-36-. +-268-u +-109-a +-97- +-314-u +-262- +-64-l +-352- +-85-c +-312-p +-285- +-373-l +-32-r +-245-i +-143- +-38-n +-408-i +-242- +-81-m +-67-g +-201- +-398-u +-21-. +-19-E +-402- +-282- +-37-h +-223- +-363- +-38-M +-239- +-331-e +-90- +-377-t +-36-m +-42-u +-162- +-114- +-65- +-347- +-33-g +-434-c +-130-d +-21-m +-75-t +-391- +-389- +-222- +-113- +-331-u +-450- +-320- +-439-q +-338- +-163-e +-396-e +-419-l +-346-e +-109-o +-226-s +-324- +-276- +-119- +-330-d +-21-p +-408-c +-348-g +-19-s +-448- +-429- +-33-q +-388- +-283- +-398- +-478- +-44-u +-334- +-38-N +-475- +-321- +-432- +-483-u +-77-c +-226-b +-49-a +-77-d +-474- +-378- +-278-, +-339-u +-118-t +-277-p +-205-v +-348-a +-109-, +-19-h +-329-r +-424-i +-231- +-43-d +-226-l +-312-e +-490- +-2-M +-444-c +-235-s +-144-e +-184- +-60- +-43-o +-99- +-330-n +-451- +-263- +-331-i +-40-u +-150- +-451-m +-449- +-44-. +-182- +-462-e +-521-v +-324-a +-252-s +-271- +-399-n +-19-u +-512-l +-471- +-37-o +-233-e +-202- +-319-a +-257-s +-356-a +-84-c +-346-u +-29-. +-19-N +-372-m +-353-u +-408- +-342- +-33-. +-437- +-236- +-494- +-287- +-374-a +-24- +-374-o +-109-c +-21- +-210- +-476- +-424-o +-105-f +-412- +-528- +-0-b +-277-t +-348-c +-122- +-64-n +-267- +-38-D +-386-s +-339-e +-17-. +-19-S +-36-d +-356-r +-480- +-43-l +-421- +-174- +-64-r +-556- +-44-i +-512-a +-564-t +-286- +-574- +-461-. +-534-o +-546-c +-481- +-522-a +-327-s +-601-a +-358-t +-59-i +-105-m +-101- +-547- +-302- +-2-S +-585- +-38-c +-130-s +-171- +-64-a +-32-, +-582- +-57- +-568-e +-512-b +-543-n +-529- +-439-s +-17-d +-79-l +-428-, +-212-u +-413- +-33- +-410- +-278-i +-512-c +-120- +-489- +-377-n +-266- +-331-, +-2- + +-0- diff --git a/sk2a/test.in b/sk2a/test.in new file mode 100644 index 0000000..5ab596c --- /dev/null +++ b/sk2a/test.in @@ -0,0 +1,2 @@ +Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc, + diff --git a/sk2a/test.out b/sk2a/test.out new file mode 100644 index 0000000..5ab596c --- /dev/null +++ b/sk2a/test.out @@ -0,0 +1,2 @@ +Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc, +