From 516889057a4441858c16e5332601a142c9d681c1 Mon Sep 17 00:00:00 2001 From: Yurii Yakovenko Date: Wed, 29 Jan 2025 22:39:50 +0000 Subject: [PATCH] Update sk1/README.md --- sk1/README.md | 325 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 325 insertions(+) diff --git a/sk1/README.md b/sk1/README.md index e69de29..09e163c 100644 --- a/sk1/README.md +++ b/sk1/README.md @@ -0,0 +1,325 @@ +Súbor compressor.c +#include +#include +#define WSIZE 250 +#define BSIZE 50 +///----------------------------LZ77------------- +typedef struct { +unsigned char pos; +unsigned char n; +char symb; +} LZ77Token; +int compress_1(const char* input_file, const char* output_file) +{ +int compr_size = 0, sz = 0; +FILE* input = fopen(input_file, "rb"); +FILE* output = fopen(output_file, "wb"); + +if (!input || !output) +{ +exit(-1); +} + +char window[WSIZE] = { 0 }; +char buffer[BSIZE] = { 0 }; + +int w_start = 0; +int w_end = 0; + +while (!feof(input)) +{ +int blen = fread(buffer, 1, BSIZE, input); + +for (int bpos = 0; bpos < blen; ++bpos) +{ +int best_pos = 0; +int best_n = 0; + +for (int spos = w_start; spos < w_end; ++spos) +{ +int mlength = 0; + +while (mlength < blen - bpos && +mlength < WSIZE && +(spos + mlength) < w_end && +window[(spos + mlength) % WSIZE] == buffer[bpos + mlength]) +{ +++mlength; +} + +if (mlength > best_n) +{ +best_pos = w_end - spos; +best_n = mlength; +} +} + +LZ77Token token; +token.pos = best_pos; +token.n = best_n; + +if (bpos + best_n < blen) +{ +token.symb = buffer[bpos + best_n]; +} +else +{ +token.symb = '\0'; +} + +sz=fwrite(&token, sizeof(LZ77Token), 1, output); +compr_size += sizeof(LZ77Token) * sz; + +for (int j = 0; j <= best_n && bpos < blen; ++j) +{ +if (bpos + j < blen) +{ +window[w_end % WSIZE] = buffer[bpos + j]; +w_end++; + +if (w_end - w_start > WSIZE) +{ +w_start++; +} +} +} +bpos += best_n; +} +} + +fclose(input); +fclose(output); + +return compr_size; +} + +int decompress_1(const char* input_file, const char* output_file) +{ +int compr_size = 0; +FILE* input = fopen(input_file, "rb"); +FILE* output = fopen(output_file, "wb"); + +if (!input || !output) +{ +exit(-1); +} + +char window[WSIZE] = { 0 }; +int window_end = 0; + +LZ77Token token; +while (fread(&token, sizeof(LZ77Token), 1, input)) +{ +if (token.pos == 0 && token.n == 0 && token.symb == '\0') +{ +continue; +} + +int start_index = (window_end - token.pos + WSIZE) % WSIZE; + +for (int i = 0; i < token.n; ++i) +{ +char ch = window[(start_index + i) % WSIZE]; +compr_size += fwrite(&ch, 1, 1, output); +window[window_end % WSIZE] = ch; +window_end = (window_end + 1) % WSIZE; +} + +if (token.symb != '\0') +{ +compr_size += fwrite(&token.symb, 1, 1, output); +window[window_end % WSIZE] = token.symb; +window_end = (window_end + 1) % WSIZE; +} +} +fclose(input); +fclose(output); + + +return compr_size; +}//-------------------------------------RLE-------------------- +int compress_2(const char* input_file, const char* output_file) { +int ch, nchar; +unsigned char count = 1; +int compr_size = 0; + +FILE* in = fopen(input_file, "rb"); +FILE* out = fopen(output_file, "wb"); + +if (!in || !out) { +if (in) fclose(in); +if (out) fclose(out); +return -1; +} + +ch = fgetc(in); +while (ch != EOF) { +nchar = fgetc(in); + +if (nchar == ch) { + +count++; +if (count == 127) { +fputc(count, out); +fputc(ch, out); +compr_size += 2; +count = 1; +ch = fgetc(in); +} +} +else { +if (count > 1) { +fputc(count, out); +fputc(ch, out); +compr_size += 2; +} +else { +int rizn = 1; +char buffer[127]; +buffer[0] = ch; + +while (nchar != EOF && nchar != ch && rizn < 127) { +buffer[rizn] = nchar; +rizn++; +ch = nchar; +nchar = fgetc(in); +} + +fputc(0x80 | rizn, out); +compr_size += 1; +for (int i = 0; i < rizn; i++) { +fputc(buffer[i], out); +compr_size += 1; +} +} +count = 1; +ch = nchar; +} +} + +fclose(in); +fclose(out); +return compr_size; +} + +int decompress_2(const char* input_file, const char* output_file) { +int byte; +int compr_size = 0; + +FILE* in = fopen(input_file, "rb"); +FILE* out = fopen(output_file, "wb"); + +if (!in || !out) { +if (in) fclose(in); +if (out) fclose(out); +return -1; +} + +while ((byte = fgetc(in)) != EOF) { +if (byte & 0x80) { + +int count = byte & 0x7F; +for (int i = 0; i < count; i++) { +int ch = fgetc(in); +if (ch == EOF) break; +fputc(ch, out); +compr_size += 1; +} +} +else { + +int count = byte; +int ch = fgetc(in); +for (int i = 0; i < count; i++) { +fputc(ch, out); +compr_size += 1; +} +} +} + +fclose(in); +fclose(out); +return compr_size; +} + +Súbor main +#include +#include "compressor.h" +int main(int argc, char* argv[]) +{ +if (argc <3 && argv[1][0] == '-' && argv[1][1] == 'h' ) +{ +printf("\nna kompresiu použite: compressor -c infile outfile"); +printf("\nna dekompresiu použite: compressor -d infile outfile"); +return 0; +} +if (argc == 4) +{ +if (argv[1][0] == '-' && argv[1][1] == 'c' && argv[1][2] == 0) +{ +compress_1(argv[2], "tmpjkxqp.tmp"); +compress_2("tmpjkxqp.tmp", argv[3]); +remove("tmpjkxqp.tmp"); +return 0; +} +if (argv[1][0] == '-' && argv[1][1] == 'd' && argv[1][2] == 0) +{ +decompress_2(argv[2], "tmpjkxqp.tmp"); +decompress_1("tmpjkxqp.tmp", argv[3]); +remove("tmpjkxqp.tmp"); +return 0; +} +} + +printf("\nnesprávne argumenty"); + +return 0; +} +Súbor compressor.h +#include +int compress_1(const char *a, const char *b); +int compress_2(const char *a, const char *b); +int decompress_1(const char *a, const char *b); +int decompress_2(const char *a, const char *b); + + +Napríklad treba komprimovať taký text. (I_love_Slovenska_Republika! ) +Veľkosť okna – 15 buffer – 8 +okna LZ77 buffer + I_love_S ++---------------------------------------------------+ +|I |(0;0;I) | _love_S | ++-----------------------+-----------+---------------+ +|I_ |(0;0;_) | love_S | ++-----------------------+-----------+---------------+ +|I_l |(0;0;l) | ove_S | ++-----------------------+-----------+---------------+ +|I_lo |(0;0;o) | ve_S | ++-----------------------+-----------+---------------+ +|I_lov |(0;0;v) | e_S | ++-----------------------+-----------+---------------+ +|I_love |(0;0;e) | _S | ++-----------------------+-----------+---------------+ +|I_love_S |(5;1;S) | _Slovenska | ++-----------------------+-----------+---------------+ +|I_love_Sloven |(6;4;n) | ska | ++-----------------------+-----------+---------------+ +|I_love_Slovens |(0;0;s) | ka | ++-----------------------+-----------+---------------+ +|I_love_Slovensk |(0;0;k) | a | ++-----------------------+-----------+---------------+ +|_love_Slovenska |(0;0;a) | _Republi | ++-----------------------+-----------+---------------+ +|ove_Slovenska_R |(15;1;R) | epubli | ++-----------------------+-----------+---------------+ +|e_Slovenska_Rep |(13;1;p) | ubli | ++-----------------------+-----------+---------------+ +|_Slovenska_Repu |(0;0;u) | bli | ++-----------------------+-----------+---------------+ +RLE +Ak kódujeme RLE priamo (I_love_Slovenska_Republika!!!!! ) (1I1_1l1o1v1e1_1S1l1o1v1e1n1s1k1a1_1R1e1p1u1b1l1i1k1a5!) +Je to veľmi iracionálne, pretože počet znakov sa v skutočnosti zdvojnásobuje, preto sa kódovacia postupnosť delí na dva typy: 1 (séria rôznych znakov, čo je veta, sa kóduje zapísaním počtu rôznych znakov a potom samotných znakov; 2 séria rovnakých znakov sa kóduje zapísaním počtu opakovaní a znaku, ktorý sa opakuje) (26I_love_Slovenska_Republika5!!!!! ) +Je tu však problém 26I - či je to ďalších 26 sérií jedinečných znakov alebo je to 26 opakovaní jedného znaku, preto existuje riešenie nastavením najdôležitejšieho bitu na rozlíšenie týchto dvoch sérií. Neprítomnosť bitu znamená, že toto číslo je dĺžka série jedinečných znakov a nastavený bit znamená, že v dolných 7 bitoch je niekoľko opakovaní jedného znaku. + Zakódujme našu správu takto:(26I_love_Slovenska_Republika133!!!!! ) 133 ck 128 +5, t. j. nastavený paritný bit znamená, že ide o 5 opakujúcich sa znakov + +