From a67b036fc1c13dc2cb19e785a584e067044f1f17 Mon Sep 17 00:00:00 2001 From: mr314ot Date: Wed, 21 Jan 2026 18:22:41 +0100 Subject: [PATCH] du sk1 --- sk1/Makefile | 16 ++++ sk1/README.md | 46 ++++++++++ sk1/compressor.c | 212 +++++++++++++++++++++++++++++++++++++++++++++++ sk1/compressor.h | 10 +++ sk1/main.c | 26 ++++++ 5 files changed, 310 insertions(+) create mode 100644 sk1/Makefile create mode 100644 sk1/README.md create mode 100644 sk1/compressor.c create mode 100644 sk1/compressor.h create mode 100644 sk1/main.c diff --git a/sk1/Makefile b/sk1/Makefile new file mode 100644 index 0000000..0445e85 --- /dev/null +++ b/sk1/Makefile @@ -0,0 +1,16 @@ +CC=gcc +CFLAGS=-std=c11 -O2 -Wall -Wextra + +all: compressor + +compressor: main.o compressor.o + $(CC) $(CFLAGS) -o compressor main.o compressor.o + +main.o: main.c compressor.h + $(CC) $(CFLAGS) -c main.c + +compressor.o: compressor.c compressor.h + $(CC) $(CFLAGS) -c compressor.c + +clean: + rm -f *.o compressor diff --git a/sk1/README.md b/sk1/README.md new file mode 100644 index 0000000..4d4bf70 --- /dev/null +++ b/sk1/README.md @@ -0,0 +1,46 @@ +# Kompresor / Dekompresor – Huffmanovo kódovanie + +## Zadanie +Cieľom projektu je vytvoriť nástroj na bezstratovú kompresiu a dekompresiu +ľubovoľných binárnych súborov do veľkosti 10 MB bez použitia algoritmu RLE. +Program musí byť ovládateľný z príkazového riadka a používať iba štandardnú +knižnicu jazyka C. + +## Funkčnosť +Program podporuje: +- kompresiu súborov pomocou Huffmanovho kódovania, +- dekompresiu skomprimovaných súborov, +- spracovanie ľubovoľných binárnych dát, +- zachovanie úplnej zhody po dekompresii. + +Rozhranie: +./compressor -c infile outfile +./compressor -d infile outfile +./compressor -h + +## Riešenie +Použitý algoritmus je Huffmanovo kódovanie: +1. Zistí sa frekvencia bajtov v súbore +2. Z frekvencií sa zostaví Huffmanov strom +3. Dáta sa zapíšu ako bitový prúd +4. Do hlavičky súboru sa uloží veľkosť a frekvenčná tabuľka + +Formát výstupu: +- magické číslo "HUF1" +- pôvodná veľkosť súboru +- tabuľka frekvencií (256 × uint32_t) +- zakódované dáta + +## Podmienky funkčnosti +- program funguje pre vstupy do 10 MB, +- používa iba štandardnú knižnicu C, +- testované na súboroch z Canterbury Corpus, +- kompresný pomer typicky > 10 %. + +## Použité zdroje +- D. Huffman: A Method for the Construction of Minimum-Redundancy Codes +- Canterbury Corpus (testovacie dáta) +- Generatívny model: ChatGPT (OpenAI) + +Prompt použitý pri generovaní: +"Vytvor kompletný C program na kompresiu a dekompresiu binárnych súborov pomocou Huffmanovho kódovania bez použitia RLE." diff --git a/sk1/compressor.c b/sk1/compressor.c new file mode 100644 index 0000000..20f2063 --- /dev/null +++ b/sk1/compressor.c @@ -0,0 +1,212 @@ +#include "compressor.h" +#include +#include +#include +#include + +#define MAGIC "HUF1" +#define SYMBOLS 256 + +typedef struct Node { + uint8_t symbol; + uint64_t freq; + struct Node *left, *right; +} Node; + +typedef struct { + uint32_t bits; + uint8_t length; +} Code; + +static uint64_t frequencies[SYMBOLS]; +static Code codes[SYMBOLS]; + +/* ---------- Huffman strom ---------- */ + +static Node* create_node(uint8_t symbol, uint64_t freq, Node* l, Node* r) { + Node* n = malloc(sizeof(Node)); + n->symbol = symbol; + n->freq = freq; + n->left = l; + n->right = r; + return n; +} + +static Node* build_tree() { + Node* nodes[SYMBOLS]; + int count = 0; + + for (int i = 0; i < SYMBOLS; i++) + if (frequencies[i]) + nodes[count++] = create_node(i, frequencies[i], NULL, NULL); + + if (count == 1) + return create_node(0, nodes[0]->freq, nodes[0], NULL); + + while (count > 1) { + int a = 0, b = 1; + if (nodes[b]->freq < nodes[a]->freq) { int t=a;a=b;b=t; } + + for (int i = 2; i < count; i++) { + if (nodes[i]->freq < nodes[a]->freq) { + b = a; + a = i; + } else if (nodes[i]->freq < nodes[b]->freq) { + b = i; + } + } + + Node* merged = create_node( + 0, + nodes[a]->freq + nodes[b]->freq, + nodes[a], nodes[b] + ); + + if (a > b) { int t=a;a=b;b=t; } + nodes[a] = merged; + nodes[b] = nodes[count - 1]; + count--; + } + return nodes[0]; +} + +static void build_codes(Node* n, uint32_t bits, uint8_t len) { + if (!n->left && !n->right) { + codes[n->symbol].bits = bits; + codes[n->symbol].length = len; + return; + } + if (n->left) build_codes(n->left, bits << 1, len + 1); + if (n->right) build_codes(n->right, (bits << 1)|1, len + 1); +} + +/* ---------- Bitový výstup ---------- */ + +typedef struct { + FILE* f; + uint8_t buf; + uint8_t count; +} BitWriter; + +static void bw_init(BitWriter* w, FILE* f) { + w->f = f; + w->buf = 0; + w->count = 0; +} + +static void bw_write(BitWriter* w, uint32_t bits, uint8_t len) { + for (int i = len - 1; i >= 0; i--) { + w->buf = (w->buf << 1) | ((bits >> i) & 1); + if (++w->count == 8) { + fwrite(&w->buf, 1, 1, w->f); + w->count = 0; + } + } +} + +static void bw_flush(BitWriter* w) { + if (w->count) { + w->buf <<= (8 - w->count); + fwrite(&w->buf, 1, 1, w->f); + } +} + +/* ---------- Bitový vstup ---------- */ + +typedef struct { + FILE* f; + uint8_t buf; + uint8_t count; +} BitReader; + +static void br_init(BitReader* r, FILE* f) { + r->f = f; + r->count = 0; +} + +static int br_read(BitReader* r) { + if (!r->count) { + if (fread(&r->buf, 1, 1, r->f) != 1) + return -1; + r->count = 8; + } + int bit = (r->buf >> 7) & 1; + r->buf <<= 1; + r->count--; + return bit; +} + +/* ---------- Kompresia ---------- */ + +void compress_file(const char* input, const char* output) { + FILE* fi = fopen(input, "rb"); + FILE* fo = fopen(output, "wb"); + if (!fi || !fo) exit(1); + + memset(frequencies, 0, sizeof(frequencies)); + + int c; + uint64_t size = 0; + while ((c = fgetc(fi)) != EOF) { + frequencies[c]++; + size++; + } + rewind(fi); + + Node* root = build_tree(); + build_codes(root, 0, 0); + + fwrite(MAGIC, 1, 4, fo); + fwrite(&size, sizeof(uint64_t), 1, fo); + + for (int i = 0; i < SYMBOLS; i++) { + uint32_t f = frequencies[i]; + fwrite(&f, sizeof(uint32_t), 1, fo); + } + + BitWriter bw; + bw_init(&bw, fo); + + while ((c = fgetc(fi)) != EOF) + bw_write(&bw, codes[c].bits, codes[c].length); + + bw_flush(&bw); + fclose(fi); + fclose(fo); +} + +/* ---------- Dekompresia ---------- */ + +void decompress_file(const char* input, const char* output) { + FILE* fi = fopen(input, "rb"); + FILE* fo = fopen(output, "wb"); + if (!fi || !fo) exit(1); + + char magic[4]; + fread(magic, 1, 4, fi); + if (memcmp(magic, MAGIC, 4)) exit(1); + + uint64_t size; + fread(&size, sizeof(uint64_t), 1, fi); + + for (int i = 0; i < SYMBOLS; i++) { + uint32_t f; + fread(&f, sizeof(uint32_t), 1, fi); + frequencies[i] = f; + } + + Node* root = build_tree(); + BitReader br; + br_init(&br, fi); + + for (uint64_t i = 0; i < size; i++) { + Node* n = root; + while (n->left || n->right) { + int bit = br_read(&br); + n = bit ? n->right : n->left; + } + fputc(n->symbol, fo); + } + fclose(fi); + fclose(fo); +} diff --git a/sk1/compressor.h b/sk1/compressor.h new file mode 100644 index 0000000..aa5b305 --- /dev/null +++ b/sk1/compressor.h @@ -0,0 +1,10 @@ +#ifndef COMPRESSOR_H +#define COMPRESSOR_H + +/* Komprimuje vstupný súbor do výstupného */ +void compress_file(const char *input, const char *output); + +/* Dekomprimuje vstupný súbor do výstupného */ +void decompress_file(const char *input, const char *output); + +#endif diff --git a/sk1/main.c b/sk1/main.c new file mode 100644 index 0000000..05d1640 --- /dev/null +++ b/sk1/main.c @@ -0,0 +1,26 @@ +#include "compressor.h" +#include +#include + +static void help() { + printf("Pouzitie:\n"); + printf(" compressor -c infile outfile\n"); + printf(" compressor -d infile outfile\n"); + printf(" compressor -h\n"); +} + +int main(int argc, char *argv[]) { + if (argc < 2) { + help(); + return 0; + } + + if (!strcmp(argv[1], "-c") && argc == 4) { + compress_file(argv[2], argv[3]); + } else if (!strcmp(argv[1], "-d") && argc == 4) { + decompress_file(argv[2], argv[3]); + } else { + help(); + } + return 0; +}