du sk1
This commit is contained in:
parent
8bf4c7c4d2
commit
a67b036fc1
16
sk1/Makefile
Normal file
16
sk1/Makefile
Normal file
@ -0,0 +1,16 @@
|
||||
CC=gcc
|
||||
CFLAGS=-std=c11 -O2 -Wall -Wextra
|
||||
|
||||
all: compressor
|
||||
|
||||
compressor: main.o compressor.o
|
||||
$(CC) $(CFLAGS) -o compressor main.o compressor.o
|
||||
|
||||
main.o: main.c compressor.h
|
||||
$(CC) $(CFLAGS) -c main.c
|
||||
|
||||
compressor.o: compressor.c compressor.h
|
||||
$(CC) $(CFLAGS) -c compressor.c
|
||||
|
||||
clean:
|
||||
rm -f *.o compressor
|
||||
46
sk1/README.md
Normal file
46
sk1/README.md
Normal file
@ -0,0 +1,46 @@
|
||||
# Kompresor / Dekompresor – Huffmanovo kódovanie
|
||||
|
||||
## Zadanie
|
||||
Cieľom projektu je vytvoriť nástroj na bezstratovú kompresiu a dekompresiu
|
||||
ľubovoľných binárnych súborov do veľkosti 10 MB bez použitia algoritmu RLE.
|
||||
Program musí byť ovládateľný z príkazového riadka a používať iba štandardnú
|
||||
knižnicu jazyka C.
|
||||
|
||||
## Funkčnosť
|
||||
Program podporuje:
|
||||
- kompresiu súborov pomocou Huffmanovho kódovania,
|
||||
- dekompresiu skomprimovaných súborov,
|
||||
- spracovanie ľubovoľných binárnych dát,
|
||||
- zachovanie úplnej zhody po dekompresii.
|
||||
|
||||
Rozhranie:
|
||||
./compressor -c infile outfile
|
||||
./compressor -d infile outfile
|
||||
./compressor -h
|
||||
|
||||
## Riešenie
|
||||
Použitý algoritmus je Huffmanovo kódovanie:
|
||||
1. Zistí sa frekvencia bajtov v súbore
|
||||
2. Z frekvencií sa zostaví Huffmanov strom
|
||||
3. Dáta sa zapíšu ako bitový prúd
|
||||
4. Do hlavičky súboru sa uloží veľkosť a frekvenčná tabuľka
|
||||
|
||||
Formát výstupu:
|
||||
- magické číslo "HUF1"
|
||||
- pôvodná veľkosť súboru
|
||||
- tabuľka frekvencií (256 × uint32_t)
|
||||
- zakódované dáta
|
||||
|
||||
## Podmienky funkčnosti
|
||||
- program funguje pre vstupy do 10 MB,
|
||||
- používa iba štandardnú knižnicu C,
|
||||
- testované na súboroch z Canterbury Corpus,
|
||||
- kompresný pomer typicky > 10 %.
|
||||
|
||||
## Použité zdroje
|
||||
- D. Huffman: A Method for the Construction of Minimum-Redundancy Codes
|
||||
- Canterbury Corpus (testovacie dáta)
|
||||
- Generatívny model: ChatGPT (OpenAI)
|
||||
|
||||
Prompt použitý pri generovaní:
|
||||
"Vytvor kompletný C program na kompresiu a dekompresiu binárnych súborov pomocou Huffmanovho kódovania bez použitia RLE."
|
||||
212
sk1/compressor.c
Normal file
212
sk1/compressor.c
Normal file
@ -0,0 +1,212 @@
|
||||
#include "compressor.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#define MAGIC "HUF1"
|
||||
#define SYMBOLS 256
|
||||
|
||||
typedef struct Node {
|
||||
uint8_t symbol;
|
||||
uint64_t freq;
|
||||
struct Node *left, *right;
|
||||
} Node;
|
||||
|
||||
typedef struct {
|
||||
uint32_t bits;
|
||||
uint8_t length;
|
||||
} Code;
|
||||
|
||||
static uint64_t frequencies[SYMBOLS];
|
||||
static Code codes[SYMBOLS];
|
||||
|
||||
/* ---------- Huffman strom ---------- */
|
||||
|
||||
static Node* create_node(uint8_t symbol, uint64_t freq, Node* l, Node* r) {
|
||||
Node* n = malloc(sizeof(Node));
|
||||
n->symbol = symbol;
|
||||
n->freq = freq;
|
||||
n->left = l;
|
||||
n->right = r;
|
||||
return n;
|
||||
}
|
||||
|
||||
static Node* build_tree() {
|
||||
Node* nodes[SYMBOLS];
|
||||
int count = 0;
|
||||
|
||||
for (int i = 0; i < SYMBOLS; i++)
|
||||
if (frequencies[i])
|
||||
nodes[count++] = create_node(i, frequencies[i], NULL, NULL);
|
||||
|
||||
if (count == 1)
|
||||
return create_node(0, nodes[0]->freq, nodes[0], NULL);
|
||||
|
||||
while (count > 1) {
|
||||
int a = 0, b = 1;
|
||||
if (nodes[b]->freq < nodes[a]->freq) { int t=a;a=b;b=t; }
|
||||
|
||||
for (int i = 2; i < count; i++) {
|
||||
if (nodes[i]->freq < nodes[a]->freq) {
|
||||
b = a;
|
||||
a = i;
|
||||
} else if (nodes[i]->freq < nodes[b]->freq) {
|
||||
b = i;
|
||||
}
|
||||
}
|
||||
|
||||
Node* merged = create_node(
|
||||
0,
|
||||
nodes[a]->freq + nodes[b]->freq,
|
||||
nodes[a], nodes[b]
|
||||
);
|
||||
|
||||
if (a > b) { int t=a;a=b;b=t; }
|
||||
nodes[a] = merged;
|
||||
nodes[b] = nodes[count - 1];
|
||||
count--;
|
||||
}
|
||||
return nodes[0];
|
||||
}
|
||||
|
||||
static void build_codes(Node* n, uint32_t bits, uint8_t len) {
|
||||
if (!n->left && !n->right) {
|
||||
codes[n->symbol].bits = bits;
|
||||
codes[n->symbol].length = len;
|
||||
return;
|
||||
}
|
||||
if (n->left) build_codes(n->left, bits << 1, len + 1);
|
||||
if (n->right) build_codes(n->right, (bits << 1)|1, len + 1);
|
||||
}
|
||||
|
||||
/* ---------- Bitový výstup ---------- */
|
||||
|
||||
typedef struct {
|
||||
FILE* f;
|
||||
uint8_t buf;
|
||||
uint8_t count;
|
||||
} BitWriter;
|
||||
|
||||
static void bw_init(BitWriter* w, FILE* f) {
|
||||
w->f = f;
|
||||
w->buf = 0;
|
||||
w->count = 0;
|
||||
}
|
||||
|
||||
static void bw_write(BitWriter* w, uint32_t bits, uint8_t len) {
|
||||
for (int i = len - 1; i >= 0; i--) {
|
||||
w->buf = (w->buf << 1) | ((bits >> i) & 1);
|
||||
if (++w->count == 8) {
|
||||
fwrite(&w->buf, 1, 1, w->f);
|
||||
w->count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void bw_flush(BitWriter* w) {
|
||||
if (w->count) {
|
||||
w->buf <<= (8 - w->count);
|
||||
fwrite(&w->buf, 1, 1, w->f);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------- Bitový vstup ---------- */
|
||||
|
||||
typedef struct {
|
||||
FILE* f;
|
||||
uint8_t buf;
|
||||
uint8_t count;
|
||||
} BitReader;
|
||||
|
||||
static void br_init(BitReader* r, FILE* f) {
|
||||
r->f = f;
|
||||
r->count = 0;
|
||||
}
|
||||
|
||||
static int br_read(BitReader* r) {
|
||||
if (!r->count) {
|
||||
if (fread(&r->buf, 1, 1, r->f) != 1)
|
||||
return -1;
|
||||
r->count = 8;
|
||||
}
|
||||
int bit = (r->buf >> 7) & 1;
|
||||
r->buf <<= 1;
|
||||
r->count--;
|
||||
return bit;
|
||||
}
|
||||
|
||||
/* ---------- Kompresia ---------- */
|
||||
|
||||
void compress_file(const char* input, const char* output) {
|
||||
FILE* fi = fopen(input, "rb");
|
||||
FILE* fo = fopen(output, "wb");
|
||||
if (!fi || !fo) exit(1);
|
||||
|
||||
memset(frequencies, 0, sizeof(frequencies));
|
||||
|
||||
int c;
|
||||
uint64_t size = 0;
|
||||
while ((c = fgetc(fi)) != EOF) {
|
||||
frequencies[c]++;
|
||||
size++;
|
||||
}
|
||||
rewind(fi);
|
||||
|
||||
Node* root = build_tree();
|
||||
build_codes(root, 0, 0);
|
||||
|
||||
fwrite(MAGIC, 1, 4, fo);
|
||||
fwrite(&size, sizeof(uint64_t), 1, fo);
|
||||
|
||||
for (int i = 0; i < SYMBOLS; i++) {
|
||||
uint32_t f = frequencies[i];
|
||||
fwrite(&f, sizeof(uint32_t), 1, fo);
|
||||
}
|
||||
|
||||
BitWriter bw;
|
||||
bw_init(&bw, fo);
|
||||
|
||||
while ((c = fgetc(fi)) != EOF)
|
||||
bw_write(&bw, codes[c].bits, codes[c].length);
|
||||
|
||||
bw_flush(&bw);
|
||||
fclose(fi);
|
||||
fclose(fo);
|
||||
}
|
||||
|
||||
/* ---------- Dekompresia ---------- */
|
||||
|
||||
void decompress_file(const char* input, const char* output) {
|
||||
FILE* fi = fopen(input, "rb");
|
||||
FILE* fo = fopen(output, "wb");
|
||||
if (!fi || !fo) exit(1);
|
||||
|
||||
char magic[4];
|
||||
fread(magic, 1, 4, fi);
|
||||
if (memcmp(magic, MAGIC, 4)) exit(1);
|
||||
|
||||
uint64_t size;
|
||||
fread(&size, sizeof(uint64_t), 1, fi);
|
||||
|
||||
for (int i = 0; i < SYMBOLS; i++) {
|
||||
uint32_t f;
|
||||
fread(&f, sizeof(uint32_t), 1, fi);
|
||||
frequencies[i] = f;
|
||||
}
|
||||
|
||||
Node* root = build_tree();
|
||||
BitReader br;
|
||||
br_init(&br, fi);
|
||||
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
Node* n = root;
|
||||
while (n->left || n->right) {
|
||||
int bit = br_read(&br);
|
||||
n = bit ? n->right : n->left;
|
||||
}
|
||||
fputc(n->symbol, fo);
|
||||
}
|
||||
fclose(fi);
|
||||
fclose(fo);
|
||||
}
|
||||
10
sk1/compressor.h
Normal file
10
sk1/compressor.h
Normal file
@ -0,0 +1,10 @@
|
||||
#ifndef COMPRESSOR_H
|
||||
#define COMPRESSOR_H
|
||||
|
||||
/* Komprimuje vstupný súbor do výstupného */
|
||||
void compress_file(const char *input, const char *output);
|
||||
|
||||
/* Dekomprimuje vstupný súbor do výstupného */
|
||||
void decompress_file(const char *input, const char *output);
|
||||
|
||||
#endif
|
||||
26
sk1/main.c
Normal file
26
sk1/main.c
Normal file
@ -0,0 +1,26 @@
|
||||
#include "compressor.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
static void help() {
|
||||
printf("Pouzitie:\n");
|
||||
printf(" compressor -c infile outfile\n");
|
||||
printf(" compressor -d infile outfile\n");
|
||||
printf(" compressor -h\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc < 2) {
|
||||
help();
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[1], "-c") && argc == 4) {
|
||||
compress_file(argv[2], argv[3]);
|
||||
} else if (!strcmp(argv[1], "-d") && argc == 4) {
|
||||
decompress_file(argv[2], argv[3]);
|
||||
} else {
|
||||
help();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user