funguje
This commit is contained in:
parent
9bb436a453
commit
44e8945215
17
sk1/Makefile
Normal file
17
sk1/Makefile
Normal file
@ -0,0 +1,17 @@
|
||||
CC = gcc
|
||||
CFLAGS = -std=c11 -Wall -Wextra -O2
|
||||
|
||||
all: compressor
|
||||
|
||||
compressor: main.o compressor.o
|
||||
$(CC) $(CFLAGS) -o compressor main.o compressor.o
|
||||
|
||||
main.o: main.c compressor.h
|
||||
$(CC) $(CFLAGS) -c main.c
|
||||
|
||||
compressor.o: compressor.c compressor.h
|
||||
$(CC) $(CFLAGS) -c compressor.c
|
||||
|
||||
clean:
|
||||
rm -f *.o compressor
|
||||
|
||||
82
sk1/README.md
Normal file
82
sk1/README.md
Normal file
@ -0,0 +1,82 @@
|
||||
# Kompressor – domáca časť skúšky
|
||||
|
||||
## Zadanie
|
||||
Cieľom zadania je naprogramovať nástroj na kompresiu a dekompresiu súborov.
|
||||
Program musí fungovať pre ľubovoľné binárne súbory do veľkosti 10 MB.
|
||||
Po dekompresii musí byť výsledný súbor rovnaký ako pôvodný.
|
||||
|
||||
Nie je povolené použiť RLE (Run Length Encoding).
|
||||
Je povolené použiť napríklad Huffmanovo kódovanie alebo iný algoritmus.
|
||||
|
||||
---
|
||||
|
||||
## Popis programu
|
||||
Program umožňuje:
|
||||
- skomprimovať súbor
|
||||
- dekomprimovať súbor
|
||||
- vypísať pomoc pri spustení
|
||||
|
||||
Program sa spúšťa z príkazového riadka a pracuje so súbormi zadanými ako argumenty.
|
||||
|
||||
---
|
||||
|
||||
## Použitý algoritmus
|
||||
Použil som **Huffmanovo kódovanie**.
|
||||
|
||||
Najprv sa spočítajú frekvencie jednotlivých bajtov vo vstupnom súbore.
|
||||
Podľa frekvencií sa vytvorí Huffmanov strom.
|
||||
Každému bajtu sa priradí bitový kód.
|
||||
|
||||
Do výsledného súboru sa uloží:
|
||||
- veľkosť pôvodného súboru
|
||||
- tabuľka frekvencií
|
||||
- skomprimovaný bitový tok
|
||||
|
||||
Pri dekompresii sa z tabuľky frekvencií znovu vytvorí strom
|
||||
a pomocou neho sa dáta dekódujú späť.
|
||||
|
||||
---
|
||||
|
||||
## Použitie programu
|
||||
./compressor -c vstupny_subor vystupny_subor
|
||||
Skomprimuje vstupný súbor do výstupného.
|
||||
./compressor -d skomprimovany_subor vystupny_subor
|
||||
Dekompresia späť na pôvodné dáta.
|
||||
./compressor -h
|
||||
Vypíše pomoc.
|
||||
|
||||
---
|
||||
|
||||
## Technické detaily
|
||||
- program je napísaný v jazyku C (štandard C11)
|
||||
- používa iba štandardnú knižnicu C
|
||||
- práca so súbormi je realizovaná pomocou fopen, fread a fwrite
|
||||
- program je rozdelený na viac súborov:
|
||||
- main.c – spracovanie argumentov a spustenie programu
|
||||
- compressor.c – implementácia kompresie a dekompresie
|
||||
- compressor.h – hlavičkový súbor
|
||||
- preklad prebieha pomocou Makefile
|
||||
|
||||
---
|
||||
|
||||
## Obmedzenia
|
||||
- maximálna veľkosť vstupného súboru je 10 MB
|
||||
- kompresia nemusí byť efektívna pre všetky typy súborov
|
||||
- program nevyužíva žiadne externé knižnice
|
||||
|
||||
---
|
||||
|
||||
## Testovanie
|
||||
Program bol testovaný na súboroch z Canterbury corpus.
|
||||
Po dekompresii bol výsledný súbor vždy zhodný s pôvodným.
|
||||
|
||||
---
|
||||
|
||||
## Použité zdroje
|
||||
- študijné materiály k predmetu
|
||||
- základný popis Huffmanovho kódovania
|
||||
- generatívny model ChatGPT (OpenAI)
|
||||
|
||||
Použitý prompt:
|
||||
"Implement Huffman compression in C without external libraries"
|
||||
|
||||
BIN
sk1/compressor
Executable file
BIN
sk1/compressor
Executable file
Binary file not shown.
249
sk1/compressor.c
Normal file
249
sk1/compressor.c
Normal file
@ -0,0 +1,249 @@
|
||||
#include "compressor.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define BYTE_RANGE 256
|
||||
#define IO_BUFFER 8192
|
||||
|
||||
typedef struct HuffNode {
|
||||
int value;
|
||||
uint32_t weight;
|
||||
struct HuffNode *zero;
|
||||
struct HuffNode *one;
|
||||
} HuffNode;
|
||||
|
||||
typedef struct {
|
||||
uint32_t pattern;
|
||||
uint8_t size;
|
||||
} BitCode;
|
||||
|
||||
static HuffNode *node_create(int value, uint32_t weight)
|
||||
{
|
||||
HuffNode *n=malloc(sizeof(HuffNode));
|
||||
if (!n)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
n->value=value;
|
||||
n->weight=weight;
|
||||
n->zero=NULL;
|
||||
n->one=NULL;
|
||||
return n;
|
||||
}
|
||||
|
||||
static void node_destroy(HuffNode *n)
|
||||
{
|
||||
if (!n)
|
||||
{
|
||||
return;
|
||||
}
|
||||
node_destroy(n->zero);
|
||||
node_destroy(n->one);
|
||||
free(n);
|
||||
}
|
||||
|
||||
static HuffNode *assemble_tree(uint32_t histogram[])
|
||||
{
|
||||
HuffNode *pool[BYTE_RANGE*2];
|
||||
int pool_size=0;
|
||||
for (int i = 0; i < BYTE_RANGE; i++)
|
||||
{
|
||||
if (histogram[i]>0)
|
||||
{
|
||||
pool[pool_size++] = node_create(i, histogram[i]);
|
||||
}
|
||||
}
|
||||
if (pool_size==1)
|
||||
{
|
||||
pool[pool_size++]=node_create(-1, 0);
|
||||
}
|
||||
while (pool_size>1)
|
||||
{
|
||||
int m1=0, m2=1;
|
||||
if (pool[m2]->weight<pool[m1]->weight)
|
||||
{
|
||||
int t=m1; m1=m2; m2=t;
|
||||
}
|
||||
for (int i = 2; i < pool_size; i++)
|
||||
{
|
||||
if (pool[i]->weight < pool[m1]->weight)
|
||||
{
|
||||
m2=m1;
|
||||
m1=i;
|
||||
} else if (pool[i]->weight < pool[m2]->weight)
|
||||
{
|
||||
m2=i;
|
||||
}
|
||||
}
|
||||
HuffNode *parent = node_create(
|
||||
-1,
|
||||
pool[m1]->weight+pool[m2]->weight
|
||||
);
|
||||
parent->zero=pool[m1];
|
||||
parent->one=pool[m2];
|
||||
pool[m1]=parent;
|
||||
pool[m2]=pool[pool_size-1];
|
||||
pool_size--;
|
||||
}
|
||||
return pool[0];
|
||||
}
|
||||
|
||||
static void generate_codes
|
||||
(
|
||||
HuffNode *root,
|
||||
BitCode table[],
|
||||
uint32_t bits,
|
||||
uint8_t depth
|
||||
) {
|
||||
if (!root) return;
|
||||
if (root->value >= 0)
|
||||
{
|
||||
table[root->value].pattern=bits;
|
||||
table[root->value].size=depth;
|
||||
return;
|
||||
}
|
||||
generate_codes(root->zero, table, bits << 1, depth + 1);
|
||||
generate_codes(root->one, table, (bits << 1) | 1, depth + 1);
|
||||
}
|
||||
|
||||
int pack_binary(const char *input_path, const char *output_path)
|
||||
{
|
||||
FILE *src=fopen(input_path, "rb");
|
||||
FILE *dst=fopen(output_path, "wb");
|
||||
if (!src || !dst)
|
||||
{
|
||||
if (src) fclose(src);
|
||||
if (dst) fclose(dst);
|
||||
return 1;
|
||||
}
|
||||
uint32_t freq_map[BYTE_RANGE]={0};
|
||||
uint8_t buffer[IO_BUFFER];
|
||||
size_t read_bytes;
|
||||
uint32_t total_size=0;
|
||||
while ((read_bytes = fread(buffer, 1, IO_BUFFER, src)) > 0)
|
||||
{
|
||||
total_size += read_bytes;
|
||||
for (size_t i = 0; i < read_bytes; i++)
|
||||
{
|
||||
freq_map[buffer[i]]++;
|
||||
}
|
||||
}
|
||||
rewind(src);
|
||||
if (fwrite(&total_size, sizeof(uint32_t), 1, dst) != 1 ||
|
||||
fwrite(freq_map, sizeof(uint32_t), BYTE_RANGE, dst) != BYTE_RANGE)
|
||||
{
|
||||
fclose(src);
|
||||
fclose(dst);
|
||||
return 1;
|
||||
}
|
||||
HuffNode *tree=assemble_tree(freq_map);
|
||||
if (!tree)
|
||||
{
|
||||
fclose(src);
|
||||
fclose(dst);
|
||||
return 1;
|
||||
}
|
||||
BitCode codebook[BYTE_RANGE];
|
||||
memset(codebook, 0, sizeof(codebook));
|
||||
generate_codes(tree, codebook, 0, 0);
|
||||
uint8_t out_byte=0;
|
||||
int bit_count=0;
|
||||
while ((read_bytes = fread(buffer, 1, IO_BUFFER, src)) > 0) {
|
||||
for (size_t i = 0; i < read_bytes; i++)
|
||||
{
|
||||
BitCode c=codebook[buffer[i]];
|
||||
for (int b = c.size - 1; b >= 0; b--)
|
||||
{
|
||||
out_byte = (out_byte << 1) |
|
||||
((c.pattern >> b) & 1);
|
||||
bit_count++;
|
||||
if (bit_count==8)
|
||||
{
|
||||
if (fwrite(&out_byte, 1, 1, dst)!=1)
|
||||
{
|
||||
node_destroy(tree);
|
||||
fclose(src);
|
||||
fclose(dst);
|
||||
return 1;
|
||||
}
|
||||
out_byte=0;
|
||||
bit_count=0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (bit_count>0)
|
||||
{
|
||||
out_byte <<= (8 - bit_count);
|
||||
if (fwrite(&out_byte, 1, 1, dst)!=1)
|
||||
{
|
||||
node_destroy(tree);
|
||||
fclose(src);
|
||||
fclose(dst);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
node_destroy(tree);
|
||||
fclose(src);
|
||||
fclose(dst);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int unpack_binary(const char *input_path, const char *output_path)
|
||||
{
|
||||
FILE *src=fopen(input_path, "rb");
|
||||
FILE *dst=fopen(output_path, "wb");
|
||||
if (!src || !dst)
|
||||
{
|
||||
if (src) fclose(src);
|
||||
if (dst) fclose(dst);
|
||||
return 1;
|
||||
}
|
||||
uint32_t expected_size;
|
||||
uint32_t freq_map[BYTE_RANGE];
|
||||
if (fread(&expected_size, sizeof(uint32_t), 1, src) != 1 ||
|
||||
fread(freq_map, sizeof(uint32_t), BYTE_RANGE, src) != BYTE_RANGE)
|
||||
{
|
||||
fclose(src);
|
||||
fclose(dst);
|
||||
return 1;
|
||||
}
|
||||
HuffNode *tree=assemble_tree(freq_map);
|
||||
if (!tree)
|
||||
{
|
||||
fclose(src);
|
||||
fclose(dst);
|
||||
return 1;
|
||||
}
|
||||
HuffNode *cursor=tree;
|
||||
uint8_t byte;
|
||||
uint32_t produced=0;
|
||||
while (produced<expected_size &&
|
||||
fread(&byte, 1, 1, src)==1)
|
||||
{
|
||||
for (int i = 7; i >= 0 && produced < expected_size; i--)
|
||||
{
|
||||
int bit=(byte >> i) & 1;
|
||||
cursor=bit ? cursor->one : cursor->zero;
|
||||
if (cursor->value>=0)
|
||||
{
|
||||
if (fputc(cursor->value, dst)==EOF)
|
||||
{
|
||||
node_destroy(tree);
|
||||
fclose(src);
|
||||
fclose(dst);
|
||||
return 1;
|
||||
}
|
||||
produced++;
|
||||
cursor=tree;
|
||||
}
|
||||
}
|
||||
}
|
||||
node_destroy(tree);
|
||||
fclose(src);
|
||||
fclose(dst);
|
||||
return 0;
|
||||
}
|
||||
|
||||
7
sk1/compressor.h
Normal file
7
sk1/compressor.h
Normal file
@ -0,0 +1,7 @@
|
||||
#ifndef COMPRESSOR_H
|
||||
#define COMPRESSOR_H
|
||||
|
||||
int pack_binary(const char *input_path, const char *output_path);
|
||||
int unpack_binary(const char *input_path, const char *output_path);
|
||||
#endif
|
||||
|
||||
BIN
sk1/compressor.o
Normal file
BIN
sk1/compressor.o
Normal file
Binary file not shown.
38
sk1/main.c
Normal file
38
sk1/main.c
Normal file
@ -0,0 +1,38 @@
|
||||
#include "compressor.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
static void print_help(void) {
|
||||
printf("Binary compressor using Huffman coding\n");
|
||||
printf("Usage:\n");
|
||||
printf(" ./compressor -c input output\n");
|
||||
printf(" ./compressor -d input output\n");
|
||||
printf(" ./compressor -h\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc < 2) {
|
||||
print_help();
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (strcmp(argv[1], "-h") == 0) {
|
||||
print_help();
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (argc != 4) {
|
||||
print_help();
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (strcmp(argv[1], "-c") == 0)
|
||||
return pack_binary(argv[2], argv[3]);
|
||||
|
||||
if (strcmp(argv[1], "-d") == 0)
|
||||
return unpack_binary(argv[2], argv[3]);
|
||||
|
||||
print_help();
|
||||
return 1;
|
||||
}
|
||||
|
||||
BIN
sk1/main.o
Normal file
BIN
sk1/main.o
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user