fin huf
This commit is contained in:
parent
a0c4f91d0e
commit
9e310f42c5
209
sk1/:wq
Normal file
209
sk1/:wq
Normal file
@ -0,0 +1,209 @@
|
||||
#include "compressor.h"
|
||||
#include <string.h>
|
||||
|
||||
static HuffmanNode *create_node(unsigned char symbol, uint32_t freq) {
|
||||
HuffmanNode *node = malloc(sizeof(HuffmanNode));
|
||||
if (!node) return NULL;
|
||||
node->symbol = symbol;
|
||||
node->freq = freq;
|
||||
node->left = NULL;
|
||||
node->right = NULL;
|
||||
return node;
|
||||
}
|
||||
|
||||
static void free_tree(HuffmanNode *root) {
|
||||
if (!root) return;
|
||||
free_tree(root->left);
|
||||
free_tree(root->right);
|
||||
free(root);
|
||||
}
|
||||
|
||||
static HuffmanNode *build_tree(uint32_t freq[]) {
|
||||
HuffmanNode *nodes[SYMBOLS * 2];
|
||||
int count = 0;
|
||||
|
||||
for (int i = 0; i < SYMBOLS; i++) {
|
||||
if (freq[i] > 0) {
|
||||
nodes[count++] = create_node((unsigned char)i, freq[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (count == 0) return NULL;
|
||||
|
||||
while (count > 1) {
|
||||
int min1 = 0, min2 = 1;
|
||||
|
||||
if (nodes[min2]->freq < nodes[min1]->freq) {
|
||||
int tmp = min1;
|
||||
min1 = min2;
|
||||
min2 = tmp;
|
||||
}
|
||||
|
||||
for (int i = 2; i < count; i++) {
|
||||
if (nodes[i]->freq < nodes[min1]->freq) {
|
||||
min2 = min1;
|
||||
min1 = i;
|
||||
} else if (nodes[i]->freq < nodes[min2]->freq) {
|
||||
min2 = i;
|
||||
}
|
||||
}
|
||||
|
||||
HuffmanNode *parent =
|
||||
create_node(0, nodes[min1]->freq + nodes[min2]->freq);
|
||||
parent->left = nodes[min1];
|
||||
parent->right = nodes[min2];
|
||||
|
||||
nodes[min1] = parent;
|
||||
nodes[min2] = nodes[count - 1];
|
||||
count--;
|
||||
}
|
||||
|
||||
return nodes[0];
|
||||
}
|
||||
|
||||
static void build_codes(HuffmanNode *root, char *buffer, int depth,
|
||||
char codes[SYMBOLS][256]) {
|
||||
if (!root) return;
|
||||
|
||||
if (!root->left && !root->right) {
|
||||
if (depth == 0) {
|
||||
buffer[0] = '0';
|
||||
depth = 1;
|
||||
}
|
||||
|
||||
buffer[depth] = '\0';
|
||||
strcpy(codes[root->symbol], buffer);
|
||||
return;
|
||||
}
|
||||
|
||||
if (root->left) {
|
||||
buffer[depth] = '0';
|
||||
build_codes(root->left, buffer, depth + 1, codes);
|
||||
}
|
||||
|
||||
if (root->right) {
|
||||
buffer[depth] = '1';
|
||||
build_codes(root->right, buffer, depth + 1, codes);
|
||||
}
|
||||
}
|
||||
|
||||
int compress_file(const char *infile, const char *outfile) {
|
||||
FILE *in = fopen(infile, "rb");
|
||||
FILE *out = fopen(outfile, "wb");
|
||||
if (!in || !out) return 1;
|
||||
|
||||
uint32_t freq[SYMBOLS] = {0};
|
||||
int c;
|
||||
|
||||
while ((c = fgetc(in)) != EOF) {
|
||||
freq[(unsigned char)c]++;
|
||||
}
|
||||
uint64_t original_size = 0;
|
||||
for( int i = 0; i < SYMBOLS; i++) original_size += freq[i];
|
||||
|
||||
fwrite("HUF1",1,4,out);
|
||||
fwrite(&original_size, sizeof(uint64_t), 1, out);
|
||||
fwrite(freq, sizeof(uint32_t), SYMBOLS, out);
|
||||
|
||||
HuffmanNode *root = build_tree(freq);
|
||||
if (!root) {
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char codes[SYMBOLS][256] = {{0}};
|
||||
char buffer[256];
|
||||
build_codes(root, buffer, 0, codes);
|
||||
|
||||
rewind(in);
|
||||
|
||||
unsigned char out_byte = 0;
|
||||
int bit_count = 0;
|
||||
|
||||
while ((c = fgetc(in)) != EOF) {
|
||||
char *code = codes[(unsigned char)c];
|
||||
for (int i = 0; code[i] != '\0'; i++) {
|
||||
out_byte <<= 1;
|
||||
if (code[i] == '1') out_byte |= 1;
|
||||
bit_count++;
|
||||
|
||||
if (bit_count == 8) {
|
||||
fputc(out_byte, out);
|
||||
out_byte = 0;
|
||||
bit_count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bit_count > 0) {
|
||||
out_byte <<= (8 - bit_count);
|
||||
fputc(out_byte, out);
|
||||
}
|
||||
|
||||
free_tree(root);
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int decompress_file(const char *infile, const char *outfile) {
|
||||
FILE *in = fopen(infile, "rb");
|
||||
FILE *out = fopen(outfile, "wb");
|
||||
if (!in || !out) return 1;
|
||||
|
||||
char magic[4];
|
||||
if (fread(magic, 1, 4, in) != 4 || memcmp(magic, "HUF1", 4) != 0) {
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
return 1;
|
||||
}
|
||||
|
||||
uint64_t original_size = 0;
|
||||
if (fread(&original_size, sizeof(uint64_t), 1, in) != 1) {
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
return 1;
|
||||
}
|
||||
|
||||
uint32_t freq[SYMBOLS];
|
||||
if (fread(freq, sizeof(uint32_t), SYMBOLS, in) != SYMBOLS) {
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
return 1;
|
||||
}
|
||||
|
||||
HuffmanNode *root = build_tree(freq);
|
||||
if (!root) {
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
return 1;
|
||||
}
|
||||
|
||||
HuffmanNode *current = root;
|
||||
int byte;
|
||||
uint64_t written = 0;
|
||||
while ((byte = fgetc(in)) != EOF) {
|
||||
for (int i = 7; i >= 0; i--) {
|
||||
int bit = (byte >> i) & 1;
|
||||
current = bit ? current->right : current->left;
|
||||
|
||||
if (!current->left && !current->right) {
|
||||
fputc(current->symbol, out);
|
||||
written++;
|
||||
if ( written == original_size) {
|
||||
free_tree(root);
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
return 0;
|
||||
}
|
||||
current = root;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free_tree(root);
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
return 0;
|
||||
}
|
||||
13
sk1/Makefile
13
sk1/Makefile
@ -1,21 +1,16 @@
|
||||
CC=gcc
|
||||
CFLAGS=-Wall -Wextra -std=c11
|
||||
|
||||
|
||||
all: compressor
|
||||
|
||||
|
||||
compressor: main.o compressor.o
|
||||
$(CC) $(CFLAGS) -o compressor main.o compressor.o
|
||||
|
||||
$(CC) $(CFLAGS) -o compressor main.o compressor.o
|
||||
|
||||
main.o: main.c compressor.h
|
||||
$(CC) $(CFLAGS) -c main.c
|
||||
|
||||
$(CC) $(CFLAGS) -c main.c
|
||||
|
||||
compressor.o: compressor.c compressor.h
|
||||
$(CC) $(CFLAGS) -c compressor.c
|
||||
|
||||
$(CC) $(CFLAGS) -c compressor.c
|
||||
|
||||
clean:
|
||||
rm -f *.o compressor
|
||||
rm -f *.o compressor
|
||||
|
||||
BIN
sk1/compressor
Executable file
BIN
sk1/compressor
Executable file
Binary file not shown.
279
sk1/compressor.c
279
sk1/compressor.c
@ -1,140 +1,209 @@
|
||||
#include "compressor.h"
|
||||
#include <string.h>
|
||||
|
||||
|
||||
static HuffmanNode *create_node(unsigned char s, uint32_t f) {
|
||||
HuffmanNode *n = malloc(sizeof(HuffmanNode));
|
||||
n->symbol = s;
|
||||
n->freq = f;
|
||||
n->left = n->right = NULL;
|
||||
return n;
|
||||
static HuffmanNode *create_node(unsigned char symbol, uint32_t freq) {
|
||||
HuffmanNode *node = malloc(sizeof(HuffmanNode));
|
||||
if (!node) return NULL;
|
||||
node->symbol = symbol;
|
||||
node->freq = freq;
|
||||
node->left = NULL;
|
||||
node->right = NULL;
|
||||
return node;
|
||||
}
|
||||
|
||||
|
||||
static void free_tree(HuffmanNode *root) {
|
||||
if (!root) return;
|
||||
free_tree(root->left);
|
||||
free_tree(root->right);
|
||||
free(root);
|
||||
if (!root) return;
|
||||
free_tree(root->left);
|
||||
free_tree(root->right);
|
||||
free(root);
|
||||
}
|
||||
|
||||
|
||||
static HuffmanNode *build_tree(uint32_t freq[]) {
|
||||
HuffmanNode *nodes[SYMBOLS * 2];
|
||||
int n = 0;
|
||||
HuffmanNode *nodes[SYMBOLS * 2];
|
||||
int count = 0;
|
||||
|
||||
for (int i = 0; i < SYMBOLS; i++) {
|
||||
if (freq[i] > 0) {
|
||||
nodes[count++] = create_node((unsigned char)i, freq[i]);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < SYMBOLS; i++)
|
||||
if (freq[i] > 0)
|
||||
nodes[n++] = create_node((unsigned char)i, freq[i]);while (n > 1) {
|
||||
int a = 0, b = 1;
|
||||
if (nodes[b]->freq < nodes[a]->freq) { int t=a;a=b;b=t; }
|
||||
for (int i = 2; i < n; i++) {
|
||||
if (nodes[i]->freq < nodes[a]->freq) {
|
||||
b = a; a = i;
|
||||
} else if (nodes[i]->freq < nodes[b]->freq) {
|
||||
b = i;
|
||||
}
|
||||
}
|
||||
HuffmanNode *p = create_node(0, nodes[a]->freq + nodes[b]->freq);
|
||||
p->left = nodes[a];
|
||||
p->right = nodes[b];
|
||||
nodes[a] = p;
|
||||
nodes[b] = nodes[n-1];
|
||||
n--;
|
||||
}
|
||||
return nodes[0];
|
||||
if (count == 0) return NULL;
|
||||
|
||||
while (count > 1) {
|
||||
int min1 = 0, min2 = 1;
|
||||
|
||||
if (nodes[min2]->freq < nodes[min1]->freq) {
|
||||
int tmp = min1;
|
||||
min1 = min2;
|
||||
min2 = tmp;
|
||||
}
|
||||
|
||||
for (int i = 2; i < count; i++) {
|
||||
if (nodes[i]->freq < nodes[min1]->freq) {
|
||||
min2 = min1;
|
||||
min1 = i;
|
||||
} else if (nodes[i]->freq < nodes[min2]->freq) {
|
||||
min2 = i;
|
||||
}
|
||||
}
|
||||
|
||||
HuffmanNode *parent =
|
||||
create_node(0, nodes[min1]->freq + nodes[min2]->freq);
|
||||
parent->left = nodes[min1];
|
||||
parent->right = nodes[min2];
|
||||
|
||||
nodes[min1] = parent;
|
||||
nodes[min2] = nodes[count - 1];
|
||||
count--;
|
||||
}
|
||||
|
||||
return nodes[0];
|
||||
}
|
||||
|
||||
static void build_codes(HuffmanNode *root, char *buffer, int depth,
|
||||
char codes[SYMBOLS][256]) {
|
||||
if (!root) return;
|
||||
|
||||
static void build_codes(HuffmanNode *r, char *code, int d, char codes[SYMBOLS][256]) {
|
||||
if (!r->left && !r->right) {
|
||||
code[d] = '\0';
|
||||
strcpy(codes[r->symbol], code);
|
||||
return;
|
||||
if (r->left) {
|
||||
code[d] = '0';
|
||||
build_codes(r->left, code, d+1, codes);
|
||||
}
|
||||
if (r->right) {
|
||||
code[d] = '1';
|
||||
build_codes(r->right, code, d+1, codes);
|
||||
}
|
||||
}
|
||||
if (!root->left && !root->right) {
|
||||
if (depth == 0) {
|
||||
buffer[0] = '0';
|
||||
depth = 1;
|
||||
}
|
||||
|
||||
buffer[depth] = '\0';
|
||||
strcpy(codes[root->symbol], buffer);
|
||||
return;
|
||||
}
|
||||
|
||||
if (root->left) {
|
||||
buffer[depth] = '0';
|
||||
build_codes(root->left, buffer, depth + 1, codes);
|
||||
}
|
||||
|
||||
if (root->right) {
|
||||
buffer[depth] = '1';
|
||||
build_codes(root->right, buffer, depth + 1, codes);
|
||||
}
|
||||
}
|
||||
|
||||
int compress_file(const char *infile, const char *outfile) {
|
||||
FILE *in = fopen(infile, "rb");
|
||||
FILE *out = fopen(outfile, "wb");
|
||||
if (!in || !out) return 1;
|
||||
FILE *in = fopen(infile, "rb");
|
||||
FILE *out = fopen(outfile, "wb");
|
||||
if (!in || !out) return 1;
|
||||
|
||||
uint32_t freq[SYMBOLS] = {0};
|
||||
int c;
|
||||
|
||||
uint32_t freq[SYMBOLS] = {0};
|
||||
int c;
|
||||
while ((c = fgetc(in)) != EOF) freq[c]++;}fwrite(freq, sizeof(uint32_t), SYMBOLS, out);
|
||||
while ((c = fgetc(in)) != EOF) {
|
||||
freq[(unsigned char)c]++;
|
||||
}
|
||||
uint64_t original_size = 0;
|
||||
for( int i = 0; i < SYMBOLS; i++) original_size += freq[i];
|
||||
|
||||
fwrite("HUF1",1,4,out);
|
||||
fwrite(&original_size, sizeof(uint64_t), 1, out);
|
||||
fwrite(freq, sizeof(uint32_t), SYMBOLS, out);
|
||||
|
||||
HuffmanNode *root = build_tree(freq);
|
||||
if (!root) {
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
return 1;
|
||||
}
|
||||
|
||||
HuffmanNode *root = build_tree(freq);
|
||||
char codes[SYMBOLS][256] = {{0}};
|
||||
char tmp[256];
|
||||
build_codes(root, tmp, 0, codes);
|
||||
char codes[SYMBOLS][256] = {{0}};
|
||||
char buffer[256];
|
||||
build_codes(root, buffer, 0, codes);
|
||||
|
||||
rewind(in);
|
||||
|
||||
rewind(in);
|
||||
unsigned char buf = 0;
|
||||
int bits = 0;
|
||||
unsigned char out_byte = 0;
|
||||
int bit_count = 0;
|
||||
|
||||
while ((c = fgetc(in)) != EOF) {
|
||||
char *code = codes[(unsigned char)c];
|
||||
for (int i = 0; code[i] != '\0'; i++) {
|
||||
out_byte <<= 1;
|
||||
if (code[i] == '1') out_byte |= 1;
|
||||
bit_count++;
|
||||
|
||||
while ((c = fgetc(in)) != EOF) {
|
||||
char *p = codes[c];
|
||||
while (*p) {
|
||||
buf <<= 1;
|
||||
if (*p == '1') buf |= 1;
|
||||
bits++;
|
||||
if (bits == 8) {
|
||||
fputc(buf, out);
|
||||
buf = 0;
|
||||
bits = 0;
|
||||
if (bit_count == 8) {
|
||||
fputc(out_byte, out);
|
||||
out_byte = 0;
|
||||
bit_count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bit_count > 0) {
|
||||
out_byte <<= (8 - bit_count);
|
||||
fputc(out_byte, out);
|
||||
}
|
||||
|
||||
free_tree(root);
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
return 0;
|
||||
}
|
||||
p++;
|
||||
}
|
||||
}
|
||||
if (bits) {
|
||||
buf <<= (8 - bits);
|
||||
fputc(buf, out);
|
||||
free_tree(root);
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int decompress_file(const char *infile, const char *outfile) {
|
||||
FILE *in = fopen(infile, "rb");
|
||||
FILE *out = fopen(outfile, "wb");
|
||||
if (!in || !out) return 1;
|
||||
FILE *in = fopen(infile, "rb");
|
||||
FILE *out = fopen(outfile, "wb");
|
||||
if (!in || !out) return 1;
|
||||
|
||||
char magic[4];
|
||||
if (fread(magic, 1, 4, in) != 4 || memcmp(magic, "HUF1", 4) != 0) {
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
return 1;
|
||||
}
|
||||
|
||||
uint32_t freq[SYMBOLS];
|
||||
fread(freq, sizeof(uint32_t), SYMBOLS, in);
|
||||
uint64_t original_size = 0;
|
||||
if (fread(&original_size, sizeof(uint64_t), 1, in) != 1) {
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
return 1;
|
||||
}
|
||||
|
||||
uint32_t freq[SYMBOLS];
|
||||
if (fread(freq, sizeof(uint32_t), SYMBOLS, in) != SYMBOLS) {
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
return 1;
|
||||
}
|
||||
|
||||
HuffmanNode *root = build_tree(freq);
|
||||
HuffmanNode *cur = root;
|
||||
int byte;
|
||||
HuffmanNode *root = build_tree(freq);
|
||||
if (!root) {
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
return 1;
|
||||
}
|
||||
|
||||
HuffmanNode *current = root;
|
||||
int byte;
|
||||
uint64_t written = 0;
|
||||
while ((byte = fgetc(in)) != EOF) {
|
||||
for (int i = 7; i >= 0; i--) {
|
||||
int bit = (byte >> i) & 1;
|
||||
current = bit ? current->right : current->left;
|
||||
|
||||
while ((byte = fgetc(in)) != EOF) {
|
||||
for (int i = 7; i >= 0; i--) {
|
||||
int bit = (byte >> i) & 1;
|
||||
cur = bit ? cur->right : cur->left;
|
||||
if (!cur->left && !cur->right) {
|
||||
fputc(cur->symbol, out);
|
||||
cur = root;
|
||||
}
|
||||
}
|
||||
}}free_tree(root);
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
return 0;
|
||||
if (!current->left && !current->right) {
|
||||
fputc(current->symbol, out);
|
||||
written++;
|
||||
if ( written == original_size) {
|
||||
free_tree(root);
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
return 0;
|
||||
}
|
||||
current = root;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free_tree(root);
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
BIN
sk1/compressor.o
Normal file
BIN
sk1/compressor.o
Normal file
Binary file not shown.
BIN
sk1/main.o
Normal file
BIN
sk1/main.o
Normal file
Binary file not shown.
BIN
sk1/test.huff
Normal file
BIN
sk1/test.huff
Normal file
Binary file not shown.
1
sk1/test.txt
Normal file
1
sk1/test.txt
Normal file
@ -0,0 +1 @@
|
||||
Toto je test subor na kompresiu
|
||||
1
sk1/test_out.txt
Normal file
1
sk1/test_out.txt
Normal file
@ -0,0 +1 @@
|
||||
Toto je test subor na kompresiu
|
||||
Loading…
Reference in New Issue
Block a user