250 lines
5.6 KiB
C
250 lines
5.6 KiB
C
#include "compressor.h"
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdint.h>
|
|
|
|
#define BYTE_RANGE 256
|
|
#define IO_BUFFER 8192
|
|
|
|
typedef struct HuffNode {
|
|
int value;
|
|
uint32_t weight;
|
|
struct HuffNode *zero;
|
|
struct HuffNode *one;
|
|
} HuffNode;
|
|
|
|
typedef struct {
|
|
uint32_t pattern;
|
|
uint8_t size;
|
|
} BitCode;
|
|
|
|
static HuffNode *node_create(int value, uint32_t weight)
|
|
{
|
|
HuffNode *n=malloc(sizeof(HuffNode));
|
|
if (!n)
|
|
{
|
|
return NULL;
|
|
}
|
|
n->value=value;
|
|
n->weight=weight;
|
|
n->zero=NULL;
|
|
n->one=NULL;
|
|
return n;
|
|
}
|
|
|
|
static void node_destroy(HuffNode *n)
|
|
{
|
|
if (!n)
|
|
{
|
|
return;
|
|
}
|
|
node_destroy(n->zero);
|
|
node_destroy(n->one);
|
|
free(n);
|
|
}
|
|
|
|
static HuffNode *assemble_tree(uint32_t histogram[])
|
|
{
|
|
HuffNode *pool[BYTE_RANGE*2];
|
|
int pool_size=0;
|
|
for (int i = 0; i < BYTE_RANGE; i++)
|
|
{
|
|
if (histogram[i]>0)
|
|
{
|
|
pool[pool_size++] = node_create(i, histogram[i]);
|
|
}
|
|
}
|
|
if (pool_size==1)
|
|
{
|
|
pool[pool_size++]=node_create(-1, 0);
|
|
}
|
|
while (pool_size>1)
|
|
{
|
|
int m1=0, m2=1;
|
|
if (pool[m2]->weight<pool[m1]->weight)
|
|
{
|
|
int t=m1; m1=m2; m2=t;
|
|
}
|
|
for (int i = 2; i < pool_size; i++)
|
|
{
|
|
if (pool[i]->weight < pool[m1]->weight)
|
|
{
|
|
m2=m1;
|
|
m1=i;
|
|
} else if (pool[i]->weight < pool[m2]->weight)
|
|
{
|
|
m2=i;
|
|
}
|
|
}
|
|
HuffNode *parent = node_create(
|
|
-1,
|
|
pool[m1]->weight+pool[m2]->weight
|
|
);
|
|
parent->zero=pool[m1];
|
|
parent->one=pool[m2];
|
|
pool[m1]=parent;
|
|
pool[m2]=pool[pool_size-1];
|
|
pool_size--;
|
|
}
|
|
return pool[0];
|
|
}
|
|
|
|
static void generate_codes
|
|
(
|
|
HuffNode *root,
|
|
BitCode table[],
|
|
uint32_t bits,
|
|
uint8_t depth
|
|
) {
|
|
if (!root) return;
|
|
if (root->value >= 0)
|
|
{
|
|
table[root->value].pattern=bits;
|
|
table[root->value].size=depth;
|
|
return;
|
|
}
|
|
generate_codes(root->zero, table, bits << 1, depth + 1);
|
|
generate_codes(root->one, table, (bits << 1) | 1, depth + 1);
|
|
}
|
|
|
|
int pack_binary(const char *input_path, const char *output_path)
|
|
{
|
|
FILE *src=fopen(input_path, "rb");
|
|
FILE *dst=fopen(output_path, "wb");
|
|
if (!src || !dst)
|
|
{
|
|
if (src) fclose(src);
|
|
if (dst) fclose(dst);
|
|
return 1;
|
|
}
|
|
uint32_t freq_map[BYTE_RANGE]={0};
|
|
uint8_t buffer[IO_BUFFER];
|
|
size_t read_bytes;
|
|
uint32_t total_size=0;
|
|
while ((read_bytes = fread(buffer, 1, IO_BUFFER, src)) > 0)
|
|
{
|
|
total_size += read_bytes;
|
|
for (size_t i = 0; i < read_bytes; i++)
|
|
{
|
|
freq_map[buffer[i]]++;
|
|
}
|
|
}
|
|
rewind(src);
|
|
if (fwrite(&total_size, sizeof(uint32_t), 1, dst) != 1 ||
|
|
fwrite(freq_map, sizeof(uint32_t), BYTE_RANGE, dst) != BYTE_RANGE)
|
|
{
|
|
fclose(src);
|
|
fclose(dst);
|
|
return 1;
|
|
}
|
|
HuffNode *tree=assemble_tree(freq_map);
|
|
if (!tree)
|
|
{
|
|
fclose(src);
|
|
fclose(dst);
|
|
return 1;
|
|
}
|
|
BitCode codebook[BYTE_RANGE];
|
|
memset(codebook, 0, sizeof(codebook));
|
|
generate_codes(tree, codebook, 0, 0);
|
|
uint8_t out_byte=0;
|
|
int bit_count=0;
|
|
while ((read_bytes = fread(buffer, 1, IO_BUFFER, src)) > 0) {
|
|
for (size_t i = 0; i < read_bytes; i++)
|
|
{
|
|
BitCode c=codebook[buffer[i]];
|
|
for (int b = c.size - 1; b >= 0; b--)
|
|
{
|
|
out_byte = (out_byte << 1) |
|
|
((c.pattern >> b) & 1);
|
|
bit_count++;
|
|
if (bit_count==8)
|
|
{
|
|
if (fwrite(&out_byte, 1, 1, dst)!=1)
|
|
{
|
|
node_destroy(tree);
|
|
fclose(src);
|
|
fclose(dst);
|
|
return 1;
|
|
}
|
|
out_byte=0;
|
|
bit_count=0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (bit_count>0)
|
|
{
|
|
out_byte <<= (8 - bit_count);
|
|
if (fwrite(&out_byte, 1, 1, dst)!=1)
|
|
{
|
|
node_destroy(tree);
|
|
fclose(src);
|
|
fclose(dst);
|
|
return 1;
|
|
}
|
|
}
|
|
node_destroy(tree);
|
|
fclose(src);
|
|
fclose(dst);
|
|
return 0;
|
|
}
|
|
|
|
int unpack_binary(const char *input_path, const char *output_path)
|
|
{
|
|
FILE *src=fopen(input_path, "rb");
|
|
FILE *dst=fopen(output_path, "wb");
|
|
if (!src || !dst)
|
|
{
|
|
if (src) fclose(src);
|
|
if (dst) fclose(dst);
|
|
return 1;
|
|
}
|
|
uint32_t expected_size;
|
|
uint32_t freq_map[BYTE_RANGE];
|
|
if (fread(&expected_size, sizeof(uint32_t), 1, src) != 1 ||
|
|
fread(freq_map, sizeof(uint32_t), BYTE_RANGE, src) != BYTE_RANGE)
|
|
{
|
|
fclose(src);
|
|
fclose(dst);
|
|
return 1;
|
|
}
|
|
HuffNode *tree=assemble_tree(freq_map);
|
|
if (!tree)
|
|
{
|
|
fclose(src);
|
|
fclose(dst);
|
|
return 1;
|
|
}
|
|
HuffNode *cursor=tree;
|
|
uint8_t byte;
|
|
uint32_t produced=0;
|
|
while (produced<expected_size &&
|
|
fread(&byte, 1, 1, src)==1)
|
|
{
|
|
for (int i = 7; i >= 0 && produced < expected_size; i--)
|
|
{
|
|
int bit=(byte >> i) & 1;
|
|
cursor=bit ? cursor->one : cursor->zero;
|
|
if (cursor->value>=0)
|
|
{
|
|
if (fputc(cursor->value, dst)==EOF)
|
|
{
|
|
node_destroy(tree);
|
|
fclose(src);
|
|
fclose(dst);
|
|
return 1;
|
|
}
|
|
produced++;
|
|
cursor=tree;
|
|
}
|
|
}
|
|
}
|
|
node_destroy(tree);
|
|
fclose(src);
|
|
fclose(dst);
|
|
return 0;
|
|
}
|
|
|