Update sk1/compressor.c
This commit is contained in:
parent
2f587f253b
commit
70c293a075
459
sk1/compressor.c
459
sk1/compressor.c
@ -1,291 +1,250 @@
|
|||||||
#include "compressor.h"
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdint.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
// --- Алгоритм Run-Length Encoding (RLE) ---
|
#define WINDOW_SIZE 4096
|
||||||
|
#define LOOKAHEAD_BUFFER_SIZE 18
|
||||||
|
|
||||||
int compress_2(const char* input_file_name, const char* output_file_name) {
|
typedef struct {
|
||||||
FILE *infile = fopen(input_file_name, "rb");
|
uint16_t offset;
|
||||||
if (!infile) {
|
uint8_t length;
|
||||||
|
uint8_t next_char;
|
||||||
|
} LZ77Triple;
|
||||||
|
|
||||||
|
int lz77_compress(const char *input_filename, const char *output_filename) {
|
||||||
|
// Open the input file in binary read mode
|
||||||
|
FILE *input_file = fopen(input_filename, "rb");
|
||||||
|
if (!input_file) {
|
||||||
perror("Error opening input file");
|
perror("Error opening input file");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
FILE *outfile = fopen(output_file_name, "wb");
|
// Open the output file in binary write mode
|
||||||
if (!outfile) {
|
FILE *output_file = fopen(output_filename, "wb");
|
||||||
|
if (!output_file) {
|
||||||
perror("Error opening output file");
|
perror("Error opening output file");
|
||||||
fclose(infile);
|
fclose(input_file);
|
||||||
return -1;
|
return -2;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned char current_byte, previous_byte;
|
uint8_t *window = (uint8_t *)malloc(WINDOW_SIZE + LOOKAHEAD_BUFFER_SIZE);
|
||||||
size_t count = 0;
|
if (!window) {
|
||||||
|
perror("Memory allocation failed");
|
||||||
if (fread(&previous_byte, 1, 1, infile) != 1) {
|
fclose(input_file);
|
||||||
fclose(infile);
|
fclose(output_file);
|
||||||
fclose(outfile);
|
return -3;
|
||||||
return 0; // Порожній файл
|
|
||||||
}
|
}
|
||||||
|
|
||||||
count = 1;
|
size_t window_start = 0;
|
||||||
|
size_t lookahead_start = 0;
|
||||||
|
size_t bytes_read;
|
||||||
|
|
||||||
while (fread(¤t_byte, 1, 1, infile) == 1) {
|
// Initialize the window with data from the input file
|
||||||
|
bytes_read = fread(window + WINDOW_SIZE, 1, LOOKAHEAD_BUFFER_SIZE, input_file);
|
||||||
|
|
||||||
|
while (bytes_read > 0) {
|
||||||
|
size_t best_match_offset = 0;
|
||||||
|
size_t best_match_length = 0;
|
||||||
|
|
||||||
|
// Search for the best match within the sliding window
|
||||||
|
for (size_t i = window_start; i < WINDOW_SIZE + lookahead_start; i++) {
|
||||||
|
size_t match_length = 0;
|
||||||
|
|
||||||
|
while (match_length < bytes_read &&
|
||||||
|
window[i + match_length] == window[WINDOW_SIZE + match_length]) {
|
||||||
|
match_length++;
|
||||||
|
if (match_length >= LOOKAHEAD_BUFFER_SIZE) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (match_length > best_match_length) {
|
||||||
|
best_match_length = match_length;
|
||||||
|
best_match_offset = WINDOW_SIZE + lookahead_start - i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a triple and write it to the output file
|
||||||
|
LZ77Triple triple;
|
||||||
|
triple.offset = (uint16_t)best_match_offset;
|
||||||
|
triple.length = (uint8_t)best_match_length;
|
||||||
|
triple.next_char = window[WINDOW_SIZE + best_match_length];
|
||||||
|
|
||||||
|
// Write the triple to the output file
|
||||||
|
fwrite(&triple, sizeof(LZ77Triple), 1, output_file);
|
||||||
|
|
||||||
|
// Slide the window
|
||||||
|
window_start = (window_start + best_match_length + 1) % WINDOW_SIZE;
|
||||||
|
lookahead_start = (lookahead_start + best_match_length + 1) % LOOKAHEAD_BUFFER_SIZE;
|
||||||
|
|
||||||
|
// Read new byte into the lookahead buffer
|
||||||
|
bytes_read = fread(window + WINDOW_SIZE, 1, LOOKAHEAD_BUFFER_SIZE - lookahead_start, input_file);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cleanup and close files
|
||||||
|
fclose(input_file);
|
||||||
|
fclose(output_file);
|
||||||
|
free(window);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int lz77_decompress(const char *input_filename, const char *output_filename) {
|
||||||
|
FILE *input = fopen(input_filename, "rb");
|
||||||
|
FILE *output = fopen(output_filename, "wb");
|
||||||
|
if (!input || !output) {
|
||||||
|
if (input) fclose(input);
|
||||||
|
if (output) fclose(output);
|
||||||
|
return -1; // Помилка відкриття файлу
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t buffer_size = 4096; // Максимальний розмір вікна
|
||||||
|
unsigned char *window = malloc(buffer_size);
|
||||||
|
size_t window_size = 0; // Розмір заповненої частини вікна
|
||||||
|
size_t window_pos = 0; // Поточна позиція в межах вікна
|
||||||
|
|
||||||
|
if (!window) {
|
||||||
|
fclose(input);
|
||||||
|
fclose(output);
|
||||||
|
return -1; // Помилка пам'яті
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!feof(input)) {
|
||||||
|
unsigned char flag;
|
||||||
|
if (fread(&flag, 1, 1, input) != 1) break;
|
||||||
|
|
||||||
|
for (int i = 0; i < 8 && !feof(input); i++) {
|
||||||
|
if (flag & (1 << i)) { // Літеральний символ
|
||||||
|
unsigned char literal;
|
||||||
|
if (fread(&literal, 1, 1, input) != 1) break;
|
||||||
|
|
||||||
|
fputc(literal, output);
|
||||||
|
|
||||||
|
// Додати символ у вікно
|
||||||
|
window[window_pos] = literal;
|
||||||
|
window_pos = (window_pos + 1) % buffer_size;
|
||||||
|
if (window_size < buffer_size) window_size++;
|
||||||
|
} else { // Посилання
|
||||||
|
unsigned short offset_length;
|
||||||
|
if (fread(&offset_length, 2, 1, input) != 1) break;
|
||||||
|
|
||||||
|
size_t offset = offset_length >> 4;
|
||||||
|
size_t length = (offset_length & 0xF) + 3;
|
||||||
|
|
||||||
|
for (size_t j = 0; j < length; j++) {
|
||||||
|
unsigned char byte = window[(window_pos - offset + buffer_size) % buffer_size];
|
||||||
|
fputc(byte, output);
|
||||||
|
|
||||||
|
// Додати байт у вікно
|
||||||
|
window[window_pos] = byte;
|
||||||
|
window_pos = (window_pos + 1) % buffer_size;
|
||||||
|
if (window_size < buffer_size) window_size++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(window);
|
||||||
|
fclose(input);
|
||||||
|
fclose(output);
|
||||||
|
return 0; // Успішна декомпресія
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void rle_compress(const char *input_filename, const char *output_filename) {
|
||||||
|
FILE *input_file = fopen(input_filename, "rb");
|
||||||
|
if (!input_file) {
|
||||||
|
perror("Error opening input file");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
FILE *output_file = fopen(output_filename, "wb");
|
||||||
|
if (!output_file) {
|
||||||
|
perror("Error opening output file");
|
||||||
|
fclose(input_file);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t current_byte, previous_byte;
|
||||||
|
uint8_t count = 1;
|
||||||
|
|
||||||
|
if (fread(&previous_byte, 1, 1, input_file) != 1) {
|
||||||
|
printf("Input file is empty or read error occurred.\n");
|
||||||
|
fclose(input_file);
|
||||||
|
fclose(output_file);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (fread(¤t_byte, 1, 1, input_file) == 1) {
|
||||||
if (current_byte == previous_byte && count < 255) {
|
if (current_byte == previous_byte && count < 255) {
|
||||||
count++;
|
count++;
|
||||||
} else {
|
} else {
|
||||||
fwrite(&previous_byte, 1, 1, outfile);
|
fwrite(&previous_byte, 1, 1, output_file);
|
||||||
fwrite(&count, 1, 1, outfile);
|
fwrite(&count, 1, 1, output_file);
|
||||||
|
printf("Writing byte: %c with count: %d\n", previous_byte, count);
|
||||||
previous_byte = current_byte;
|
previous_byte = current_byte;
|
||||||
count = 1;
|
count = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fwrite(&previous_byte, 1, 1, outfile);
|
fwrite(&previous_byte, 1, 1, output_file);
|
||||||
fwrite(&count, 1, 1, outfile);
|
fwrite(&count, 1, 1, output_file);
|
||||||
|
printf("Writing byte: %c with count: %d\n", previous_byte, count);
|
||||||
|
|
||||||
fclose(infile);
|
fclose(input_file);
|
||||||
fclose(outfile);
|
fclose(output_file);
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int decompress_2(const char* input_file_name, const char* output_file_name) {
|
|
||||||
FILE *infile = fopen(input_file_name, "rb");
|
|
||||||
if (!infile) {
|
int rle_decompress(const char *input_filename, const char *output_filename) {
|
||||||
|
// Open the input file in binary read mode
|
||||||
|
FILE *input_file = fopen(input_filename, "rb");
|
||||||
|
if (!input_file) {
|
||||||
perror("Error opening input file");
|
perror("Error opening input file");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
FILE *outfile = fopen(output_file_name, "wb");
|
// Open the output file in binary write mode
|
||||||
if (!outfile) {
|
FILE *output_file = fopen(output_filename, "wb");
|
||||||
|
if (!output_file) {
|
||||||
perror("Error opening output file");
|
perror("Error opening output file");
|
||||||
fclose(infile);
|
fclose(input_file);
|
||||||
return -1;
|
return -2;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned char current_byte;
|
uint8_t byte;
|
||||||
unsigned char count;
|
uint8_t count;
|
||||||
|
size_t decompressed_size = 0;
|
||||||
|
|
||||||
while (fread(¤t_byte, 1, 1, infile) == 1) {
|
// Read [byte, count] pairs from the input file
|
||||||
if (fread(&count, 1, 1, infile) != 1) {
|
while (fread(&byte, 1, 1, input_file) == 1) {
|
||||||
perror("Malformed input file");
|
if (fread(&count, 1, 1, input_file) != 1) {
|
||||||
fclose(infile);
|
// Handle malformed input file
|
||||||
fclose(outfile);
|
fprintf(stderr, "Error: Malformed input file\n");
|
||||||
return -1;
|
fclose(input_file);
|
||||||
|
fclose(output_file);
|
||||||
|
return -3;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < count; i++) {
|
// Write 'count' occurrences of 'byte' to the output file
|
||||||
fwrite(¤t_byte, 1, 1, outfile);
|
for (uint8_t i = 0; i < count; i++) {
|
||||||
}
|
if (fwrite(&byte, 1, 1, output_file) != 1) {
|
||||||
}
|
perror("Error writing to output file");
|
||||||
|
fclose(input_file);
|
||||||
fclose(infile);
|
fclose(output_file);
|
||||||
fclose(outfile);
|
return -4;
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Алгоритм Хаффмана (Huffman Coding) ---
|
|
||||||
|
|
||||||
typedef struct Node {
|
|
||||||
unsigned char symbol;
|
|
||||||
size_t frequency;
|
|
||||||
struct Node *left, *right;
|
|
||||||
} Node;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
unsigned char symbol;
|
|
||||||
char *code;
|
|
||||||
} HuffmanCode;
|
|
||||||
|
|
||||||
int compare_nodes(const void *a, const void *b) {
|
|
||||||
return (*(Node**)a)->frequency - (*(Node**)b)->frequency;
|
|
||||||
}
|
|
||||||
|
|
||||||
Node* create_huffman_tree(unsigned char *data, size_t size) {
|
|
||||||
size_t freq[256] = {0};
|
|
||||||
for (size_t i = 0; i < size; i++) {
|
|
||||||
freq[data[i]]++;
|
|
||||||
}
|
|
||||||
|
|
||||||
Node *nodes[256];
|
|
||||||
size_t node_count = 0;
|
|
||||||
|
|
||||||
for (int i = 0; i < 256; i++) {
|
|
||||||
if (freq[i] > 0) {
|
|
||||||
nodes[node_count] = malloc(sizeof(Node));
|
|
||||||
if (!nodes[node_count]) {
|
|
||||||
perror("Memory allocation failed");
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
nodes[node_count]->symbol = (unsigned char)i;
|
decompressed_size++;
|
||||||
nodes[node_count]->frequency = freq[i];
|
|
||||||
nodes[node_count]->left = nodes[node_count]->right = NULL;
|
|
||||||
node_count++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
while (node_count > 1) {
|
// Clean up and close files
|
||||||
qsort(nodes, node_count, sizeof(Node*), compare_nodes);
|
fclose(input_file);
|
||||||
|
fclose(output_file);
|
||||||
|
|
||||||
Node* left = nodes[0];
|
return (int)decompressed_size;
|
||||||
Node* right = nodes[1];
|
|
||||||
|
|
||||||
Node* parent = malloc(sizeof(Node));
|
|
||||||
if (!parent) {
|
|
||||||
perror("Memory allocation failed");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
parent->symbol = 0;
|
|
||||||
parent->frequency = left->frequency + right->frequency;
|
|
||||||
parent->left = left;
|
|
||||||
parent->right = right;
|
|
||||||
|
|
||||||
memmove(nodes, nodes + 2, (node_count - 2) * sizeof(Node*));
|
|
||||||
nodes[node_count - 2] = parent;
|
|
||||||
node_count--;
|
|
||||||
}
|
|
||||||
|
|
||||||
return nodes[0];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void generate_huffman_codes(Node* root, HuffmanCode* codes, char* current_code, int depth) {
|
|
||||||
if (!root) return;
|
|
||||||
|
|
||||||
if (root->left == NULL && root->right == NULL) {
|
|
||||||
current_code[depth] = '\0';
|
|
||||||
codes[root->symbol].symbol = root->symbol;
|
|
||||||
codes[root->symbol].code = strdup(current_code);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
current_code[depth] = '0';
|
|
||||||
generate_huffman_codes(root->left, codes, current_code, depth + 1);
|
|
||||||
|
|
||||||
current_code[depth] = '1';
|
|
||||||
generate_huffman_codes(root->right, codes, current_code, depth + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
void serialize_huffman_tree(Node* root, FILE* outfile) {
|
|
||||||
if (!root) return;
|
|
||||||
|
|
||||||
if (root->left == NULL && root->right == NULL) {
|
|
||||||
fputc('L', outfile);
|
|
||||||
fputc(root->symbol, outfile);
|
|
||||||
} else {
|
|
||||||
fputc('I', outfile);
|
|
||||||
serialize_huffman_tree(root->left, outfile);
|
|
||||||
serialize_huffman_tree(root->right, outfile);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Node* rebuild_huffman_tree(unsigned char* tree_data, size_t size) {
|
|
||||||
(void)size; // Позначаємо параметр як тимчасово невикористаний
|
|
||||||
size_t index = 0;
|
|
||||||
|
|
||||||
Node* build_tree_recursively(unsigned char* data, size_t* index) {
|
|
||||||
if (data[*index] == 'L') { // Лист (Leaf)
|
|
||||||
(*index)++;
|
|
||||||
Node* leaf = malloc(sizeof(Node));
|
|
||||||
if (!leaf) {
|
|
||||||
perror("Memory allocation failed");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
leaf->symbol = data[*index];
|
|
||||||
leaf->frequency = 0; // частота не потрібна для декомпресії
|
|
||||||
leaf->left = leaf->right = NULL;
|
|
||||||
(*index)++;
|
|
||||||
return leaf;
|
|
||||||
} else if (data[*index] == 'I') { // Вузол (Internal)
|
|
||||||
(*index)++;
|
|
||||||
Node* internal = malloc(sizeof(Node));
|
|
||||||
if (!internal) {
|
|
||||||
perror("Memory allocation failed");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
internal->symbol = 0; // внутрішні вузли не мають символів
|
|
||||||
internal->frequency = 0;
|
|
||||||
internal->left = build_tree_recursively(data, index);
|
|
||||||
internal->right = build_tree_recursively(data, index);
|
|
||||||
return internal;
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return build_tree_recursively(tree_data, &index);
|
|
||||||
}
|
|
||||||
|
|
||||||
void free_huffman_tree(Node* root) {
|
|
||||||
if (!root) return;
|
|
||||||
free_huffman_tree(root->left);
|
|
||||||
free_huffman_tree(root->right);
|
|
||||||
free(root);
|
|
||||||
}
|
|
||||||
|
|
||||||
int compress_1(const char* input_file_name, const char* output_file_name) {
|
|
||||||
FILE *infile = fopen(input_file_name, "rb");
|
|
||||||
if (!infile) {
|
|
||||||
perror("Error opening input file");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
fseek(infile, 0, SEEK_END);
|
|
||||||
size_t file_size = ftell(infile);
|
|
||||||
fseek(infile, 0, SEEK_SET);
|
|
||||||
|
|
||||||
unsigned char* data = malloc(file_size);
|
|
||||||
if (!data) {
|
|
||||||
fclose(infile);
|
|
||||||
perror("Memory allocation failed");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
fread(data, 1, file_size, infile);
|
|
||||||
fclose(infile);
|
|
||||||
|
|
||||||
Node* root = create_huffman_tree(data, file_size);
|
|
||||||
if (!root) {
|
|
||||||
free(data);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
HuffmanCode codes[256] = {0};
|
|
||||||
char current_code[256];
|
|
||||||
generate_huffman_codes(root, codes, current_code, 0);
|
|
||||||
|
|
||||||
FILE *outfile = fopen(output_file_name, "wb");
|
|
||||||
if (!outfile) {
|
|
||||||
free(data);
|
|
||||||
free_huffman_tree(root);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
serialize_huffman_tree(root, outfile);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < file_size; i++) {
|
|
||||||
const char* code = codes[data[i]].code;
|
|
||||||
for (size_t j = 0; code[j] != '\0'; j++) {
|
|
||||||
fputc(code[j], outfile);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose(outfile);
|
|
||||||
free(data);
|
|
||||||
free_huffman_tree(root);
|
|
||||||
|
|
||||||
for (int i = 0; i < 256; i++) {
|
|
||||||
free(codes[i].code);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Декомпресія (приклад потребує уточнень залежно від специфіки формату)
|
|
||||||
int decompress_1(const char* input_file_name, const char* output_file_name) {
|
|
||||||
(void)input_file_name;
|
|
||||||
(void)output_file_name;
|
|
||||||
return -1; // Поки що не реалізовано.
|
|
||||||
}
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user