From 70c293a07502f673d1db270e43526999ffe7c11e Mon Sep 17 00:00:00 2001 From: Yurii Chechur Date: Thu, 26 Dec 2024 15:59:08 +0000 Subject: [PATCH] Update sk1/compressor.c --- sk1/compressor.c | 459 +++++++++++++++++++++-------------------------- 1 file changed, 209 insertions(+), 250 deletions(-) diff --git a/sk1/compressor.c b/sk1/compressor.c index ab9ed66..01ab31a 100644 --- a/sk1/compressor.c +++ b/sk1/compressor.c @@ -1,291 +1,250 @@ -#include "compressor.h" #include -#include +#include #include +#include -// --- Алгоритм Run-Length Encoding (RLE) --- +#define WINDOW_SIZE 4096 +#define LOOKAHEAD_BUFFER_SIZE 18 -int compress_2(const char* input_file_name, const char* output_file_name) { - FILE *infile = fopen(input_file_name, "rb"); - if (!infile) { +typedef struct { + uint16_t offset; + uint8_t length; + uint8_t next_char; +} LZ77Triple; + +int lz77_compress(const char *input_filename, const char *output_filename) { + // Open the input file in binary read mode + FILE *input_file = fopen(input_filename, "rb"); + if (!input_file) { perror("Error opening input file"); return -1; } - FILE *outfile = fopen(output_file_name, "wb"); - if (!outfile) { + // Open the output file in binary write mode + FILE *output_file = fopen(output_filename, "wb"); + if (!output_file) { perror("Error opening output file"); - fclose(infile); - return -1; + fclose(input_file); + return -2; } - unsigned char current_byte, previous_byte; - size_t count = 0; - - if (fread(&previous_byte, 1, 1, infile) != 1) { - fclose(infile); - fclose(outfile); - return 0; // Порожній файл + uint8_t *window = (uint8_t *)malloc(WINDOW_SIZE + LOOKAHEAD_BUFFER_SIZE); + if (!window) { + perror("Memory allocation failed"); + fclose(input_file); + fclose(output_file); + return -3; } - count = 1; + size_t window_start = 0; + size_t lookahead_start = 0; + size_t bytes_read; - while (fread(¤t_byte, 1, 1, infile) == 1) { + // Initialize the window with data from the input file + bytes_read = fread(window + WINDOW_SIZE, 1, LOOKAHEAD_BUFFER_SIZE, input_file); + + while (bytes_read > 0) { + size_t best_match_offset = 0; + size_t best_match_length = 0; + + // Search for the best match within the sliding window + for (size_t i = window_start; i < WINDOW_SIZE + lookahead_start; i++) { + size_t match_length = 0; + + while (match_length < bytes_read && + window[i + match_length] == window[WINDOW_SIZE + match_length]) { + match_length++; + if (match_length >= LOOKAHEAD_BUFFER_SIZE) { + break; + } + } + + if (match_length > best_match_length) { + best_match_length = match_length; + best_match_offset = WINDOW_SIZE + lookahead_start - i; + } + } + + // Create a triple and write it to the output file + LZ77Triple triple; + triple.offset = (uint16_t)best_match_offset; + triple.length = (uint8_t)best_match_length; + triple.next_char = window[WINDOW_SIZE + best_match_length]; + + // Write the triple to the output file + fwrite(&triple, sizeof(LZ77Triple), 1, output_file); + + // Slide the window + window_start = (window_start + best_match_length + 1) % WINDOW_SIZE; + lookahead_start = (lookahead_start + best_match_length + 1) % LOOKAHEAD_BUFFER_SIZE; + + // Read new byte into the lookahead buffer + bytes_read = fread(window + WINDOW_SIZE, 1, LOOKAHEAD_BUFFER_SIZE - lookahead_start, input_file); + } + + // Cleanup and close files + fclose(input_file); + fclose(output_file); + free(window); + + return 0; +} + +int lz77_decompress(const char *input_filename, const char *output_filename) { + FILE *input = fopen(input_filename, "rb"); + FILE *output = fopen(output_filename, "wb"); + if (!input || !output) { + if (input) fclose(input); + if (output) fclose(output); + return -1; // Помилка відкриття файлу + } + + size_t buffer_size = 4096; // Максимальний розмір вікна + unsigned char *window = malloc(buffer_size); + size_t window_size = 0; // Розмір заповненої частини вікна + size_t window_pos = 0; // Поточна позиція в межах вікна + + if (!window) { + fclose(input); + fclose(output); + return -1; // Помилка пам'яті + } + + while (!feof(input)) { + unsigned char flag; + if (fread(&flag, 1, 1, input) != 1) break; + + for (int i = 0; i < 8 && !feof(input); i++) { + if (flag & (1 << i)) { // Літеральний символ + unsigned char literal; + if (fread(&literal, 1, 1, input) != 1) break; + + fputc(literal, output); + + // Додати символ у вікно + window[window_pos] = literal; + window_pos = (window_pos + 1) % buffer_size; + if (window_size < buffer_size) window_size++; + } else { // Посилання + unsigned short offset_length; + if (fread(&offset_length, 2, 1, input) != 1) break; + + size_t offset = offset_length >> 4; + size_t length = (offset_length & 0xF) + 3; + + for (size_t j = 0; j < length; j++) { + unsigned char byte = window[(window_pos - offset + buffer_size) % buffer_size]; + fputc(byte, output); + + // Додати байт у вікно + window[window_pos] = byte; + window_pos = (window_pos + 1) % buffer_size; + if (window_size < buffer_size) window_size++; + } + } + } + } + + free(window); + fclose(input); + fclose(output); + return 0; // Успішна декомпресія +} + + + + +void rle_compress(const char *input_filename, const char *output_filename) { + FILE *input_file = fopen(input_filename, "rb"); + if (!input_file) { + perror("Error opening input file"); + return; + } + + FILE *output_file = fopen(output_filename, "wb"); + if (!output_file) { + perror("Error opening output file"); + fclose(input_file); + return; + } + + uint8_t current_byte, previous_byte; + uint8_t count = 1; + + if (fread(&previous_byte, 1, 1, input_file) != 1) { + printf("Input file is empty or read error occurred.\n"); + fclose(input_file); + fclose(output_file); + return; + } + + while (fread(¤t_byte, 1, 1, input_file) == 1) { if (current_byte == previous_byte && count < 255) { count++; } else { - fwrite(&previous_byte, 1, 1, outfile); - fwrite(&count, 1, 1, outfile); + fwrite(&previous_byte, 1, 1, output_file); + fwrite(&count, 1, 1, output_file); + printf("Writing byte: %c with count: %d\n", previous_byte, count); previous_byte = current_byte; count = 1; } } - fwrite(&previous_byte, 1, 1, outfile); - fwrite(&count, 1, 1, outfile); + fwrite(&previous_byte, 1, 1, output_file); + fwrite(&count, 1, 1, output_file); + printf("Writing byte: %c with count: %d\n", previous_byte, count); - fclose(infile); - fclose(outfile); - - return 1; + fclose(input_file); + fclose(output_file); } -int decompress_2(const char* input_file_name, const char* output_file_name) { - FILE *infile = fopen(input_file_name, "rb"); - if (!infile) { + + +int rle_decompress(const char *input_filename, const char *output_filename) { + // Open the input file in binary read mode + FILE *input_file = fopen(input_filename, "rb"); + if (!input_file) { perror("Error opening input file"); return -1; } - FILE *outfile = fopen(output_file_name, "wb"); - if (!outfile) { + // Open the output file in binary write mode + FILE *output_file = fopen(output_filename, "wb"); + if (!output_file) { perror("Error opening output file"); - fclose(infile); - return -1; + fclose(input_file); + return -2; } - unsigned char current_byte; - unsigned char count; + uint8_t byte; + uint8_t count; + size_t decompressed_size = 0; - while (fread(¤t_byte, 1, 1, infile) == 1) { - if (fread(&count, 1, 1, infile) != 1) { - perror("Malformed input file"); - fclose(infile); - fclose(outfile); - return -1; + // Read [byte, count] pairs from the input file + while (fread(&byte, 1, 1, input_file) == 1) { + if (fread(&count, 1, 1, input_file) != 1) { + // Handle malformed input file + fprintf(stderr, "Error: Malformed input file\n"); + fclose(input_file); + fclose(output_file); + return -3; } - for (size_t i = 0; i < count; i++) { - fwrite(¤t_byte, 1, 1, outfile); - } - } - - fclose(infile); - fclose(outfile); - - return 1; -} - -// --- Алгоритм Хаффмана (Huffman Coding) --- - -typedef struct Node { - unsigned char symbol; - size_t frequency; - struct Node *left, *right; -} Node; - -typedef struct { - unsigned char symbol; - char *code; -} HuffmanCode; - -int compare_nodes(const void *a, const void *b) { - return (*(Node**)a)->frequency - (*(Node**)b)->frequency; -} - -Node* create_huffman_tree(unsigned char *data, size_t size) { - size_t freq[256] = {0}; - for (size_t i = 0; i < size; i++) { - freq[data[i]]++; - } - - Node *nodes[256]; - size_t node_count = 0; - - for (int i = 0; i < 256; i++) { - if (freq[i] > 0) { - nodes[node_count] = malloc(sizeof(Node)); - if (!nodes[node_count]) { - perror("Memory allocation failed"); - return NULL; + // Write 'count' occurrences of 'byte' to the output file + for (uint8_t i = 0; i < count; i++) { + if (fwrite(&byte, 1, 1, output_file) != 1) { + perror("Error writing to output file"); + fclose(input_file); + fclose(output_file); + return -4; } - nodes[node_count]->symbol = (unsigned char)i; - nodes[node_count]->frequency = freq[i]; - nodes[node_count]->left = nodes[node_count]->right = NULL; - node_count++; + decompressed_size++; } } - while (node_count > 1) { - qsort(nodes, node_count, sizeof(Node*), compare_nodes); + // Clean up and close files + fclose(input_file); + fclose(output_file); - Node* left = nodes[0]; - Node* right = nodes[1]; - - Node* parent = malloc(sizeof(Node)); - if (!parent) { - perror("Memory allocation failed"); - return NULL; - } - - parent->symbol = 0; - parent->frequency = left->frequency + right->frequency; - parent->left = left; - parent->right = right; - - memmove(nodes, nodes + 2, (node_count - 2) * sizeof(Node*)); - nodes[node_count - 2] = parent; - node_count--; - } - - return nodes[0]; + return (int)decompressed_size; } - -void generate_huffman_codes(Node* root, HuffmanCode* codes, char* current_code, int depth) { - if (!root) return; - - if (root->left == NULL && root->right == NULL) { - current_code[depth] = '\0'; - codes[root->symbol].symbol = root->symbol; - codes[root->symbol].code = strdup(current_code); - return; - } - - current_code[depth] = '0'; - generate_huffman_codes(root->left, codes, current_code, depth + 1); - - current_code[depth] = '1'; - generate_huffman_codes(root->right, codes, current_code, depth + 1); -} - -void serialize_huffman_tree(Node* root, FILE* outfile) { - if (!root) return; - - if (root->left == NULL && root->right == NULL) { - fputc('L', outfile); - fputc(root->symbol, outfile); - } else { - fputc('I', outfile); - serialize_huffman_tree(root->left, outfile); - serialize_huffman_tree(root->right, outfile); - } -} - -Node* rebuild_huffman_tree(unsigned char* tree_data, size_t size) { - (void)size; // Позначаємо параметр як тимчасово невикористаний - size_t index = 0; - - Node* build_tree_recursively(unsigned char* data, size_t* index) { - if (data[*index] == 'L') { // Лист (Leaf) - (*index)++; - Node* leaf = malloc(sizeof(Node)); - if (!leaf) { - perror("Memory allocation failed"); - return NULL; - } - leaf->symbol = data[*index]; - leaf->frequency = 0; // частота не потрібна для декомпресії - leaf->left = leaf->right = NULL; - (*index)++; - return leaf; - } else if (data[*index] == 'I') { // Вузол (Internal) - (*index)++; - Node* internal = malloc(sizeof(Node)); - if (!internal) { - perror("Memory allocation failed"); - return NULL; - } - internal->symbol = 0; // внутрішні вузли не мають символів - internal->frequency = 0; - internal->left = build_tree_recursively(data, index); - internal->right = build_tree_recursively(data, index); - return internal; - } - return NULL; - } - - return build_tree_recursively(tree_data, &index); -} - -void free_huffman_tree(Node* root) { - if (!root) return; - free_huffman_tree(root->left); - free_huffman_tree(root->right); - free(root); -} - -int compress_1(const char* input_file_name, const char* output_file_name) { - FILE *infile = fopen(input_file_name, "rb"); - if (!infile) { - perror("Error opening input file"); - return -1; - } - - fseek(infile, 0, SEEK_END); - size_t file_size = ftell(infile); - fseek(infile, 0, SEEK_SET); - - unsigned char* data = malloc(file_size); - if (!data) { - fclose(infile); - perror("Memory allocation failed"); - return -1; - } - - fread(data, 1, file_size, infile); - fclose(infile); - - Node* root = create_huffman_tree(data, file_size); - if (!root) { - free(data); - return -1; - } - - HuffmanCode codes[256] = {0}; - char current_code[256]; - generate_huffman_codes(root, codes, current_code, 0); - - FILE *outfile = fopen(output_file_name, "wb"); - if (!outfile) { - free(data); - free_huffman_tree(root); - return -1; - } - - serialize_huffman_tree(root, outfile); - - for (size_t i = 0; i < file_size; i++) { - const char* code = codes[data[i]].code; - for (size_t j = 0; code[j] != '\0'; j++) { - fputc(code[j], outfile); - } - } - - fclose(outfile); - free(data); - free_huffman_tree(root); - - for (int i = 0; i < 256; i++) { - free(codes[i].code); - } - - return 1; -} - -// Декомпресія (приклад потребує уточнень залежно від специфіки формату) -int decompress_1(const char* input_file_name, const char* output_file_name) { - (void)input_file_name; - (void)output_file_name; - return -1; // Поки що не реалізовано. -} -