From eab4543e3c3a0a705a4b774b38ecfe8cb56ae70e Mon Sep 17 00:00:00 2001 From: Ivan Leichenko Date: Fri, 17 Jan 2025 19:08:33 +0100 Subject: [PATCH] sk1sk1sk1 --- sk1/Makefile | 12 ++ sk1/README.md | 7 + sk1/compressor.c | 427 +++++++++++++++++++++++++++++++++++++++++++++++ sk1/compressor.h | 30 ++++ sk1/main.c | 78 +++++++++ 5 files changed, 554 insertions(+) create mode 100644 sk1/Makefile create mode 100644 sk1/README.md create mode 100644 sk1/compressor.c create mode 100644 sk1/compressor.h create mode 100644 sk1/main.c diff --git a/sk1/Makefile b/sk1/Makefile new file mode 100644 index 0000000..eb1b7a1 --- /dev/null +++ b/sk1/Makefile @@ -0,0 +1,12 @@ +CFLAGS= -std=c99 -g -Wall -Werror + +all: compressor + +%.o: %.c + gcc -c -o $@ $< $(CFLAGS) + +compressor: main.o compressor.o + gcc $(CFLAGS) main.o compressor.o -o compressor + +clean: + rm *.o compressor \ No newline at end of file diff --git a/sk1/README.md b/sk1/README.md new file mode 100644 index 0000000..775647d --- /dev/null +++ b/sk1/README.md @@ -0,0 +1,7 @@ +Zadanie - Kompressor +Naprogramovať nástroj na kompresiu a dekompresiu súboru do 10MB. + +Program komprimuje a dekomprimuje súbory až do veľkosti 10 MB pomocou Huffmanovho kódovania a algoritmu LZ77. +Program vezme existujúci vstupný súbor a názov výstupného súboru a potom skomprimuje vstupný súbor pomocou jedného z algoritmov podľa výberu používateľa. +Pri dekompresii musí používateľ zadať aj existujúci komprimovaný súbor a oznámiť programu, aký algoritmus bol použitý na komprimáciu súboru, a zadať názov výstupného súboru, po ktorom program vráti súbor do pôvodného stavu. + diff --git a/sk1/compressor.c b/sk1/compressor.c new file mode 100644 index 0000000..309c6be --- /dev/null +++ b/sk1/compressor.c @@ -0,0 +1,427 @@ +#include "compressor.h" +#include +#include +#include +#include + +#define WINDOW_SIZE 4096 // Размер скользящего окна +#define LOOKAHEAD_BUFFER_SIZE 15 // Размер буфера предпросмотра +// Структура для хранения токена +typedef struct +{ + int offset; + int length; + char next_char; +} LZ77Token; + +#define MAX_TREE_NODES 256 + +// Huffman tree node +typedef struct Node +{ + unsigned char symbol; + int frequency; + struct Node* left; + struct Node* right; +} Node; + +// Min-heap for Huffman tree +typedef struct MinHeap +{ + Node* nodes[MAX_TREE_NODES]; + int size; +} MinHeap; + +void swap_nodes(Node** a, Node** b) +{ + Node* temp = *a; + *a = *b; + *b = temp; +} + +void heapify(MinHeap* heap, int idx) +{ + int smallest = idx; + int left = 2 * idx + 1; + int right = 2 * idx + 2; + + if (left < heap->size && heap->nodes[left]->frequency < heap->nodes[smallest]->frequency) + { + smallest = left; + } + + if (right < heap->size && heap->nodes[right]->frequency < heap->nodes[smallest]->frequency) + { + smallest = right; + } + + if (smallest != idx) + { + swap_nodes(&heap->nodes[smallest], &heap->nodes[idx]); + heapify(heap, smallest); + } +} + +Node* extract_min(MinHeap* heap) +{ + Node* temp = heap->nodes[0]; + heap->nodes[0] = heap->nodes[heap->size - 1]; + heap->size--; + heapify(heap, 0); + return temp; +} + +void insert_min_heap(MinHeap* heap, Node* node) +{ + heap->size++; + int i = heap->size - 1; + + while (i && node->frequency < heap->nodes[(i - 1) / 2]->frequency) + { + heap->nodes[i] = heap->nodes[(i - 1) / 2]; + i = (i - 1) / 2; + } + + heap->nodes[i] = node; +} + +MinHeap* create_min_heap() +{ + MinHeap* heap = (MinHeap*)malloc(sizeof(MinHeap)); + heap->size = 0; + return heap; +} + +Node* create_node(unsigned char symbol, int frequency) +{ + Node* node = (Node*)malloc(sizeof(Node)); + node->symbol = symbol; + node->frequency = frequency; + node->left = node->right = NULL; + return node; +} + +void build_huffman_tree(MinHeap* heap) +{ + while (heap->size > 1) + { + Node* left = extract_min(heap); + Node* right = extract_min(heap); + + Node* new_node = create_node(0, left->frequency + right->frequency); + new_node->left = left; + new_node->right = right; + + insert_min_heap(heap, new_node); + } +} + +void build_codes(Node* root, char* code, int top, char codes[MAX_TREE_NODES][MAX_TREE_NODES]) +{ + if (root->left) + { + code[top] = '0'; + build_codes(root->left, code, top + 1, codes); + } + + if (root->right) + { + code[top] = '1'; + build_codes(root->right, code, top + 1, codes); + } + + if (!root->left && !root->right) + { + code[top] = '\0'; + strcpy(codes[root->symbol], code); + } +} + +void free_tree(Node* root) +{ + if (root) + { + free_tree(root->left); + free_tree(root->right); + free(root); + } +} + +int compress_2(const char* input_file_name, const char* output_file_name) +{ + FILE* input = fopen(input_file_name, "rb"); + if (!input) return -1; + + int freq[MAX_TREE_NODES] = {0}; + unsigned char buffer; + + while (fread(&buffer, 1, 1, input)) + { + freq[buffer]++; + } + + MinHeap* heap = create_min_heap(); + for (int i = 0; i < MAX_TREE_NODES; i++) + { + if (freq[i] > 0) + { + insert_min_heap(heap, create_node((unsigned char)i, freq[i])); + } + } + + build_huffman_tree(heap); + + char codes[MAX_TREE_NODES][MAX_TREE_NODES] = {0}; + char code[MAX_TREE_NODES]; + build_codes(heap->nodes[0], code, 0, codes); + + fseek(input, 0, SEEK_SET); + FILE* output = fopen(output_file_name, "wb"); + if (!output) + { + fclose(input); + return -1; + } + + fwrite(freq, sizeof(freq), 1, output); + + unsigned char out_buffer = 0; + int bit_count = 0; + + while (fread(&buffer, 1, 1, input)) + { + char* symbol_code = codes[buffer]; + for (int i = 0; symbol_code[i] != '\0'; i++) + { + out_buffer <<= 1; + if (symbol_code[i] == '1') + { + out_buffer |= 1; + } + bit_count++; + + if (bit_count == 8) + { + fwrite(&out_buffer, 1, 1, output); + bit_count = 0; + out_buffer = 0; + } + } + } + + if (bit_count > 0) + { + out_buffer <<= (8 - bit_count); + fwrite(&out_buffer, 1, 1, output); + } + + fseek(output, 0, SEEK_END); + int sizeOUT = ftell(output); + fseek(output, 0, SEEK_SET); + + fclose(input); + fclose(output); + free_tree(heap->nodes[0]); + free(heap); + + return sizeOUT; +} + +int decompress_2(const char* input_file_name, const char* output_file_name) +{ + FILE* input = fopen(input_file_name, "rb"); + if (!input) return -1; + + int freq[MAX_TREE_NODES]; + fread(freq, sizeof(freq), 1, input); + + MinHeap* heap = create_min_heap(); + for (int i = 0; i < MAX_TREE_NODES; i++) + { + if (freq[i] > 0) + { + insert_min_heap(heap, create_node((unsigned char)i, freq[i])); + } + } + + build_huffman_tree(heap); + + FILE* output = fopen(output_file_name, "wb"); + if (!output) + { + fclose(input); + return -1; + } + + Node* root = heap->nodes[0]; + Node* current = root; + unsigned char buffer; + + while (fread(&buffer, 1, 1, input)) + { + for (int i = 7; i >= 0; i--) + { + if ((buffer >> i) & 1) + { + current = current->right; + } + else + { + current = current->left; + } + + if (!current->left && !current->right) + { + fwrite(¤t->symbol, 1, 1, output); + current = root; + } + } + } + + fseek(output, 0, SEEK_END); + int sizeOUT = ftell(output); + fseek(output, 0, SEEK_SET); + + fclose(input); + fclose(output); + free_tree(root); + free(heap); + + return sizeOUT; +} + +// Функция для записи токена в файл в компактном формате +void write_token(FILE *file, LZ77Token token) +{ + // Записываем offset и length как 2 байта (можно оптимизировать дальше) + unsigned short offset_length = (token.offset << 4) | (token.length & 0xF); + fwrite(&offset_length, sizeof(unsigned short), 1, file); + fwrite(&token.next_char, sizeof(char), 1, file); +} + +// Функция для чтения токена из файла +LZ77Token read_token(FILE *file) +{ + LZ77Token token; + unsigned short offset_length; + fread(&offset_length, sizeof(unsigned short), 1, file); + token.offset = offset_length >> 4; + token.length = offset_length & 0xF; + fread(&token.next_char, sizeof(char), 1, file); + return token; +} + +int compress_1(const char* input_file_name, const char* output_file_name) +{ + FILE *input_file = fopen(input_file_name, "rb"); + FILE *output_file = fopen(output_file_name, "wb"); + + if (!input_file || !output_file) + { + return -1; + } + + fseek(input_file, 0, SEEK_END); + long file_size = ftell(input_file); + fseek(input_file, 0, SEEK_SET); + + if (file_size > 10 * 1024 * 1024) + { + return -1; + } + + char *data = (char*)malloc(file_size); + fread(data, 1, file_size, input_file); + + int pos = 0; + while (pos < file_size) + { + LZ77Token token = {0, 0, data[pos]}; + + int max_length = 0; + int max_offset = 0; + + int start = (pos - WINDOW_SIZE) > 0 ? (pos - WINDOW_SIZE) : 0; + for (int i = start; i < pos; i++) + { + int length = 0; + while (length < LOOKAHEAD_BUFFER_SIZE && pos + length < file_size && data[i + length] == data[pos + length]) + { + length++; + } + if (length > max_length) + { + max_length = length; + max_offset = pos - i; + } + } + + if (max_length > 1) + { + token.offset = max_offset; + token.length = max_length; + token.next_char = data[pos + max_length]; + pos += max_length + 1; + } + else + { + pos++; + } + + write_token(output_file, token); + } + + fseek(output_file, 0, SEEK_END); + int sizeOUT = ftell(output_file); + fseek(output_file, 0, SEEK_SET); + + free(data); + fclose(input_file); + fclose(output_file); + + return sizeOUT; +} + +int decompress_1(const char* input_file_name, const char* output_file_name) +{ + FILE *input_file = fopen(input_file_name, "rb"); + FILE *output_file = fopen(output_file_name, "wb"); + + if (!input_file || !output_file) + { + return -1; + } + + char *window = (char*)malloc(WINDOW_SIZE); + int window_pos = 0; + + while (!feof(input_file)) + { + LZ77Token token = read_token(input_file); + + if (token.length > 0) + { + int start = window_pos - token.offset; + for (int i = 0; i < token.length; i++) + { + char c = window[(start + i) % WINDOW_SIZE]; + fputc(c, output_file); + window[window_pos % WINDOW_SIZE] = c; + window_pos++; + } + } + fputc(token.next_char, output_file); + window[window_pos % WINDOW_SIZE] = token.next_char; + window_pos++; + } + + fseek(output_file, 0, SEEK_END); + int sizeOUT = ftell(output_file); + fseek(output_file, 0, SEEK_SET); + + free(window); + fclose(input_file); + fclose(output_file); + + return sizeOUT; +} \ No newline at end of file diff --git a/sk1/compressor.h b/sk1/compressor.h new file mode 100644 index 0000000..0596a44 --- /dev/null +++ b/sk1/compressor.h @@ -0,0 +1,30 @@ +/** + * Skomprimuje súbor s názvom input_file_name pomocou kompresného algoritmu 1 + * a zapíše do súboru s názvom output_file_name + * @arg input_file_name vstupný súbor (na čítanie) + * @arg output_file_nameout výstupný súbor (na zápis) + * @return počet bajtov skomprimovaného súboru v prípade úspechu. Zápornú hodnotu v prípade chyby. + */ +int compress_1(const char* input_file_name, const char* output_file_name); +/** + * Dekomprimuje súbor skomprimovaný pomocou algoritmu 1 in a zapíše do súboru out. + * @arg input_file_name vstupný súbor (na čítanie) + * @arg output_file_nameout výstupný súbor (na zápis) + * @return počet bajtov deomprimovaného súboru v prípade úspechu. Zápornú hodnotu v prípade chyby. + */ +int decompress_1(const char* input_file_name, const char* output_file_name); +/** + * Skomprimuje súbor s názvom input_file_name pomocou kompresného algoritmu 2 + * a zapíše do súboru s názvom output_file_name + * @arg input_file_name vstupný súbor (na čítanie) + * @arg output_file_nameout výstupný súbor (na zápis) + * @return počet bajtov skomprimovaného súboru v prípade úspechu. Zápornú hodnotu v prípade chyby. + */ +int compress_2(const char* input_file_name, const char* output_file_name); +/** + * Dekomprimuje súbor skomprimovaný pomocou algoritmu 2 in a zapíše do súboru out. + * @arg input_file_name vstupný súbor (na čítanie) + * @arg output_file_nameout výstupný súbor (na zápis) + * @return počet bajtov deomprimovaného súboru v prípade úspechu. Zápornú hodnotu v prípade chyby. + */ +int decompress_2(const char* input_file_name, const char* output_file_name); \ No newline at end of file diff --git a/sk1/main.c b/sk1/main.c new file mode 100644 index 0000000..3ed0bca --- /dev/null +++ b/sk1/main.c @@ -0,0 +1,78 @@ +#include "compressor.h" +#include +#include +#include + +void print_help() +{ + printf("Usage:\n"); + printf(" ./compressor -c1/-c2 infile outfile\t\tCompress infile to outfile\n\t\t\t\t\t\t-c1 Compress using LZ77\n\t\t\t\t\t\t-c2 Compress using huffman coding\n\n"); + printf(" ./compressor -d1/-d2 compressed uncompressed\tDecompress compressed to uncompressed\n\t\t\t\t\t\t-d1 Decompress ONLY for LZ77\n\t\t\t\t\t\t-d2 Decompress ONLY for huffman coding\n\n"); + printf(" ./compressor -h\t\t\t\tShow this help message\n"); +} + +int main(int argc, char* argv[]) +{ + if (argc < 2) + { + print_help(); + return 1; + } + + if (strcmp(argv[1], "-h") == 0) + { + print_help(); + return 0; + } + else if (strcmp(argv[1], "-c1") == 0 && argc == 4) + { + const char* infile = argv[2]; + const char* outfile = argv[3]; + if (compress_1(infile, outfile) < 0) + { + fprintf(stderr, "Compression failed\n"); + return 1; + } + printf("Compressed successfully.\n"); + } + else if (strcmp(argv[1], "-d1") == 0 && argc == 4) + { + const char* compressed = argv[2]; + const char* uncompressed = argv[3]; + if (decompress_1(compressed, uncompressed) < 0) + { + fprintf(stderr, "Decompression failed\n"); + return 1; + } + printf("Decompressed successfully.\n"); + } + else if (strcmp(argv[1], "-c2") == 0 && argc == 4) + { + const char* infile = argv[2]; + const char* outfile = argv[3]; + if (compress_2(infile, outfile) < 0) + { + fprintf(stderr, "Compression failed\n"); + return 1; + } + printf("Compressed successfully.\n"); + } + else if (strcmp(argv[1], "-d2") == 0 && argc == 4) + { + const char* compressed = argv[2]; + const char* uncompressed = argv[3]; + if (decompress_2(compressed, uncompressed) < 0) + { + fprintf(stderr, "Decompression failed\n"); + return 1; + } + printf("Decompressed successfully.\n"); + } + else + { + print_help(); + return 1; + } + + return 0; +} \ No newline at end of file