220 lines
6.5 KiB
C
220 lines
6.5 KiB
C
#include "compressor.h"
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <ctype.h>
|
|
#include <string.h>
|
|
|
|
#define MAX_DICT_SIZE 4096
|
|
|
|
int read_file(const char* filename, unsigned char** data, size_t* size) {
|
|
FILE* file = fopen(filename, "rb");
|
|
if (!file) return -1;
|
|
|
|
fseek(file, 0, SEEK_END);
|
|
*size = ftell(file);
|
|
fseek(file, 0, SEEK_SET);
|
|
|
|
*data = malloc(*size);
|
|
if (!*data) {
|
|
fclose(file);
|
|
return -1;
|
|
}
|
|
|
|
fread(*data, 1, *size, file);
|
|
fclose(file);
|
|
return 0;
|
|
}
|
|
int write_file(const char* filename, const unsigned char* data, size_t size) {
|
|
FILE* file = fopen(filename, "wb");
|
|
if (!file) return -1;
|
|
|
|
fwrite(data, 1, size, file);
|
|
fclose(file);
|
|
return 0;
|
|
}
|
|
|
|
//RLE
|
|
int compress_1(const char* input_file_name, const char* output_file_name) {
|
|
size_t size;
|
|
unsigned char* data;
|
|
if (read_file(input_file_name, &data, &size) != 0) return -1;
|
|
FILE* output = fopen(output_file_name, "w");
|
|
if (!output) {
|
|
free(data);
|
|
return -1;
|
|
}
|
|
for (size_t i = 0; i < size;) {
|
|
unsigned char byte = data[i];
|
|
size_t count = 1;
|
|
while (i + count < size && data[i + count] == byte && count < 255) {
|
|
count++;
|
|
}
|
|
fprintf(output, "%c%d", byte, (int)count);
|
|
|
|
i += count;
|
|
}
|
|
|
|
fclose(output);
|
|
free(data);
|
|
return 0;
|
|
}
|
|
unsigned char* decompress_1_buffer(const unsigned char* data, size_t size, size_t* out_size) {
|
|
unsigned char* decompressed = malloc(size * 255);
|
|
if (!decompressed) return NULL;
|
|
size_t write_idx = 0;
|
|
for (size_t i = 0; i < size;) {
|
|
unsigned char byte = data[i++];
|
|
size_t count = 0;
|
|
while (i < size && isdigit(data[i])) {
|
|
count = count * 10 + (data[i++] - '0');
|
|
}
|
|
for (size_t j = 0; j < count; j++) {
|
|
if (write_idx >= size * 255) {
|
|
size_t new_size = write_idx * 2;
|
|
unsigned char* new_buffer = realloc(decompressed, new_size);
|
|
if (!new_buffer) {
|
|
free(decompressed);
|
|
return NULL;
|
|
}
|
|
decompressed = new_buffer;
|
|
}
|
|
decompressed[write_idx++] = byte;
|
|
}
|
|
}
|
|
*out_size = write_idx;
|
|
return decompressed;
|
|
}
|
|
|
|
int decompress_1(const char* input_file_name, const char* output_file_name) {
|
|
size_t size;
|
|
unsigned char* data;
|
|
if (read_file(input_file_name, &data, &size) != 0) return -1;
|
|
|
|
size_t out_size;
|
|
unsigned char* decompressed = decompress_1_buffer(data, size, &out_size);
|
|
if (!decompressed) {
|
|
free(data);
|
|
return -1;
|
|
}
|
|
int result = write_file(output_file_name, decompressed, out_size);
|
|
free(data);
|
|
free(decompressed);
|
|
return result == 0 ? (int)out_size : -1;
|
|
}
|
|
|
|
// LZ78
|
|
int compress_2(const char* input_file_name, const char* output_file_name) {
|
|
unsigned char* data;
|
|
size_t size;
|
|
if (read_file(input_file_name, &data, &size) != 0) {
|
|
return -1;
|
|
}
|
|
FILE* output = fopen(output_file_name, "w");
|
|
if (!output) {
|
|
free(data);
|
|
return -1;
|
|
}
|
|
unsigned char* dictionary[MAX_DICT_SIZE] = { NULL };
|
|
size_t dict_size = 0;
|
|
size_t current_index = 0;
|
|
while (current_index < size) {
|
|
size_t match_index = 0;
|
|
size_t match_length = 0;
|
|
for (size_t i = 0; i < dict_size; i++) {
|
|
size_t len = 0;
|
|
while (data[current_index + len] != '\0' &&
|
|
dictionary[i][len] != '\0' &&
|
|
data[current_index + len] == dictionary[i][len]) {
|
|
len++;
|
|
}
|
|
if (len > match_length) {
|
|
match_length = len;
|
|
match_index = i + 1;
|
|
}
|
|
}
|
|
if (current_index + match_length < size) {
|
|
fprintf(output, "%zu%c", match_index, data[current_index + match_length]);
|
|
}
|
|
if (dict_size < MAX_DICT_SIZE) {
|
|
dictionary[dict_size] = malloc(match_length + 2);
|
|
if (!dictionary[dict_size]) {
|
|
fclose(output);
|
|
free(data);
|
|
for (size_t i = 0; i < dict_size; i++) {
|
|
free(dictionary[i]);
|
|
}
|
|
return -1;
|
|
}
|
|
if (match_length > 0) {
|
|
memcpy(dictionary[dict_size], &data[current_index], match_length);
|
|
}
|
|
dictionary[dict_size][match_length] = data[current_index + match_length];
|
|
dictionary[dict_size][match_length + 1] = '\0';
|
|
dict_size++;
|
|
}
|
|
current_index += match_length + 1;
|
|
}
|
|
|
|
fclose(output);
|
|
free(data);
|
|
for (size_t i = 0; i < dict_size; i++) {
|
|
free(dictionary[i]);
|
|
}
|
|
return 0;
|
|
}
|
|
int decompress_2(const char* input_file_name, const char* output_file_name) {
|
|
unsigned char* data;
|
|
size_t size;
|
|
if (read_file(input_file_name, &data, &size) != 0) {
|
|
return -1;
|
|
}
|
|
FILE* output = fopen(output_file_name, "w");
|
|
if (!output) {
|
|
free(data);
|
|
return -1;
|
|
}
|
|
unsigned char* dictionary[MAX_DICT_SIZE] = { NULL };
|
|
size_t dict_size = 0;
|
|
size_t current_index = 0;
|
|
while (current_index < size) {
|
|
unsigned short index = 0;
|
|
unsigned char ch = '\0';
|
|
while (current_index < size && isdigit(data[current_index])) {
|
|
index = index * 10 + (data[current_index] - '0');
|
|
current_index++;
|
|
}
|
|
if (current_index < size) {
|
|
ch = data[current_index++];
|
|
}
|
|
if (index == 0) {
|
|
fputc(ch, output);
|
|
} else {
|
|
fwrite(dictionary[index - 1], 1, strlen((char*)dictionary[index - 1]), output);
|
|
fputc(ch, output);
|
|
}
|
|
if (dict_size < MAX_DICT_SIZE) {
|
|
size_t length = (index > 0) ? strlen((char*)dictionary[index - 1]) : 0;
|
|
dictionary[dict_size] = malloc(length + 2);
|
|
if (!dictionary[dict_size]) {
|
|
fclose(output);
|
|
free(data);
|
|
for (size_t i = 0; i < dict_size; i++) {
|
|
free(dictionary[i]);
|
|
}
|
|
return -1;
|
|
}
|
|
if (index > 0) {
|
|
memcpy(dictionary[dict_size], dictionary[index - 1], length);
|
|
}
|
|
dictionary[dict_size][length] = ch;
|
|
dictionary[dict_size][length + 1] = '\0';
|
|
dict_size++;
|
|
}
|
|
}
|
|
fclose(output);
|
|
free(data);
|
|
for (size_t i = 0; i < dict_size; i++) {
|
|
free(dictionary[i]);
|
|
}
|
|
return 0;
|
|
} |