diff --git a/inflate.c b/inflate.c new file mode 100644 index 0000000000000000000000000000000000000000..42c71d5e44aa11ce10af5390f1744a6b2105fc08 --- /dev/null +++ b/inflate.c @@ -0,0 +1,213 @@ +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdbool.h> +#include <math.h> + +/* Types of huffman codes */ +static const int FIXED = 1; +static const int DYNAMIC = 2; + +/* Fixed Huffman codes mapping. */ + +static const int DECODE_7_MIN = 0; +static const int DECODE_7_MAX = 23; +static const int DECODE_7[] = { + 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, + 271, 272, 273, 274, 275, 276, 277, 278, 279 +}; + +static const int DECODE_8_MIN = 48; +static const int DECODE_8_MAX = 191; +static const int DECODE_8[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, + 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, + 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, + 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, + 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, + 136, 137, 138, 139, 140, 141, 142, 143, 280, 281, 282, 283, 284, 285, + 286, 287 +}; + +static const int DECODE_9_MIN = 400; +static const int DECODE_9_MAX = 511; +static const int DECODE_9[] = { + 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, + 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, + 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, + 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, + 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, + 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, + 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, + 249, 250, 251, 252, 253, 254, 255 +}; + +static const int LENGTH_OFFSET = 257; +static const int DECODE_LENGTH[] = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, + 67, 83, 99, 115, 131, 163, 195, 227, 258 +}; +static const int ADD_LENGTHS[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 +}; + +static const int DECODE_DIST[] = { + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577 +}; +static const int ADD_DISTS[] = { + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, + 11, 11, 12, 12, 13, 13 +}; + +int match(int chunk, const int decode_table[], + const int decode_min, const int decode_max) { + + if (chunk < decode_min || chunk > decode_max) { + return -1; + } + + return decode_table[chunk - decode_min]; +} + +int get_next_bit(char *buf, int n) { + int buf_pos = n / 8; + int pos = n - 8 * buf_pos; + + char byte = buf[buf_pos]; + char mask = 1 << pos; + return ((byte & mask) != 0); +} + +/* Read the block starting at the n-th bit */ +int read_block(char *buf, int buf_size, int n, FILE *out) { + /* First bit is the BFINAL flag */ + bool bfinal = get_next_bit(buf, n); + + /* Next two bits are BTYPE */ + int btype = get_next_bit(buf, n + 1) + 2 * get_next_bit(buf, n + 2); + + int n_read = 3; + + if (btype == DYNAMIC) { + /* TODO */ + } + + int chunk_val; + + do + { + int chunk = 0; + /* Read first 7 bytes */ + for (int i = 0; i < 7; i++) { + chunk += get_next_bit(buf, n + n_read) << (6 - i); + n_read += 1; + } + + chunk_val = match(chunk, DECODE_7, DECODE_7_MIN, DECODE_7_MAX); + + /* No match, read another bit */ + if (chunk_val == -1) { + chunk <<= 1; + chunk += get_next_bit(buf, n + n_read); + n_read += 1; + chunk_val = match(chunk, DECODE_8, DECODE_8_MIN, DECODE_8_MAX); + } + + /* Again no match, read another bit */ + if (chunk_val == -1) { + chunk += get_next_bit(buf, n + n_read); + n_read += 1; + chunk_val = match(chunk, DECODE_9, DECODE_9_MIN, DECODE_9_MAX); + } + /* Literal */ + if (chunk_val <= 255) { + fwrite(&chunk_val, 1, sizeof(char), out); + } + /* Length */ + else if (chunk_val != 256) { + int length = DECODE_LENGTH[chunk_val - LENGTH_OFFSET]; + /* Might need to read some extra bits and add to the length */ + int additional_bits = ADD_LENGTHS[chunk_val - LENGTH_OFFSET]; + for (int i = 0; i < additional_bits; i++) { + length += get_next_bit(buf, n + n_read) << i; + n_read += 1; + } + + /* Next 5 bits are a distance */ + int dist_val = 0; + for (int i = 0; i < 5; i++) { + dist_val += get_next_bit(buf, n + n_read) << (4 - i); + n_read += 1; + } + int dist = DECODE_DIST[dist_val]; + additional_bits = ADD_DISTS[dist_val]; + for (int i = 0; i < additional_bits; i++) { + dist += get_next_bit(buf, n + n_read) << i; + n_read += 1; + } + + for (int i = 0; i < length; i++) { + fseek(out, -dist, SEEK_CUR); + int c = fgetc(out); + fseek(out, 0, SEEK_END); + fwrite(&c, 1, sizeof(char), out); + } + } + } + while (chunk_val != 256); + + return n_read; +} + +void inflate(FILE *fp) { + /* Create output file */ + FILE *out = fopen("test.txt", "wb+"); + + /* Read the file into a buffer */ + fseek(fp, 0, SEEK_END); + long size = ftell(fp); + rewind(fp); + char *buf = (char *) malloc(size); + if (!buf) { + fprintf(stderr, "error: memory error\n"); + exit(1); + } + fread(buf, 1, size, fp); + + long n_read = 0; + + while (n_read < 8 * size) { + n_read += read_block(buf, size, n_read, out); + break; + } + + fclose(out); + free(buf); +} + +int main(int argc, char *argv[]) { + if (argc != 2) { + fprintf(stderr, "usage: inflate [file]\n"); + return 1; + } + + char *fname = argv[1]; + + FILE *fp = fopen(fname, "rb"); + + if (!fp) { + fprintf(stderr, "error: file not found\n"); + return 1; + } + + inflate(fp); + + fclose(fp); + return 0; +}