diff --git a/lz77/bitwriter.c b/lz77/bitwriter.c new file mode 100644 index 0000000000000000000000000000000000000000..276e2d34f4fa1ee2907ace6bcdb1a9263f19fcde --- /dev/null +++ b/lz77/bitwriter.c @@ -0,0 +1,149 @@ +#include "bitwriter.h" +#include <stdlib.h> +#include <stdio.h> +#include <stdbool.h> +#include <stdint.h> +#include <string.h> +#include <assert.h> + +struct BitWriter +{ + FILE *file; + uint8_t byte; + int mod; +}; + +BitWriter *BitWriter_init(FILE *file) +{ + BitWriter *bw = malloc(sizeof(BitWriter)); + assert(bw); + bw->file = file; + bw->byte = 0; + bw->mod = 0; + return bw; +} + +void BitWriter_free(BitWriter *bw) +{ + free(bw); +} + +void BitWriter_write_bit(BitWriter *bw, bool bit) +{ + bw->byte |= (bit > 0) << (bw->mod++); + + if (bw->mod == 8) + { + fwrite(&bw->byte, 1, 1, bw->file); + bw->byte = 0; + bw->mod = 0; + } +} + +void BitWriter_flush(BitWriter *bw) +{ + while (bw->mod != 0) BitWriter_write_bit(bw, 0); +} + +void BitWriter_write_bin(BitWriter *bw, uint32_t bits, int number_to_write) +{ + for (int b = number_to_write - 1; b >= 0; b--) + BitWriter_write_bit(bw, bits & (1 << b)); +} + +void BitWriter_write_bin_reverse(BitWriter *bw, uint32_t bits, int number_to_write) +{ + for (int b = 0; b < number_to_write; b++) + BitWriter_write_bit(bw, bits & (1 << b)); +} + +void BitWriter_write_alpha(BitWriter *bw, uint32_t byte) +{ + assert(byte <= 285); + + if (byte <= 143) /* 8 bit encoding */ + { + uint32_t to_encode = 48 + (uint32_t) byte; + BitWriter_write_bin(bw, to_encode, 8); + } + else if (byte <= 255) /* 9 bit encoding */ + { + uint32_t to_encode = 256 + (uint32_t) byte; + BitWriter_write_bin(bw, to_encode, 9); + } + else if (byte <= 279) /* 7 bit encoding */ + { + uint32_t to_encode = (uint32_t) byte - 256; + BitWriter_write_bin(bw, to_encode, 7); + } + else if (byte <= 287) + { + uint32_t to_encode = (uint32_t) byte - 88; + BitWriter_write_bin(bw, to_encode, 8); + } +} + +void BitWriter_write_length(BitWriter *bw, uint32_t length) +{ + assert(3 <= length && length <= 258); + + if (length <= 10) /* 3 - 10 */ + { + BitWriter_write_alpha(bw, length + 254); + } + else if (length <= 18) /* 11 - 22 */ + { + BitWriter_write_alpha(bw, (length + 519) >> 1); + BitWriter_write_bin_reverse(bw, 1 + length, 1); + } + else if (length <= 34) /* 23 - 34 */ + { + BitWriter_write_alpha(bw, (length + 1057) >> 2); + BitWriter_write_bin_reverse(bw, 1 + length, 2); + } + else if (length <= 66) /* 35 - 66 */ + { + BitWriter_write_alpha(bw, (length + 2149) >> 3); + BitWriter_write_bin_reverse(bw, 5 + length, 3); + } + else if (length <= 130) /* 67 - 130 */ + { + BitWriter_write_alpha(bw, (length + 4365) >> 4); + BitWriter_write_bin_reverse(bw, 13 + length, 4); + } + else if (length <= 257) /* 131 - 257 */ + { + BitWriter_write_alpha(bw, (length + 8861) >> 5); + BitWriter_write_bin_reverse(bw, 29 + length, 5); + } + else if (length == 258) + { + BitWriter_write_alpha(bw, 285); + } + else + { + assert(0); + } +} + +void BitWriter_write_distance(BitWriter *bw, uint32_t dist) +{ + assert(1 <= dist && dist <= 32768); + + if (dist <= 4) BitWriter_write_bin(bw, dist - 1, 5); + else + { + int bits = 1; + + while (true) + { + if (dist <= 1 << (bits + 2)) + { + BitWriter_write_bin(bw, (dist + bits * (1 << (bits + 1)) - 1) >> bits, 5); + BitWriter_write_bin_reverse(bw, dist - 1, bits); + return; + } + else bits++; + } + } +} \ No newline at end of file diff --git a/lz77/bitwriter.h b/lz77/bitwriter.h new file mode 100644 index 0000000000000000000000000000000000000000..ce0015a11289f0ebbbf76b73f59421b35c9543f4 --- /dev/null +++ b/lz77/bitwriter.h @@ -0,0 +1,30 @@ +#ifndef __BIT_WRITER_H__ +#define __BIT_WRITER_H__ + +#include <stdlib.h> +#include <stdio.h> +#include <stdbool.h> +#include <stdint.h> +#include <string.h> +#include <assert.h> + + +/* A struct for writing to files as + streams of individual bits */ + +struct BitWriter; +typedef struct BitWriter BitWriter; + +BitWriter *BitWriter_init(FILE *file); +void BitWriter_free(BitWriter *bw); +void BitWriter_write_bit(BitWriter *bw, bool bit); +void BitWriter_flush(BitWriter *bw); +void BitWriter_write_alpha(BitWriter *bw, uint32_t byte); +void BitWriter_write_length(BitWriter *bw, uint32_t length); +void BitWriter_write_distance(BitWriter *bw, uint32_t dist); +void BitWriter_write_bin(BitWriter *bw, uint32_t bits, int number_to_write); + +#endif + + + diff --git a/lz77/hashmap.c b/lz77/hashmap.c new file mode 100644 index 0000000000000000000000000000000000000000..9315d3e522cf4742e0c685f58e669789eb52e385 --- /dev/null +++ b/lz77/hashmap.c @@ -0,0 +1,210 @@ +#include "hashmap.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <stdbool.h> + +#define BINS 33703 + +uint32_t hash_key(uint32_t val) +{ + return (val * 2654435761) % BINS; +} + + +/* first, make a hashmap that's just a linked list of key, value pairs */ + +struct LinkedListNode; + +typedef struct LinkedListNode LinkedListNode; + +struct LinkedListNode +{ + uint32_t key; + void *value; + LinkedListNode *next; +}; + +typedef struct ListHashmap +{ + LinkedListNode *head; +} ListHashmap; + +LinkedListNode *LinkedListNode_init(uint32_t key, void *value, LinkedListNode *next) +{ + LinkedListNode *lln = malloc(sizeof(LinkedListNode)); + assert(lln); + lln->key = key; + lln->value = value; + lln->next = next; + return lln; +} + +void LinkedListNode_free(LinkedListNode *lln) +{ + free(lln); +} + +void ListHashmap_init(ListHashmap *lh) +{ + lh->head = NULL; +} + +void ListHashmap_deinit(ListHashmap *lh) +{ + LinkedListNode *pos = lh->head; + while (pos) + { + LinkedListNode *next = pos->next; + LinkedListNode_free(pos); + pos = next; + } +} + +void ListHashmap_update(ListHashmap *lh, uint32_t key, void *value) +{ + LinkedListNode *pos = lh->head; + LinkedListNode *prev = NULL; + + while (pos) + { + if (pos->key == key) + { + pos->value = value; + return; + } + + prev = pos; + pos = pos->next; + } + + /* must make a new pair */ + + LinkedListNode *new = LinkedListNode_init(key, value, NULL); + + if (prev) + prev->next = new; + else + lh->head = new; +} + +bool ListHashmap_contains(ListHashmap *lh, uint32_t key) +{ + LinkedListNode *pos = lh->head; + + while (pos) + { + if (pos->key == key) return true; + + pos = pos->next; + } + + return false; +} + +void *ListHashmap_get(ListHashmap *lh, uint32_t key) +{ + LinkedListNode *pos = lh->head; + + while (pos) + { + if (pos->key == key) return pos->value; + + pos = pos->next; + } + + assert(false); +} + +void ListHashmap_delete(ListHashmap *lh, uint32_t key) +{ + assert(ListHashmap_contains(lh, key)); + + if (lh->head->key == key) + { + LinkedListNode *next = lh->head->next; + LinkedListNode_free(lh->head); + lh->head = next; + } + else + { + LinkedListNode *pos = lh->head->next; + LinkedListNode *prev = lh->head; + + while (pos) + { + if (pos->key == key) + { + prev->next = pos->next; + LinkedListNode_free(pos); + return; + } + + prev = pos; + pos = pos->next; + } + } +} + +void ListHashmap_printPairs(ListHashmap *lh) +{ + LinkedListNode *pos = lh->head; + + while (pos) + { + uint32_t key = pos->key; + int *ptr = ListHashmap_get(lh, key); + printf("(%d, %d)\n", key, *ptr); + pos = pos->next; + } +} + +struct Hashmap +{ + ListHashmap bins[BINS]; +}; + +Hashmap *Hashmap_init() +{ + Hashmap *hm = malloc(sizeof(Hashmap)); + assert(hm); + for (size_t i = 0; i < BINS; i++) + { + ListHashmap_init(&hm->bins[i]); + } + return hm; +} + +void Hashmap_free(Hashmap *hm) +{ + for (size_t i = 0; i < BINS; i++) + { + ListHashmap_deinit(&hm->bins[i]); + } + free(hm); +} + +void Hashmap_update(Hashmap *hm, uint32_t key, void *value) +{ + uint32_t hash = hash_key(key); + ListHashmap_update(&hm->bins[hash], key, value); +} + +bool Hashmap_contains(Hashmap *hm, uint32_t key) +{ + uint32_t hash = hash_key(key); + return ListHashmap_contains(&hm->bins[hash], key); +} + +void *Hashmap_get(Hashmap *hm, uint32_t key) +{ + uint32_t hash = hash_key(key); + return ListHashmap_get(&hm->bins[hash], key); +} + +void Hashmap_delete(Hashmap *hm, uint32_t key) +{ + uint32_t hash = hash_key(key); + ListHashmap_delete(&hm->bins[hash], key); +} \ No newline at end of file diff --git a/lz77/hashmap.h b/lz77/hashmap.h new file mode 100644 index 0000000000000000000000000000000000000000..1cca13203a3a93dfeb7e6ad7a6f67a6b431f311f --- /dev/null +++ b/lz77/hashmap.h @@ -0,0 +1,17 @@ +#ifndef __HMH__ +#define __HMH__ + +#include <stdint.h> +#include <stdbool.h> + +struct Hashmap; +typedef struct Hashmap Hashmap; + +Hashmap *Hashmap_init(); +void Hashmap_free(Hashmap *hm); +void Hashmap_update(Hashmap *hm, uint32_t key, void *value); +bool Hashmap_contains(Hashmap *hm, uint32_t key); +void *Hashmap_get(Hashmap *hm, uint32_t key); +void Hashmap_delete(Hashmap *hm, uint32_t key); + +#endif \ No newline at end of file diff --git a/lz77/lz77.c b/lz77/lz77.c new file mode 100644 index 0000000000000000000000000000000000000000..325d9b2983ab87d003816851013cd62e692d530d --- /dev/null +++ b/lz77/lz77.c @@ -0,0 +1,99 @@ +#include "hashmap.h" +#include "bitwriter.h" +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> + +int main(int argc, char *argv[]) +{ + if (argc != 2) + { + printf("Usage: %s <input file>\n", argv[0]); + exit(1); + } + + char *input_file_name = argv[1]; + char *output_file_name = malloc(sizeof(char) * + (strlen(input_file_name) + strlen(".deflate") + 1)); + sprintf(output_file_name, "%s.deflate", input_file_name); + + FILE *input_file = fopen(input_file_name, "r"); + FILE *output_file = fopen(output_file_name, "w"); + free(output_file_name); + BitWriter *bitWriter = BitWriter_init(output_file); + Hashmap *hashMap = Hashmap_init(); + + + BitWriter_write_bit(bitWriter, 1); /* BFINAL */ + + BitWriter_write_bit(bitWriter, 1); /* BTYPE */ + BitWriter_write_bit(bitWriter, 0); + + + + fseek(input_file, 0, SEEK_END); + size_t input_file_length = ftell(input_file); + fseek(input_file, 0, SEEK_SET); + + size_t cursor = 0; + + while (cursor < input_file_length) + { + printf("Compressed %ld/%ld\n", cursor, input_file_length); + uint32_t best_length = 0; + size_t best_length_start = 0; + size_t scan_start = 0; + if (cursor > 32768) scan_start = cursor - 32768; + for (size_t start = scan_start; start < cursor; start++) + { + uint32_t length = 0; + while (start + length < cursor && cursor + length < input_file_length && length < 258) + { + uint8_t back, cur; + fseek(input_file, start + length, SEEK_SET); + fread(&back, 1, 1, input_file); + fseek(input_file, cursor + length, SEEK_SET); + fread(&cur, 1, 1, input_file); + if (back == cur) length += 1; + else break; + } + + if (length > best_length) + { + best_length = length; + best_length_start = start; + } + } + + if (best_length > 2) + { + uint32_t backtrack = cursor - best_length_start; + BitWriter_write_length(bitWriter, best_length); + BitWriter_write_distance(bitWriter, backtrack); + cursor += best_length; + } + else + { + fseek(input_file, cursor, SEEK_SET); + uint8_t b; + fread(&b, 1, 1, input_file); + BitWriter_write_alpha(bitWriter, b); + cursor += 1; + } + + + + + } + + + + BitWriter_write_bin(bitWriter, 0, 7); /* EOB */ + + BitWriter_flush(bitWriter); + + Hashmap_free(hashMap); + BitWriter_free(bitWriter); + fclose(input_file); + fclose(output_file); +} \ No newline at end of file