Commit 9b424ab4 authored by John M. (Jack) Maxfield's avatar John M. (Jack) Maxfield
Browse files

Some work on deflate

parent 2bac0fe7
No related merge requests found
Pipeline #30452 failed with stage
in 0 seconds
Showing with 505 additions and 0 deletions
+505 -0
#include "bitwriter.h"
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include <assert.h>
struct BitWriter
{
FILE *file;
uint8_t byte;
int mod;
};
BitWriter *BitWriter_init(FILE *file)
{
BitWriter *bw = malloc(sizeof(BitWriter));
assert(bw);
bw->file = file;
bw->byte = 0;
bw->mod = 0;
return bw;
}
void BitWriter_free(BitWriter *bw)
{
free(bw);
}
void BitWriter_write_bit(BitWriter *bw, bool bit)
{
bw->byte |= (bit > 0) << (bw->mod++);
if (bw->mod == 8)
{
fwrite(&bw->byte, 1, 1, bw->file);
bw->byte = 0;
bw->mod = 0;
}
}
void BitWriter_flush(BitWriter *bw)
{
while (bw->mod != 0) BitWriter_write_bit(bw, 0);
}
void BitWriter_write_bin(BitWriter *bw, uint32_t bits, int number_to_write)
{
for (int b = number_to_write - 1; b >= 0; b--)
BitWriter_write_bit(bw, bits & (1 << b));
}
void BitWriter_write_bin_reverse(BitWriter *bw, uint32_t bits, int number_to_write)
{
for (int b = 0; b < number_to_write; b++)
BitWriter_write_bit(bw, bits & (1 << b));
}
void BitWriter_write_alpha(BitWriter *bw, uint32_t byte)
{
assert(byte <= 285);
if (byte <= 143) /* 8 bit encoding */
{
uint32_t to_encode = 48 + (uint32_t) byte;
BitWriter_write_bin(bw, to_encode, 8);
}
else if (byte <= 255) /* 9 bit encoding */
{
uint32_t to_encode = 256 + (uint32_t) byte;
BitWriter_write_bin(bw, to_encode, 9);
}
else if (byte <= 279) /* 7 bit encoding */
{
uint32_t to_encode = (uint32_t) byte - 256;
BitWriter_write_bin(bw, to_encode, 7);
}
else if (byte <= 287)
{
uint32_t to_encode = (uint32_t) byte - 88;
BitWriter_write_bin(bw, to_encode, 8);
}
}
void BitWriter_write_length(BitWriter *bw, uint32_t length)
{
assert(3 <= length && length <= 258);
if (length <= 10) /* 3 - 10 */
{
BitWriter_write_alpha(bw, length + 254);
}
else if (length <= 18) /* 11 - 22 */
{
BitWriter_write_alpha(bw, (length + 519) >> 1);
BitWriter_write_bin_reverse(bw, 1 + length, 1);
}
else if (length <= 34) /* 23 - 34 */
{
BitWriter_write_alpha(bw, (length + 1057) >> 2);
BitWriter_write_bin_reverse(bw, 1 + length, 2);
}
else if (length <= 66) /* 35 - 66 */
{
BitWriter_write_alpha(bw, (length + 2149) >> 3);
BitWriter_write_bin_reverse(bw, 5 + length, 3);
}
else if (length <= 130) /* 67 - 130 */
{
BitWriter_write_alpha(bw, (length + 4365) >> 4);
BitWriter_write_bin_reverse(bw, 13 + length, 4);
}
else if (length <= 257) /* 131 - 257 */
{
BitWriter_write_alpha(bw, (length + 8861) >> 5);
BitWriter_write_bin_reverse(bw, 29 + length, 5);
}
else if (length == 258)
{
BitWriter_write_alpha(bw, 285);
}
else
{
assert(0);
}
}
void BitWriter_write_distance(BitWriter *bw, uint32_t dist)
{
assert(1 <= dist && dist <= 32768);
if (dist <= 4) BitWriter_write_bin(bw, dist - 1, 5);
else
{
int bits = 1;
while (true)
{
if (dist <= 1 << (bits + 2))
{
BitWriter_write_bin(bw, (dist + bits * (1 << (bits + 1)) - 1) >> bits, 5);
BitWriter_write_bin_reverse(bw, dist - 1, bits);
return;
}
else bits++;
}
}
}
\ No newline at end of file
#ifndef __BIT_WRITER_H__
#define __BIT_WRITER_H__
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include <assert.h>
/* A struct for writing to files as
streams of individual bits */
struct BitWriter;
typedef struct BitWriter BitWriter;
BitWriter *BitWriter_init(FILE *file);
void BitWriter_free(BitWriter *bw);
void BitWriter_write_bit(BitWriter *bw, bool bit);
void BitWriter_flush(BitWriter *bw);
void BitWriter_write_alpha(BitWriter *bw, uint32_t byte);
void BitWriter_write_length(BitWriter *bw, uint32_t length);
void BitWriter_write_distance(BitWriter *bw, uint32_t dist);
void BitWriter_write_bin(BitWriter *bw, uint32_t bits, int number_to_write);
#endif
#include "hashmap.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <stdbool.h>
#define BINS 33703
uint32_t hash_key(uint32_t val)
{
return (val * 2654435761) % BINS;
}
/* first, make a hashmap that's just a linked list of key, value pairs */
struct LinkedListNode;
typedef struct LinkedListNode LinkedListNode;
struct LinkedListNode
{
uint32_t key;
void *value;
LinkedListNode *next;
};
typedef struct ListHashmap
{
LinkedListNode *head;
} ListHashmap;
LinkedListNode *LinkedListNode_init(uint32_t key, void *value, LinkedListNode *next)
{
LinkedListNode *lln = malloc(sizeof(LinkedListNode));
assert(lln);
lln->key = key;
lln->value = value;
lln->next = next;
return lln;
}
void LinkedListNode_free(LinkedListNode *lln)
{
free(lln);
}
void ListHashmap_init(ListHashmap *lh)
{
lh->head = NULL;
}
void ListHashmap_deinit(ListHashmap *lh)
{
LinkedListNode *pos = lh->head;
while (pos)
{
LinkedListNode *next = pos->next;
LinkedListNode_free(pos);
pos = next;
}
}
void ListHashmap_update(ListHashmap *lh, uint32_t key, void *value)
{
LinkedListNode *pos = lh->head;
LinkedListNode *prev = NULL;
while (pos)
{
if (pos->key == key)
{
pos->value = value;
return;
}
prev = pos;
pos = pos->next;
}
/* must make a new pair */
LinkedListNode *new = LinkedListNode_init(key, value, NULL);
if (prev)
prev->next = new;
else
lh->head = new;
}
bool ListHashmap_contains(ListHashmap *lh, uint32_t key)
{
LinkedListNode *pos = lh->head;
while (pos)
{
if (pos->key == key) return true;
pos = pos->next;
}
return false;
}
void *ListHashmap_get(ListHashmap *lh, uint32_t key)
{
LinkedListNode *pos = lh->head;
while (pos)
{
if (pos->key == key) return pos->value;
pos = pos->next;
}
assert(false);
}
void ListHashmap_delete(ListHashmap *lh, uint32_t key)
{
assert(ListHashmap_contains(lh, key));
if (lh->head->key == key)
{
LinkedListNode *next = lh->head->next;
LinkedListNode_free(lh->head);
lh->head = next;
}
else
{
LinkedListNode *pos = lh->head->next;
LinkedListNode *prev = lh->head;
while (pos)
{
if (pos->key == key)
{
prev->next = pos->next;
LinkedListNode_free(pos);
return;
}
prev = pos;
pos = pos->next;
}
}
}
void ListHashmap_printPairs(ListHashmap *lh)
{
LinkedListNode *pos = lh->head;
while (pos)
{
uint32_t key = pos->key;
int *ptr = ListHashmap_get(lh, key);
printf("(%d, %d)\n", key, *ptr);
pos = pos->next;
}
}
struct Hashmap
{
ListHashmap bins[BINS];
};
Hashmap *Hashmap_init()
{
Hashmap *hm = malloc(sizeof(Hashmap));
assert(hm);
for (size_t i = 0; i < BINS; i++)
{
ListHashmap_init(&hm->bins[i]);
}
return hm;
}
void Hashmap_free(Hashmap *hm)
{
for (size_t i = 0; i < BINS; i++)
{
ListHashmap_deinit(&hm->bins[i]);
}
free(hm);
}
void Hashmap_update(Hashmap *hm, uint32_t key, void *value)
{
uint32_t hash = hash_key(key);
ListHashmap_update(&hm->bins[hash], key, value);
}
bool Hashmap_contains(Hashmap *hm, uint32_t key)
{
uint32_t hash = hash_key(key);
return ListHashmap_contains(&hm->bins[hash], key);
}
void *Hashmap_get(Hashmap *hm, uint32_t key)
{
uint32_t hash = hash_key(key);
return ListHashmap_get(&hm->bins[hash], key);
}
void Hashmap_delete(Hashmap *hm, uint32_t key)
{
uint32_t hash = hash_key(key);
ListHashmap_delete(&hm->bins[hash], key);
}
\ No newline at end of file
#ifndef __HMH__
#define __HMH__
#include <stdint.h>
#include <stdbool.h>
struct Hashmap;
typedef struct Hashmap Hashmap;
Hashmap *Hashmap_init();
void Hashmap_free(Hashmap *hm);
void Hashmap_update(Hashmap *hm, uint32_t key, void *value);
bool Hashmap_contains(Hashmap *hm, uint32_t key);
void *Hashmap_get(Hashmap *hm, uint32_t key);
void Hashmap_delete(Hashmap *hm, uint32_t key);
#endif
\ No newline at end of file
#include "hashmap.h"
#include "bitwriter.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
if (argc != 2)
{
printf("Usage: %s <input file>\n", argv[0]);
exit(1);
}
char *input_file_name = argv[1];
char *output_file_name = malloc(sizeof(char) *
(strlen(input_file_name) + strlen(".deflate") + 1));
sprintf(output_file_name, "%s.deflate", input_file_name);
FILE *input_file = fopen(input_file_name, "r");
FILE *output_file = fopen(output_file_name, "w");
free(output_file_name);
BitWriter *bitWriter = BitWriter_init(output_file);
Hashmap *hashMap = Hashmap_init();
BitWriter_write_bit(bitWriter, 1); /* BFINAL */
BitWriter_write_bit(bitWriter, 1); /* BTYPE */
BitWriter_write_bit(bitWriter, 0);
fseek(input_file, 0, SEEK_END);
size_t input_file_length = ftell(input_file);
fseek(input_file, 0, SEEK_SET);
size_t cursor = 0;
while (cursor < input_file_length)
{
printf("Compressed %ld/%ld\n", cursor, input_file_length);
uint32_t best_length = 0;
size_t best_length_start = 0;
size_t scan_start = 0;
if (cursor > 32768) scan_start = cursor - 32768;
for (size_t start = scan_start; start < cursor; start++)
{
uint32_t length = 0;
while (start + length < cursor && cursor + length < input_file_length && length < 258)
{
uint8_t back, cur;
fseek(input_file, start + length, SEEK_SET);
fread(&back, 1, 1, input_file);
fseek(input_file, cursor + length, SEEK_SET);
fread(&cur, 1, 1, input_file);
if (back == cur) length += 1;
else break;
}
if (length > best_length)
{
best_length = length;
best_length_start = start;
}
}
if (best_length > 2)
{
uint32_t backtrack = cursor - best_length_start;
BitWriter_write_length(bitWriter, best_length);
BitWriter_write_distance(bitWriter, backtrack);
cursor += best_length;
}
else
{
fseek(input_file, cursor, SEEK_SET);
uint8_t b;
fread(&b, 1, 1, input_file);
BitWriter_write_alpha(bitWriter, b);
cursor += 1;
}
}
BitWriter_write_bin(bitWriter, 0, 7); /* EOB */
BitWriter_flush(bitWriter);
Hashmap_free(hashMap);
BitWriter_free(bitWriter);
fclose(input_file);
fclose(output_file);
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment