Commit 21d2d56e authored by John M. (Jack) Maxfield's avatar John M. (Jack) Maxfield
Browse files

Fix lz77

parent 26f037cf
No related merge requests found
Pipeline #30573 canceled with stage
Showing with 4736 additions and 43 deletions
+4736 -43
This diff is collapsed.
......@@ -4,8 +4,97 @@
#include <stdio.h>
#include <stdlib.h>
const size_t DISTANCE_MAX = 32768;
struct IntQueueNode;
typedef struct IntQueueNode IntQueueNode;
struct IntQueueNode
{
size_t value;
IntQueueNode *next;
};
typedef struct IntQueue
{
uint64_t size;
IntQueueNode *head;
IntQueueNode *tail;
} IntQueue;
IntQueue *IntQueue_init()
{
IntQueue *queue = malloc(sizeof(IntQueue));
assert(queue);
queue->size = 0;
queue->head = NULL;
queue->tail = NULL;
return queue;
}
void IntQueue_free(IntQueue *queue)
{
if (queue->head)
{
IntQueueNode *pos = queue->head;
while (pos)
{
IntQueueNode *next = pos->next;
free(pos);
pos = next;
}
}
free(queue);
}
IntQueueNode *IntQueue_get_head(IntQueue *queue)
{
return queue->head;
}
void IntQueue_push(IntQueue *queue, uint64_t value)
{
IntQueueNode *node = malloc(sizeof(IntQueueNode));
assert(node);
node->value = value;
node->next = NULL;
queue->size++;
if (queue->head == NULL && queue->tail == NULL)
{
queue->head = node;
queue->tail = node;
}
else
{
queue->tail->next = node;
queue->tail = node;
}
}
uint64_t IntQueue_peek(IntQueue *queue)
{
return queue->head->value;
}
void IntQueue_pop(IntQueue *queue)
{
assert(queue->head);
IntQueueNode *oldHead = queue->head;
queue->head = queue->head->next;
if (!queue->head) queue->tail = NULL;
queue->size--;
}
bool IntQueue_empty(IntQueue *queue)
{
return queue->size == 0;
}
int main(int argc, char *argv[])
{
/* this is bad */
if (argc != 2)
{
printf("Usage: %s <input file>\n", argv[0]);
......@@ -29,69 +118,110 @@ int main(int argc, char *argv[])
BitWriter_write_bit(bitWriter, 1); /* BTYPE */
BitWriter_write_bit(bitWriter, 0);
IntQueue *seenTripleQueue = IntQueue_init();
/* read the input file to a buffer */
/* TODO: use mmap? */
fseek(input_file, 0, SEEK_END);
size_t input_file_length = ftell(input_file);
fseek(input_file, 0, SEEK_SET);
uint8_t *inp = malloc(input_file_length);
assert(inp);
fread(inp, input_file_length, 1, input_file);
size_t cursor = 0;
while (cursor < input_file_length)
while (cursor < input_file_length)
{
printf("Compressed %ld/%ld\n", cursor, input_file_length);
uint32_t best_length = 0;
size_t best_length_start = 0;
size_t scan_start = 0;
if (cursor > 32768) scan_start = cursor - 32768;
for (size_t start = scan_start; start < cursor; start++)
{
uint32_t length = 0;
while (start + length < cursor && cursor + length < input_file_length && length < 258)
{
uint8_t back, cur;
fseek(input_file, start + length, SEEK_SET);
fread(&back, 1, 1, input_file);
fseek(input_file, cursor + length, SEEK_SET);
fread(&cur, 1, 1, input_file);
if (back == cur) length += 1;
else break;
}
size_t old_cursor = cursor;
if (length > best_length)
{
best_length = length;
best_length_start = start;
}
}
if (best_length > 2)
size_t bytes_remaining = input_file_length - cursor;
if (bytes_remaining < 3)
{
uint32_t backtrack = cursor - best_length_start;
BitWriter_write_length(bitWriter, best_length);
BitWriter_write_distance(bitWriter, backtrack);
cursor += best_length;
/* just write literal */
BitWriter_write_alpha(bitWriter, inp[cursor++]);
}
else
{
fseek(input_file, cursor, SEEK_SET);
uint8_t b;
fread(&b, 1, 1, input_file);
BitWriter_write_alpha(bitWriter, b);
cursor += 1;
}
uint32_t triple = inp[cursor] + (inp[cursor + 1] << 8) + (inp[cursor + 2] << 16);
if (Hashmap_contains(hashMap, triple))
{
IntQueue *queue = Hashmap_get(hashMap, triple);
assert(cursor - IntQueue_peek(queue) <= DISTANCE_MAX);
IntQueueNode *pos = IntQueue_get_head(queue);
size_t best_dist = 0;
size_t best_len = 0;
while (pos)
{
size_t dif = 0;
size_t dist = cursor - pos->value;
/* take mod to account for case when length longer than distance */
while (cursor + dif < input_file_length && inp[cursor + dif] == inp[pos->value + dif % dist]) dif += 1;
assert(dif >= 3);
if (dif > best_len)
{
best_dist = dist;
best_len = dif;
}
pos = pos->next;
}
/* write the length, distance pair and move on */
BitWriter_write_length(bitWriter, best_len);
BitWriter_write_distance(bitWriter, best_dist);
cursor += best_len;
}
else
{
BitWriter_write_alpha(bitWriter, inp[cursor++]);
}
for (; old_cursor < cursor; old_cursor++)
{
/* clean up hash map elements which no longer matter */
if (old_cursor >= DISTANCE_MAX)
{
size_t cleanup_loc = old_cursor - DISTANCE_MAX;
uint32_t old_triple = inp[cleanup_loc] + (inp[cleanup_loc + 1] << 8) + (inp[cleanup_loc + 2] << 16);
assert(Hashmap_contains(hashMap, old_triple));
IntQueue *queue = Hashmap_get(hashMap, old_triple);
assert(IntQueue_peek(queue) == cleanup_loc);
IntQueue_pop(queue);
if (IntQueue_empty(queue))
{
IntQueue_free(queue);
Hashmap_delete(hashMap, old_triple);
}
}
/* add new triple to hash map */
uint32_t new_triple = inp[old_cursor] + (inp[old_cursor + 1] << 8) + (inp[old_cursor + 2] << 16);
if (Hashmap_contains(hashMap, new_triple))
{
IntQueue *queue = Hashmap_get(hashMap, new_triple);
IntQueue_push(queue, old_cursor);
}
else
{
IntQueue *newQueue = IntQueue_init();
IntQueue_push(newQueue, old_cursor);
Hashmap_update(hashMap, new_triple, newQueue);
}
}
}
}
BitWriter_write_bin(bitWriter, 0, 7); /* EOB */
BitWriter_flush(bitWriter);
BitWriter_flush(bitWriter);
Hashmap_free(hashMap);
BitWriter_free(bitWriter);
fclose(input_file);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment