Commit 213b475b authored by Mike Iovine's avatar Mike Iovine
Browse files

Merge branch 'master' of https://gitlab.caltech.edu/miovine/p1

No related merge requests found
Pipeline #32433 failed with stage
in 0 seconds
Showing with 4746 additions and 9 deletions
+4746 -9
......@@ -4,4 +4,6 @@
/huffman
/myunzip
/myzip
/lz77
\ No newline at end of file
/lz77
/__pycache__
/tests/__pycache__
\ No newline at end of file
all : myzip0 myunzip0 huffman inflate myunzip myzip lz77
myzip0 : myzip0.c
cc myzip0.c -o myzip0
myzip0 : src/myzip0/myzip0.c
cc src/myzip0/myzip0.c -o myzip0
myunzip0 : src/myunzip0/myunzip0.c
cc src/myunzip0/myunzip0.c -o myunzip0
huffman : huffman.c
cc huffman.c -o huffman
huffman : src/huffman/huffman.c src/lz77/bitwriter.c
cc -I include src/huffman/huffman.c src/lz77/bitwriter.c -o huffman
inflate : include/inflate.h src/inflate/main.c src/inflate/inflate.c
gcc -I include src/inflate/main.c src/inflate/inflate.c -o inflate
......@@ -21,5 +21,8 @@ lz77 : include/bitwriter.h include/hashmap.h include/lz77.h src/lz77/bitwriter.c
myzip : include/bitwriter.h include/hashmap.h include/lz77.h src/lz77/bitwriter.c src/lz77/hashmap.c src/lz77/lz77.c src/myzip/myzip.c
cc -I include src/lz77/lz77.c src/lz77/hashmap.c src/lz77/bitwriter.c src/myzip/myzip.c -o myzip
test : all
python3 -m unittest discover tests -p 'test_*.py'
clean :
rm -f myzip0 myunzip0 huffman inflate myunzip myzip lz77 test_myunzip0
rm -f myzip0 myunzip0 huffman inflate myunzip myzip lz77
......@@ -4,68 +4,7 @@
#include <stdint.h>
#include <string.h>
#include <assert.h>
/* A struct for writing to files as
streams of individual bits */
typedef struct __bit_writer
{
FILE *file;
uint8_t byte;
int mod;
} bit_writer;
bit_writer *bit_writer_init(FILE *file)
{
bit_writer *bw = malloc(sizeof(bit_writer));
assert(bw);
bw->file = file;
bw->byte = 0;
bw->mod = 0;
return bw;
}
void bit_writer_free(bit_writer *bw)
{
free(bw);
}
void bit_writer_write_bit(bit_writer *bw, bool bit)
{
bw->byte |= (bit > 0) << (bw->mod++);
if (bw->mod == 8)
{
fwrite(&bw->byte, 1, 1, bw->file);
bw->byte = 0;
bw->mod = 0;
}
}
void bit_writer_flush(bit_writer *bw)
{
while (bw->mod != 0) bit_writer_write_bit(bw, 0);
}
void write_byte(bit_writer *bw, uint8_t byte)
{
if (byte <= 143) /* 8 bit encoding */
{
uint32_t to_encode = 0x30 + (uint32_t) byte;
for (int b = 8 - 1; b >= 0; b--)
bit_writer_write_bit(bw, to_encode & (1 << b));
} else /* 9 bit encoding */
{
uint32_t to_encode = 0x100 + (uint32_t) byte;
for (int b = 9 - 1; b >= 0; b--)
{
// bool to_write = () != 0;
// printf("Writing %d\n", to_write);
bit_writer_write_bit(bw, to_encode & (1 << b));
}
}
}
#include "bitwriter.h"
int main(int argc, char *argv[])
{
......@@ -83,33 +22,29 @@ int main(int argc, char *argv[])
FILE *input_file = fopen(input_file_name, "r");
FILE *output_file = fopen(output_file_name, "w");
free(output_file_name);
bit_writer *bw = bit_writer_init(output_file);
BitWriter *bitWriter = BitWriter_init(output_file);
bit_writer_write_bit(bw, 1); /* BFINAL */
BitWriter_write_bit(bitWriter, 1); /* BFINAL */
bit_writer_write_bit(bw, 1); /* BTYPE */
bit_writer_write_bit(bw, 0);
BitWriter_write_bit(bitWriter, 1); /* BTYPE */
BitWriter_write_bit(bitWriter, 0);
// write_byte(bw, 144);
char c;
uint8_t c;
while (fread(&c, 1, 1, input_file))
{
// printf("Writing %d\n", (int) c);
write_byte(bw, c);
BitWriter_write_alpha(bitWriter, c);
}
for (int i = 0; i < 7; i++)
bit_writer_write_bit(bw, 0);
BitWriter_write_bit(bitWriter, 0);
bit_writer_flush(bw);
bit_writer_free(bw);
BitWriter_flush(bitWriter);
BitWriter_free(bitWriter);
fclose(output_file);
fclose(input_file);
return 0;
}
\ No newline at end of file
......@@ -6,6 +6,16 @@
#include <stdlib.h>
const size_t DISTANCE_MAX = 32768;
const size_t SCAN_MAX = 768;
/* SCAN_MAX is number of locations to check
* for example, in a file which is all 0s,
* we check up to 32768 locations where a triple
* of 0s occurs. SCAN_MAX limits this number since
* it took too long to compress these files.
* 768 seems a reasonable choice and performs
* exactly the same as the unlimited version
* on test files.
*/
struct IntQueueNode;
typedef struct IntQueueNode IntQueueNode;
......@@ -138,8 +148,8 @@ size_t write_lz77_stream(FILE *input_file, FILE *output_file)
size_t best_dist = 0;
size_t best_len = 0;
while (pos)
size_t checked = 0;
while (pos && checked < SCAN_MAX)
{
size_t dif = 0;
size_t dist = cursor - pos->value;
......@@ -153,6 +163,7 @@ size_t write_lz77_stream(FILE *input_file, FILE *output_file)
}
pos = pos->next;
checked += 1;
}
/* write the length, distance pair and move on */
......
......@@ -88,7 +88,7 @@ int main(int argc, char *argv[])
/* extra field */
/* copy input file to output */
uint32_t compressed_size = write_lz77_stream(input_file, output_file);
printf("Compressed size = %d\n", compressed_size);
/* printf("Compressed size = %d\n", compressed_size); */
size_t central_directory_start = ftell(output_file); /* save location */
fseek(output_file, compressed_size_loc, SEEK_SET); /* move back */
......
File moved
File added
File added
This diff is collapsed.
File added
import unittest
import os
import filecmp
import shutil
import subprocess
import hashlib
TEST_FILES = [
'tests/resources/testfile1.bin',
'tests/resources/testfile2.bin',
'tests/resources/testfile3.bin',
'tests/resources/testfile4.bin',
]
EXPECTED_MD5 = [
'd1b520eac79e5b2acc7ad49023da4bf3',
'e2277a6ac23fd320275448add3e257ad',
'0b7fd4fd1edef574e2a6db15412eebb3',
'1d6260598f95c6bd880cca06c1c54c70'
]
EXEC_FILE = './huffman'
def hash_file(path):
hasher = hashlib.md5()
with open(path, 'rb') as file:
hasher.update(file.read())
return hasher.hexdigest()
class TestHuffman(unittest.TestCase):
def test_huffman(self):
"""
Create .deflate files for each example binary
and compare their checksum with known values
"""
for filename, md5 in zip(TEST_FILES, EXPECTED_MD5):
subprocess.run([EXEC_FILE, filename])
output_filename = filename + '.deflate'
self.assertTrue(os.path.exists(output_filename))
self.assertEqual(hash_file(output_filename), md5)
os.remove(output_filename)
if __name__ == '__main__':
unittest.main()
import unittest
import os
import filecmp
import shutil
import subprocess
# These tests rely on a working "unzip" command line utility
# I use the Info-ZIP one which is widely available
TEST_FILES = [
'tests/resources/testfile1.bin',
'tests/resources/testfile2.bin',
'tests/resources/testfile3.bin',
'tests/resources/testfile4.bin',
]
OUTPUT_DIR = 'tests/output'
OUTPUT_ZIP_NAME = os.path.join(OUTPUT_DIR, 'result.zip')
class TestMyzip(unittest.TestCase):
def test_zip_utils(self):
"""
Try to zip up each of the files
and unzip them
"""
for zip_exec in ['./myzip0', './myzip']:
# make an output directory
try:
os.makedirs(OUTPUT_DIR)
except FileExistsError:
pass
except:
self.fail('could not make output directory')
for filename in TEST_FILES:
# make sure the test file even exists
self.assertTrue(os.path.exists(filename))
# zip up the file
subprocess.run([zip_exec, OUTPUT_ZIP_NAME, filename])
# check that zipped verison exists
self.assertTrue(os.path.exists(OUTPUT_ZIP_NAME))
# now try to unzip it with "known correct" tool
subprocess.run(['unzip', '-q', '-o', OUTPUT_ZIP_NAME, '-d', OUTPUT_DIR],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
recovered_file_path = os.path.join(OUTPUT_DIR, filename)
# make sure recovered file exists, and
self.assertTrue(os.path.exists(recovered_file_path))
# check that it is correct
self.assertTrue(filecmp.cmp(filename, recovered_file_path))
shutil.rmtree(OUTPUT_DIR)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment