Commit 6dbf8f10 authored by Samantha Chang's avatar Samantha Chang
Browse files

Added hashtable lecture

parent c5a203e0
Showing with 377 additions and 0 deletions
+377 -0
File added
#include <stdlib.h>
#include <assert.h>
#include "hashtable.h"
typedef enum {
EMPTY, TOMBSTONE, FILLED
} status_t;
typedef struct hashtable_entry_t {
status_t status;
ht_key_t *key;
ht_value_t *value;
} hashtable_entry_t;
typedef struct hashtable_t {
size_t capacity;
size_t size;
hash_function_t key_hash;
equals_function_t key_equals;
free_function_t key_free;
free_function_t value_free;
hashtable_entry_t *data;
} hashtable_t;
const size_t INITIAL_CAPACITY = 8;
const hashtable_entry_t EMPTY_ENTRY = {.status=EMPTY, .key=NULL, .value=NULL};
static hashtable_entry_t *hashtable_init_data(size_t capacity) {
hashtable_entry_t * new = calloc(capacity, sizeof(hashtable_entry_t));
for (size_t i = 0; i < capacity; i++) {
new[i] = EMPTY_ENTRY;
}
return new;
}
static void hashtable_free_data(hashtable_t *table, hashtable_entry_t *data) {
for (size_t i = 0; i < table->capacity; i++) {
if (table->key_free != NULL && table->data[i].status == FILLED) {
table->key_free(data[i].key);
}
if (table->value_free != NULL && table->data[i].status == FILLED) {
table->value_free(data[i].value);
}
}
free(data);
}
hashtable_t *hashtable_init(hash_function_t key_hash, equals_function_t key_equals, free_function_t key_free, free_function_t value_free) {
hashtable_t *table = malloc(sizeof(hashtable_t));
table->capacity = INITIAL_CAPACITY;
table->size = 0;
table->data = hashtable_init_data(table->capacity);
table->key_hash = key_hash;
table->key_equals = key_equals;
table->key_free = key_free;
table->value_free = value_free;
return table;
}
void hashtable_free(hashtable_t *table) {
hashtable_free_data(table, table->data);
free(table);
}
static size_t probe(size_t original_idx, size_t attempt_number, size_t capacity) {
return (original_idx + attempt_number) % capacity;
}
static hashtable_entry_t *hashtable_get_entry(hashtable_t *table, ht_key_t *key, bool stop_at_tombstone) {
return NULL; //TODO: Refactor this code after debugging to de-duplicate the code!
}
ht_value_t *hashtable_get(hashtable_t *table, ht_key_t *key) {
if (key) {
size_t hc = table->key_hash(key);
for (size_t i = 0; i < table->capacity; i++) {
size_t idx = probe(hc, i, table->capacity);
hashtable_entry_t *entry = &table->data[idx];
if (entry->status == FILLED && table->key_equals(entry->key, key)) {
return entry->value;
}
if (entry->status == EMPTY) {
return entry->value;
}
}
}
return NULL;
}
ht_value_t *hashtable_remove(hashtable_t *table, ht_key_t *key) {
hashtable_entry_t* to_remove = NULL;
if (key) {
size_t hc = table->key_hash(key);
for (size_t i = 0; i < table->capacity; i++) {
size_t idx = probe(hc, i, table->capacity);
hashtable_entry_t *candidate = &table->data[idx];
if (candidate->status == FILLED && table->key_equals(candidate->key, key)) {
to_remove = candidate;
break;
}
if (candidate->status == EMPTY) {
break;
}
}
}
if (to_remove == NULL) {
return NULL;
}
ht_value_t *old = to_remove->value;
table->size--;
to_remove->status = TOMBSTONE;
table->key_free(to_remove->key);
to_remove->key = NULL;
to_remove->value = NULL;
return old;
}
static void hashtable_rehash(hashtable_t *table) {
hashtable_entry_t *old = table->data;
table->data = hashtable_init_data(table->capacity * 2);
table->capacity *= 2;
for (size_t i = 0; i < table->capacity / 2; i++) {
if (old[i].status == FILLED) {
hashtable_put(table, old[i].key, old[i].value);
table->size--;
}
}
free(old);
}
ht_value_t *hashtable_put(hashtable_t *table, ht_key_t *key, ht_value_t *value) {
hashtable_entry_t* existing_entry = (hashtable_entry_t*)&EMPTY_ENTRY;
if (key) {
size_t hc = table->key_hash(key);
for (size_t i = 0; i < table->capacity; i++) {
size_t idx = probe(hc, i, table->capacity);
hashtable_entry_t *candidate = &table->data[idx];
if (candidate->status == FILLED && table->key_equals(candidate->key, key)) {
existing_entry = candidate;
break;
}
if (candidate->status == EMPTY) {
existing_entry = candidate;
break;
}
if (candidate->status == TOMBSTONE) {
existing_entry = candidate;
break;
}
}
}
//There are no spaces left in our hashtable
if (existing_entry == &EMPTY_ENTRY) {
hashtable_rehash(table);
if (key) {
size_t hc = table->key_hash(key);
for (size_t i = 0; i < table->capacity; i++) {
size_t idx = probe(hc, i, table->capacity);
hashtable_entry_t *candidate = &table->data[idx];
if (candidate->status == FILLED && table->key_equals(candidate->key, key)) {
existing_entry = candidate;
break;
}
if (candidate->status == EMPTY) {
existing_entry = candidate;
break;
}
if (candidate->status == TOMBSTONE) {
existing_entry = candidate;
break;
}
}
}
assert(existing_entry != (hashtable_entry_t*)&EMPTY_ENTRY);
}
ht_value_t *old = existing_entry->value;
if (existing_entry->status == FILLED && table->key_equals(existing_entry->key, key)) {
// Key already exists, replace value only
existing_entry->value = value;
table->key_free(key);
} else {
// New key-value pair
existing_entry->key = key;
existing_entry->value = value;
existing_entry->status = FILLED;
table->size++;
}
return old;
}
ht_key_t **hashtable_keys(hashtable_t *table) {
ht_key_t **keys = malloc((table->size + 1) * sizeof(ht_key_t *));
size_t k = 0;
for (size_t i = 0; i < table->capacity; i++) {
if (table->data[i].status == FILLED) {
keys[k] = table->data[i].key;
k++;
}
}
keys[table->size] = NULL;
return keys;
}
size_t hashtable_size(hashtable_t *table) {
return table->size;
}
#include "test_util.h"
#include "hashtable.h"
#include <stdlib.h>
#include <string.h>
#include <assert.h>
// Hash function for strings
size_t string_hash(key_t *key) {
char *str = (char *) key;
return strlen(str);
}
// Equality function for strings
bool string_equals(key_t *a, key_t *b) {
return strcmp((char *)a, (char *)b) == 0;
}
// Free function for heap-allocated keys/values
void free_str(void *ptr) {
free(ptr);
}
void test_put_get_size() {
hashtable_t *ht = hashtable_init(string_hash, string_equals, free_str, free_str);
hashtable_put(ht, strdup("a"), strdup("1"));
hashtable_put(ht, strdup("b"), strdup("2"));
hashtable_put(ht, strdup("c"), strdup("3"));
assert(strcmp((char *)hashtable_get(ht, "a"), "1") == 0);
assert(strcmp((char *)hashtable_get(ht, "b"), "2") == 0);
assert(strcmp((char *)hashtable_get(ht, "c"), "3") == 0);
assert(hashtable_size(ht) == 3);
hashtable_free(ht);
}
void test_overwrite_value() {
hashtable_t *ht = hashtable_init(string_hash, string_equals, free_str, free_str);
hashtable_put(ht, strdup("x"), strdup("old"));
ht_value_t *old = hashtable_put(ht, strdup("x"), strdup("new"));
// Old value should be returned and freed
assert(strcmp((char *)old, "old") == 0);
free_str(old);
assert(strcmp((char *)hashtable_get(ht, "x"), "new") == 0);
assert(hashtable_size(ht) == 1);
hashtable_free(ht);
}
void test_remove() {
hashtable_t *ht = hashtable_init(string_hash, string_equals, free_str, free_str);
hashtable_put(ht, strdup("k"), strdup("v"));
assert(strcmp((char *)hashtable_get(ht, "k"), "v") == 0);
ht_value_t *removed = hashtable_remove(ht, "k");
assert(strcmp((char *)removed, "v") == 0);
free_str(removed);
assert(hashtable_get(ht, "k") == NULL);
assert(hashtable_size(ht) == 0);
removed = hashtable_remove(ht, "a"); //Should not exist
assert(removed == NULL);
hashtable_free(ht);
}
void test_keys() {
hashtable_t *ht = hashtable_init(string_hash, string_equals, free_str, free_str);
hashtable_put(ht, strdup("apple"), strdup("red"));
hashtable_put(ht, strdup("banana"), strdup("yellow"));
key_t **keys = hashtable_keys(ht);
size_t found = 0;
for (size_t i = 0; keys[i] != NULL; i++) {
if (strcmp(keys[i], "apple") == 0 || strcmp(keys[i], "banana") == 0) {
found++;
}
}
assert(found == 2);
free(keys);
hashtable_free(ht);
}
void test_rehashing() {
hashtable_t *ht = hashtable_init(string_hash, string_equals, free_str, free_str);
// Insert more than INITIAL_CAPACITY (8) to trigger a rehash
for (int i = 0; i < 20; i++) {
char key[10], val[10];
snprintf(key, sizeof(key), "k%d", i); //Stores one of k0, k1, ..., k19 in key
snprintf(val, sizeof(val), "v%d", i);
hashtable_put(ht, strdup(key), strdup(val));
}
for (int i = 0; i < 20; i++) {
char key[10], val[10];
snprintf(key, sizeof(key), "k%d", i);
snprintf(val, sizeof(val), "v%d", i);
printf("HASH %s \n", hashtable_get(ht, key));
printf("VAL %s \n", val);
assert(strcmp((char *)hashtable_get(ht, key), val) == 0);
}
assert(hashtable_size(ht) == 20);
hashtable_free(ht);
}
void test_tombstone_handling() {
hashtable_t *ht = hashtable_init(string_hash, string_equals, free_str, free_str);
hashtable_put(ht, strdup("a"), strdup("1"));
hashtable_put(ht, strdup("b"), strdup("2"));
hashtable_put(ht, strdup("c"), strdup("3"));
assert(hashtable_size(ht) == 3);
char* removed = hashtable_remove(ht, "b");
assert(strcmp(removed, "2") == 0);
assert(hashtable_get(ht, "b") == NULL);
free_str(removed);
// Insert new key that may reuse tombstone
hashtable_put(ht, strdup("d"), strdup("4"));
assert(strcmp(hashtable_get(ht, "d"), "4") == 0);
hashtable_free(ht);
}
int main(int argc, char *argv[]) {
bool all_tests = argc == 1;
char testname[100];
if (!all_tests) {
read_testname(argv[1], testname, sizeof(testname));
}
DO_TEST(test_put_get_size);
DO_TEST(test_overwrite_value);
DO_TEST(test_remove);
DO_TEST(test_keys);
DO_TEST(test_rehashing);
DO_TEST(test_tombstone_handling);
puts("hashtable_test PASS");
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment