diff --git a/bayes.py b/bayes.py index 2805d46c7195c31771213b6e57bf0e2e51466dee..c73f2330517011aa7df2ffe8cd7053abdc420390 100644 --- a/bayes.py +++ b/bayes.py @@ -35,21 +35,30 @@ After training (which is run before your code), the following 3 global variables num_data_points_in_category[category] = Total number of documents in the category 'category' """ @cache -def pr_category(category : str) : # Pr(category) +def pr_category(category : str): + """ + Computes Pr(category) + """ return 0 @cache -def pr_word_given_category(word : str, category : str, num_words_in_document : int): # Pr(word | category) +def pr_word_given_category(word : str, category : str, num_words_in_document : int): + """ + Computes Pr(word | category) + """ return 0 -def pr_category_given_words(words : List[str], category : str): # Pr(category | words) +def log_pr_category_given_words(words : List[str], category : str): + """ + Computes log(Pr(category | words)) + """ return 0 def predict(categories, words): best = None best_likelihood = -inf for category in categories: - pr = pr_category_given_words(words, category) + pr = log_pr_category_given_words(words, category) if pr > best_likelihood: best = category best_likelihood = pr