import java.io.File; import java.io.FileNotFoundException; import java.util.HashMap; import java.util.Map; import java.util.Scanner; public class QuadGramLikelihoods { private Map<String, Double> likelihoods; private double notFoundLikelihood; public QuadGramLikelihoods() throws FileNotFoundException { Map<String, Integer> freqs = getQuadFrequencies(); long total = 0; for (String qg : freqs.keySet()) { total += freqs.get(qg); } total += freqs.keySet().size(); this.likelihoods = new HashMap<>(); for (String qg : freqs.keySet()) { this.likelihoods.put(qg, Math.log10((double)freqs.get(qg) / (double)total)); } this.notFoundLikelihood = Math.log10(1.0/total); } public static Map<String, Integer> getQuadFrequencies() throws FileNotFoundException { Scanner quadgrams = new Scanner(new File("english_quadgrams.txt")); Map<String, Integer> freqs = new HashMap<>(); while (quadgrams.hasNextLine()) { String line = quadgrams.nextLine(); String[] pieces = line.split("\\s"); String quadgram = pieces[0]; int freq = Integer.parseInt(pieces[1]); freqs.put(quadgram, freq); } return freqs; } /** * Returns the log-likelihood of the provided quad-gram. * @param quadgram the quad-gram to get the likelihood of * @return the log-likelihood as calculated by our data set */ public double get(String quadgram) { Double likelihood = likelihoods.get(quadgram); if (likelihood != null) { return likelihood; } else { return this.notFoundLikelihood; } } }