main.py

import csv
from stats import make_model
from model import set_model, extract_feature, predict
from tqdm import tqdm


def train():
  trainf = open("train.txt")
  xs, ys = [], []
  essays = list(csv.reader(trainf))[1:]
  print(f"Training on {len(essays)} essays.")
  for entry in tqdm(essays):
    essay = entry[1]
    score1 = float(entry[2])
    score2 = float(entry[3])

    feature = extract_feature(essay)

    xs.append(feature)
    ys.append(score1 + score2)

  dataset = list(zip(xs, ys))
  b0, b1 = make_model(dataset)
  set_model((b0, b1))

def is_within(prediction, answer, i):
    return abs(prediction - answer) <= i

def test():
  testf = open("test.txt")
  correct_counts = [0, 0, 0, 0, 0, 0]

  essays = list(csv.reader(testf))[1:]
  print(f"Testing on {len(essays)} essays.")
  for entry in tqdm(essays):
    essay = entry[1]
    score1 = float(entry[2])
    score2 = float(entry[3])

    pred = predict(essay)
    answer = score1 + score2

    for i in range(len(correct_counts)):
      if is_within(pred, answer, i):
        correct_counts[i] += 1.0

  print(f'Prediction accuracy:')
  for i in range(len(correct_counts)):
    print(f'Within ± {i} points => {100*correct_counts[i] / len(essays):.2f} % correct')

MODEL = train()
test()