1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import csv
from stats import make_model
from model import set_model, extract_feature, predict
from tqdm import tqdm
def train():
trainf = open("train.txt")
xs, ys = [], []
essays = list(csv.reader(trainf))[1:]
print(f"Training on {len(essays)} essays.")
for entry in tqdm(essays):
essay = entry[1]
score1 = float(entry[2])
score2 = float(entry[3])
feature = extract_feature(essay)
xs.append(feature)
ys.append(score1 + score2)
dataset = list(zip(xs, ys))
b0, b1 = make_model(dataset)
set_model((b0, b1))
def is_within(prediction, answer, i):
return abs(prediction - answer) <= i
def test():
testf = open("test.txt")
correct_counts = [0, 0, 0, 0, 0, 0]
essays = list(csv.reader(testf))[1:]
print(f"Testing on {len(essays)} essays.")
for entry in tqdm(essays):
essay = entry[1]
score1 = float(entry[2])
score2 = float(entry[3])
pred = predict(essay)
answer = score1 + score2
for i in range(len(correct_counts)):
if is_within(pred, answer, i):
correct_counts[i] += 1.0
print(f'Prediction accuracy:')
for i in range(len(correct_counts)):
print(f'Within ± {i} points => {100*correct_counts[i] / len(essays):.2f} % correct')
MODEL = train()
test()