1 | """ |
---|
2 | A script to demonstrate that how predictor works |
---|
3 | """ |
---|
4 | |
---|
5 | import os |
---|
6 | import sys |
---|
7 | import time |
---|
8 | import numpy as np |
---|
9 | |
---|
10 | from model.predictor import HelpPagePredictor |
---|
11 | from common.keyword_vector import read_dictionary, count_occurances |
---|
12 | from common.lookuptable import create_table |
---|
13 | from common.constants import KEYWORDS_FILE |
---|
14 | |
---|
15 | def find_prediction(filename): |
---|
16 | """ |
---|
17 | Given a file name as string, get the predicted help page name |
---|
18 | """ |
---|
19 | dictionary = read_dictionary(KEYWORDS_FILE) |
---|
20 | |
---|
21 | start = time.time() |
---|
22 | vectors, file_list = create_table(dictionary=dictionary) |
---|
23 | end = time.time() |
---|
24 | print(end - start, "seconds to create_table") |
---|
25 | |
---|
26 | start = time.time() |
---|
27 | pred = get_prediction(filename, dictionary, vectors, file_list) |
---|
28 | end = time.time() |
---|
29 | print(end - start, "seconds to make prediction.") |
---|
30 | return pred |
---|
31 | |
---|
32 | def get_prediction(filename, dictionary, vectors, file_list): |
---|
33 | """ |
---|
34 | Train a predictor, get the predicted help page name |
---|
35 | """ |
---|
36 | predictor = HelpPagePredictor() |
---|
37 | predictor.fit(vectors, file_list) |
---|
38 | |
---|
39 | filename = os.path.expanduser(filename) |
---|
40 | test_vec = count_occurances(filename, dictionary) |
---|
41 | prediction = predictor.predict(np.array([test_vec])) |
---|
42 | return prediction[0].tolist() |
---|
43 | |
---|
44 | |
---|
45 | def main(): |
---|
46 | """ |
---|
47 | Run some basic tests |
---|
48 | """ |
---|
49 | print("Running some tests") |
---|
50 | |
---|
51 | dictionary = read_dictionary(KEYWORDS_FILE) |
---|
52 | |
---|
53 | start = time.time() |
---|
54 | vectors, file_list = create_table(dictionary=dictionary) |
---|
55 | end = time.time() |
---|
56 | print(end - start, "seconds to create_table") |
---|
57 | |
---|
58 | predictor = HelpPagePredictor() |
---|
59 | predictor.fit(vectors, file_list) |
---|
60 | |
---|
61 | print("prediction for zero vector") |
---|
62 | start = time.time() |
---|
63 | zerovec = np.zeros(len(dictionary)) |
---|
64 | prediction = predictor.predict(np.array([zerovec])) |
---|
65 | end = time.time() |
---|
66 | print(end - start, "seconds to make prediction") |
---|
67 | print(prediction) |
---|
68 | print() |
---|
69 | |
---|
70 | prediction = get_prediction("extract.lib", |
---|
71 | dictionary, |
---|
72 | vectors, |
---|
73 | file_list) |
---|
74 | print(prediction) |
---|
75 | print() |
---|
76 | |
---|
77 | |
---|
78 | if len(sys.argv) >= 2: |
---|
79 | for i in range(len(sys.argv)): |
---|
80 | if i == 0: |
---|
81 | continue |
---|
82 | if not os.path.isfile(sys.argv[i]): |
---|
83 | continue |
---|
84 | |
---|
85 | print("predicting for file", sys.argv[i]) |
---|
86 | prediction = get_prediction(sys.argv[i], |
---|
87 | dictionary, |
---|
88 | vectors, |
---|
89 | file_list) |
---|
90 | print(prediction) |
---|
91 | print() |
---|
92 | |
---|
93 | if __name__ == '__main__': |
---|
94 | main() |
---|