source: git/Singular/dyn_modules/machinelearning/ml_python/predictor_runner.py @ 133ae7

spielwiese
Last change on this file since 133ae7 was 133ae7, checked in by Murray Heymann <heymann.murray@…>, 5 years ago
Convert machine learning to dynamic module
  • Property mode set to 100644
File size: 2.5 KB
Line 
1"""
2A script to demonstrate that how predictor works
3"""
4
5import os
6import sys
7import time
8import numpy as np
9
10from model.predictor import HelpPagePredictor
11from common.keyword_vector import read_dictionary, count_occurances
12from common.lookuptable import create_table
13from common.constants import KEYWORDS_FILE
14
15def find_prediction(filename):
16    """
17    Given a file name as string, get the predicted help page name
18    """
19    dictionary = read_dictionary(KEYWORDS_FILE)
20
21    start = time.time()
22    vectors, file_list = create_table(dictionary=dictionary)
23    end = time.time()
24    print(end - start, "seconds to create_table")
25
26    start = time.time()
27    pred = get_prediction(filename, dictionary, vectors, file_list)
28    end = time.time()
29    print(end - start, "seconds to make prediction.")
30    return pred
31
32def get_prediction(filename, dictionary, vectors, file_list):
33    """
34    Train a predictor, get the predicted help page name
35    """
36    predictor = HelpPagePredictor()
37    predictor.fit(vectors, file_list)
38
39    filename = os.path.expanduser(filename)
40    test_vec = count_occurances(filename, dictionary)
41    prediction = predictor.predict(np.array([test_vec]))
42    return prediction[0].tolist()
43
44
45def main():
46    """
47    Run some basic tests
48    """
49    print("Running some tests")
50
51    dictionary = read_dictionary(KEYWORDS_FILE)
52
53    start = time.time()
54    vectors, file_list = create_table(dictionary=dictionary)
55    end = time.time()
56    print(end - start, "seconds to create_table")
57
58    predictor = HelpPagePredictor()
59    predictor.fit(vectors, file_list)
60
61    print("prediction for zero vector")
62    start = time.time()
63    zerovec = np.zeros(len(dictionary))
64    prediction = predictor.predict(np.array([zerovec]))
65    end = time.time()
66    print(end - start, "seconds to make prediction")
67    print(prediction)
68    print()
69
70    prediction = get_prediction("extract.lib",
71                                dictionary,
72                                vectors,
73                                file_list)
74    print(prediction)
75    print()
76
77
78    if len(sys.argv) >= 2:
79        for i in range(len(sys.argv)):
80            if i == 0:
81                continue
82            if not os.path.isfile(sys.argv[i]):
83                continue
84
85            print("predicting for file", sys.argv[i])
86            prediction = get_prediction(sys.argv[i],
87                                        dictionary,
88                                        vectors,
89                                        file_list)
90            print(prediction)
91            print()
92
93if __name__ == '__main__':
94    main()
Note: See TracBrowser for help on using the repository browser.