Ignore:
Timestamp:
Aug 2, 2019, 7:35:10 PM (5 years ago)
Author:
Murray Heymann <heymann.murray@…>
Branches:
(u'spielwiese', 'fe61d9c35bf7c61f2b6cbf1b56e25e2f08d536cc')
Children:
58603fd7faac3d049f61ca0b65d89e4be0a664f9
Parents:
fece1392f8e9ff07b64d0ed4e5ec57bfa6dbf258
git-author:
Murray Heymann <heymann.murray@gmail.com>2019-08-02 19:35:10+02:00
git-committer:
Murray Heymann <heymann.murray@gmail.com>2019-08-02 19:35:13+02:00
Message:
Use sklearn to count keyword occurances
File:
1 edited

Legend:

Unmodified
Added
Removed
  • machine_learning/predictor_runner.py

    rfece13 r112c79  
    55import os
    66import sys
     7import time
    78import numpy as np
    8 from model.predictor import *
     9
     10from model.predictor import HelpPagePredictor
     11from common.keyword_vector import read_dictionary, count_occurances
     12from common.lookuptable import create_table
     13from common.constants import KEYWORDS_FILE
    914
    1015def find_prediction(filename):
     16    """
     17    Given a file name as string, get the predicted help page name
     18    """
    1119    dictionary = read_dictionary(KEYWORDS_FILE)
     20
    1221    start = time.time()
    1322    vectors, file_list = create_table(dictionary=dictionary)
     
    1524    print(end - start, "seconds to create_table")
    1625
     26    return _find_prediction(filename, dictionary, vectors, file_list)
     27
     28
     29def _find_prediction(filename, dictionary, vectors, file_list):
     30    """
     31    Train a predictor, get the predicted help page name
     32    """
    1733    predictor = HelpPagePredictor()
    1834    predictor.fit(vectors, file_list)
     
    2440    print(end - start, "seconds to make prediction")
    2541    return prediction
    26    
     42
    2743
    2844def main():
     
    4258    predictor.fit(vectors, file_list)
    4359
    44     start = time.time()
    45     test_vec = count_occurances("extract.lib", dictionary)
    46     prediction = predictor.predict(np.array([test_vec]))
    47     end = time.time()
    48     print(end - start, "seconds to make prediction")
    49     print(prediction)
    50     print()
    51 
    5260    print("prediction for zero vector")
    5361    start = time.time()
     
    5967    print()
    6068
     69    prediction = _find_prediction("extract.lib",
     70                                  dictionary,
     71                                  vectors,
     72                                  file_list)
     73    print(prediction)
     74    print()
     75
     76
    6177    if len(sys.argv) >= 2:
    6278        for i in range(len(sys.argv)):
     
    6581            if not os.path.isfile(sys.argv[i]):
    6682                continue
     83
    6784            print("predicting for file", sys.argv[i])
    68             start = time.time()
    69             test_vec = count_occurances(sys.argv[i], dictionary)
    70             prediction = predictor.predict(np.array([test_vec]))
    71             end = time.time()
    72             print(end - start, "seconds to make prediction")
     85            prediction = _find_prediction(sys.argv[i],
     86                                          dictionary,
     87                                          vectors,
     88                                          file_list)
    7389            print(prediction)
    7490            print()
Note: See TracChangeset for help on using the changeset viewer.