Ignore:
Timestamp:
Jul 29, 2019, 2:45:44 PM (5 years ago)
Author:
Murray Heymann <heymann.murray@…>
Branches:
(u'fieker-DuVal', '117eb8c30fc9e991c4decca4832b1d19036c4c65')(u'spielwiese', 'c5facdfddea2addfd91babd8b9019161dea4b695')
Children:
eb2904af1f3fb2d653378807ed3b18780cfd4dbc
Parents:
d93ae5668f2da84265ee686cd190828895b49cba
Message:
Optimize methods
File:
1 edited

Legend:

Unmodified
Added
Removed
  • machine_learning/model/predictor.py

    rd93ae5 r91d4ae  
    1212# Local imports
    1313from common.keyword_vector import vector_distance, count_occurances, \
    14                                     read_dictionary
     14        read_dictionary, normalise_vector
    1515from common.lookuptable import create_table
    1616from common.constants import KEYWORDS_FILE
     
    4848        for x in X: # pylint: disable=invalid-name
    4949            # find the closest vector
    50 
    5150            min_val = float("inf")
    52             min_vec = None
     51            index = -1
     52            i = 0
    5353            for vec in self.vectors:
    5454                dist = vector_distance(x, vec)
    5555                if dist < min_val:
    5656                    min_val = dist
    57                     min_vec = vec
     57                    index = i
     58                i = i + 1
    5859
    5960            # find corresponding filename
    60             index = list(self.vectors).index(min_vec)
    6161            file = self.files[index]
    6262            ret_list.append(file)
     
    7070    print("Running some tests")
    7171    predictor = HelpPagePredictor()
    72     vector1 = {"hello":1, "bye":4, "pizza": 10}
    73     vector2 = {"hello":2, "bye":3, "pizza": 1}
    74     vector3 = {"hello":3, "bye":9, "pizza": 3}
     72    vector1 = normalise_vector([1, 4, 10])
     73    vector2 = normalise_vector([2, 3, 1])
     74    vector3 = normalise_vector([3, 9, 3])
    7575
    7676    vectors = np.array([vector1, vector2, vector3])
     
    7878    print(vectors)
    7979    print(files)
     80    print()
    8081
    81     testvec = {"hello":1, "bye":1, "pizza": 1}
     82    testvec = normalise_vector([1, 1, 1])
     83    print("test vector:")
     84    print(testvec)
     85    print()
    8286
    8387    print("distance to 1")
     
    9397    predictor.fit(vectors, files)
    9498    prediction = predictor.predict(np.array([testvec]))
     99    print("Prediction:")
    95100    print(prediction)
     101    print()
    96102
    97103    dictionary = read_dictionary(KEYWORDS_FILE)
     104
    98105    start = time.time()
    99106    vectors, file_list = create_table(dictionary=dictionary)
    100107    end = time.time()
    101108    print(end - start, "seconds to create_table")
     109
    102110    test_vec = count_occurances("extract.lib", dictionary)
    103111    predictor.fit(vectors, file_list)
     112
    104113    start = time.time()
    105114    prediction = predictor.predict(np.array([test_vec]))
    106115    end = time.time()
     116    print(end - start, "seconds to make prediction")
    107117    print(prediction)
    108     print(end - start, "seconds to make prediction")
    109118
    110119if __name__ == '__main__':
Note: See TracChangeset for help on using the changeset viewer.