Changeset 55a5abb in git
- Timestamp:
- Jul 26, 2019, 7:15:06 PM (4 years ago)
- Branches:
- (u'spielwiese', '828514cf6e480e4bafc26df99217bf2a1ed1ef45')
- Children:
- d93ae5668f2da84265ee686cd190828895b49cba
- Parents:
- 8720894b787185ed212506a9a4492ddf6e5f014c
- git-author:
- Murray Heymann <heymann.murray@gmail.com>2019-07-26 19:15:06+02:00
- git-committer:
- Murray Heymann <heymann.murray@gmail.com>2019-07-26 19:15:13+02:00
- Location:
- machine_learning
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
machine_learning/common/keyword_vector.py
r872089 r55a5abb 29 29 30 30 31 def count_occurances(filename, dictionary ):31 def count_occurances(filename, dictionary, normalise=True): 32 32 """ 33 33 Create a vector from a dictionary and populate the counts according to … … 52 52 vector[word] = vector[word] + 1 53 53 line = file.readline() 54 if normalise: 55 normalise_vector(vector) 54 56 return vector 55 57 … … 97 99 return -1 98 100 99 nvec1 = copy_vector(vec1)100 nvec2 = copy_vector(vec2)101 normalise_vector(nvec1)102 normalise_vector(nvec2)103 101 104 102 dist = 0 105 for key in nvec1:106 dist = dist + ( nvec1[key] - nvec2[key]) ** 2103 for key in vec1: 104 dist = dist + (vec1[key] - vec2[key]) ** 2 107 105 108 106 dist = math.sqrt(dist) … … 142 140 143 141 vector1 = {"hello":3, "bye":4} 142 normalise_vector(vector1) 144 143 vector2 = {"hello":4, "bye":3} 144 normalise_vector(vector2) 145 145 print("distance same vector: " + str(vector_distance(vector1, vector1))) 146 146 print("distance different vector: " + str(vector_distance(vector1, vector2))) -
machine_learning/common/lookuptable.py
r872089 r55a5abb 14 14 15 15 # local imports 16 from common.keyword_vector import count_occurances, read_dictionary 16 from common.keyword_vector import count_occurances, read_dictionary, \ 17 normalise_vector 17 18 from common.constants import HELP_FILE_URL, HELP_FILE_PATH, SINGULAR_BIN, \ 18 19 EXTRACT_SCRIPT, KEYWORDS_FILE, HELPFILE_NPY, \ … … 64 65 for file in file_list: 65 66 vector = count_occurances(os.path.join(HELP_FILE_PATH, "html", 66 file), dictionary) 67 file), 68 dictionary, 69 normalise=False) 67 70 vectors.append(vector) 68 71 vectors = np.array(vectors) … … 71 74 vectors = np.load(VECTORS_NPY) 72 75 file_list = np.load(HELPFILE_NPY) 76 for vector in vectors: 77 normalise_vector(vector) 73 78 74 79 return (vectors, file_list) -
machine_learning/model/predictor.py
r872089 r55a5abb 2 2 Define the predictor class for classifying according to help page. 3 3 """ 4 5 import cProfile 6 import time 4 7 5 8 # Third party imports … … 93 96 94 97 dictionary = read_dictionary(KEYWORDS_FILE) 98 start = time.time() 95 99 vectors, file_list = create_table(dictionary=dictionary) 100 end = time.time() 101 print(end - start, "seconds to create_table") 96 102 test_vec = count_occurances("extract.lib", dictionary) 97 103 predictor.fit(vectors, file_list) 104 start = time.time() 98 105 prediction = predictor.predict(np.array([test_vec])) 106 end = time.time() 99 107 print(prediction) 108 print(end - start, "seconds to make prediction") 100 109 101 110 if __name__ == '__main__': 102 main()111 cProfile.run("main()")
Note: See TracChangeset
for help on using the changeset viewer.