Changeset f59883 in git for machine_learning/common/keyword_vector.py
- Timestamp:
- Jul 30, 2019, 5:21:47 PM (4 years ago)
- Branches:
- (u'spielwiese', '8e0ad00ce244dfd0756200662572aef8402f13d5')
- Children:
- da892581a52069935f084604d05c0ecd6d19d5c9
- Parents:
- de88e1d84f18979811df4c3d1dc68f221a3362ef
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
machine_learning/common/keyword_vector.py
rde88e1d rf59883 5 5 import os 6 6 import re 7 import sys8 7 import numpy as np 9 8 … … 17 16 """ 18 17 if not os.path.isfile(filename): 19 print("Please provide a valid input file as argument") 20 return np.array([]) 18 print("Please provide a valid input file as argument to read " 19 "dictionary") 20 raise FileNotFoundError 21 21 22 22 dictionary = [] … … 38 38 if not os.path.isfile(filename): 39 39 print("Please provide a valid input file as argument") 40 return [] 41 if dictionary.size == 0: 42 print("Please provide a valid dictionary as argument") 43 return [] 44 if dictionary is None: 45 print("Please provide a valid dictionary as argument") 46 return [] 40 raise FileNotFoundError 41 assert dictionary is not None, \ 42 "Please provide a valid dictionary as argument" 43 assert not dictionary.size == 0, \ 44 "Please provide a valid dictionary as argument" 45 47 46 vector = create_vector_dictionary(dictionary) 48 47 with open(filename, "r+") as file: … … 67 66 Return an identical copy of a dictionary 68 67 """ 69 new_dic = [] 70 for word in dictionary: 71 new_dic.append(word) 72 return new_dic 68 return np.copy(np.array(dictionary)) 73 69 74 70 … … 84 80 def create_vector_dictionary(dictionary): 85 81 """ 86 Create a zero vectorfor a given dictionary82 Create a zero lookup dictionary for a given dictionary 87 83 """ 88 84 assert not dictionary is None, "Please give a dictionary" … … 98 94 Calculate the Euclidean distance between two vectors. 99 95 """ 100 if not len(vec1) == len(vec2): 101 print("Vectors don't have the same sizes") 102 return -1 96 assert len(vec1) == len(vec2), \ 97 "Vectors don't have the same sizes" 103 98 104 99 dist = np.linalg.norm(vec1 - vec2) … … 114 109 115 110 if vec is None: 116 print(" Please providea valid vector")111 print("Warning, None is not a valid vector") 117 112 print("Returning empty vector by default") 118 113 return np.array([]) … … 120 115 if not isinstance(vec, np.ndarray): 121 116 print("Warning, vector should be a numpy array") 117 118 if np.array(vec).size == 0: 119 print("Warning, vector being normalised is empty") 120 122 121 norm = np.linalg.norm(vec) 123 122 if not norm == 0: … … 125 124 return vec 126 125 127 128 def main(): 129 """ 130 Run some basic tests 131 """ 132 133 testvector = np.array([3, 4]) 134 normalise_vector(testvector) 135 print("normalised vector: " + str(testvector)) 126 def test_read_dictionary(): 127 """ 128 Create test for read_dictionary function 129 """ 130 print("\033[1;32;40mTesting read_dictionary function:\033[1;37;40m") 131 132 print("Non-existant file") 133 correct = False 134 try: 135 read_dictionary("asdfasdf") 136 except FileNotFoundError: 137 print("correctly caught non-existant file") 138 print("\033[1;32;40mpass\033[1;37;40m") 139 correct = True 140 if not correct: 141 print("\033[1;31;40mfail\033[1;37;40m") 142 143 print("Reading default file") 144 correct = True 145 try: 146 read_dictionary() 147 except FileNotFoundError: 148 print("Default file for dictionary missing") 149 print("\033[1;31;40mfail\033[1;37;40m") 150 correct = False 151 if correct: 152 print("\033[1;32;40mpass\033[1;37;40m") 153 print() 154 print() 155 156 def test_count_occurances(): 157 """ 158 Create test for count_occurances function 159 """ 160 print("\033[1;32;40mTesting count_occurances function:\033[1;37;40m") 161 dic = read_dictionary() 162 correct = False 163 try: 164 vec = count_occurances("asdfasdf", dic) 165 except FileNotFoundError: 166 correct = True 167 print("Correctly raised FileNotFoundError") 168 print("\033[1;32;40mpass\033[1;37;40m") 169 if not correct: 170 print("\033[1;31;40mfail\033[1;37;40m") 171 172 print("Count occurances with None dictionary:") 173 correct = False 174 try: 175 count_occurances("../Singular/table.h", None) 176 except AssertionError: 177 print("Correctly caught AssertionError") 178 print("\033[1;32;40mpass\033[1;37;40m") 179 correct = True 180 if not correct: 181 print("\033[1;31;40mfail\033[1;37;40m") 182 183 184 print("Count occurances with empty dictionary:") 185 correct = False 186 try: 187 count_occurances("../Singular/table.h", np.array([])) 188 except AssertionError: 189 print("Correctly caught AssertionError") 190 print("\033[1;32;40mpass\033[1;37;40m") 191 correct = True 192 if not correct: 193 print("\033[1;31;40mfail\033[1;37;40m") 194 195 196 print("vector of ../Singular/table.h") 197 vec = count_occurances("../Singular/table.h", dic) 198 print(vec) 199 print() 200 print() 201 202 def test_create_vector_dictionary(): 203 """ 204 Create test for create_vector_dictionary function 205 """ 206 print("\033[1;32;40mTesting create_vector_dictionary " \ 207 "function:\033[1;37;40m") 208 read_dictionary() 209 210 print("Create Vector Dictionary with None as dictionary:") 211 correct = False 212 try: 213 create_vector_dictionary(None) 214 except AssertionError: 215 correct = True 216 print("\033[1;32;40mpass\033[1;37;40m") 217 if not correct: 218 print("\033[1;31;40mfail\033[1;37;40m") 219 220 print("Create Vector Dictionary with empty dictionary:") 221 correct = False 222 try: 223 create_vector_dictionary(np.array([])) 224 except AssertionError: 225 correct = True 226 print("\033[1;32;40mpass\033[1;37;40m") 227 if not correct: 228 print("\033[1;31;40mfail\033[1;37;40m") 229 230 print() 231 print() 232 233 def test_vector_distance(): 234 """ 235 Create test for vector_distance function 236 """ 237 print("\033[1;32;40mTesting vector_distance function:\033[1;37;40m") 136 238 137 239 vector1 = np.array([3, 4]) 138 240 vector1 = normalise_vector(vector1) 139 241 vector2 = np.array([4, 3]) 140 normalise_vector(vector2)141 242 vector2 = normalise_vector(vector2) 142 print("distance same vector: " + str(vector_distance(vector1, vector1))) 143 print("distance different vector: " + str(vector_distance(vector1, vector2))) 144 print(vector1) 145 print(vector2) 146 print() 243 244 print("Distance of vectors of different dimensions:") 245 correct = False 246 try: 247 vector_distance(np.array([1, 2, 3]), vector1) 248 except AssertionError: 249 correct = True 250 print("\033[1;32;40mpass\033[1;37;40m") 251 if not correct: 252 print("\033[1;31;40mfail\033[1;37;40m") 253 254 255 print("Distance same vector: " + str(vector_distance(vector1, vector1))) 256 assert vector_distance(vector1, vector1) == 0, \ 257 "distance to same vectorshould be 0" 258 print("\033[1;32;40mpass\033[1;37;40m") 259 260 print("Distance different vector: " + str(vector_distance(vector1, vector2))) 261 assert vector_distance(vector1, vector2) > 0, \ 262 "Distance between nonequal vectors should be strictly positive" 263 print("\033[1;32;40mpass\033[1;37;40m") 264 print() 265 print() 266 267 def test_normalise_vector(): 268 """ 269 Create test for normalise_vector function 270 """ 271 print("\033[1;32;40mTesting normalise_vector function:\033[1;37;40m") 272 testvector = np.array([3, 4]) 273 testvector = normalise_vector(testvector) 274 assert np.linalg.norm(testvector) == 1, \ 275 "Normalised vector should have norm of 1" 276 print("\033[1;32;40mpass\033[1;37;40m") 277 print("normalised vector: " + str(testvector)) 278 print("\033[1;32;40mpass\033[1;37;40m") 147 279 148 280 print("Attempt to normalise the zero vector") 149 281 print(normalise_vector(np.array([0, 0, 0, 0, 0]))) 150 print( )282 print("\033[1;32;40mpass\033[1;37;40m") 151 283 152 284 print("Attempt to normalise list") 153 285 print(normalise_vector([3, 4, 0, 0, 0])) 154 print( )286 print("\033[1;32;40mpass\033[1;37;40m") 155 287 156 288 print("Attempt to normalise empty vector") 157 289 print(normalise_vector(np.array([]))) 158 print( )290 print("\033[1;32;40mpass\033[1;37;40m") 159 291 160 292 print("Attempt to normalise None") 161 293 print(normalise_vector(None)) 162 print() 163 164 if len(sys.argv) == 2: 165 dic = read_dictionary(filename=sys.argv[1]) 166 else: 167 dic = read_dictionary() 168 print("vector of ../Singular/table.h") 169 print(count_occurances("../Singular/table.h", dic)) 294 print("\033[1;32;40mpass\033[1;37;40m") 295 print() 296 print() 297 298 299 def main(): 300 """ 301 Run some basic tests 302 """ 303 test_normalise_vector() 304 305 test_vector_distance() 306 307 test_read_dictionary() 308 309 test_count_occurances() 310 311 test_create_vector_dictionary() 170 312 171 313 if __name__ == '__main__':
Note: See TracChangeset
for help on using the changeset viewer.