Changeset f59883 in git


Ignore:
Timestamp:
Jul 30, 2019, 5:21:47 PM (5 years ago)
Author:
Murray Heymann <heymann.murray@…>
Branches:
(u'spielwiese', '17f1d200f27c5bd38f5dfc6e8a0879242279d1d8')
Children:
da892581a52069935f084604d05c0ecd6d19d5c9
Parents:
de88e1d84f18979811df4c3d1dc68f221a3362ef
Message:
Expand testing
Location:
machine_learning
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • machine_learning/common/keyword_vector.py

    rde88e1d rf59883  
    55import os
    66import re
    7 import sys
    87import numpy as np
    98
     
    1716    """
    1817    if not os.path.isfile(filename):
    19         print("Please provide a valid input file as argument")
    20         return np.array([])
     18        print("Please provide a valid input file as argument to read "
     19              "dictionary")
     20        raise FileNotFoundError
    2121
    2222    dictionary = []
     
    3838    if not os.path.isfile(filename):
    3939        print("Please provide a valid input file as argument")
    40         return []
    41     if dictionary.size == 0:
    42         print("Please provide a valid dictionary as argument")
    43         return []
    44     if dictionary is None:
    45         print("Please provide a valid dictionary as argument")
    46         return []
     40        raise FileNotFoundError
     41    assert dictionary is not None, \
     42            "Please provide a valid dictionary as argument"
     43    assert not dictionary.size == 0, \
     44            "Please provide a valid dictionary as argument"
     45
    4746    vector = create_vector_dictionary(dictionary)
    4847    with open(filename, "r+") as file:
     
    6766    Return an identical copy of a dictionary
    6867    """
    69     new_dic = []
    70     for word in dictionary:
    71         new_dic.append(word)
    72     return new_dic
     68    return np.copy(np.array(dictionary))
    7369
    7470
     
    8480def create_vector_dictionary(dictionary):
    8581    """
    86     Create a zero vector for a given dictionary
     82    Create a zero lookup dictionary for a given dictionary
    8783    """
    8884    assert not dictionary is None, "Please give a dictionary"
     
    9894    Calculate the Euclidean distance between two vectors.
    9995    """
    100     if not len(vec1) == len(vec2):
    101         print("Vectors don't have the same sizes")
    102         return -1
     96    assert len(vec1) == len(vec2), \
     97            "Vectors don't have the same sizes"
    10398
    10499    dist = np.linalg.norm(vec1 - vec2)
     
    114109
    115110    if vec is None:
    116         print("Please provide a valid vector")
     111        print("Warning, None is not a valid vector")
    117112        print("Returning empty vector by default")
    118113        return np.array([])
     
    120115    if not isinstance(vec, np.ndarray):
    121116        print("Warning, vector should be a numpy array")
     117
     118    if np.array(vec).size == 0:
     119        print("Warning, vector being normalised is empty")
     120
    122121    norm = np.linalg.norm(vec)
    123122    if not norm == 0:
     
    125124    return vec
    126125
    127 
    128 def main():
    129     """
    130     Run some basic tests
    131     """
    132 
    133     testvector = np.array([3, 4])
    134     normalise_vector(testvector)
    135     print("normalised vector: " + str(testvector))
     126def test_read_dictionary():
     127    """
     128    Create test for read_dictionary function
     129    """
     130    print("\033[1;32;40mTesting read_dictionary function:\033[1;37;40m")
     131
     132    print("Non-existant file")
     133    correct = False
     134    try:
     135        read_dictionary("asdfasdf")
     136    except FileNotFoundError:
     137        print("correctly caught non-existant file")
     138        print("\033[1;32;40mpass\033[1;37;40m")
     139        correct = True
     140    if not correct:
     141        print("\033[1;31;40mfail\033[1;37;40m")
     142
     143    print("Reading default file")
     144    correct = True
     145    try:
     146        read_dictionary()
     147    except FileNotFoundError:
     148        print("Default file for dictionary missing")
     149        print("\033[1;31;40mfail\033[1;37;40m")
     150        correct = False
     151    if correct:
     152        print("\033[1;32;40mpass\033[1;37;40m")
     153    print()
     154    print()
     155
     156def test_count_occurances():
     157    """
     158    Create test for count_occurances function
     159    """
     160    print("\033[1;32;40mTesting count_occurances function:\033[1;37;40m")
     161    dic = read_dictionary()
     162    correct = False
     163    try:
     164        vec = count_occurances("asdfasdf", dic)
     165    except FileNotFoundError:
     166        correct = True
     167        print("Correctly raised FileNotFoundError")
     168        print("\033[1;32;40mpass\033[1;37;40m")
     169    if not correct:
     170        print("\033[1;31;40mfail\033[1;37;40m")
     171
     172    print("Count occurances with None dictionary:")
     173    correct = False
     174    try:
     175        count_occurances("../Singular/table.h", None)
     176    except AssertionError:
     177        print("Correctly caught AssertionError")
     178        print("\033[1;32;40mpass\033[1;37;40m")
     179        correct = True
     180    if not correct:
     181        print("\033[1;31;40mfail\033[1;37;40m")
     182
     183
     184    print("Count occurances with empty dictionary:")
     185    correct = False
     186    try:
     187        count_occurances("../Singular/table.h", np.array([]))
     188    except AssertionError:
     189        print("Correctly caught AssertionError")
     190        print("\033[1;32;40mpass\033[1;37;40m")
     191        correct = True
     192    if not correct:
     193        print("\033[1;31;40mfail\033[1;37;40m")
     194
     195
     196    print("vector of ../Singular/table.h")
     197    vec = count_occurances("../Singular/table.h", dic)
     198    print(vec)
     199    print()
     200    print()
     201
     202def test_create_vector_dictionary():
     203    """
     204    Create test for create_vector_dictionary function
     205    """
     206    print("\033[1;32;40mTesting create_vector_dictionary " \
     207            "function:\033[1;37;40m")
     208    read_dictionary()
     209
     210    print("Create Vector Dictionary with None as dictionary:")
     211    correct = False
     212    try:
     213        create_vector_dictionary(None)
     214    except AssertionError:
     215        correct = True
     216        print("\033[1;32;40mpass\033[1;37;40m")
     217    if not correct:
     218        print("\033[1;31;40mfail\033[1;37;40m")
     219
     220    print("Create Vector Dictionary with empty dictionary:")
     221    correct = False
     222    try:
     223        create_vector_dictionary(np.array([]))
     224    except AssertionError:
     225        correct = True
     226        print("\033[1;32;40mpass\033[1;37;40m")
     227    if not correct:
     228        print("\033[1;31;40mfail\033[1;37;40m")
     229
     230    print()
     231    print()
     232
     233def test_vector_distance():
     234    """
     235    Create test for vector_distance function
     236    """
     237    print("\033[1;32;40mTesting vector_distance function:\033[1;37;40m")
    136238
    137239    vector1 = np.array([3, 4])
    138240    vector1 = normalise_vector(vector1)
    139241    vector2 = np.array([4, 3])
    140     normalise_vector(vector2)
    141242    vector2 = normalise_vector(vector2)
    142     print("distance same vector: " + str(vector_distance(vector1, vector1)))
    143     print("distance different vector: " + str(vector_distance(vector1, vector2)))
    144     print(vector1)
    145     print(vector2)
    146     print()
     243
     244    print("Distance of vectors of different dimensions:")
     245    correct = False
     246    try:
     247        vector_distance(np.array([1, 2, 3]), vector1)
     248    except AssertionError:
     249        correct = True
     250        print("\033[1;32;40mpass\033[1;37;40m")
     251    if not correct:
     252        print("\033[1;31;40mfail\033[1;37;40m")
     253
     254
     255    print("Distance same vector: " + str(vector_distance(vector1, vector1)))
     256    assert vector_distance(vector1, vector1) == 0, \
     257            "distance to same vectorshould be 0"
     258    print("\033[1;32;40mpass\033[1;37;40m")
     259
     260    print("Distance different vector: " + str(vector_distance(vector1, vector2)))
     261    assert vector_distance(vector1, vector2) > 0, \
     262            "Distance between nonequal vectors should be strictly positive"
     263    print("\033[1;32;40mpass\033[1;37;40m")
     264    print()
     265    print()
     266
     267def test_normalise_vector():
     268    """
     269    Create test for normalise_vector function
     270    """
     271    print("\033[1;32;40mTesting normalise_vector function:\033[1;37;40m")
     272    testvector = np.array([3, 4])
     273    testvector = normalise_vector(testvector)
     274    assert np.linalg.norm(testvector) == 1, \
     275            "Normalised vector should have norm of 1"
     276    print("\033[1;32;40mpass\033[1;37;40m")
     277    print("normalised vector: " + str(testvector))
     278    print("\033[1;32;40mpass\033[1;37;40m")
    147279
    148280    print("Attempt to normalise the zero vector")
    149281    print(normalise_vector(np.array([0, 0, 0, 0, 0])))
    150     print()
     282    print("\033[1;32;40mpass\033[1;37;40m")
    151283
    152284    print("Attempt to normalise list")
    153285    print(normalise_vector([3, 4, 0, 0, 0]))
    154     print()
     286    print("\033[1;32;40mpass\033[1;37;40m")
    155287
    156288    print("Attempt to normalise empty vector")
    157289    print(normalise_vector(np.array([])))
    158     print()
     290    print("\033[1;32;40mpass\033[1;37;40m")
    159291
    160292    print("Attempt to normalise None")
    161293    print(normalise_vector(None))
    162     print()
    163 
    164     if len(sys.argv) == 2:
    165         dic = read_dictionary(filename=sys.argv[1])
    166     else:
    167         dic = read_dictionary()
    168     print("vector of ../Singular/table.h")
    169     print(count_occurances("../Singular/table.h", dic))
     294    print("\033[1;32;40mpass\033[1;37;40m")
     295    print()
     296    print()
     297
     298
     299def main():
     300    """
     301    Run some basic tests
     302    """
     303    test_normalise_vector()
     304
     305    test_vector_distance()
     306
     307    test_read_dictionary()
     308
     309    test_count_occurances()
     310
     311    test_create_vector_dictionary()
    170312
    171313if __name__ == '__main__':
  • machine_learning/model/predictor.py

    rde88e1d rf59883  
    120120    print(end - start, "seconds to create_table")
    121121
    122     test_vec = count_occurances("extract.lib", dictionary)
    123122    predictor = HelpPagePredictor()
    124123    predictor.fit(vectors, file_list)
    125124
    126125    start = time.time()
     126    test_vec = count_occurances("extract.lib", dictionary)
    127127    prediction = predictor.predict(np.array([test_vec]))
    128128    end = time.time()
     
    132132
    133133    print("prediction for zero vector")
     134    start = time.time()
    134135    zerovec = np.zeros(len(dictionary) - 2)
    135     start = time.time()
    136136    prediction = predictor.predict(np.array([zerovec]))
    137137    end = time.time()
     
    147147                continue
    148148            print("predicting for file", sys.argv[i])
     149            start = time.time()
    149150            test_vec = count_occurances(sys.argv[i], dictionary)
    150             start = time.time()
    151151            prediction = predictor.predict(np.array([test_vec]))
    152152            end = time.time()
Note: See TracChangeset for help on using the changeset viewer.