Changeset cd552e in git for machine_learning/ml_python/common/lookuptable.py
- Timestamp:
- Aug 12, 2019, 4:20:41 PM (5 years ago)
- Branches:
- (u'spielwiese', 'fe61d9c35bf7c61f2b6cbf1b56e25e2f08d536cc')
- Children:
- 87d423b777b7762ac55fd9308018f3d7a12b9940
- Parents:
- b03e2f1e6cf3ff16ea956608cf7b560430904959
- git-author:
- Murray Heymann <heymann.murray@gmail.com>2019-08-12 16:20:41+02:00
- git-committer:
- Murray Heymann <heymann.murray@gmail.com>2019-08-12 16:20:52+02:00
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
machine_learning/ml_python/common/lookuptable.py
rb03e2f rcd552e 17 17 from common.constants import HELP_FILE_URL, HELP_FILE_PATH, SINGULAR_BIN, \ 18 18 EXTRACT_SCRIPT, KEYWORDS_FILE, HELPFILE_NPY, \ 19 VECTORS_NPY 19 VECTORS_NPY, HOME_DIR 20 20 21 21 … … 50 50 'keywords.txt' 51 51 """ 52 # ensure the homedir exists 53 if not os.path.isdir(HOME_DIR): 54 os.makedirs(HOME_DIR) 55 52 56 # extract keywords using the singular script 53 os.system(SINGULAR_BIN + " " + EXTRACT_SCRIPT) 57 os.system(SINGULAR_BIN + " -q " + EXTRACT_SCRIPT + 58 " | sort | uniq > " + KEYWORDS_FILE) 54 59 55 60 # read from the file created by singular 56 61 dictionary = read_dictionary() 57 58 # sort alphabetically59 dictionary = np.sort(np.unique(dictionary))60 61 # write back to the same file62 with open(KEYWORDS_FILE, "w") as file:63 for word in dictionary:64 file.write(word + "\n")65 62 66 63 return dictionary … … 71 68 Get a list of helpfiles, and generate a word occurance vector for each. 72 69 """ 70 73 71 if dictionary is None: 74 72 dictionary = read_dictionary(KEYWORDS_FILE) … … 78 76 not os.path.isfile(HELPFILE_NPY) or \ 79 77 not attempt_cached: 78 os.makedirs(HOME_DIR, exist_ok=True) 80 79 file_list = np.array(get_list_of_htm_files()) 81 80 np.save(HELPFILE_NPY, file_list) … … 101 100 check whether the various files exist, and create if necessary. 102 101 """ 102 if not os.path.isdir(HOME_DIR): 103 os.makedirs(HOME_DIR) 104 103 105 # check for and download help files if necessary 104 106 tbz2_path = os.path.join(HELP_FILE_PATH, "helpfiles.tbz2") … … 111 113 else: 112 114 dictionary = None 113 114 115 115 116 if not os.path.isfile(VECTORS_NPY) or not os.path.isfile(HELPFILE_NPY): … … 128 129 if not os.path.isfile(KEYWORDS_FILE): 129 130 retvalue = False 130 if not os.path.isfile(VECTORS_NPY) or not os.path.isfile(HELPFILE_NPY): 131 if not os.path.isdir(HOME_DIR) or \ 132 not os.path.isfile(VECTORS_NPY) or \ 133 not os.path.isfile(HELPFILE_NPY): 131 134 retvalue = False 132 135
Note: See TracChangeset
for help on using the changeset viewer.