Ignore:
Timestamp:
Aug 12, 2019, 4:20:41 PM (5 years ago)
Author:
Murray Heymann <heymann.murray@…>
Branches:
(u'spielwiese', 'fe61d9c35bf7c61f2b6cbf1b56e25e2f08d536cc')
Children:
87d423b777b7762ac55fd9308018f3d7a12b9940
Parents:
b03e2f1e6cf3ff16ea956608cf7b560430904959
git-author:
Murray Heymann <heymann.murray@gmail.com>2019-08-12 16:20:41+02:00
git-committer:
Murray Heymann <heymann.murray@gmail.com>2019-08-12 16:20:52+02:00
Message:
Create dir in home for cache files for ml
File:
1 edited

Legend:

Unmodified
Added
Removed
  • machine_learning/ml_python/common/lookuptable.py

    rb03e2f rcd552e  
    1717from common.constants import HELP_FILE_URL, HELP_FILE_PATH, SINGULAR_BIN, \
    1818                        EXTRACT_SCRIPT, KEYWORDS_FILE, HELPFILE_NPY, \
    19                         VECTORS_NPY
     19                        VECTORS_NPY, HOME_DIR
    2020
    2121
     
    5050    'keywords.txt'
    5151    """
     52    # ensure the homedir exists
     53    if not os.path.isdir(HOME_DIR):
     54        os.makedirs(HOME_DIR)
     55
    5256    # extract keywords using the singular script
    53     os.system(SINGULAR_BIN + " " + EXTRACT_SCRIPT)
     57    os.system(SINGULAR_BIN + " -q " + EXTRACT_SCRIPT +
     58            " | sort | uniq > " + KEYWORDS_FILE)
    5459
    5560    # read from the file created by singular
    5661    dictionary = read_dictionary()
    57 
    58     # sort alphabetically
    59     dictionary = np.sort(np.unique(dictionary))
    60 
    61     # write back to the same file
    62     with open(KEYWORDS_FILE, "w") as file:
    63         for word in dictionary:
    64             file.write(word + "\n")
    6562
    6663    return dictionary
     
    7168    Get a list of helpfiles, and generate a word occurance vector for each.
    7269    """
     70
    7371    if dictionary is None:
    7472        dictionary = read_dictionary(KEYWORDS_FILE)
     
    7876            not os.path.isfile(HELPFILE_NPY) or \
    7977            not attempt_cached:
     78        os.makedirs(HOME_DIR, exist_ok=True)
    8079        file_list = np.array(get_list_of_htm_files())
    8180        np.save(HELPFILE_NPY, file_list)
     
    101100    check whether the various files exist, and create if necessary.
    102101    """
     102    if not os.path.isdir(HOME_DIR):
     103        os.makedirs(HOME_DIR)
     104
    103105    # check for and download help files if necessary
    104106    tbz2_path = os.path.join(HELP_FILE_PATH, "helpfiles.tbz2")
     
    111113    else:
    112114        dictionary = None
    113 
    114115
    115116    if not os.path.isfile(VECTORS_NPY) or not os.path.isfile(HELPFILE_NPY):
     
    128129    if not os.path.isfile(KEYWORDS_FILE):
    129130        retvalue = False
    130     if not os.path.isfile(VECTORS_NPY) or not os.path.isfile(HELPFILE_NPY):
     131    if not os.path.isdir(HOME_DIR) or \
     132            not os.path.isfile(VECTORS_NPY) or \
     133            not os.path.isfile(HELPFILE_NPY):
    131134        retvalue = False
    132135
Note: See TracChangeset for help on using the changeset viewer.