-
Notifications
You must be signed in to change notification settings - Fork 0
/
fileLoading.py
53 lines (41 loc) · 2.03 KB
/
fileLoading.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from constants import *
from samplingMethods import *
import json
from collections import defaultdict
import os
def formatWordDictionary():
dictionaries = os.listdir(WORD_LIST_FOLDER)
for dictFileName in dictionaries:
wordDictionary = loadWordDictionaryFile(dictFileName)
with open(f"{WORD_LIST_FOLDER}{dictFileName}", "w") as wordDictionaryFile:
json.dump(wordDictionary, wordDictionaryFile, sort_keys=True, indent=4)
def loadWordDictionary():
wordDictionary = loadWordDictionaryFile(MAIN_WORD_DICTIONARY_FILENAME)
wordDictionary = recursivelyLoadSubDictionaries(wordDictionary)
return preProcessWordDictionary(wordDictionary)
def recursivelyLoadSubDictionaries(wordDictionary):
dictionaryList = []
for wordGroup in wordDictionary[DICTIONARY_KEY]:
if FILENAME_KEY in wordGroup:
loadedFile = loadWordDictionaryFile(wordGroup[FILENAME_KEY])
wordGroup = {**wordGroup, **loadedFile[WORD_GROUP_KEY]}
if SAMPLING_STRATEGY_KEY in loadedFile:
wordDictionary[SAMPLING_STRATEGY_KEY] = {
**wordDictionary[SAMPLING_STRATEGY_KEY],
**loadedFile[SAMPLING_STRATEGY_KEY],
}
dictionaryList.append(wordGroup)
wordDictionary[DICTIONARY_KEY] = dictionaryList
return wordDictionary
def loadWordDictionaryFile(filename):
with open(f"{WORD_LIST_FOLDER}{filename}") as wordDictionaryFile:
return json.load(wordDictionaryFile)
def preProcessWordDictionary(wordDictionary):
preProcessedWordDictionary = {}
for wordGroup in wordDictionary[DICTIONARY_KEY]:
preProcessedWordDictionary[wordGroup[LABEL_KEY]] = parseFromObject(wordGroup)
samplingStrategyDict = defaultdict(defaultSamplingStrategy)
for samplingKey, samplingStrategy in wordDictionary[SAMPLING_STRATEGY_KEY].items():
samplingStrategyDict[samplingKey] = parseFromObject(samplingStrategy)
preProcessedWordDictionary[SAMPLING_STRATEGY_KEY] = samplingStrategyDict
return preProcessedWordDictionary