forked from christiancasey/iip-word-lists
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbuild_site.sh
executable file
·145 lines (132 loc) · 4.35 KB
/
build_site.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env bash
update=1; # should be update=1 and then set to zero with -nu tag, but I've hacked it temporarily to save trouble for testing
debug=0; # This flag causes this script to delete most of the input files in order to speed up testing
exceptions=0;
silent=0;
google_sheets=0;
use_existing=0;
#new_system=0;
if [ -z "$DOCS" ]; then
export DOCS="docs"
fi
echo "Using data in $DOCS."
# Procedure for generating the appropriate arguments and running the python script
# -- worldlist.py -- which parses the xml, generates plain text, and lemmatizes
# It is not clear to me why this is a procedure, because it is called only once below
run_script() {
source environment/bin/activate;
cd $DOCS;
exceptions_flag=""
new_system_flag=""
silent_flag=""
google_sheets_flag=""
if [ $exceptions == 1 ]; then
exceptions_flag="--fileexception"
fi
if [ $silent == 1 ]; then
silent_flag="--silent"
fi
if [ $google_sheets == 1 ]; then
google_sheets_flag="--google_sheets"
fi
#if [ $new_system == 1 ]; then
# new_system_flag="--new_system"
#fi
# python3 -i ../src/python/wordlist.py texts/xml/* --nodiplomatic --html_general\
# --plaintext --flat texts/plain $google_sheets_flag $exceptions_flag $new_system_flag $silent_flag;
../src/python/wordlist.py texts/xml/* --nodiplomatic --html_general\
--plaintext --flat texts/plain $google_sheets_flag $exceptions_flag $new_system_flag $silent_flag;
cd ..;
}
# Parse command-line arguments
for word in $*; do
if [ "$word" == "--help" ] || [ "$word" == "-h" ]; then
printf "Usage:\n
-h, --help Print this message.
--no-update, -nu Do not fetch epidoc files from github.
--exceptions, -e If an exception occurs in the python \
code, print the error message.
--use-existing, -ue Do not rebuild the word lists.\n" |
sed -e 's:\t::g';
exit;
elif [ "$word" == "--no-update" ] || [ "$word" == "-nu" ]; then
update=0;
elif [ "$word" == "--silent" ] || [ "$word" == "-s" ]; then
silent=1;
elif [ "$word" == "--google_sheets" ] || [ "$word" == "-gs" ]; then
google_sheets=1;
elif [ "$word" == "--exceptions" ] || [ "$word" == "-e" ]; then
exceptions=1;
elif [ "$word" == "--new-system" ] || [ "$word" == "-ns" ]; then
new_system=1;
elif [ "$word" == "--debug" ] || [ "$word" == "-d" ]; then
debug=1;
fi
done
# Delete all of the old files,
# including those generated programmatically and downloaded from the repo
if [ $update == 1 ]; then
echo "Removing old site...";
if [ -d $DOCS ]; then
cd $DOCS;
if [ $update == 0 ]; then
mv texts ..;
fi
cd ..;
rm -rf $DOCS
fi
mkdir $DOCS
fi
say "updating texts" # Robot voice output so I can do other things while this runs
if [ $update == 1 ]; then
echo "Updating texts...";
mkdir temp;
cd temp;
wget "https://github.com/Brown-University-Library/iip-texts/archive/master.zip";
# wget $(echo "https://github.com/Brown-University-Library/iip-texts/\
# archive/master.zip" | sed -e 's:\t::g');
unzip master.zip;
mkdir ../$DOCS/texts;
cp -r iip-texts-master/epidoc-files/ ../$DOCS/texts/xml;
# read -p "Press [Enter] key to continue..."
cd ..;
rm -rf temp;
cd $DOCS/texts/xml;
if [ -f interpretations.xml ]; then
rm interpretations.xml;
fi
if [ -f include_publicationStmt.xml ]; then
rm include_publicationStmt.xml;
fi
cd ../../..;
else
# mv texts $DOCS; # c - Why is this here????????
echo ...
fi
pwd
# Delete a bunch of the files to speed up testing
if [ $debug == 1 ]; then
echo "Delete a bunch of the text files to speed up testing."
for i in {1..7}
do
rm $DOCS/texts/xml/*$i*.xml
done
fi
say "running lemmatizer" # Robot voice output so I can do other things while this runs
run_script;
say "lemmatizer complete"
cp src/web/wordlist.css $DOCS/;
cp src/web/style.css $DOCS/;
# cp src/web/index_search.js $DOCS/;
cp index_search.js $DOCS/;
cp src/web/doubletree.html $DOCS/;
cp -r src/web/doubletreejs $DOCS/;
cp src/web/levenshtein.min.js $DOCS/;
cp src/web/wordinfo.css $DOCS/;
cp src/web/wordInfo.js $DOCS/;
cp res/doubletree.svg $DOCS/;
# cat $DOCS/texts/plain/* > $DOCS/combined.txt # Rewritten below to avoid "arg list too long" error
# All this does is combine all of the texts into one text file so that the doubletree can be generated from them
# It is complicated because it has to be
find $DOCS/texts/plain -name "*.txt" -exec cat '{}' ';' > $DOCS/combined.txt
./src/python/per_line.py $DOCS/combined.txt $DOCS/doubletree-data.txt