forked from pjheslin/diogenes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmk.lexica-perseus
35 lines (26 loc) · 2.4 KB
/
mk.lexica-perseus
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# Get LSJ and L-S lexica from Perseus
.DEFAULT_GOAL := all
include mk.common
all: $(PERSEUSLEXICA)/lexica/.git/HEAD $(DATA)/lat.ls.perseus-eng1.xml $(DATA)/grc.lsj.xml
$(PERSEUSLEXICA)/lexica/.git/HEAD:
rm -rf $(PERSEUSLEXICA)
mkdir -p $(PERSEUSLEXICA)
cd $(PERSEUSLEXICA) && git clone https://github.com/PerseusDL/lexica
cd $(PERSEUSLEXICA)/lexica && git checkout $(PERSEUSLEXICACOMMIT)
LSJDIR = $(PERSEUSLEXICA)/lexica/CTS_XML_TEI/perseus/pdllex/grc/lsj
LSJS = $(LSJDIR)/grc.lsj.perseus-eng1.xml $(LSJDIR)/grc.lsj.perseus-eng2.xml $(LSJDIR)/grc.lsj.perseus-eng3.xml $(LSJDIR)/grc.lsj.perseus-eng4.xml $(LSJDIR)/grc.lsj.perseus-eng5.xml $(LSJDIR)/grc.lsj.perseus-eng6.xml $(LSJDIR)/grc.lsj.perseus-eng7.xml $(LSJDIR)/grc.lsj.perseus-eng8.xml $(LSJDIR)/grc.lsj.perseus-eng9.xml $(LSJDIR)/grc.lsj.perseus-eng10.xml $(LSJDIR)/grc.lsj.perseus-eng11.xml $(LSJDIR)/grc.lsj.perseus-eng12.xml $(LSJDIR)/grc.lsj.perseus-eng13.xml $(LSJDIR)/grc.lsj.perseus-eng14.xml $(LSJDIR)/grc.lsj.perseus-eng15.xml $(LSJDIR)/grc.lsj.perseus-eng16.xml $(LSJDIR)/grc.lsj.perseus-eng17.xml $(LSJDIR)/grc.lsj.perseus-eng18.xml $(LSJDIR)/grc.lsj.perseus-eng19.xml $(LSJDIR)/grc.lsj.perseus-eng20.xml $(LSJDIR)/grc.lsj.perseus-eng21.xml $(LSJDIR)/grc.lsj.perseus-eng22.xml $(LSJDIR)/grc.lsj.perseus-eng23.xml $(LSJDIR)/grc.lsj.perseus-eng24.xml $(LSJDIR)/grc.lsj.perseus-eng25.xml $(LSJDIR)/grc.lsj.perseus-eng26.xml $(LSJDIR)/grc.lsj.perseus-eng27.xml
$(DATA)/grc.lsj.xml: utils/reformat_lsj.pl $(PERSEUSLEXICA)/lexica/.git/HEAD
mkdir -p $(DATA)
echo '<!-- Reformatted for Diogenes from the XML files in https://github.com/PerseusDL/lexica -->' > $@
for LSJ in $(LSJS); do cat $$LSJ | perl ./utils/reformat_lsj.pl >> $@; done
# The reformatting for readability of the LSJ XML has split entries over multiple, indented lines, which has introduced spurious whitespace before punctuation after citations. This is a hack to get rid of the worst (but not all) of it.
perl -pi -e 's#</biblScope>\s+</bibl>#</biblScope></bibl>#g' $@
perl -pi -e 's#</bibl>\s+</cit>#</bibl></cit>#g' $@
# Remove some spurious newlines
perl -pi -e 's#(?<!</entryFree>)\s*\n# #' $@
$(DATA)/lat.ls.perseus-eng1.xml: $(PERSEUSLEXICA)/lexica/.git/HEAD
mkdir -p $(DATA)
cp $(PERSEUSLEXICA)/lexica/CTS_XML_TEI/perseus/pdllex/lat/ls/lat.ls.perseus-eng1.xml $@
# Remove spurious newlines in the middle of entries
perl -pi -e 's#(?<!</entryFree>)\s*\n# #' $@
clean: $(PERSEUSLEXICA) $(LEXICONFILES)