forked from CentreForCorpusResearch/clic-annotation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
annotate.sh
72 lines (48 loc) · 2.65 KB
/
annotate.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/bin/bash
# __author__ = "Catherine Smith, Matthew Brook O'Donnell, J. de Joode" (in arbirary order)
# runs the paragraphs and find_extra_chapter_title scripts of a directory of text file
# the file needs to be run as
# ./annotate.sh input output
#
# where input and output are the names of a directory (without a backslash at the end)
STARTTIME=$(date +%s)
SCRIPT_DIR=$( pwd )
# Convert the relatives path into absolute ones by moving in the dir and running pwd
INPUT_DIR=$1
cd $INPUT_DIR && INPUT_DIR=$( pwd ) && cd $SCRIPT_DIR
OUTPUT_DIR=$2
cd $OUTPUT_DIR && OUTPUT_DIR=$( pwd ) && cd $SCRIPT_DIR
# Run the scripts for each file in the INPUT_DIR
cd $INPUT_DIR
for i in $( ls | grep ".txt" ); do
echo '--------------------------------------------------'
echo 'Creating Base XML: ' $i
nf=$i
nf=${nf/.txt/.xml}
echo 'Stage 1a -- basic paragraph extraction'
python $SCRIPT_DIR/paragraphs.py $i > $SCRIPT_DIR/tmp-paragraphs-1.xml
echo 'Stage 1b -- numbering paragraphs and finding parts of the title'
python $SCRIPT_DIR/paragraphs_find_extra_chapter_titles.py $SCRIPT_DIR/tmp-paragraphs-1.xml > $SCRIPT_DIR/tmp-paragraphs-2.xml
cp $SCRIPT_DIR/tmp-paragraphs-2.xml $OUTPUT_DIR/paragraphs/$nf
echo 'Stage 2 -- extracting sentences'
python $SCRIPT_DIR/sentences.py $SCRIPT_DIR/tmp-paragraphs-2.xml > $SCRIPT_DIR/tmp-sentences.xml
cp $SCRIPT_DIR/tmp-sentences.xml $OUTPUT_DIR/sentences/$nf
echo 'Stage 3 -- adding milestones for quotes'
python $SCRIPT_DIR/quotes.py $SCRIPT_DIR/tmp-sentences.xml > $SCRIPT_DIR/tmp-quotes.xml
cp $SCRIPT_DIR/tmp-quotes.xml $OUTPUT_DIR/quotes/$nf
echo 'Stage 4 -- adding milestones for suspensions'
python $SCRIPT_DIR/suspensions.py $SCRIPT_DIR/tmp-quotes.xml > $SCRIPT_DIR/tmp-suspensions.xml
cp $SCRIPT_DIR/tmp-suspensions.xml $OUTPUT_DIR/suspensions/$nf
echo 'Stage 5 -- adding milestones for alternative quotes'
python $SCRIPT_DIR/alternativequotes.py $SCRIPT_DIR/tmp-suspensions.xml > $SCRIPT_DIR/tmp-alternativequotes.xml
cp $SCRIPT_DIR/tmp-alternativequotes.xml $OUTPUT_DIR/alternativequotes/$nf
echo 'Stage 6 -- adding milestones for alternative suspensions'
python $SCRIPT_DIR/alternativesuspensions.py $SCRIPT_DIR/tmp-alternativequotes.xml > $SCRIPT_DIR/tmp-alternativesuspensions.xml
cp $SCRIPT_DIR/tmp-alternativesuspensions.xml $OUTPUT_DIR/alternativesuspensions/$nf
echo 'Writing the resuls to final'
cp $SCRIPT_DIR/tmp-alternativesuspensions.xml $OUTPUT_DIR/final/$nf
done
echo 'Finished and now cleaning up. Find your results in the directory `final` in your output directory.'
cd $SCRIPT_DIR && rm tmp*.xml
ENDTIME=$(date +%s)
echo "It took $(($ENDTIME - $STARTTIME)) seconds to complete this annotation."