-
Notifications
You must be signed in to change notification settings - Fork 5
/
auto_conv
executable file
·84 lines (74 loc) · 2.25 KB
/
auto_conv
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/bin/sh
usage () {
echo "Usage: $0 [-d cdrom_dir]"
exit 1
}
make_dir () {
if [ ! -d "$1" ]; then
mkdir -p $1
fi
}
CDROM_DIR=/mnt/cdrom
PERL=perl
BASE_DIR=`dirname $0`
DAT_DIR=$BASE_DIR/dat
DEST_DIR=$DAT_DIR/num
SYN_DIR=$DAT_DIR/syn
REL_DIR=$DAT_DIR/rel
while getopts d:h OPT
do
case $OPT in
d) CDROM_DIR=$OPTARG
;;
h) usage
;;
*) usage
;;
esac
done
shift `expr $OPTIND - 1`
MAI95_FILE=$CDROM_DIR/mai95.txt
if [ -f "$MAI95_FILE" ]; then
:
elif [ -f "$CDROM_DIR/MAI95.txt" ]; then
MAI95_FILE=$CDROM_DIR/MAI95.txt
elif [ -f "$CDROM_DIR/mai1995.txt" ]; then
MAI95_FILE=$CDROM_DIR/mai1995.txt
elif [ -f "$CDROM_DIR/MAI1995.txt" ]; then
MAI95_FILE=$CDROM_DIR/MAI1995.txt
else
echo "Not found: $CDROM_DIR/mai95.txt"
usage
fi
make_dir $SYN_DIR
make_dir $REL_DIR
echo "converting ${MAI95_FILE} to SGML ..."
$PERL $BASE_DIR/src/trans.pl < $MAI95_FILE 2> /dev/null | $PERL $BASE_DIR/src/mainichi.pl $DEST_DIR
echo "making KyotoCorpus (syn) ..."
for i in 01 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17
do
echo "converting 9501$i ..."
# $PERL $BASE_DIR/src/format.pl -exed 9501$i < $DEST_DIR/9501.all | $PERL $BASE_DIR/src/dupli.pl > $DEST_DIR/9501$i.org
$PERL $BASE_DIR/src/format.pl -exed 9501$i < $DEST_DIR/9501.all > $DEST_DIR/9501$i.org
$PERL $BASE_DIR/src/num2KNP.pl --without_self_id $DEST_DIR/9501$i
mv -f $DEST_DIR/9501$i.knp $SYN_DIR
done
for i in 01 02 03 04 05 06 07 08 09 10 11 12
do
echo "converting 95${i}ED ..."
: > $DEST_DIR/95${i}ED.org
for j in 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
do
# $PERL $BASE_DIR/src/format.pl -ed 95$i$j < $DEST_DIR/95$i.all | $PERL $BASE_DIR/src/dupli.pl >> $DEST_DIR/95${i}ED.org
$PERL $BASE_DIR/src/format.pl -ed 95$i$j < $DEST_DIR/95$i.all >> $DEST_DIR/95${i}ED.org
done
$PERL $BASE_DIR/src/num2KNP.pl --without_self_id $DEST_DIR/95${i}ED
mv -f $DEST_DIR/95${i}ED.knp $SYN_DIR
done
echo "making KyotoCorpus (rel) ..."
for i in 01 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17
do
echo "converting 9501$i (with relation tags) ..."
$PERL $BASE_DIR/src/num2KNP.pl --without_self_id --rel $BASE_DIR/id/full/all.id $DEST_DIR/9501$i
mv -f $DEST_DIR/9501$i.knp $REL_DIR
done