-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.sh
executable file
·137 lines (104 loc) · 3.78 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/bin/bash
if [ -z "$1" ]; then
echo "Give file extension to extract like .go or .sol"
exit 1
fi
if [ -z "$2" ]; then
echo "Give file language parser like .go"
exit 1
fi
ROOT=$(pwd)
COMBY=comby
# global names
SOURCES=$ROOT/sources
TEMPLATES=$ROOT/templates
FRAGMENTS=$ROOT/fragments
# temporary working directories and names.
EXTRACTED=$ROOT/extracted
EXTRACTED_FOR_FILE=extracted_for_file
FRAGMENTS_FOR_FILE=$ROOT/fragments_for_file
# patterns for extraction
EXTRACT_PATTERNS_DIR=$ROOT/extraction_specifications
EXTENSION="$1"
LANG="$2"
EXTRACTORS=`ls $EXTRACT_PATTERNS_DIR/*.toml`
PYTHON_EXTRACTOR=$ROOT/extract.py
rm -rf $TEMPLATES
mkdir -p $TEMPLATES
rm -rf $FRAGMENTS
mkdir -p $FRAGMENTS
rm -rf $EXTRACTED
mkdir -p $EXTRACTED
rm -rf $EXTRACTED_FOR_FILE
mkdir -p $EXTRACTED_FOR_FILE
rm -rf $FRAGMENTS_FOR_FILE
mkdir -p $FRAGMENTS_FOR_FILE
for s in `ls ${SOURCES}/*${EXTENSION}`; do
echo -n $s
### extract concrete fragments for this file based on $EXTRACTORS ###
for e in $EXTRACTORS; do
echo -n " " $(basename $e) " "
# write matches to $EXTRACTED_FOR_FILE
$COMBY -sequential -config $e -d $SOURCES -f $s -matcher $LANG -json-lines -match-only \
| python $PYTHON_EXTRACTOR $EXTRACTED_FOR_FILE $EXTENSION
done
echo
# if there is an arg to this script, then nest.
if [ ! -z "$1" ]; then
MAX_DEPTH=10
echo -n "depth"
for i in `seq 1 "$MAX_DEPTH"`; do
cd $EXTRACTED_FOR_FILE
mkdir $EXTRACTED_FOR_FILE
echo -n " " $i " "
for e in $EXTRACTORS; do
$COMBY -sequential -config $e -f $EXTENSION -matcher $LANG -json-lines -match-only \
| python $PYTHON_EXTRACTOR $EXTRACTED_FOR_FILE $EXTENSION
done
if [ -z "$(ls -A -- $EXTRACTED_FOR_FILE)" ]; then
echo 'Nothing more to generate, stopping'
rm -rf $EXTRACTED_FOR_FILE
break
fi
done
else
echo -n "... skipping nesting ..."
fi
cd $ROOT
find $EXTRACTED_FOR_FILE -name "*${EXTENSION}" -exec cp -n {} $FRAGMENTS \; # save the concrete fragments for this file in the global corpus
### templatizing ###
echo -n "... templatizing ..."
# flatten extracted and dedupe everything for this file.
find $EXTRACTED_FOR_FILE -name "*${EXTENSION}" -exec cp -n {} $FRAGMENTS_FOR_FILE \;
fdupes -dN $FRAGMENTS_FOR_FILE &> /dev/null
echo ":[:[id()]]" > $FRAGMENTS_FOR_FILE/rewrite
for c in `ls ${FRAGMENTS_FOR_FILE}/*${EXTENSION}`; do
cp $c $FRAGMENTS_FOR_FILE/match
SOURCE_NAME=$(basename $s)
FRAGMENT_NAME=$(basename $c)
$COMBY -sequential -matcher $LANG -f $s -templates $FRAGMENTS_FOR_FILE -stdout > $TEMPLATES/${SOURCE_NAME%.*}.${FRAGMENT_NAME%.*}.template${EXTENSION}
done
cp $s $TEMPLATES/$SOURCE_NAME.delete.me.123 # temporarily include the source file as a template for deduping processing all files
###
# we're done with this file
rm -rf $FRAGMENTS_FOR_FILE
rm -rf $EXTRACTED_FOR_FILE
mkdir -p $EXTRACTED_FOR_FILE
mkdir -p $FRAGMENTS_FOR_FILE
echo
done
rm -rf $FRAGMENTS_FOR_FILE
rm -rf $EXTRACTED_FOR_FILE
rm -rf $EXTRACTED
# strip comments and dedup templates (had no holes, wasn't templatized).
$COMBY -matcher .txt '//:[x\n]' '' -d $TEMPLATES $EXTENSION -i
fdupes -dN $TEMPLATES > /dev/null
find $TEMPLATES -name "*.delete.me.123" -exec rm {} \;
# dedup concrete fragments (for files that share fragments).
$COMBY -matcher .txt '//:[x\n]' '' -d $FRAGMENTS $EXTENSION -i
fdupes -dN $FRAGMENTS > /dev/null
echo "-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-"
echo -n "Extracted this many total concrete fragments: "
ls $FRAGMENTS | wc -l
echo -n "Created this many total templates: "
ls $TEMPLATES | wc -l