-
Notifications
You must be signed in to change notification settings - Fork 1
/
count-word-occurrences
executable file
·44 lines (36 loc) · 1.02 KB
/
count-word-occurrences
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/env bash
basename="$(basename "$0")"
# Check for required arguments, explain usage if not present.
if [ $# -lt 1 ]; then
>&2 echo "usage: $basename <filename> [minimum]"
exit 1
fi
# Optionally filter out words below a minimum number of occurrences.
if [ $# -gt 1 ]; then
min_count="$2"
else
min_count=1
fi
# Can we access the given filename?
filename="$1"
if [ ! -f "$filename" ]; then
>&2 echo "fatal: file '$filename' does not exist"
exit 2
fi
if [ ! -r "$filename" ]; then
>&2 echo "fatal: cannot read file '$filename'"
exit 3
fi
# Strip link URLs from Markdown documents.
textfile="/tmp/${basename}-${filename}.txt"
sed 's/\!\{0,1\}\[[^]]*\]([^)]*)//g' "$filename" > "$textfile" || {
>&2 echo "fatal: cannot write to file '$textfile'"
exit 4
}
words=$(cat /usr/share/dict/words | tr '[:upper:]' '[:lower:]' | uniq)
for word in $words; do
occurrences="$(grep -ciw "$word" "$textfile")"
if [ "$occurrences" -ge "$min_count" ]; then
echo "$word: $occurrences"
fi
done