-
Notifications
You must be signed in to change notification settings - Fork 6
/
Wileybook
executable file
·76 lines (61 loc) · 2.36 KB
/
Wileybook
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/bin/bash
# Call this script like
# Wileybook http://onlinelibrary.wiley.com/book/10.1002/9780470316887 or (mandatory for some books)
# Wileybook http://onlinelibrary.wiley.com/book/10.1002/0471705195
# where the final digits are the ISBN numbers (possibly with 978 - the publisher missing)
# Parse the arguments
if [ ! $# = 1 ]; then
echo "Usage: $(basename $0) URL-to-Wiley-content"
echo "Example: $(basename $0) http://onlinelibrary.wiley.com/book/10.1002/9780470316887"
exit
fi
# First (and only) command line argument is the URL of the online resource
URL=$1
ISBN=`echo $URL | grep -o '[0-9]\{10,\}'`
if [ ! $ISBN"xxx" == "xxx" ]; then
echo "Looking up $URL (ISBN $ISBN)"
else
echo "$(basename $0): Failed to parse ISBN."
exit
fi
# Remember the current directory
pushd .
# Make a temporary directory
echo "Making directory /tmp/$ISBN"
rm -rf /tmp/$ISBN
mkdir /tmp/$ISBN
# Get the source
cd /tmp/$ISBN
echo "$(basename $0): Getting the HTML source"
wget --user-agent="Mozilla/5.0" $URL -O source
# Parse the source to get the session ID
export SID=`grep -o 's=[a-f0-9]\{40\}' source`
echo "We have the session id $SID"
# Parse the source to retrieve an ordered list of download URLs
# grep -o '"/doi/10\.1002/$ISBN/pdf"' source > list
grep -o '/doi[a-zA-Z0-9./]*/pdf[[:alnum:]?=&;]*' source > list
rm -f logfile
echo "$(basename $0): Getting the individual chapter files"
for i in $(cat list); do
wget --user-agent="Mozilla/5.0" "http://onlinelibrary.wiley.com$i" -O source2
j=$(grep -m 1 -o 'http://onlinelibrary.wiley.com/store/[a-zA-Z0-9.:/]*.pdf[[:alnum:]?=&;]*' source2)
wget --user-agent="Mozilla/5.0" "$j" -a logfile
done
# Create a list of files to concatenate
grep -e "^Saving to:.*" logfile | grep --only-matching -e "[[:alnum:]-]*\.pdf[[:alnum:].?=&;]*" | uniq > download_list
# qpdf is confused by the name of the downloaded files.
# Therefore, we rename them.
j=0;
rm -f concat_list;
for i in $(cat download_list); do
mv "$i" $((++j)).pdf;
echo $j.pdf >> concat_list
done
BOOKTITLE=$ISBN
# Concatenate the pdf files to a book
echo "$(basename $0): Concatenating files"
qpdf --empty $BOOKTITLE.pdf --pages $(cat concat_list) --
# Return to the current directory
popd
# Inform the user
echo "The book has been compiled and now resides in /tmp/$ISBN/$BOOKTITLE.pdf"