-
Notifications
You must be signed in to change notification settings - Fork 0
/
entrypoint.sh
65 lines (51 loc) · 1.76 KB
/
entrypoint.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/bin/sh -l
set -e
APPLICATION_ID=$1
API_KEY=$2
FILES=$3
# File changes in documentation repository
ADDED=$4
REMOVED=$5
UPDATED=$6
RENAMED=$7
# Build from the main source repository
git clone https://github.com/vtexdocs/docsearch-scraper.git
cd docsearch-scraper/
# Install pipenv
pip3 install pipenv==2018.11.26
# Download ChromeDriver
chromedriverStableVersion=$(curl -s 'https://chromedriver.storage.googleapis.com/LATEST_RELEASE')
wget -q "https://chromedriver.storage.googleapis.com/${chromedriverStableVersion}/chromedriver_linux64.zip"
unzip chromedriver_linux64.zip
chown root:root chromedriver
chmod +x chromedriver
# Create the .env file for docsearch
echo "APPLICATION_ID=${APPLICATION_ID}
API_KEY=${API_KEY}
CHROMEDRIVER_PATH=/github/workspace/docsearch-scraper/chromedriver
ADDED_FILES=${ADDED}
REMOVED_FILES=${REMOVED}
UPDATED_FILES=${UPDATED}
RENAMED_FILES=${RENAMED}
" > .env
PIPENV_VENV_IN_PROJECT=true pipenv install --python 3.6
echo "Update webclient.py"
cp ./utils/webclient.py ./.venv/lib/python3.6/site-packages/scrapy/core/downloader/
echo "🗂️ Files to process: $FILES"
# Loop through each file and run the scraper
for FILE in $(eval echo "$FILES"); do
echo "🔍 Running scraper for $FILE"
# Run the scraper and check if it was successful
if yes | pipenv run ./docsearch run $FILE; then
# Print success message only if the file was processed successfully
echo "✅ Successfully indexed and uploaded the results for $FILE to Algolia"
else
# Print error message if the scraper failed for the file
echo "❌ Failed to index and upload results for $FILE"
fi
done
# Capture errors (if any) and append to output
if [ -f ./outputs/errors.txt ]; then
errors=$(cat ./outputs/errors.txt)
echo "errors=$errors" >> "$GITHUB_OUTPUT"
fi