Skip to content

Commit

Permalink
Fixed chromedriver; TODO google chrome spawn needed
Browse files Browse the repository at this point in the history
  • Loading branch information
zdeneklapes committed Dec 25, 2023
1 parent a26f58b commit 75e6fc9
Show file tree
Hide file tree
Showing 9 changed files with 251 additions and 212 deletions.
20 changes: 20 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,24 @@ tmp/
*.cache
*.pkl
*.env

.dockerignore
.editorconfig
.github
.gitignore
.pre-commit-config.yaml
Dockerfile
Dockerfile_selenium
docker-compose.yml
pyproject.toml
scripts
tags
tests
.git/
.idea/
.ruff_cache/
bazos.egg-info/
tmp/
tokens/
venv/

26 changes: 18 additions & 8 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,17 @@ RUN set -ex && \
echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list && \
apt-get update -y

RUN set -ex && \
apt-get install -y google-chrome-stable && \
export CHROMEVER=$(google-chrome --product-version | grep -o "[^\.]*\.[^\.]*\.[^\.]*") && \
export DRIVERVER=$(curl -s "https://chromedriver.storage.googleapis.com/LATEST_RELEASE_$CHROMEVER")
#RUN set -ex && \
# apt-get install -y google-chrome-stable && \
# export CHROMEVER=$(google-chrome --product-version | grep -o "[^\.]*\.[^\.]*\.[^\.]*") && \
# export DRIVERVER=$(curl -s "https://chromedriver.storage.googleapis.com/LATEST_RELEASE_$CHROMEVER")

ARG CHROME_VERSION="116.0.5845.187-1"
RUN wget --no-verbose -O /tmp/chrome.deb https://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_${CHROME_VERSION}_amd64.deb \
&& apt install -y /tmp/chrome.deb \
&& rm /tmp/chrome.deb

#ENV DISPLAY=:99

#RUN set -ex && \
# echo "Using chromedriver version: $DRIVERVER"
Expand All @@ -46,18 +53,21 @@ RUN set -ex && \
#RUN rm google-chrome-stable_current_amd64.deb


COPY requirements.txt /app/requirements.txt
COPY requirements.txt setup.py README.md /app/
#COPY setup.py /app/
#COPY requirements.txt /app/
COPY bazos /app/bazos

WORKDIR /app
RUN set -ex && \
pip install --upgrade pip && \
pip install -r /app/requirements.txt

#RUN set -ex && \
# pip install -e .
RUN set -ex && \
pip install -e /app

# set the proxy addresses
#ENV HTTP_PROXY "http://134.209.29.120:8080"
#ENV HTTPS_PROXY "https://45.77.71.140:9050"

WORKDIR /app
CMD ["fish"]
24 changes: 17 additions & 7 deletions bazos/main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
import os
import pickle # nosec
import sys
Expand All @@ -19,6 +21,8 @@

load_dotenv()

# vim test.py


################################################################################
# BUG: Some images are rotated, when you upload them to bazos
Expand Down Expand Up @@ -50,20 +54,26 @@ class XPathsBazos:

class BazosScrapper:
def __init__(self, country: str, cli_args: dict):
self.user = User(country=country, products_path=cli_args['path'])
self.bazos_country = country
service = Service()

options = Options()
# options.binary_location = '/usr/bin/google-chrome'
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
webdriver_manager = ChromeDriverManager().install()
service = Service(executable_path=webdriver_manager)
options = webdriver.ChromeOptions()
self.driver = webdriver.Chrome(service=service, options=options)

self.advertisements: int

# URLs
self.user = User(country=country, products_path=cli_args['path'])
self.bazos_country = country
self.advertisements: int
self.url_bazos = f"https://bazos.{country}"
self.url_moje_inzeraty = path.join(self.url_bazos, 'moje-inzeraty.php')

def __del__(self):
self.driver.quit()
# def __del__(self):
# self.driver.close()

def print_all_rubrics_and_categories(self):
self.driver.find_element(
Expand Down
199 changes: 195 additions & 4 deletions main.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,199 @@ RED='\033[0;31m'
NC='\033[0m'
GREEN='\033[0;32m'
DEBUG=1
ZIP_NAME='TODO.zip'
VPS_URI='TODO'

source ./scripts/docker.sh
source ./scripts/python.sh
source ./scripts/release.sh
source ./scripts/utils.sh
function prune_docker() {
# Stop and remove all containers
docker stop $(docker ps -aq)
docker system prune --all --force --volumes

# Remove all volumes: not just dangling ones
for i in $(docker volume ls --format json | jq -r ".Name"); do
docker volume rm -f ${i}
done
}

function docker_show_ipaddress() {
# Show ip address of running containers
for docker_container in $(docker ps -aq); do
CMD1="$(docker ps -a | grep "${docker_container}" | grep --invert-match "Exited\|Created" | awk '{print $2}'): "
if [ ${CMD1} != ": " ]; then
printf "${CMD1}"
printf "$(docker inspect ${docker_container} | grep "IPAddress" | tail -n 1)\n"
fi
done
}

function dev_docker_up() {
docker build -t zdeneklapes/bazos-api:latest -f Dockerfile . && \
docker run -it --rm \
-v=./tmp/fish/:/root/.local/share/fish/ \
-v=./bazos/:/app/bazos/ \
-v=./scripts/:/app/scripts/ \
-v=$HOME/Documents/photos-archive/bazos:/app/images/bazos/ \
zdeneklapes/bazos-api:latest
}

function create_venv() {
# Create venv
python3 -m venv venv
deactivate
source venv/bin/activate.fish
pip3 install -r requirements.txt
deactivate
}

function release_patch() {
# Get tagName, which is in format 0.0.0 (regex: v[0-9]+\.[0-9]+\.[0-9]+)
tag_name=$(gh release view --jq ".tagName" --json tagName)
patch_version=$(echo "${tag_name}" | sed -E 's/[0-9]+\.[0-9]+\.([0-9]+)/\1/')
minor_version=$(echo "${tag_name}" | sed -E 's/[0-9]+\.([0-9]+)\.[0-9]+/\1/')
major_version=$(echo "${tag_name}" | sed -E 's/([0-9]+)\.[0-9]+\.[0-9]+/\1/')

# Increment patch version
new_patch_version=$((patch_version + 1))

# Create new tag
tag_name="${major_version}.${minor_version}.${patch_version}"
new_tag_name="${major_version}.${minor_version}.${new_patch_version}"
echo "Releasing... ${tag_name} -> ${new_tag_name}"

# Create release
gh release create "${new_tag_name}" -t "Release ${patch_version}" -n "Released new patch version ${patch_version}"
}

function release_minor() {
# Get tagName, which is in format 0.0.0 (regex: v[0-9]+\.[0-9]+\.[0-9]+)
tag_name=$(gh release view --jq ".tagName" --json tagName)
patch_version=$(echo "$tag_name" | sed -E 's/[0-9]+\.[0-9]+\.([0-9]+)/\1/')
minor_version=$(echo "$tag_name" | sed -E 's/[0-9]+\.([0-9]+)\.[0-9]+/\1/')
major_version=$(echo "$tag_name" | sed -E 's/([0-9]+)\.[0-9]+\.[0-9]+/\1/')

# Increment minor version
new_minor_version=$((minor_version + 1))

# Create new tag
tag_name="${major_version}.${minor_version}.${patch_version}"
new_tag_name="${major_version}.${new_minor_version}.0"
echo "Releasing... ${tag_name} -> ${new_tag_name}"

# Create release
gh release create "${new_tag_name}" -t "Release ${new_tag_name}" -n "Released new minor version ${new_tag_name}"
}

function release_major() {
# Get tagName, which is in format 0.0.0 (regex: v[0-9]+\.[0-9]+\.[0-9]+)
tag_name=$(gh release view --jq ".tagName" --json tagName)
patch_version=$(echo "$tag_name" | sed -E 's/[0-9]+\.[0-9]+\.([0-9]+)/\1/')
minor_version=$(echo "$tag_name" | sed -E 's/[0-9]+\.([0-9]+)\.[0-9]+/\1/')
major_version=$(echo "$tag_name" | sed -E 's/([0-9]+)\.[0-9]+\.[0-9]+/\1/')

# Increment major version
new_major_version=$((major_version + 1))

# Create new tag
tag_name="${major_version}.${minor_version}.${patch_version}"
new_tag_name="${new_major_version}.0.0"
echo "Releasing... ${tag_name} -> ${new_tag_name}"

# Create release
gh release create "${new_tag_name}" -t "Release ${new_tag_name}" -n "Released new major version ${new_tag_name}"
}

function clean() {
# Clean project folder in order to see what will be done, set env variable $DEBUG=1
${RM} *.zip
# Folders
for folder in \
"venv" \
"*__pycache__" \
"*.ruff_cache" \
"*.pytest_cache" \
"*.cache" \
"*htmlcov*" \
"skip-covered"\
; do
if [ "$DEBUG" -eq 1 ]; then find . -type d -iname "${folder}"; else find . -type d -iname "${folder}" | xargs ${RM} -rf; fi
done
# Files
for file in \
"*.DS_Store" \
"tags" \
"db.sqlite3" \
"*.png" \
"*.zip" \
"*.log" \
"coverage.xml" \
"*.coverage" \
"coverage.lcov" \
; do
if [ "$DEBUG" -eq 1 ]; then find . -type f -iname "${file}"; else find . -type f -iname "${file}" | xargs ${RM}; fi
done
}

function tags() {
# Create tags and cscope
ctags -R .
cscope -Rb
}

function pack() {
# Clean and Zip project
clean
zip -r "${ZIP_NAME}" \
.editorconfig \
Dockerfile \
requirements.txt \
.gitignore \
README.md \
pyproject.toml \
bazos \
scripts \
main.sh
}

function send() {
# Send zipped project to VPS and then remove the zip file
scp "${ZIP_NAME}" "${VPS_URI}"
rm ${ZIP_NAME}
}

function help() {
# Print usage on stdout
echo "Available functions:"
for file in ./scripts/*.sh; do
function_names=$(cat ${file} | grep -E "(\ *)function\ +.*\(\)\ *\{" | sed -E "s/\ *function\ +//" | sed -E "s/\ *\(\)\ *\{\ *//")
for func_name in ${function_names[@]}; do
printf " $func_name\n"
awk "/function ${func_name}()/ { flag = 1 }; flag && /^\ +#/ { print \" \" \$0 }; flag && !/^\ +#/ && !/function ${func_name}()/ { print "\n"; exit }" ${file}
done
done

}

function usage() {
# Print usage on stdout
help
}

function die() {
# Print error message on stdout and exit
printf "${RED}ERROR: $1${NC}\n"
help
exit 1
}

function main() {
# Main function: Call other functions based on input arguments
[[ "$#" -eq 0 ]] && die "No arguments provided"
while [ "$#" -gt 0 ]; do
case "$1" in
*) "$1" || die "Unknown function: $1()" ;;
esac
shift
done
}

main "$@"
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ pysocks==1.7.1
selenium==4.10.0
tomli==2.0.1
versioneer==0.29
webdriver-manager==3.8.6
webdriver-manager==4.0.1
31 changes: 0 additions & 31 deletions scripts/docker.sh

This file was deleted.

8 changes: 0 additions & 8 deletions scripts/python.sh

This file was deleted.

Loading

0 comments on commit 75e6fc9

Please sign in to comment.