diff --git a/.dockerignore b/.dockerignore index d4939dc..0cadc7a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -17,4 +17,24 @@ tmp/ *.cache *.pkl *.env + +.dockerignore +.editorconfig +.github +.gitignore +.pre-commit-config.yaml +Dockerfile +Dockerfile_selenium +docker-compose.yml +pyproject.toml +scripts tags +tests +.git/ +.idea/ +.ruff_cache/ +bazos.egg-info/ +tmp/ +tokens/ +venv/ + diff --git a/Dockerfile b/Dockerfile index c73a9a7..8ae7113 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,10 +22,17 @@ RUN set -ex && \ echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list && \ apt-get update -y -RUN set -ex && \ - apt-get install -y google-chrome-stable && \ - export CHROMEVER=$(google-chrome --product-version | grep -o "[^\.]*\.[^\.]*\.[^\.]*") && \ - export DRIVERVER=$(curl -s "https://chromedriver.storage.googleapis.com/LATEST_RELEASE_$CHROMEVER") +#RUN set -ex && \ +# apt-get install -y google-chrome-stable && \ +# export CHROMEVER=$(google-chrome --product-version | grep -o "[^\.]*\.[^\.]*\.[^\.]*") && \ +# export DRIVERVER=$(curl -s "https://chromedriver.storage.googleapis.com/LATEST_RELEASE_$CHROMEVER") + +ARG CHROME_VERSION="116.0.5845.187-1" +RUN wget --no-verbose -O /tmp/chrome.deb https://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_${CHROME_VERSION}_amd64.deb \ + && apt install -y /tmp/chrome.deb \ + && rm /tmp/chrome.deb + +#ENV DISPLAY=:99 #RUN set -ex && \ # echo "Using chromedriver version: $DRIVERVER" @@ -46,18 +53,21 @@ RUN set -ex && \ #RUN rm google-chrome-stable_current_amd64.deb -COPY requirements.txt /app/requirements.txt +COPY requirements.txt setup.py README.md /app/ +#COPY setup.py /app/ +#COPY requirements.txt /app/ +COPY bazos /app/bazos +WORKDIR /app RUN set -ex && \ pip install --upgrade pip && \ pip install -r /app/requirements.txt -#RUN set -ex && \ -# pip install -e . +RUN set -ex && \ + pip install -e /app # set the proxy addresses #ENV HTTP_PROXY "http://134.209.29.120:8080" #ENV HTTPS_PROXY "https://45.77.71.140:9050" -WORKDIR /app CMD ["fish"] diff --git a/bazos/main.py b/bazos/main.py index 2c931ed..a92e93f 100644 --- a/bazos/main.py +++ b/bazos/main.py @@ -1,3 +1,5 @@ +from webdriver_manager.chrome import ChromeDriverManager +from selenium.webdriver.chrome.options import Options import os import pickle # nosec import sys @@ -19,6 +21,8 @@ load_dotenv() +# vim test.py + ################################################################################ # BUG: Some images are rotated, when you upload them to bazos @@ -50,20 +54,26 @@ class XPathsBazos: class BazosScrapper: def __init__(self, country: str, cli_args: dict): - self.user = User(country=country, products_path=cli_args['path']) - self.bazos_country = country - service = Service() + + options = Options() + # options.binary_location = '/usr/bin/google-chrome' + options.add_argument('--headless') + options.add_argument('--no-sandbox') + options.add_argument('--disable-dev-shm-usage') + webdriver_manager = ChromeDriverManager().install() + service = Service(executable_path=webdriver_manager) options = webdriver.ChromeOptions() self.driver = webdriver.Chrome(service=service, options=options) - self.advertisements: int - # URLs + self.user = User(country=country, products_path=cli_args['path']) + self.bazos_country = country + self.advertisements: int self.url_bazos = f"https://bazos.{country}" self.url_moje_inzeraty = path.join(self.url_bazos, 'moje-inzeraty.php') - def __del__(self): - self.driver.quit() + # def __del__(self): + # self.driver.close() def print_all_rubrics_and_categories(self): self.driver.find_element( diff --git a/main.sh b/main.sh index 94ca218..daf8735 100755 --- a/main.sh +++ b/main.sh @@ -6,8 +6,199 @@ RED='\033[0;31m' NC='\033[0m' GREEN='\033[0;32m' DEBUG=1 +ZIP_NAME='TODO.zip' +VPS_URI='TODO' -source ./scripts/docker.sh -source ./scripts/python.sh -source ./scripts/release.sh -source ./scripts/utils.sh +function prune_docker() { + # Stop and remove all containers + docker stop $(docker ps -aq) + docker system prune --all --force --volumes + + # Remove all volumes: not just dangling ones + for i in $(docker volume ls --format json | jq -r ".Name"); do + docker volume rm -f ${i} + done +} + +function docker_show_ipaddress() { + # Show ip address of running containers + for docker_container in $(docker ps -aq); do + CMD1="$(docker ps -a | grep "${docker_container}" | grep --invert-match "Exited\|Created" | awk '{print $2}'): " + if [ ${CMD1} != ": " ]; then + printf "${CMD1}" + printf "$(docker inspect ${docker_container} | grep "IPAddress" | tail -n 1)\n" + fi + done +} + +function dev_docker_up() { + docker build -t zdeneklapes/bazos-api:latest -f Dockerfile . && \ + docker run -it --rm \ + -v=./tmp/fish/:/root/.local/share/fish/ \ + -v=./bazos/:/app/bazos/ \ + -v=./scripts/:/app/scripts/ \ + -v=$HOME/Documents/photos-archive/bazos:/app/images/bazos/ \ + zdeneklapes/bazos-api:latest +} + +function create_venv() { + # Create venv + python3 -m venv venv + deactivate + source venv/bin/activate.fish + pip3 install -r requirements.txt + deactivate +} + +function release_patch() { + # Get tagName, which is in format 0.0.0 (regex: v[0-9]+\.[0-9]+\.[0-9]+) + tag_name=$(gh release view --jq ".tagName" --json tagName) + patch_version=$(echo "${tag_name}" | sed -E 's/[0-9]+\.[0-9]+\.([0-9]+)/\1/') + minor_version=$(echo "${tag_name}" | sed -E 's/[0-9]+\.([0-9]+)\.[0-9]+/\1/') + major_version=$(echo "${tag_name}" | sed -E 's/([0-9]+)\.[0-9]+\.[0-9]+/\1/') + + # Increment patch version + new_patch_version=$((patch_version + 1)) + + # Create new tag + tag_name="${major_version}.${minor_version}.${patch_version}" + new_tag_name="${major_version}.${minor_version}.${new_patch_version}" + echo "Releasing... ${tag_name} -> ${new_tag_name}" + + # Create release + gh release create "${new_tag_name}" -t "Release ${patch_version}" -n "Released new patch version ${patch_version}" +} + +function release_minor() { + # Get tagName, which is in format 0.0.0 (regex: v[0-9]+\.[0-9]+\.[0-9]+) + tag_name=$(gh release view --jq ".tagName" --json tagName) + patch_version=$(echo "$tag_name" | sed -E 's/[0-9]+\.[0-9]+\.([0-9]+)/\1/') + minor_version=$(echo "$tag_name" | sed -E 's/[0-9]+\.([0-9]+)\.[0-9]+/\1/') + major_version=$(echo "$tag_name" | sed -E 's/([0-9]+)\.[0-9]+\.[0-9]+/\1/') + + # Increment minor version + new_minor_version=$((minor_version + 1)) + + # Create new tag + tag_name="${major_version}.${minor_version}.${patch_version}" + new_tag_name="${major_version}.${new_minor_version}.0" + echo "Releasing... ${tag_name} -> ${new_tag_name}" + + # Create release + gh release create "${new_tag_name}" -t "Release ${new_tag_name}" -n "Released new minor version ${new_tag_name}" +} + +function release_major() { + # Get tagName, which is in format 0.0.0 (regex: v[0-9]+\.[0-9]+\.[0-9]+) + tag_name=$(gh release view --jq ".tagName" --json tagName) + patch_version=$(echo "$tag_name" | sed -E 's/[0-9]+\.[0-9]+\.([0-9]+)/\1/') + minor_version=$(echo "$tag_name" | sed -E 's/[0-9]+\.([0-9]+)\.[0-9]+/\1/') + major_version=$(echo "$tag_name" | sed -E 's/([0-9]+)\.[0-9]+\.[0-9]+/\1/') + + # Increment major version + new_major_version=$((major_version + 1)) + + # Create new tag + tag_name="${major_version}.${minor_version}.${patch_version}" + new_tag_name="${new_major_version}.0.0" + echo "Releasing... ${tag_name} -> ${new_tag_name}" + + # Create release + gh release create "${new_tag_name}" -t "Release ${new_tag_name}" -n "Released new major version ${new_tag_name}" +} + +function clean() { + # Clean project folder in order to see what will be done, set env variable $DEBUG=1 + ${RM} *.zip + # Folders + for folder in \ + "venv" \ + "*__pycache__" \ + "*.ruff_cache" \ + "*.pytest_cache" \ + "*.cache" \ + "*htmlcov*" \ + "skip-covered"\ + ; do + if [ "$DEBUG" -eq 1 ]; then find . -type d -iname "${folder}"; else find . -type d -iname "${folder}" | xargs ${RM} -rf; fi + done + # Files + for file in \ + "*.DS_Store" \ + "tags" \ + "db.sqlite3" \ + "*.png" \ + "*.zip" \ + "*.log" \ + "coverage.xml" \ + "*.coverage" \ + "coverage.lcov" \ + ; do + if [ "$DEBUG" -eq 1 ]; then find . -type f -iname "${file}"; else find . -type f -iname "${file}" | xargs ${RM}; fi + done +} + +function tags() { + # Create tags and cscope + ctags -R . + cscope -Rb +} + +function pack() { + # Clean and Zip project + clean + zip -r "${ZIP_NAME}" \ + .editorconfig \ + Dockerfile \ + requirements.txt \ + .gitignore \ + README.md \ + pyproject.toml \ + bazos \ + scripts \ + main.sh +} + +function send() { + # Send zipped project to VPS and then remove the zip file + scp "${ZIP_NAME}" "${VPS_URI}" + rm ${ZIP_NAME} +} + +function help() { + # Print usage on stdout + echo "Available functions:" + for file in ./scripts/*.sh; do + function_names=$(cat ${file} | grep -E "(\ *)function\ +.*\(\)\ *\{" | sed -E "s/\ *function\ +//" | sed -E "s/\ *\(\)\ *\{\ *//") + for func_name in ${function_names[@]}; do + printf " $func_name\n" + awk "/function ${func_name}()/ { flag = 1 }; flag && /^\ +#/ { print \" \" \$0 }; flag && !/^\ +#/ && !/function ${func_name}()/ { print "\n"; exit }" ${file} + done + done + +} + +function usage() { + # Print usage on stdout + help +} + +function die() { + # Print error message on stdout and exit + printf "${RED}ERROR: $1${NC}\n" + help + exit 1 +} + +function main() { + # Main function: Call other functions based on input arguments + [[ "$#" -eq 0 ]] && die "No arguments provided" + while [ "$#" -gt 0 ]; do + case "$1" in + *) "$1" || die "Unknown function: $1()" ;; + esac + shift + done +} + +main "$@" diff --git a/requirements.txt b/requirements.txt index 618b3ca..5f4a281 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,4 @@ pysocks==1.7.1 selenium==4.10.0 tomli==2.0.1 versioneer==0.29 -webdriver-manager==3.8.6 +webdriver-manager==4.0.1 diff --git a/scripts/docker.sh b/scripts/docker.sh deleted file mode 100644 index ab88b44..0000000 --- a/scripts/docker.sh +++ /dev/null @@ -1,31 +0,0 @@ -function prune_docker() { - # Stop and remove all containers - docker stop $(docker ps -aq) - docker system prune --all --force --volumes - - # Remove all volumes: not just dangling ones - for i in $(docker volume ls --format json | jq -r ".Name"); do - docker volume rm -f ${i} - done -} - -function docker_show_ipaddress() { - # Show ip address of running containers - for docker_container in $(docker ps -aq); do - CMD1="$(docker ps -a | grep "${docker_container}" | grep --invert-match "Exited\|Created" | awk '{print $2}'): " - if [ ${CMD1} != ": " ]; then - printf "${CMD1}" - printf "$(docker inspect ${docker_container} | grep "IPAddress" | tail -n 1)\n" - fi - done -} - -function dev_docker_up() { - docker build -t zdeneklapes/bazos-api:latest -f Dockerfile . && \ - docker run -it --rm \ - -v=./tmp/fish/:/root/.local/share/fish/ \ - -v=./bazos/:/app/bazos/ \ - -v=./scripts/:/app/scripts/ \ - -v=$HOME/Documents/photos-archive/bazos:/app/images/bazos/ \ - zdeneklapes/bazos-api:latest -} diff --git a/scripts/python.sh b/scripts/python.sh deleted file mode 100644 index f4a8405..0000000 --- a/scripts/python.sh +++ /dev/null @@ -1,8 +0,0 @@ -function create_venv() { - # Create venv - python3 -m venv venv - deactivate - source venv/bin/activate.fish - pip3 install -r requirements.txt - deactivate -} diff --git a/scripts/release.sh b/scripts/release.sh deleted file mode 100644 index 411a456..0000000 --- a/scripts/release.sh +++ /dev/null @@ -1,56 +0,0 @@ -function release_patch() { - # Get tagName, which is in format 0.0.0 (regex: v[0-9]+\.[0-9]+\.[0-9]+) - tag_name=$(gh release view --jq ".tagName" --json tagName) - patch_version=$(echo "${tag_name}" | sed -E 's/[0-9]+\.[0-9]+\.([0-9]+)/\1/') - minor_version=$(echo "${tag_name}" | sed -E 's/[0-9]+\.([0-9]+)\.[0-9]+/\1/') - major_version=$(echo "${tag_name}" | sed -E 's/([0-9]+)\.[0-9]+\.[0-9]+/\1/') - - # Increment patch version - new_patch_version=$((patch_version + 1)) - - # Create new tag - tag_name="${major_version}.${minor_version}.${patch_version}" - new_tag_name="${major_version}.${minor_version}.${new_patch_version}" - echo "Releasing... ${tag_name} -> ${new_tag_name}" - - # Create release - gh release create "${new_tag_name}" -t "Release ${patch_version}" -n "Released new patch version ${patch_version}" -} - -function release_minor() { - # Get tagName, which is in format 0.0.0 (regex: v[0-9]+\.[0-9]+\.[0-9]+) - tag_name=$(gh release view --jq ".tagName" --json tagName) - patch_version=$(echo "$tag_name" | sed -E 's/[0-9]+\.[0-9]+\.([0-9]+)/\1/') - minor_version=$(echo "$tag_name" | sed -E 's/[0-9]+\.([0-9]+)\.[0-9]+/\1/') - major_version=$(echo "$tag_name" | sed -E 's/([0-9]+)\.[0-9]+\.[0-9]+/\1/') - - # Increment minor version - new_minor_version=$((minor_version + 1)) - - # Create new tag - tag_name="${major_version}.${minor_version}.${patch_version}" - new_tag_name="${major_version}.${new_minor_version}.0" - echo "Releasing... ${tag_name} -> ${new_tag_name}" - - # Create release - gh release create "${new_tag_name}" -t "Release ${new_tag_name}" -n "Released new minor version ${new_tag_name}" -} - -function release_major() { - # Get tagName, which is in format 0.0.0 (regex: v[0-9]+\.[0-9]+\.[0-9]+) - tag_name=$(gh release view --jq ".tagName" --json tagName) - patch_version=$(echo "$tag_name" | sed -E 's/[0-9]+\.[0-9]+\.([0-9]+)/\1/') - minor_version=$(echo "$tag_name" | sed -E 's/[0-9]+\.([0-9]+)\.[0-9]+/\1/') - major_version=$(echo "$tag_name" | sed -E 's/([0-9]+)\.[0-9]+\.[0-9]+/\1/') - - # Increment major version - new_major_version=$((major_version + 1)) - - # Create new tag - tag_name="${major_version}.${minor_version}.${patch_version}" - new_tag_name="${new_major_version}.0.0" - echo "Releasing... ${tag_name} -> ${new_tag_name}" - - # Create release - gh release create "${new_tag_name}" -t "Release ${new_tag_name}" -n "Released new major version ${new_tag_name}" -} diff --git a/scripts/utils.sh b/scripts/utils.sh deleted file mode 100644 index 865831e..0000000 --- a/scripts/utils.sh +++ /dev/null @@ -1,97 +0,0 @@ -ZIP_NAME='TODO.zip' -VPS_URI='TODO' -function clean() { - # Clean project folder in order to see what will be done, set env variable $DEBUG=1 - ${RM} *.zip - # Folders - for folder in \ - "venv" \ - "*__pycache__" \ - "*.ruff_cache" \ - "*.pytest_cache" \ - "*.cache" \ - "*htmlcov*" \ - "skip-covered"\ - ; do - if [ "$DEBUG" -eq 1 ]; then find . -type d -iname "${folder}"; else find . -type d -iname "${folder}" | xargs ${RM} -rf; fi - done - # Files - for file in \ - "*.DS_Store" \ - "tags" \ - "db.sqlite3" \ - "*.png" \ - "*.zip" \ - "*.log" \ - "coverage.xml" \ - "*.coverage" \ - "coverage.lcov" \ - ; do - if [ "$DEBUG" -eq 1 ]; then find . -type f -iname "${file}"; else find . -type f -iname "${file}" | xargs ${RM}; fi - done -} - -function tags() { - # Create tags and cscope - ctags -R . - cscope -Rb -} - -function pack() { - # Clean and Zip project - clean - zip -r "${ZIP_NAME}" \ - .editorconfig \ - Dockerfile \ - requirements.txt \ - .gitignore \ - README.md \ - pyproject.toml \ - bazos \ - scripts \ - main.sh -} - -function send() { - # Send zipped project to VPS and then remove the zip file - scp "${ZIP_NAME}" "${VPS_URI}" - rm ${ZIP_NAME} -} - -function help() { - # Print usage on stdout - echo "Available functions:" - for file in ./scripts/*.sh; do - function_names=$(cat ${file} | grep -E "(\ *)function\ +.*\(\)\ *\{" | sed -E "s/\ *function\ +//" | sed -E "s/\ *\(\)\ *\{\ *//") - for func_name in ${function_names[@]}; do - printf " $func_name\n" - awk "/function ${func_name}()/ { flag = 1 }; flag && /^\ +#/ { print \" \" \$0 }; flag && !/^\ +#/ && !/function ${func_name}()/ { print "\n"; exit }" ${file} - done - done - -} - -function usage() { - # Print usage on stdout - help -} - -function die() { - # Print error message on stdout and exit - printf "${RED}ERROR: $1${NC}\n" - help - exit 1 -} - -function main() { - # Main function: Call other functions based on input arguments - [[ "$#" -eq 0 ]] && die "No arguments provided" - while [ "$#" -gt 0 ]; do - case "$1" in - *) "$1" || die "Unknown function: $1()" ;; - esac - shift - done -} - -main "$@"