Skip to content

Commit

Permalink
Fix driver issues with version; Added: new args, remote/local runner,…
Browse files Browse the repository at this point in the history
… containerised environment (#17)

* README specified requirements

* Create Dockerfile and ignore tmp/ directory

tmp/ directory stores fish history and some other stuff that should be
correctly mapped using docker volumes

* Added Scripts

* Update README.md and docker.sh script

* Fix dockerfile

* tests

* .

* Fixed `pip install -e .` + can be installed locally again

* Fixed chromedriver; TODO google chrome spawn needed

* .

* restructuring the code

* Fixed headless

* .

* Fixed
  • Loading branch information
zdeneklapes authored Dec 26, 2023
1 parent 97e32be commit 1e49038
Show file tree
Hide file tree
Showing 29 changed files with 987 additions and 436 deletions.
30 changes: 30 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
.git/
.idea/
tokens/
others/
logs/
.cache/
__pycache__/
*.pytest_cache/
.ruff_cache/
dist/
venv/
.venv/
*.egg-info/
tmp/
.github
tests


.DS_Store
*.pkl
*.env
.dockerignore
.editorconfig
.gitignore
.pre-commit-config.yaml
Dockerfile
Dockerfile_selenium
docker-compose.yml
pyproject.toml
tags
2 changes: 1 addition & 1 deletion .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true

[{*.py, *.pyx, *.pxd}]
[*.{py, pyx, pxd, sh}]
charset = utf-8
indent_style = space
indent_size = 4
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.git/
.idea/
tokens/
others/
Expand All @@ -10,6 +11,7 @@ dist/
venv/
.venv/
*.egg-info/
tmp/

.DS_Store
*.cache
Expand Down
140 changes: 140 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
FROM ubuntu:20.04
#FROM python:3.10
MAINTAINER Zdenek Lapes

ENV DEBIAN_FRONTEND noninteractive
ENV DEBCONF_NONINTERACTIVE_SEEN true

# Update the repositories
RUN apt-get -yqq update

# Upgrade packages
RUN apt-get -yqq upgrade

# Set locale and reconfigure
ENV LANGUAGE en_US.UTF-8
ENV LANG en_US.UTF-8
RUN apt-get install -y locales language-pack-en tzdata
#RUN apt-get install -y locales tzdata
RUN locale-gen en_US.UTF-8
RUN dpkg-reconfigure --frontend noninteractive locales
RUN apt-get -yqq install language-pack-en

# Set timezone
ENV TZ "US/Eastern"
#RUN echo "US/Eastern" | sudo tee /etc/timezone
RUN echo "US/Eastern" | tee /etc/timezone
RUN dpkg-reconfigure --frontend noninteractive tzdata

## Install utilities
RUN apt-get -yqq install ca-certificates curl dnsutils man openssl unzip wget

## Install xvfb and fonts
RUN apt-get -yqq install xvfb fonts-ipafont-gothic xfonts-100dpi xfonts-75dpi xfonts-scalable xfonts-cyrillic

## Install Fluxbox (window manager)
RUN apt-get -yqq install fluxbox

# Install VNC
RUN apt-get -yqq install x11vnc
RUN mkdir -p ~/.vnc

# Install Supervisor
RUN apt-get -yqq install supervisor
RUN mkdir -p /var/log/supervisor

# Install Java
RUN apt-get -yqq install openjdk-11-jre-headless

# Install Selenium
RUN mkdir -p /opt/selenium
RUN wget --no-verbose -O /opt/selenium/selenium-server-standalone-2.43.1.jar http://selenium-release.storage.googleapis.com/2.43/selenium-server-standalone-2.43.1.jar
RUN ln -fs /opt/selenium/selenium-server-standalone-2.43.1.jar /opt/selenium/selenium-server-standalone.jar

# Install Chrome WebDriver
RUN wget --no-verbose -O /tmp/chromedriver_linux64.zip http://chromedriver.storage.googleapis.com/2.10/chromedriver_linux64.zip
RUN mkdir -p /opt/chromedriver-2.10
RUN unzip /tmp/chromedriver_linux64.zip -d /opt/chromedriver-2.10
RUN chmod +x /opt/chromedriver-2.10/chromedriver
RUN rm /tmp/chromedriver_linux64.zip
RUN ln -fs /opt/chromedriver-2.10/chromedriver /usr/local/bin/chromedriver

# Install Google Chrome
RUN apt-get -yqq install gnupg2
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -
RUN echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list
RUN apt-get -yqq update

RUN apt-get -yqq install google-chrome-stable
#ARG CHROME_VERSION="116.0.5845.187-1"
#RUN wget --no-verbose -O /tmp/chrome.deb https://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_${CHROME_VERSION}_amd64.deb \
# && apt install -y /tmp/chrome.deb \
# && rm /tmp/chrome.deb

# Install Firefox
RUN apt-get -yqq install firefox

# Configure Supervisor
ADD ./etc/supervisor/conf.d /etc/supervisor/conf.d
#ADD ./etc/supervisord.conf /etc/supervisor/conf.d

# Configure VNC Password
RUN x11vnc -storepasswd selenium ~/.vnc/passwd

# Create a default user with sudo access
RUN useradd selenium --shell /bin/bash --create-home
RUN usermod -a -G sudo selenium
RUN echo "ALL ALL = (ALL) NOPASSWD: ALL" >> /etc/sudoers

# Default configuration
ENV SCREEN_GEOMETRY "1440x900x24"
ENV SELENIUM_PORT 4444
ENV DISPLAY :20.0

# Disable the SUID sandbox so that Chrome can launch without being in a privileged container.
# One unfortunate side effect is that `google-chrome --help` will no longer work.
RUN dpkg-divert --add --rename --divert /opt/google/chrome/google-chrome.real /opt/google/chrome/google-chrome
RUN echo "#!/bin/bash\nexec /opt/google/chrome/google-chrome.real --disable-setuid-sandbox \"\$@\"" > /opt/google/chrome/google-chrome
RUN chmod 755 /opt/google/chrome/google-chrome

RUN apt-get install -y \
fish \
bat \
vim

# Install python3.10
RUN apt install software-properties-common -y && \
add-apt-repository ppa:deadsnakes/ppa -y && \
apt install -y \
python3-pip \
python3-dev \
python3.10 \
python3.10-distutils \
python3.10-venv \
python3.10-dev

# PyCairo
RUN apt-get install -y \
pkg-config \
libcairo2-dev \
libffi-dev

RUN ln -s /usr/bin/python3.10 /usr/local/bin/python3

# Install pip for python3.10
RUN curl https://bootstrap.pypa.io/get-pip.py | python3

COPY requirements.txt setup.py README.md make.sh /app/
COPY bazos /app/bazos

RUN pip install -r /app/requirements.txt
#RUN pip install -e /app

# Ports
EXPOSE 4444 5900

ENV DISPLAY=:99

WORKDIR /app
CMD ["fish"]
#CMD ["/app/make.sh", "entrypoint"]
3 changes: 3 additions & 0 deletions Hub/example-config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[router]
username = "admin"
password = "admin"
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,15 @@ pre-commit install
### Create virtual environment + install dependencies

```shell
./make.sh dev_docker_up

# or locally
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
pip install -e .

# Try run
# And then you can try:
python3 bazos --help
```

Expand Down
131 changes: 96 additions & 35 deletions bazos/__init__.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,115 @@
import argparse
from pathlib import Path
import sys
from typing import Dict, Any
from distutils.util import strtobool # noqa

from bazos.main import bazos as bz
from bazos.scrapper import BazosScrapper, BazosUser, BazosDriver

__version__ = "0.1.0"
__apiversion__ = "0.1.0"
__author__ = 'Zdenek Lapes'
__license__ = 'MIT'


def parse_cli_argument() -> Dict[str, Any]:
parser = argparse.ArgumentParser()
parser.add_argument(
'-b', '--bazos',
action='store_true',
help='Use bazos'
)
parser.add_argument(
'--add-only',
action='store_true',
help='Add only new products, not remove old ones'
)
parser.add_argument(
'--print-rubrics',
action='store_true',
help='Print rubrics'
)
parser.add_argument(
'--country',
nargs="+",
help="What bazos country to use",
default=['cz', 'sk']
BOOL_AS_STR_ARGUMENTS_for_parser_add_argument = dict(
type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True
)
parser.add_argument(
'-p', '--path',
help='Path to products directory'
)
parser.add_argument(
"--update-credentials",
action='store_true',
help='Update credentials'
)
cli_args = vars(parser.parse_args())
return cli_args
# true/false
parser.add_argument('--login',
**BOOL_AS_STR_ARGUMENTS_for_parser_add_argument,
help='Login to bazos')
parser.add_argument('--bazos',
**BOOL_AS_STR_ARGUMENTS_for_parser_add_argument,
help='Use bazos')
parser.add_argument('--add-only',
**BOOL_AS_STR_ARGUMENTS_for_parser_add_argument,
help='Add only new products, not remove old ones')
parser.add_argument('--print-rubrics',
**BOOL_AS_STR_ARGUMENTS_for_parser_add_argument,
help='Print rubrics')
parser.add_argument("--verbose",
**BOOL_AS_STR_ARGUMENTS_for_parser_add_argument,
help='Verbose')
parser.add_argument("--delete-all",
**BOOL_AS_STR_ARGUMENTS_for_parser_add_argument,
help='Verbose')
parser.add_argument("--create-all",
**BOOL_AS_STR_ARGUMENTS_for_parser_add_argument,
help='Verbose')
parser.add_argument('--remote',
**BOOL_AS_STR_ARGUMENTS_for_parser_add_argument,
help='Use remote')
# ?
parser.add_argument('--items-path',
type=Path,
required=True,
nargs='?',
help='Path to products directory')
parser.add_argument('--credentials-path',
type=Path,
required=True,
nargs='?',
help='Path to products directory')
# +
parser.add_argument('--country',
nargs="+",
help="What bazos country to use",
default=['cz', 'sk'])
args = vars(parser.parse_args())
return args


def main():
cli_args = parse_cli_argument()
args = parse_cli_argument()

# Print arguments
if args['verbose']:
print(' '.join(sys.argv))

if cli_args['bazos']:
bz(cli_args=cli_args)
# Driver
bazos_driver = BazosDriver(args=args, country='cz')

# Login
if args['login']:
bazos_user = BazosUser(country='cz', args=args, driver=bazos_driver.driver)
bazos_user.authenticate()
bazos_user.save_user_credentials()
else:
bazos_user = BazosUser(country='cz', args=args, driver=bazos_driver.driver)
bazos_user.exists_user_credentials()

# Rubrics
if args['print_rubrics']:
for country in args['country']:
bazos_user = BazosUser(country=country, args=args, driver=bazos_driver.driver)
bazos_scrapper = BazosScrapper(country=country, args=args, user=bazos_user, driver=bazos_driver.driver)
bazos_scrapper.load_page_with_cookies()
bazos_scrapper.print_all_rubrics_and_categories()

# Bazos
if args['bazos']:
for country in args['country']:
bazos_user = BazosUser(country=country, args=args, driver=bazos_driver.driver)

if args['verbose']:
print(f"==> Processing country: {country}")

bazos_scrapper = BazosScrapper(country=country, args=args, user=bazos_user, driver=bazos_driver.driver)
bazos_scrapper.load_page_with_cookies()

# Restore advertisements
if args['delete_all']:
bazos_scrapper.delete_advertisements()
if args['create_all']:
bazos_scrapper.create_advertisements()
sys.exit()


if __name__ == '__main__':
from dotenv import load_dotenv

load_dotenv(dotenv_path='.env')
main()
30 changes: 28 additions & 2 deletions bazos/__main__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,28 @@
from bazos.__init__ import main
main()
# from bazos.__init__ import main
# main()

#
# from selenium.webdriver.chrome.options import Options
# from selenium import webdriver
# from selenium.webdriver.chrome.service import Service
#
#
# def set_chrome_options() -> Options:
# """Sets chrome options for Selenium.
# Chrome options for headless browser is enabled.
# """
# chrome_options = Options()
# chrome_options.add_argument("--headless")
# chrome_options.add_argument("--no-sandbox")
# chrome_options.add_argument("--disable-dev-shm-usage")
# chrome_prefs = {}
# chrome_options.experimental_options["prefs"] = chrome_prefs
# chrome_prefs["profile.default_content_settings"] = {"images": 2}
# return chrome_options
#
#
# if __name__ == "__main__":
# service = Service(executable_path="/usr/local/bin/chromedriver")
# driver = webdriver.Chrome(options=set_chrome_options(), service=service)
# # Do stuff with your driver
# driver.close()
Loading

0 comments on commit 1e49038

Please sign in to comment.