-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix driver issues with version; Added: new args, remote/local runner,…
… containerised environment (#17) * README specified requirements * Create Dockerfile and ignore tmp/ directory tmp/ directory stores fish history and some other stuff that should be correctly mapped using docker volumes * Added Scripts * Update README.md and docker.sh script * Fix dockerfile * tests * . * Fixed `pip install -e .` + can be installed locally again * Fixed chromedriver; TODO google chrome spawn needed * . * restructuring the code * Fixed headless * . * Fixed
- Loading branch information
1 parent
97e32be
commit 1e49038
Showing
29 changed files
with
987 additions
and
436 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
.git/ | ||
.idea/ | ||
tokens/ | ||
others/ | ||
logs/ | ||
.cache/ | ||
__pycache__/ | ||
*.pytest_cache/ | ||
.ruff_cache/ | ||
dist/ | ||
venv/ | ||
.venv/ | ||
*.egg-info/ | ||
tmp/ | ||
.github | ||
tests | ||
|
||
|
||
.DS_Store | ||
*.pkl | ||
*.env | ||
.dockerignore | ||
.editorconfig | ||
.gitignore | ||
.pre-commit-config.yaml | ||
Dockerfile | ||
Dockerfile_selenium | ||
docker-compose.yml | ||
pyproject.toml | ||
tags |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
.git/ | ||
.idea/ | ||
tokens/ | ||
others/ | ||
|
@@ -10,6 +11,7 @@ dist/ | |
venv/ | ||
.venv/ | ||
*.egg-info/ | ||
tmp/ | ||
|
||
.DS_Store | ||
*.cache | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
FROM ubuntu:20.04 | ||
#FROM python:3.10 | ||
MAINTAINER Zdenek Lapes | ||
|
||
ENV DEBIAN_FRONTEND noninteractive | ||
ENV DEBCONF_NONINTERACTIVE_SEEN true | ||
|
||
# Update the repositories | ||
RUN apt-get -yqq update | ||
|
||
# Upgrade packages | ||
RUN apt-get -yqq upgrade | ||
|
||
# Set locale and reconfigure | ||
ENV LANGUAGE en_US.UTF-8 | ||
ENV LANG en_US.UTF-8 | ||
RUN apt-get install -y locales language-pack-en tzdata | ||
#RUN apt-get install -y locales tzdata | ||
RUN locale-gen en_US.UTF-8 | ||
RUN dpkg-reconfigure --frontend noninteractive locales | ||
RUN apt-get -yqq install language-pack-en | ||
|
||
# Set timezone | ||
ENV TZ "US/Eastern" | ||
#RUN echo "US/Eastern" | sudo tee /etc/timezone | ||
RUN echo "US/Eastern" | tee /etc/timezone | ||
RUN dpkg-reconfigure --frontend noninteractive tzdata | ||
|
||
## Install utilities | ||
RUN apt-get -yqq install ca-certificates curl dnsutils man openssl unzip wget | ||
|
||
## Install xvfb and fonts | ||
RUN apt-get -yqq install xvfb fonts-ipafont-gothic xfonts-100dpi xfonts-75dpi xfonts-scalable xfonts-cyrillic | ||
|
||
## Install Fluxbox (window manager) | ||
RUN apt-get -yqq install fluxbox | ||
|
||
# Install VNC | ||
RUN apt-get -yqq install x11vnc | ||
RUN mkdir -p ~/.vnc | ||
|
||
# Install Supervisor | ||
RUN apt-get -yqq install supervisor | ||
RUN mkdir -p /var/log/supervisor | ||
|
||
# Install Java | ||
RUN apt-get -yqq install openjdk-11-jre-headless | ||
|
||
# Install Selenium | ||
RUN mkdir -p /opt/selenium | ||
RUN wget --no-verbose -O /opt/selenium/selenium-server-standalone-2.43.1.jar http://selenium-release.storage.googleapis.com/2.43/selenium-server-standalone-2.43.1.jar | ||
RUN ln -fs /opt/selenium/selenium-server-standalone-2.43.1.jar /opt/selenium/selenium-server-standalone.jar | ||
|
||
# Install Chrome WebDriver | ||
RUN wget --no-verbose -O /tmp/chromedriver_linux64.zip http://chromedriver.storage.googleapis.com/2.10/chromedriver_linux64.zip | ||
RUN mkdir -p /opt/chromedriver-2.10 | ||
RUN unzip /tmp/chromedriver_linux64.zip -d /opt/chromedriver-2.10 | ||
RUN chmod +x /opt/chromedriver-2.10/chromedriver | ||
RUN rm /tmp/chromedriver_linux64.zip | ||
RUN ln -fs /opt/chromedriver-2.10/chromedriver /usr/local/bin/chromedriver | ||
|
||
# Install Google Chrome | ||
RUN apt-get -yqq install gnupg2 | ||
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - | ||
RUN echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list | ||
RUN apt-get -yqq update | ||
|
||
RUN apt-get -yqq install google-chrome-stable | ||
#ARG CHROME_VERSION="116.0.5845.187-1" | ||
#RUN wget --no-verbose -O /tmp/chrome.deb https://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_${CHROME_VERSION}_amd64.deb \ | ||
# && apt install -y /tmp/chrome.deb \ | ||
# && rm /tmp/chrome.deb | ||
|
||
# Install Firefox | ||
RUN apt-get -yqq install firefox | ||
|
||
# Configure Supervisor | ||
ADD ./etc/supervisor/conf.d /etc/supervisor/conf.d | ||
#ADD ./etc/supervisord.conf /etc/supervisor/conf.d | ||
|
||
# Configure VNC Password | ||
RUN x11vnc -storepasswd selenium ~/.vnc/passwd | ||
|
||
# Create a default user with sudo access | ||
RUN useradd selenium --shell /bin/bash --create-home | ||
RUN usermod -a -G sudo selenium | ||
RUN echo "ALL ALL = (ALL) NOPASSWD: ALL" >> /etc/sudoers | ||
|
||
# Default configuration | ||
ENV SCREEN_GEOMETRY "1440x900x24" | ||
ENV SELENIUM_PORT 4444 | ||
ENV DISPLAY :20.0 | ||
|
||
# Disable the SUID sandbox so that Chrome can launch without being in a privileged container. | ||
# One unfortunate side effect is that `google-chrome --help` will no longer work. | ||
RUN dpkg-divert --add --rename --divert /opt/google/chrome/google-chrome.real /opt/google/chrome/google-chrome | ||
RUN echo "#!/bin/bash\nexec /opt/google/chrome/google-chrome.real --disable-setuid-sandbox \"\$@\"" > /opt/google/chrome/google-chrome | ||
RUN chmod 755 /opt/google/chrome/google-chrome | ||
|
||
RUN apt-get install -y \ | ||
fish \ | ||
bat \ | ||
vim | ||
|
||
# Install python3.10 | ||
RUN apt install software-properties-common -y && \ | ||
add-apt-repository ppa:deadsnakes/ppa -y && \ | ||
apt install -y \ | ||
python3-pip \ | ||
python3-dev \ | ||
python3.10 \ | ||
python3.10-distutils \ | ||
python3.10-venv \ | ||
python3.10-dev | ||
|
||
# PyCairo | ||
RUN apt-get install -y \ | ||
pkg-config \ | ||
libcairo2-dev \ | ||
libffi-dev | ||
|
||
RUN ln -s /usr/bin/python3.10 /usr/local/bin/python3 | ||
|
||
# Install pip for python3.10 | ||
RUN curl https://bootstrap.pypa.io/get-pip.py | python3 | ||
|
||
COPY requirements.txt setup.py README.md make.sh /app/ | ||
COPY bazos /app/bazos | ||
|
||
RUN pip install -r /app/requirements.txt | ||
#RUN pip install -e /app | ||
|
||
# Ports | ||
EXPOSE 4444 5900 | ||
|
||
ENV DISPLAY=:99 | ||
|
||
WORKDIR /app | ||
CMD ["fish"] | ||
#CMD ["/app/make.sh", "entrypoint"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[router] | ||
username = "admin" | ||
password = "admin" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,54 +1,115 @@ | ||
import argparse | ||
from pathlib import Path | ||
import sys | ||
from typing import Dict, Any | ||
from distutils.util import strtobool # noqa | ||
|
||
from bazos.main import bazos as bz | ||
from bazos.scrapper import BazosScrapper, BazosUser, BazosDriver | ||
|
||
__version__ = "0.1.0" | ||
__apiversion__ = "0.1.0" | ||
__author__ = 'Zdenek Lapes' | ||
__license__ = 'MIT' | ||
|
||
|
||
def parse_cli_argument() -> Dict[str, Any]: | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
'-b', '--bazos', | ||
action='store_true', | ||
help='Use bazos' | ||
) | ||
parser.add_argument( | ||
'--add-only', | ||
action='store_true', | ||
help='Add only new products, not remove old ones' | ||
) | ||
parser.add_argument( | ||
'--print-rubrics', | ||
action='store_true', | ||
help='Print rubrics' | ||
) | ||
parser.add_argument( | ||
'--country', | ||
nargs="+", | ||
help="What bazos country to use", | ||
default=['cz', 'sk'] | ||
BOOL_AS_STR_ARGUMENTS_for_parser_add_argument = dict( | ||
type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True | ||
) | ||
parser.add_argument( | ||
'-p', '--path', | ||
help='Path to products directory' | ||
) | ||
parser.add_argument( | ||
"--update-credentials", | ||
action='store_true', | ||
help='Update credentials' | ||
) | ||
cli_args = vars(parser.parse_args()) | ||
return cli_args | ||
# true/false | ||
parser.add_argument('--login', | ||
**BOOL_AS_STR_ARGUMENTS_for_parser_add_argument, | ||
help='Login to bazos') | ||
parser.add_argument('--bazos', | ||
**BOOL_AS_STR_ARGUMENTS_for_parser_add_argument, | ||
help='Use bazos') | ||
parser.add_argument('--add-only', | ||
**BOOL_AS_STR_ARGUMENTS_for_parser_add_argument, | ||
help='Add only new products, not remove old ones') | ||
parser.add_argument('--print-rubrics', | ||
**BOOL_AS_STR_ARGUMENTS_for_parser_add_argument, | ||
help='Print rubrics') | ||
parser.add_argument("--verbose", | ||
**BOOL_AS_STR_ARGUMENTS_for_parser_add_argument, | ||
help='Verbose') | ||
parser.add_argument("--delete-all", | ||
**BOOL_AS_STR_ARGUMENTS_for_parser_add_argument, | ||
help='Verbose') | ||
parser.add_argument("--create-all", | ||
**BOOL_AS_STR_ARGUMENTS_for_parser_add_argument, | ||
help='Verbose') | ||
parser.add_argument('--remote', | ||
**BOOL_AS_STR_ARGUMENTS_for_parser_add_argument, | ||
help='Use remote') | ||
# ? | ||
parser.add_argument('--items-path', | ||
type=Path, | ||
required=True, | ||
nargs='?', | ||
help='Path to products directory') | ||
parser.add_argument('--credentials-path', | ||
type=Path, | ||
required=True, | ||
nargs='?', | ||
help='Path to products directory') | ||
# + | ||
parser.add_argument('--country', | ||
nargs="+", | ||
help="What bazos country to use", | ||
default=['cz', 'sk']) | ||
args = vars(parser.parse_args()) | ||
return args | ||
|
||
|
||
def main(): | ||
cli_args = parse_cli_argument() | ||
args = parse_cli_argument() | ||
|
||
# Print arguments | ||
if args['verbose']: | ||
print(' '.join(sys.argv)) | ||
|
||
if cli_args['bazos']: | ||
bz(cli_args=cli_args) | ||
# Driver | ||
bazos_driver = BazosDriver(args=args, country='cz') | ||
|
||
# Login | ||
if args['login']: | ||
bazos_user = BazosUser(country='cz', args=args, driver=bazos_driver.driver) | ||
bazos_user.authenticate() | ||
bazos_user.save_user_credentials() | ||
else: | ||
bazos_user = BazosUser(country='cz', args=args, driver=bazos_driver.driver) | ||
bazos_user.exists_user_credentials() | ||
|
||
# Rubrics | ||
if args['print_rubrics']: | ||
for country in args['country']: | ||
bazos_user = BazosUser(country=country, args=args, driver=bazos_driver.driver) | ||
bazos_scrapper = BazosScrapper(country=country, args=args, user=bazos_user, driver=bazos_driver.driver) | ||
bazos_scrapper.load_page_with_cookies() | ||
bazos_scrapper.print_all_rubrics_and_categories() | ||
|
||
# Bazos | ||
if args['bazos']: | ||
for country in args['country']: | ||
bazos_user = BazosUser(country=country, args=args, driver=bazos_driver.driver) | ||
|
||
if args['verbose']: | ||
print(f"==> Processing country: {country}") | ||
|
||
bazos_scrapper = BazosScrapper(country=country, args=args, user=bazos_user, driver=bazos_driver.driver) | ||
bazos_scrapper.load_page_with_cookies() | ||
|
||
# Restore advertisements | ||
if args['delete_all']: | ||
bazos_scrapper.delete_advertisements() | ||
if args['create_all']: | ||
bazos_scrapper.create_advertisements() | ||
sys.exit() | ||
|
||
|
||
if __name__ == '__main__': | ||
from dotenv import load_dotenv | ||
|
||
load_dotenv(dotenv_path='.env') | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,28 @@ | ||
from bazos.__init__ import main | ||
main() | ||
# from bazos.__init__ import main | ||
# main() | ||
|
||
# | ||
# from selenium.webdriver.chrome.options import Options | ||
# from selenium import webdriver | ||
# from selenium.webdriver.chrome.service import Service | ||
# | ||
# | ||
# def set_chrome_options() -> Options: | ||
# """Sets chrome options for Selenium. | ||
# Chrome options for headless browser is enabled. | ||
# """ | ||
# chrome_options = Options() | ||
# chrome_options.add_argument("--headless") | ||
# chrome_options.add_argument("--no-sandbox") | ||
# chrome_options.add_argument("--disable-dev-shm-usage") | ||
# chrome_prefs = {} | ||
# chrome_options.experimental_options["prefs"] = chrome_prefs | ||
# chrome_prefs["profile.default_content_settings"] = {"images": 2} | ||
# return chrome_options | ||
# | ||
# | ||
# if __name__ == "__main__": | ||
# service = Service(executable_path="/usr/local/bin/chromedriver") | ||
# driver = webdriver.Chrome(options=set_chrome_options(), service=service) | ||
# # Do stuff with your driver | ||
# driver.close() |
Oops, something went wrong.