From b7818d50b0dc1ffa662a64a02e53064395f396b3 Mon Sep 17 00:00:00 2001 From: Jan Pelikan Date: Tue, 24 Sep 2024 22:54:49 +0200 Subject: [PATCH] feat(parser): Add Docker support with new parameter for output path - review suggestions from PR accepted and implemented - **Dockerfile**: - Fixed obsolete definition of ENVIROMENT variables to correct syntax. - Added source link for more information about depebdencies for Puppeteer respectivly Chromium/Chrome in Docker. - Dockerfiile moved to `parser` directory. - Updated path for `COPY` command. - Removed commented code for creataing a new user in Docker. This is not needed for the current implementation. - **README.md**: - Section for Docker updated with new instructions. - **app.ts**: - Variable `outputPath`: Default value is set to `./out`. Variable is now validated twice. - Variable `outputPath`: Always is checked if path exists. If not, it will be created. - Added source link for more information about calling Chrome/Chromium with `--no-sandbox` flag in Docker while running Puppeteer. --- README.md | 3 ++- Dockerfile => parser/Dockerfile | 25 +++++++++---------------- parser/src/app.ts | 8 ++++++-- 3 files changed, 17 insertions(+), 19 deletions(-) rename Dockerfile => parser/Dockerfile (64%) diff --git a/README.md b/README.md index 57e5301..090b7d6 100644 --- a/README.md +++ b/README.md @@ -131,9 +131,10 @@ Fields: You can build a `Docker` image for the `azure-vm-pricing`: ```bash +# Parser Docker image +cd parser # For Linux machines running on x86_64 or in Windows WSL docker build -f ./Dockerfile --platform linux/amd64 --build-arg ARCH=amd64 -t azure-vm-pricing . - # For Linux machines running on arm64, for example Apple Macbooks with Apple Silicon docker build -f ./Dockerfile --platform linux/arm64 --build-arg ARCH=arm64 -t azure-vm-pricing . ``` diff --git a/Dockerfile b/parser/Dockerfile similarity index 64% rename from Dockerfile rename to parser/Dockerfile index 0ed1aea..f8c8845 100644 --- a/Dockerfile +++ b/parser/Dockerfile @@ -5,9 +5,9 @@ ARG ARCH=amd64 FROM $ARCH/node:22-slim # Do not download Chromium, will be installed manually from Debian repositories -ENV PUPPETEER_SKIP_DOWNLOAD true +ENV PUPPETEER_SKIP_DOWNLOAD=true # Set the path to the Chromium executable -ENV PUPPETEER_EXECUTABLE_PATH "/usr/bin/chromium" +ENV PUPPETEER_EXECUTABLE_PATH="/usr/bin/chromium" # Create a directory for the app WORKDIR /app @@ -18,7 +18,9 @@ RUN mkdir /data # Set volume for the app data VOLUME /data -# Install dependencies +# Install dependencies for Puppeteer respectively Chromium/Chrome +# https://github.com/puppeteer/puppeteer/blob/main/docs/troubleshooting.md#running-puppeteer-on-gitlabci +# hadolint ignore=DL3008 RUN apt-get update && apt-get install -y \ wget \ ca-certificates \ @@ -62,18 +64,9 @@ RUN apt-get update && apt-get install -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Copy the app files (azure-vm-pricing) to the container -COPY ./parser . - -# When use a non-root user, the processes under the user will not have access to mounted volumes for some reason -# # Create a non-root user to run Puppeteer -# RUN groupadd -r pptruser && useradd -r -g pptruser -G audio,video pptruser \ -# && mkdir -p /home/pptruser/Downloads \ -# && chown -R pptruser:pptruser /home/pptruser \ -# && chown -R pptruser:pptruser /app - -# # Switch to the non-root user -# USER pptruser +# Copy the app files (azure-vm-pricing/parser) to the container +COPY ./ . # Install Yarn and dependencies -RUN yarn install +RUN yarn install \ + && yarn cache clean diff --git a/parser/src/app.ts b/parser/src/app.ts index 7949fd0..990ace9 100644 --- a/parser/src/app.ts +++ b/parser/src/app.ts @@ -8,7 +8,8 @@ import { isUrlBlocked } from './isUrlBlocked'; import { writeCsv, writeJson } from './writeFile'; import { AzurePortal, getPrice, getPricing } from './azurePortalExtensions'; -let outputPath: string | undefined; +// If outputPath is not provided, the default value is './out' +let outputPath: string | undefined = './out'; let recordTiming = false; let previousPerformanceNow = 0; @@ -96,10 +97,12 @@ function timeEvent(eventName: string): void { case '-p': case '--output-path': outputPath = args[offset + 1]; - // If the output path is not defined or is an empty string, use the default path + // Assign default if undefined or empty if (outputPath === undefined || outputPath === '') { outputPath = './out'; } + // Ensure the path exists + fs.mkdirSync(outputPath, { recursive: true }); break; default: parsedBinaryArg = false; @@ -131,6 +134,7 @@ function timeEvent(eventName: string): void { timeEvent('chromeStartedAt'); // --no-sandbox and --disable-setuid-sandbox are required for running in a Docker container + // More info: https://github.com/puppeteer/puppeteer/blob/main/docs/troubleshooting.md#running-puppeteer-on-gitlabci const browser = await puppeteer.launch({headless: headlessMode, args: ['--no-sandbox', '--disable-setuid-sandbox']}); const page = await browser.newPage(); timeEvent('chromeLaunchedAt');