From 5c9a49ff2aed0efe09763147162e3d10cc2a73c2 Mon Sep 17 00:00:00 2001 From: Jan Pelikan Date: Sat, 21 Sep 2024 00:06:32 +0200 Subject: [PATCH 1/2] feat(parser): Added Docker support with new parameter to define output path for exported files - **Dockerfile**: - Added new file Dockerfile for building Docker image. - **README.md**: - Added section for Docker - A usage of application updated to include new parameters `-p` and `--output-path`. - **app.ts**: - Added new variable `outputPath` to define the path of the output file. - Updated function for parsing input parameters to include the new parameters `-p` and `--output-path` with defined default value. - Updated `console.log` messages to include the new parameters. - Updated constant for calling a Chromium browser, including new arguments for launching browser in Docker container. - Updated calling `writeJson` and `writeCsv` functions to include the new `outputPath` parameter. - **write.ts**: - Function `writeJson`: Added new parameter `outputPath` to define the path of the output file. - Function `writeJson`: Updated constant `outFilename` where base path is defined by `outputPath` variable, not hard-coded anymore. - Function `writeCsv`: Added new parameter `outputPath` to define the path of the output file. - Function `writeCsv`: Updated constant `outFilename` where base path is defined by `outputPath` variable, not hard-coded anymore. --- Dockerfile | 79 +++++++++++++++++++++++++++++++++++++++++ README.md | 28 +++++++++++++-- parser/src/app.ts | 19 +++++++--- parser/src/writeFile.ts | 8 ++--- 4 files changed, 123 insertions(+), 11 deletions(-) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0ed1aea --- /dev/null +++ b/Dockerfile @@ -0,0 +1,79 @@ +# Default architecture +ARG ARCH=amd64 + +# Use the official lightweight Node.js 22 image +FROM $ARCH/node:22-slim + +# Do not download Chromium, will be installed manually from Debian repositories +ENV PUPPETEER_SKIP_DOWNLOAD true +# Set the path to the Chromium executable +ENV PUPPETEER_EXECUTABLE_PATH "/usr/bin/chromium" + +# Create a directory for the app +WORKDIR /app + +# Create a directory for the app data +RUN mkdir /data + +# Set volume for the app data +VOLUME /data + +# Install dependencies +RUN apt-get update && apt-get install -y \ + wget \ + ca-certificates \ + chromium \ + fonts-liberation \ + libasound2 \ + libatk1.0-0 \ + libc6 \ + libcairo2 \ + libcups2 \ + libdbus-1-3 \ + libexpat1 \ + libfontconfig1 \ + libgbm-dev \ + libgcc1 \ + libglib2.0-0 \ + libgdk-pixbuf2.0-0 \ + libgtk-3-0 \ + libnspr4 \ + libnss3 \ + libpango-1.0-0 \ + libpangocairo-1.0-0 \ + libx11-6 \ + libx11-xcb1 \ + libxcb1 \ + libxcomposite1 \ + libxcursor1 \ + libxdamage1 \ + libxext6 \ + libxfixes3 \ + libxi6 \ + libxrandr2 \ + libxrender1 \ + libxss1 \ + libxtst6 \ + lsb-release \ + xdg-utils \ + libu2f-udev \ + libvulkan1 \ + --no-install-recommends \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Copy the app files (azure-vm-pricing) to the container +COPY ./parser . + +# When use a non-root user, the processes under the user will not have access to mounted volumes for some reason +# # Create a non-root user to run Puppeteer +# RUN groupadd -r pptruser && useradd -r -g pptruser -G audio,video pptruser \ +# && mkdir -p /home/pptruser/Downloads \ +# && chown -R pptruser:pptruser /home/pptruser \ +# && chown -R pptruser:pptruser /app + +# # Switch to the non-root user +# USER pptruser + +# Install Yarn and dependencies +RUN yarn install diff --git a/README.md b/README.md index 5e2cd21..57e5301 100644 --- a/README.md +++ b/README.md @@ -74,13 +74,13 @@ Scroll down for the list of [supported regions](#supported-regions) and [support ```powershell > cd .\parser\ -> yarn crawl --culture en-us --currency usd --operating-system linux --region us-west +> yarn crawl --culture en-us --currency usd --operating-system linux --region us-west --output-path .\out\ ``` You can also use short names: ```powershell -> yarn crawl -l en-us -c usd -o linux -r us-west +> yarn crawl -l en-us -c usd -o linux -r us-west -p .\out\ ``` Arguments: @@ -99,7 +99,7 @@ In the footer: ### Parser output -Writes `2` output files in the `out\` directory. One is a `CSV`, the other one is `JSON`. Both files contain the same data. +Writes `2` output files in the `out\` directory, or the directory specified by the `--output-path` argument. One is a `CSV`, the other one is `JSON`. Both files contain the same data. ```text .\out\vm-pricing__.csv @@ -124,6 +124,28 @@ Fields: - _Three Year Savings plan - _Three Year Savings plan With Azure Hybrid Benefit_ +### Docker + +#### Build the Docker image + +You can build a `Docker` image for the `azure-vm-pricing`: + +```bash +# For Linux machines running on x86_64 or in Windows WSL +docker build -f ./Dockerfile --platform linux/amd64 --build-arg ARCH=amd64 -t azure-vm-pricing . + +# For Linux machines running on arm64, for example Apple Macbooks with Apple Silicon +docker build -f ./Dockerfile --platform linux/arm64 --build-arg ARCH=arm64 -t azure-vm-pricing . +``` + +#### Run the Docker image + +You can run the `azure-vm-pricing` image: + +```bash +docker run --rm -it -v ./data:/data/ azure-vm-pricing:latest bash -c "yarn crawl --culture en-us --currency eur --operating-system linux --region europe-west -p /data/" +``` + ### Parser tests The parser has unit tests focusing on edge cases of price formatting: diff --git a/parser/src/app.ts b/parser/src/app.ts index f0562c0..7949fd0 100644 --- a/parser/src/app.ts +++ b/parser/src/app.ts @@ -8,6 +8,8 @@ import { isUrlBlocked } from './isUrlBlocked'; import { writeCsv, writeJson } from './writeFile'; import { AzurePortal, getPrice, getPricing } from './azurePortalExtensions'; +let outputPath: string | undefined; + let recordTiming = false; let previousPerformanceNow = 0; let wasSuccessful = false; @@ -91,6 +93,14 @@ function timeEvent(eventName: string): void { case '--region': region = args[offset + 1]; break; + case '-p': + case '--output-path': + outputPath = args[offset + 1]; + // If the output path is not defined or is an empty string, use the default path + if (outputPath === undefined || outputPath === '') { + outputPath = './out'; + } + break; default: parsedBinaryArg = false; break; @@ -104,7 +114,7 @@ function timeEvent(eventName: string): void { debugMode = true; break; default: - console.log(`'${args[offset]}' is not a known switch, supported values are: '-l', '--culture', '-c', '--currency', '-o', '--operating-system', '-r', '--region'. None of these switches should be provided as the last arg as they require a value.`); + console.log(`'${args[offset]}' is not a known switch, supported values are: '-l', '--culture', '-c', '--currency', '-o', '--operating-system', '-r', '--region', '-p', '--output-path . None of these switches should be provided as the last arg as they require a value.`); break; } } @@ -120,7 +130,8 @@ function timeEvent(eventName: string): void { } timeEvent('chromeStartedAt'); - const browser = await puppeteer.launch({headless: headlessMode}); + // --no-sandbox and --disable-setuid-sandbox are required for running in a Docker container + const browser = await puppeteer.launch({headless: headlessMode, args: ['--no-sandbox', '--disable-setuid-sandbox']}); const page = await browser.newPage(); timeEvent('chromeLaunchedAt'); @@ -229,8 +240,8 @@ function timeEvent(eventName: string): void { console.log(); - writeJson(vmPricing, config.region, config.operatingSystem); - writeCsv(vmPricing, config.culture, config.region, config.operatingSystem); + writeJson(vmPricing, config.region, config.operatingSystem, outputPath); + writeCsv(vmPricing, config.culture, config.region, config.operatingSystem, outputPath); wasSuccessful = true; } catch (e) diff --git a/parser/src/writeFile.ts b/parser/src/writeFile.ts index 49f5f20..5110e72 100644 --- a/parser/src/writeFile.ts +++ b/parser/src/writeFile.ts @@ -1,8 +1,8 @@ const fs = require('fs'); import { VmPricing } from './vmPricing'; -export function writeJson(vmPricing: VmPricing[], region: string, operatingSystem: string): void { - const outFilename = `./out/vm-pricing_${region}_${operatingSystem}.json`; +export function writeJson(vmPricing: VmPricing[], region: string, operatingSystem: string, outputPath: string): void { + const outFilename = `${outputPath}/vm-pricing_${region}_${operatingSystem}.json`; fs.writeFile(outFilename, JSON.stringify(vmPricing, null, 2), function (err) { if (err) { @@ -13,8 +13,8 @@ export function writeJson(vmPricing: VmPricing[], region: string, operatingSyste }); } -export function writeCsv(vmPricing: VmPricing[], culture: string, region: string, operatingSystem: string): void { - const outFilename = `./out/vm-pricing_${region}_${operatingSystem}.csv`; +export function writeCsv(vmPricing: VmPricing[], culture: string, region: string, operatingSystem: string, outputPath: string): void { + const outFilename = `${outputPath}/vm-pricing_${region}_${operatingSystem}.csv`; const writer = fs.createWriteStream(outFilename); writer.write('INSTANCE,VCPU,RAM,PAY AS YOU GO,PAY AS YOU GO WITH AZURE HYBRID BENEFIT,ONE YEAR RESERVED,ONE YEAR RESERVED WITH AZURE HYBRID BENEFIT,THREE YEAR RESERVED,THREE YEAR RESERVED WITH AZURE HYBRID BENEFIT,SPOT,SPOT WITH AZURE HYBRID BENEFIT,ONE YEAR SAVINGS PLAN,ONE YEAR SAVINGS PLAN WITH AZURE HYBRID BENEFIT,THREE YEAR SAVINGS PLAN,THREE YEAR SAVINGS PLAN WITH AZURE HYBRID BENEFIT\n'); From 834f760432d97db29fa2524892e0294b9d03164f Mon Sep 17 00:00:00 2001 From: Jan Pelikan Date: Tue, 24 Sep 2024 22:54:49 +0200 Subject: [PATCH 2/2] feat(parser): Add Docker support with new parameter for output path - review suggestions from PR accepted and implemented - **Dockerfile**: - Fixed obsolete definition of ENVIROMENT variables to correct syntax. - Added source link for more information about depebdencies for Puppeteer respectivly Chromium/Chrome in Docker. - Dockerfiile moved to `parser` directory. - Updated path for `COPY` command. - Removed commented code for creataing a new user in Docker. This is not needed for the current implementation. - **README.md**: - Section for Docker updated with new instructions. - **app.ts**: - Variable `outputPath`: Default value is set to `./out`. Variable is now validated twice. - Variable `outputPath`: Always is checked if path exists. If not, it will be created. - Added source link for more information about calling Chrome/Chromium with `--no-sandbox` flag in Docker while running Puppeteer. --- README.md | 3 ++- Dockerfile => parser/Dockerfile | 25 +++++++++---------------- parser/src/app.ts | 8 ++++++-- 3 files changed, 17 insertions(+), 19 deletions(-) rename Dockerfile => parser/Dockerfile (64%) diff --git a/README.md b/README.md index 57e5301..090b7d6 100644 --- a/README.md +++ b/README.md @@ -131,9 +131,10 @@ Fields: You can build a `Docker` image for the `azure-vm-pricing`: ```bash +# Parser Docker image +cd parser # For Linux machines running on x86_64 or in Windows WSL docker build -f ./Dockerfile --platform linux/amd64 --build-arg ARCH=amd64 -t azure-vm-pricing . - # For Linux machines running on arm64, for example Apple Macbooks with Apple Silicon docker build -f ./Dockerfile --platform linux/arm64 --build-arg ARCH=arm64 -t azure-vm-pricing . ``` diff --git a/Dockerfile b/parser/Dockerfile similarity index 64% rename from Dockerfile rename to parser/Dockerfile index 0ed1aea..f8c8845 100644 --- a/Dockerfile +++ b/parser/Dockerfile @@ -5,9 +5,9 @@ ARG ARCH=amd64 FROM $ARCH/node:22-slim # Do not download Chromium, will be installed manually from Debian repositories -ENV PUPPETEER_SKIP_DOWNLOAD true +ENV PUPPETEER_SKIP_DOWNLOAD=true # Set the path to the Chromium executable -ENV PUPPETEER_EXECUTABLE_PATH "/usr/bin/chromium" +ENV PUPPETEER_EXECUTABLE_PATH="/usr/bin/chromium" # Create a directory for the app WORKDIR /app @@ -18,7 +18,9 @@ RUN mkdir /data # Set volume for the app data VOLUME /data -# Install dependencies +# Install dependencies for Puppeteer respectively Chromium/Chrome +# https://github.com/puppeteer/puppeteer/blob/main/docs/troubleshooting.md#running-puppeteer-on-gitlabci +# hadolint ignore=DL3008 RUN apt-get update && apt-get install -y \ wget \ ca-certificates \ @@ -62,18 +64,9 @@ RUN apt-get update && apt-get install -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Copy the app files (azure-vm-pricing) to the container -COPY ./parser . - -# When use a non-root user, the processes under the user will not have access to mounted volumes for some reason -# # Create a non-root user to run Puppeteer -# RUN groupadd -r pptruser && useradd -r -g pptruser -G audio,video pptruser \ -# && mkdir -p /home/pptruser/Downloads \ -# && chown -R pptruser:pptruser /home/pptruser \ -# && chown -R pptruser:pptruser /app - -# # Switch to the non-root user -# USER pptruser +# Copy the app files (azure-vm-pricing/parser) to the container +COPY ./ . # Install Yarn and dependencies -RUN yarn install +RUN yarn install \ + && yarn cache clean diff --git a/parser/src/app.ts b/parser/src/app.ts index 7949fd0..990ace9 100644 --- a/parser/src/app.ts +++ b/parser/src/app.ts @@ -8,7 +8,8 @@ import { isUrlBlocked } from './isUrlBlocked'; import { writeCsv, writeJson } from './writeFile'; import { AzurePortal, getPrice, getPricing } from './azurePortalExtensions'; -let outputPath: string | undefined; +// If outputPath is not provided, the default value is './out' +let outputPath: string | undefined = './out'; let recordTiming = false; let previousPerformanceNow = 0; @@ -96,10 +97,12 @@ function timeEvent(eventName: string): void { case '-p': case '--output-path': outputPath = args[offset + 1]; - // If the output path is not defined or is an empty string, use the default path + // Assign default if undefined or empty if (outputPath === undefined || outputPath === '') { outputPath = './out'; } + // Ensure the path exists + fs.mkdirSync(outputPath, { recursive: true }); break; default: parsedBinaryArg = false; @@ -131,6 +134,7 @@ function timeEvent(eventName: string): void { timeEvent('chromeStartedAt'); // --no-sandbox and --disable-setuid-sandbox are required for running in a Docker container + // More info: https://github.com/puppeteer/puppeteer/blob/main/docs/troubleshooting.md#running-puppeteer-on-gitlabci const browser = await puppeteer.launch({headless: headlessMode, args: ['--no-sandbox', '--disable-setuid-sandbox']}); const page = await browser.newPage(); timeEvent('chromeLaunchedAt');