diff --git a/README.md b/README.md index 5e2cd21..090b7d6 100644 --- a/README.md +++ b/README.md @@ -74,13 +74,13 @@ Scroll down for the list of [supported regions](#supported-regions) and [support ```powershell > cd .\parser\ -> yarn crawl --culture en-us --currency usd --operating-system linux --region us-west +> yarn crawl --culture en-us --currency usd --operating-system linux --region us-west --output-path .\out\ ``` You can also use short names: ```powershell -> yarn crawl -l en-us -c usd -o linux -r us-west +> yarn crawl -l en-us -c usd -o linux -r us-west -p .\out\ ``` Arguments: @@ -99,7 +99,7 @@ In the footer: ### Parser output -Writes `2` output files in the `out\` directory. One is a `CSV`, the other one is `JSON`. Both files contain the same data. +Writes `2` output files in the `out\` directory, or the directory specified by the `--output-path` argument. One is a `CSV`, the other one is `JSON`. Both files contain the same data. ```text .\out\vm-pricing__.csv @@ -124,6 +124,29 @@ Fields: - _Three Year Savings plan - _Three Year Savings plan With Azure Hybrid Benefit_ +### Docker + +#### Build the Docker image + +You can build a `Docker` image for the `azure-vm-pricing`: + +```bash +# Parser Docker image +cd parser +# For Linux machines running on x86_64 or in Windows WSL +docker build -f ./Dockerfile --platform linux/amd64 --build-arg ARCH=amd64 -t azure-vm-pricing . +# For Linux machines running on arm64, for example Apple Macbooks with Apple Silicon +docker build -f ./Dockerfile --platform linux/arm64 --build-arg ARCH=arm64 -t azure-vm-pricing . +``` + +#### Run the Docker image + +You can run the `azure-vm-pricing` image: + +```bash +docker run --rm -it -v ./data:/data/ azure-vm-pricing:latest bash -c "yarn crawl --culture en-us --currency eur --operating-system linux --region europe-west -p /data/" +``` + ### Parser tests The parser has unit tests focusing on edge cases of price formatting: diff --git a/parser/Dockerfile b/parser/Dockerfile new file mode 100644 index 0000000..f8c8845 --- /dev/null +++ b/parser/Dockerfile @@ -0,0 +1,72 @@ +# Default architecture +ARG ARCH=amd64 + +# Use the official lightweight Node.js 22 image +FROM $ARCH/node:22-slim + +# Do not download Chromium, will be installed manually from Debian repositories +ENV PUPPETEER_SKIP_DOWNLOAD=true +# Set the path to the Chromium executable +ENV PUPPETEER_EXECUTABLE_PATH="/usr/bin/chromium" + +# Create a directory for the app +WORKDIR /app + +# Create a directory for the app data +RUN mkdir /data + +# Set volume for the app data +VOLUME /data + +# Install dependencies for Puppeteer respectively Chromium/Chrome +# https://github.com/puppeteer/puppeteer/blob/main/docs/troubleshooting.md#running-puppeteer-on-gitlabci +# hadolint ignore=DL3008 +RUN apt-get update && apt-get install -y \ + wget \ + ca-certificates \ + chromium \ + fonts-liberation \ + libasound2 \ + libatk1.0-0 \ + libc6 \ + libcairo2 \ + libcups2 \ + libdbus-1-3 \ + libexpat1 \ + libfontconfig1 \ + libgbm-dev \ + libgcc1 \ + libglib2.0-0 \ + libgdk-pixbuf2.0-0 \ + libgtk-3-0 \ + libnspr4 \ + libnss3 \ + libpango-1.0-0 \ + libpangocairo-1.0-0 \ + libx11-6 \ + libx11-xcb1 \ + libxcb1 \ + libxcomposite1 \ + libxcursor1 \ + libxdamage1 \ + libxext6 \ + libxfixes3 \ + libxi6 \ + libxrandr2 \ + libxrender1 \ + libxss1 \ + libxtst6 \ + lsb-release \ + xdg-utils \ + libu2f-udev \ + libvulkan1 \ + --no-install-recommends \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Copy the app files (azure-vm-pricing/parser) to the container +COPY ./ . + +# Install Yarn and dependencies +RUN yarn install \ + && yarn cache clean diff --git a/parser/src/app.ts b/parser/src/app.ts index f0562c0..990ace9 100644 --- a/parser/src/app.ts +++ b/parser/src/app.ts @@ -8,6 +8,9 @@ import { isUrlBlocked } from './isUrlBlocked'; import { writeCsv, writeJson } from './writeFile'; import { AzurePortal, getPrice, getPricing } from './azurePortalExtensions'; +// If outputPath is not provided, the default value is './out' +let outputPath: string | undefined = './out'; + let recordTiming = false; let previousPerformanceNow = 0; let wasSuccessful = false; @@ -91,6 +94,16 @@ function timeEvent(eventName: string): void { case '--region': region = args[offset + 1]; break; + case '-p': + case '--output-path': + outputPath = args[offset + 1]; + // Assign default if undefined or empty + if (outputPath === undefined || outputPath === '') { + outputPath = './out'; + } + // Ensure the path exists + fs.mkdirSync(outputPath, { recursive: true }); + break; default: parsedBinaryArg = false; break; @@ -104,7 +117,7 @@ function timeEvent(eventName: string): void { debugMode = true; break; default: - console.log(`'${args[offset]}' is not a known switch, supported values are: '-l', '--culture', '-c', '--currency', '-o', '--operating-system', '-r', '--region'. None of these switches should be provided as the last arg as they require a value.`); + console.log(`'${args[offset]}' is not a known switch, supported values are: '-l', '--culture', '-c', '--currency', '-o', '--operating-system', '-r', '--region', '-p', '--output-path . None of these switches should be provided as the last arg as they require a value.`); break; } } @@ -120,7 +133,9 @@ function timeEvent(eventName: string): void { } timeEvent('chromeStartedAt'); - const browser = await puppeteer.launch({headless: headlessMode}); + // --no-sandbox and --disable-setuid-sandbox are required for running in a Docker container + // More info: https://github.com/puppeteer/puppeteer/blob/main/docs/troubleshooting.md#running-puppeteer-on-gitlabci + const browser = await puppeteer.launch({headless: headlessMode, args: ['--no-sandbox', '--disable-setuid-sandbox']}); const page = await browser.newPage(); timeEvent('chromeLaunchedAt'); @@ -229,8 +244,8 @@ function timeEvent(eventName: string): void { console.log(); - writeJson(vmPricing, config.region, config.operatingSystem); - writeCsv(vmPricing, config.culture, config.region, config.operatingSystem); + writeJson(vmPricing, config.region, config.operatingSystem, outputPath); + writeCsv(vmPricing, config.culture, config.region, config.operatingSystem, outputPath); wasSuccessful = true; } catch (e) diff --git a/parser/src/writeFile.ts b/parser/src/writeFile.ts index 49f5f20..5110e72 100644 --- a/parser/src/writeFile.ts +++ b/parser/src/writeFile.ts @@ -1,8 +1,8 @@ const fs = require('fs'); import { VmPricing } from './vmPricing'; -export function writeJson(vmPricing: VmPricing[], region: string, operatingSystem: string): void { - const outFilename = `./out/vm-pricing_${region}_${operatingSystem}.json`; +export function writeJson(vmPricing: VmPricing[], region: string, operatingSystem: string, outputPath: string): void { + const outFilename = `${outputPath}/vm-pricing_${region}_${operatingSystem}.json`; fs.writeFile(outFilename, JSON.stringify(vmPricing, null, 2), function (err) { if (err) { @@ -13,8 +13,8 @@ export function writeJson(vmPricing: VmPricing[], region: string, operatingSyste }); } -export function writeCsv(vmPricing: VmPricing[], culture: string, region: string, operatingSystem: string): void { - const outFilename = `./out/vm-pricing_${region}_${operatingSystem}.csv`; +export function writeCsv(vmPricing: VmPricing[], culture: string, region: string, operatingSystem: string, outputPath: string): void { + const outFilename = `${outputPath}/vm-pricing_${region}_${operatingSystem}.csv`; const writer = fs.createWriteStream(outFilename); writer.write('INSTANCE,VCPU,RAM,PAY AS YOU GO,PAY AS YOU GO WITH AZURE HYBRID BENEFIT,ONE YEAR RESERVED,ONE YEAR RESERVED WITH AZURE HYBRID BENEFIT,THREE YEAR RESERVED,THREE YEAR RESERVED WITH AZURE HYBRID BENEFIT,SPOT,SPOT WITH AZURE HYBRID BENEFIT,ONE YEAR SAVINGS PLAN,ONE YEAR SAVINGS PLAN WITH AZURE HYBRID BENEFIT,THREE YEAR SAVINGS PLAN,THREE YEAR SAVINGS PLAN WITH AZURE HYBRID BENEFIT\n');