Skip to content

Commit

Permalink
Added test workflow to run custom crawl tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dev-aravind committed Nov 19, 2024
1 parent 556cead commit 2f7a33f
Showing 1 changed file with 73 additions and 0 deletions.
73 changes: 73 additions & 0 deletions .github/workflows/custom-crawl-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
name: Custom crawl test

on:
workflow_dispatch:
inputs:
page-url:
description: 'URL of the page to crawl'
entity-identifier:
description: 'Identifier of the entity'
is-paginated:
description: 'Whether the page is paginated'
default: 'false'
headless:
description: 'Whether to run in headless mode'
required : false
default: 'false'
fetch-urls-headlessly:
description: 'Set as true to fetch the entity URLs headlessly'
default: 'false'
required: false
offset:
description: 'Offset for paginated pages'
default: '1'
required: false

jobs:
call_reusable_workflow:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Action setup
uses: ./
with:
mode: 'fetch-push'
page-url: 'https://capitol.nb.ca/en/tickets-events?start='
entity-identifier: 'div.fc-item-block-standard-wrapper.odd a, div.fc-item-block-standard-wrapper.even a'
downloadFile: "capitolnb-events.jsonld"
artifact: capitol-nb-ca
is-paginated: "0"
headless: "true"
offset: "12"
publisher: ${{ secrets.PUBLISHER_URI_GREGORY }}
token: ${{ secrets.DEV_PAT }}

- name: Set up Ruby
uses: ruby/setup-ruby@v1
with:
bundler-cache: true

- name: Run ruby code
run: |
isPaginated=${{ inputs.is-paginated || 'false' }}
headless=${{ inputs.headless || 'false' }}
fetchUrlsHeadlessly=${{ inputs.fetch-urls-headlessly || 'false' }}
offset=${{ inputs.offset || '1' }}
bundle exec ruby src/main.rb \
"${{ inputs.page-url }}" \
"${{ inputs.entity-identifier }}" \
output/test_entity.jsonld \
"$isPaginated" \
"$headless" \
"$fetchUrlsHeadlessly" \
"$offset"
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: jsonld-data
path: output/

0 comments on commit 2f7a33f

Please sign in to comment.