Skip to content

Commit

Permalink
ci: add scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
yongenaelf committed Jun 20, 2024
1 parent fb2b1a5 commit a0831f4
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 0 deletions.
32 changes: 32 additions & 0 deletions .github/workflows/typesense.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Update Typesense

on:
workflow_dispatch:
push:
branches: [develop]

jobs:
build:
name: Update Typesense index
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Check out the repo
uses: actions/checkout@v4

- uses: actions/setup-node@v4
with:
node-version: 18
cache: 'npm'

- run: npm install
- run: npm run build
- run: |
npm run serve -- --port 80 &
docker run -it \
-e TYPESENSE_API_KEY=${{ secrets.TYPESENSE_API_KEY }} \
-e TYPESENSE_HOST=${{ secrets.TYPESENSE_HOST }} \
-e TYPESENSE_PORT=443 \
-e TYPESENSE_PROTOCOL=https \
-e "CONFIG=$(cat config/typesense.json | jq -r tostring)" typesense/docsearch-scraper:latest
37 changes: 37 additions & 0 deletions config/typesense.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"index_name": "aelf-docs",
"start_urls": ["http://host.docker.internal"],
"sitemap_urls": ["http://host.docker.internal/sitemap.xml"],
"sitemap_alternate_links": true,
"stop_urls": [],
"selectors": {
"lvl0": {
"selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]",
"type": "xpath",
"global": true,
"default_value": "Documentation"
},
"lvl1": "article h1, header h1",
"lvl2": "article h2",
"lvl3": "article h3",
"lvl4": "article h4",
"lvl5": "article h5, article td:first-child",
"lvl6": "article h6",
"text": "article p, article li, article td:last-child"
},
"strip_chars": " .,;:#",
"custom_settings": {
"separatorsToIndex": "_",
"attributesForFaceting": ["language", "version", "type", "docusaurus_tag"],
"attributesToRetrieve": [
"hierarchy",
"content",
"anchor",
"url",
"url_without_anchor",
"type"
]
},
"conversation_id": ["833762294"],
"nb_hits": 46250
}

0 comments on commit a0831f4

Please sign in to comment.