Skip to content

Commit

Permalink
Upgrade Algolia search to v3 (#3560)
Browse files Browse the repository at this point in the history
* update dapr publish command

Signed-off-by: Hannah Hunter <[email protected]>
Signed-off-by: Aaron Crawfis <[email protected]>

* Split workflow into two steps

Signed-off-by: Aaron Crawfis <[email protected]>

* Update upload path

Signed-off-by: Aaron Crawfis <[email protected]>

* Add concurrency check

Signed-off-by: Aaron Crawfis <[email protected]>

* Add Algolia workflow script and step

Signed-off-by: Aaron Crawfis <[email protected]>

* Update Algolia box to v3

Signed-off-by: Aaron Crawfis <[email protected]>

* Fix secret name

Signed-off-by: Aaron Crawfis <[email protected]>

* Override default search bar in Docsy v3

Signed-off-by: Aaron Crawfis <[email protected]>

* Remove temporary comment

Signed-off-by: Aaron Crawfis <[email protected]>

* Consolidate build and deploy

Signed-off-by: Aaron Crawfis <[email protected]>

---------

Signed-off-by: Hannah Hunter <[email protected]>
Signed-off-by: Aaron Crawfis <[email protected]>
Co-authored-by: Hannah Hunter <[email protected]>
Co-authored-by: Mark Fussell <[email protected]>
  • Loading branch information
3 people committed Jun 17, 2023
1 parent b975970 commit 0d0d29a
Show file tree
Hide file tree
Showing 5 changed files with 182 additions and 14 deletions.
118 changes: 118 additions & 0 deletions .github/scripts/algolia.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import os
from re import S
import sys
import json
from bs4 import BeautifulSoup
from algoliasearch.search_client import SearchClient

url = "docs.dapr.io"
if len(sys.argv) > 1:
starting_directory = os.path.join(os.getcwd(), str(sys.argv[1]))
else:
starting_directory = os.getcwd()

ALGOLIA_APP_ID = os.getenv('ALGOLIA_APP_ID')
ALGOLIA_API_KEY = os.getenv('ALGOLIA_API_WRITE_KEY')
ALGOLIA_INDEX_NAME = os.getenv('ALGOLIA_INDEX_NAME')

client = SearchClient.create(ALGOLIA_APP_ID, ALGOLIA_API_KEY)
index = client.init_index(ALGOLIA_INDEX_NAME)

excluded_files = [
"404.html",
]

exluded_directories = [
"zh-hans",
]

rankings = {
"Getting started": 0,
"Concepts": 100,
"Developing applications": 200,
"Operations": 300,
"Reference": 400,
"Contributing": 500,
"Home": 600
}

def scan_directory(directory: str, pages: list):
if os.path.basename(directory) in exluded_directories:
print(f'Skipping directory: {directory}')
return
for file in os.listdir(directory):
path = os.path.join(directory, file)
if os.path.isfile(path):
if file.endswith(".html") and file not in excluded_files:
if '<!-- DISABLE_ALGOLIA -->' not in open(path, encoding="utf8").read():
print(f'Indexing: {path}')
pages.append(path)
else:
print(f'Skipping hidden page: {path}')
else:
scan_directory(path, pages)

def parse_file(path: str):
data = {}
data["hierarchy"] = {}
data["rank"] = 999
data["subrank"] = 99
data["type"] = "lvl2"
data["lvl0"] = ""
data["lvl1"] = ""
data["lvl2"] = ""
data["lvl3"] = ""
text = ""
subrank = 0
with open(path, "r", errors='ignore') as file:
content = file.read()
soup = BeautifulSoup(content, "html.parser")
for meta in soup.find_all("meta"):
if meta.get("name") == "description":
data["lvl2"] = meta.get("content")
data["hierarchy"]["lvl1"] = meta.get("content")
elif meta.get("property") == "og:title":
data["lvl0"] = meta.get("content")
data["hierarchy"]["lvl0"] = meta.get("content")
data["hierarchy"]["lvl2"] = meta.get("content")
elif meta.get("property") == "og:url":
data["url"] = meta.get("content")
data["path"] = meta.get("content").split(url)[1]
data["objectID"] = meta.get("content").split(url)[1]
breadcrumbs = soup.find_all("li", class_="breadcrumb-item")
try:
subrank = len(breadcrumbs)
data["subrank"] = subrank
except:
subrank = 99
data["subrank"] = 99
for bc in breadcrumbs:
section = bc.text.strip()
data["lvl1"] = section
data["hierarchy"]["lvl0"] = section
try:
data["rank"] = rankings[section] + subrank
except:
print(f"Rank not found for section {section}")
data["rank"] = 998
break
for p in soup.find_all("p"):
if p.text != "":
text = text + p.text
data["text"] = text
return data

def index_payload(payload):
res = index.replace_all_objects(payload)
res.wait()


if __name__ == "__main__":
pages = []
payload = []
scan_directory(starting_directory, pages)
for page in pages:
data = parse_file(page)
if "objectID" in data:
payload.append(data)
index_payload(payload)
26 changes: 26 additions & 0 deletions .github/workflows/website-root.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,29 @@ jobs:
with:
azure_static_web_apps_api_token: ${{ secrets.AZURE_STATIC_WEB_APPS_API_TOKEN_PROUD_BAY_0E9E0E81E }}
action: "close"

algolia_index:
name: Index site for Algolia
if: github.event_name == 'push'
needs: ['build_and_deploy_job']
runs-on: ubuntu-latest
env:
ALGOLIA_APP_ID: ${{ secrets.ALGOLIA_APP_ID }}
ALGOLIA_API_WRITE_KEY: ${{ secrets.ALGOLIA_API_WRITE_KEY }}
ALGOLIA_INDEX_NAME: daprdocs
steps:
- name: Checkout docs repo
uses: actions/checkout@v2
with:
submodules: false
- name: Download Hugo artifacts
uses: actions/download-artifact@v3
with:
name: hugo_build
path: site/
- name: Install Python packages
run: |
pip install --upgrade bs4
pip install --upgrade 'algoliasearch>=2.0,<3.0'
- name: Index site
run: python ./.github/scripts/algolia.py ./site
20 changes: 7 additions & 13 deletions daprdocs/layouts/partials/hooks/body-end.html
Original file line number Diff line number Diff line change
@@ -1,19 +1,13 @@
<script src="/js/copy-code-button.js"></script>

{{ with .Site.Params.algolia_docsearch }}
<script src="https://cdn.jsdelivr.net/npm/docsearch.js@2.6.3/dist/cdn/docsearch.min.js"></script>
<script>
<script src="https://cdn.jsdelivr.net/npm/@docsearch/js@3"></script>
<script type="text/javascript">
docsearch({
// Your apiKey and indexName will be given to you once
// we create your config
apiKey: '54ae43aa28ce8f00c54c8d5f544d29b9',
indexName: 'crawler_dapr',
container: '#docsearch',
appId: 'O0QLQGNF38',
// Replace inputSelector with a CSS selector
// matching your search input
inputSelector: '.td-search-input',
// Set debug to true to inspect the dropdown
debug: false,
apiKey: '54ae43aa28ce8f00c54c8d5f544d29b9',
indexName: 'daprdocs',
});
</script>
{{ end }}

<script src="/js/copy-code-button.js"></script>
2 changes: 1 addition & 1 deletion daprdocs/layouts/partials/hooks/head-end.html
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{{ with .Site.Params.algolia_docsearch }}
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.css" />
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@docsearch/css@3" />
{{ end }}
30 changes: 30 additions & 0 deletions daprdocs/layouts/partials/search-input.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{{ if .Site.Params.gcs_engine_id -}}
<input type="search" class="form-control td-search-input" placeholder="&#xf002; {{ T "ui_search" }}" aria-label="{{ T "ui_search" }}" autocomplete="off">
{{ else if .Site.Params.algolia_docsearch -}}
<div id="docsearch"></div>
{{ else if .Site.Params.offlineSearch -}}
{{ $offlineSearchIndex := resources.Get "json/offline-search-index.json" | resources.ExecuteAsTemplate "offline-search-index.json" . -}}
{{ if hugo.IsProduction -}}
{{/* Use `md5` as finger print hash function to shorten file name to avoid `file name too long` error. */ -}}
{{ $offlineSearchIndex = $offlineSearchIndex | fingerprint "md5" -}}
{{ end -}}
{{ $offlineSearchLink := $offlineSearchIndex.RelPermalink -}}

<input
type="search"
class="form-control td-search-input"
placeholder="&#xf002; {{ T "ui_search" }}"
aria-label="{{ T "ui_search" }}"
autocomplete="off"
{{/*
The data attribute name of the json file URL must end with `src` since
Hugo's absurlreplacer requires `src`, `href`, `action` or `srcset` suffix for the attribute name.
If the absurlreplacer is not applied, the URL will start with `/`.
It causes the json file loading error when when relativeURLs is enabled.
https://github.com/google/docsy/issues/181
*/}}
data-offline-search-index-json-src="{{ $offlineSearchLink }}"
data-offline-search-base-href="/"
data-offline-search-max-results="{{ .Site.Params.offlineSearchMaxResults | default 10 }}"
>
{{ end -}}

0 comments on commit 0d0d29a

Please sign in to comment.