From 786350564196c30d1a3e408a4133248e54a0eb9b Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Thu, 4 Jul 2024 10:27:37 +0200 Subject: [PATCH] feat: Add API docs generation (#22) * Initial API docs * Use haystack-experimental category slug * Update category slug to experiments-api * Add harness api docs * Remove push branches trigger, leave main only * Use ReadmeIntegrationRenderer * Attempt to get haystack version from readme docs and use it * Output only two last minor versions, current stable and unstable 2.x releases * Experiment * More exp * Update docs/pydoc/config/harness.yml Co-authored-by: Madeesh Kannan * Update docs/pydoc/config/harness.yml Co-authored-by: Madeesh Kannan * Fix harness.yml * Parametrize versions to sync * Temp setting SYNC_LAST_N_HAYSTACK_VERSIONS to 1 --------- Co-authored-by: Madeesh Kannan --- .github/utils/delete_outdated_docs.py | 72 ++++++++++++++++++++ .github/utils/pydoc-markdown.sh | 10 +++ .github/workflows/api_docs.yml | 67 ++++++++++++++++++ docs/README.md | 57 ++++++++++++++++ docs/pydoc/config/harness.yml | 29 ++++++++ docs/pydoc/config/openai_function_caller.yml | 27 ++++++++ docs/pydoc/config/openapitool.yml | 27 ++++++++ 7 files changed, 289 insertions(+) create mode 100644 .github/utils/delete_outdated_docs.py create mode 100755 .github/utils/pydoc-markdown.sh create mode 100644 .github/workflows/api_docs.yml create mode 100644 docs/README.md create mode 100644 docs/pydoc/config/harness.yml create mode 100644 docs/pydoc/config/openai_function_caller.yml create mode 100644 docs/pydoc/config/openapitool.yml diff --git a/.github/utils/delete_outdated_docs.py b/.github/utils/delete_outdated_docs.py new file mode 100644 index 0000000..1d88c11 --- /dev/null +++ b/.github/utils/delete_outdated_docs.py @@ -0,0 +1,72 @@ +import argparse +import base64 +import os +import re +from pathlib import Path +from typing import List + +import requests +import yaml + +VERSION_VALIDATOR = re.compile(r"^[0-9]+\.[0-9]+$") + + +def readme_token(): + api_key = os.getenv("README_API_KEY", None) + if not api_key: + raise Exception("README_API_KEY env var is not set") + + api_key = f"{api_key}:" + return base64.b64encode(api_key.encode("utf-8")).decode("utf-8") + + +def create_headers(version: str): + return {"authorization": f"Basic {readme_token()}", "x-readme-version": version} + + +def get_docs_in_category(category_slug: str, version: str) -> List[str]: + """ + Returns the slugs of all documents in a category for the specific version. + """ + url = f"https://dash.readme.com/api/v1/categories/{category_slug}/docs" + headers = create_headers(version) + res = requests.get(url, headers=headers, timeout=10) + return [doc["slug"] for doc in res.json()] + + +def delete_doc(slug: str, version: str): + url = f"https://dash.readme.com/api/v1/docs/{slug}" + headers = create_headers(version) + res = requests.delete(url, headers=headers, timeout=10) + res.raise_for_status() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Delete outdated documentation from Readme.io. " + "It will delete all documents that are not present in the current config files." + ) + parser.add_argument( + "-c", "--config-path", help="Path to folder containing YAML documentation configs", required=True, type=Path + ) + parser.add_argument("-v", "--version", help="The version that will have its documents deleted", required=True) + args = parser.parse_args() + + configs = [yaml.safe_load(c.read_text()) for c in args.config_path.glob("*.yml")] + + remote_docs = {} + for config in configs: + category_slug = config["renderer"]["category_slug"] + if category_slug in remote_docs: + continue + docs = get_docs_in_category(category_slug, args.version) + + remote_docs[category_slug] = docs + + for config in configs: + doc_slug = config["renderer"]["slug"] + category_slug = config["renderer"]["category_slug"] + if doc_slug in remote_docs[category_slug]: + continue + + delete_doc(doc_slug, args.version) diff --git a/.github/utils/pydoc-markdown.sh b/.github/utils/pydoc-markdown.sh new file mode 100755 index 0000000..670bd09 --- /dev/null +++ b/.github/utils/pydoc-markdown.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -e # Fails on any error in the following loop +cd docs/pydoc +rm -rf temp && mkdir temp +cd temp +for file in ../config/* ; do + echo "Converting $file..." + pydoc-markdown "$file" +done diff --git a/.github/workflows/api_docs.yml b/.github/workflows/api_docs.yml new file mode 100644 index 0000000..fb48776 --- /dev/null +++ b/.github/workflows/api_docs.yml @@ -0,0 +1,67 @@ +name: Sync docs with Readme + +on: + pull_request: + paths: + - "docs/pydoc/**" + push: + branches: + - main + +env: + HATCH_VERSION: "1.9.3" + PYTHON_VERSION: "3.10" + SYNC_LAST_N_HAYSTACK_VERSIONS: 1 + +jobs: + get-versions: + runs-on: ubuntu-latest + outputs: + versions: ${{ steps.version_finder.outputs.versions }} + steps: + - name: Get Haystack Docs versions + id: version_finder + run: | + curl -s "https://dash.readme.com/api/v1/version" --header "authorization: Basic ${{ secrets.README_API_KEY }}" > out + VERSIONS=$(jq -c '[ .[] | select(.version | startswith("2.")) | .version ] | .[-${{ env.SYNC_LAST_N_HAYSTACK_VERSIONS }}:]' out) + { + echo 'versions<> "$GITHUB_OUTPUT" + sync: + runs-on: ubuntu-latest + needs: get-versions + strategy: + fail-fast: false + max-parallel: 1 + matrix: + hs-docs-version: ${{ fromJSON(needs.get-versions.outputs.versions) }} + steps: + - name: Checkout this repo + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + - name: Install Hatch + run: pip install hatch==${{ env.HATCH_VERSION }} + + - name: Generate API docs + env: + # This is necessary to fetch the documentation categories + # from Readme.io as we need them to associate the slug + # in config files with their id. + README_API_KEY: ${{ secrets.README_API_KEY }} + # The command is a bit misleading, we're not actually syncing anything here, + # we're just generating the markdown files from the the yaml configs. + PYDOC_TOOLS_HAYSTACK_DOC_VERSION: ${{ matrix.hs-docs-version }} + run: hatch run readme:sync + + - name: Sync docs for ${{ matrix.hs-docs-version }} + if: github.event_name == 'push' + uses: readmeio/rdme@v8 + with: + rdme: docs ./docs/pydoc/temp --key="${{ secrets.README_API_KEY }}" --version="${{ matrix.hs-docs-version }}" diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..8c4726d --- /dev/null +++ b/docs/README.md @@ -0,0 +1,57 @@ +# :ledger: Looking for the docs? + +You can find Haystack's documentation at https://docs.haystack.deepset.ai/. + +# :computer: How to update docs? + +## Overview, Components, Pipeline Nodes, and Guides + +You can find these docs on the Haystack Docs page: https://docs.haystack.deepset.ai/docs/get_started. If you want to contribute, and we welcome every contribution, do the following: +1. Make sure you're on the right version (check the version expanding list in the top left corner). +2. Use the "Suggest Edits" link you can find in the top right corner of every page. +3. Suggest a change right in the docs and click **Submit Suggested Edits**. +4. Optionally, leave us a comment and submit your change. + +Once we take care of it, you'll get an email telling you the change's been merged, or not. If not, we'll give you the reason why. + +Make sure to check our [Contribution Guidelines](https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md). + +## Tutorials + +The Tutorials live in a separate repo: https://github.com/deepset-ai/haystack-tutorials. For instructions on how to contribute to tutorials, see [Contributing to Tutorials](https://github.com/deepset-ai/haystack-tutorials/blob/main/Contributing.md#contributing-to-haystack-tutorials). + +## API Reference + +We use Pydoc-Markdown to create Markdown files from the docstrings in our code. There is a Github Action that regenerates the API pages with each commit. + +If you want to generate a new Markdown file for a new Haystack module, create a `.yml` file in `docs/pydoc/config` which configures how Pydoc-Markdown will generate the page and commit it to main. + +All the updates to doctrings get pushed to documentation when you commit to the main branch. + +### Configuration + +Pydoc will read the configuration from a `.yml` file which is located under `/haystack_experimental/docs/pydoc/config. Our files contain three main sections: + +- **loader**: A list of plugins that load API objects from python source files. + - **type**: Loader for python source files + - **search_path**: Location of source files + - **modules**: Module which are used for generating the markdown file + - **ignore_when_discovered**: Define which files should be ignored +- **processor**: A list of plugins that process API objects to modify their docstrings (e.g. to adapt them from a documentation format to Markdown or to remove items that should not be rendered into the documentation). + - **type: filter**: Filter for specific modules + - **documented_only**: Only documented API objects + - **do_not_filter_modules**: Do not filter module objects + - **skip_empty_modules**: Skip modules without content +- **renderer**: A plugin that produces the output files. We use a custom ReadmeRenderer based on the Markdown renderer. It makes sure the Markdown files comply with ReadMe requirements. + - **type**: Define the renderer which you want to use. We are using the ReadmeRenderer to make sure the files display properly in ReadMe. + - **excerpt**: Add a short description of the page. It shows up right below the page title. + - **category**: This is the ReadMe category ID to make sure the doc lands in the right section of Haystack docs. + - **title**: The title of the doc as it will appear on the website. Make sure you always add "API" at the end. + - **slug**: The page slug, each word should be separated with a dash. + - **order**: Pages are ordered alphabetically. This defines where in the TOC the page lands. + - markdown: + - **descriptive_class_title**: Remove the word "Object" from class titles. + - **descriptive_module_title**: Adding the word “Module” before the module name. + - **add_method_class_prefix**: Add the class name as a prefix to method names. + - **add_member_class_prefix**: Add the class name as a prefix to member names. + - **filename**: File name of the generated file, use underscores to separate each word. diff --git a/docs/pydoc/config/harness.yml b/docs/pydoc/config/harness.yml new file mode 100644 index 0000000..890b4de --- /dev/null +++ b/docs/pydoc/config/harness.yml @@ -0,0 +1,29 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../../../haystack_experimental/evaluation] + modules: ["harness/evaluation_harness", + "harness/rag/harness", + "harness/rag/parameters"] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer + excerpt: Evaluation harness for Haystack. + category_slug: experiments-api + title: Evaluation Harness + slug: evaluation-harness + order: 50 + markdown: + descriptive_class_title: false + classdef_code_block: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: evaluation_harness.md diff --git a/docs/pydoc/config/openai_function_caller.yml b/docs/pydoc/config/openai_function_caller.yml new file mode 100644 index 0000000..8f88329 --- /dev/null +++ b/docs/pydoc/config/openai_function_caller.yml @@ -0,0 +1,27 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../../../haystack_experimental/components/tools/openai] + modules: ["function_caller"] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer + excerpt: OpenAIFunctionCaller invokes Python functions from ChatMessage(s) + category_slug: experiments-api + title: OpenAI Function Caller + slug: openai-function-caller-api + order: 90 + markdown: + descriptive_class_title: false + classdef_code_block: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: openai_function_caller_api.md diff --git a/docs/pydoc/config/openapitool.yml b/docs/pydoc/config/openapitool.yml new file mode 100644 index 0000000..6a4c3ee --- /dev/null +++ b/docs/pydoc/config/openapitool.yml @@ -0,0 +1,27 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../../../haystack_experimental/components/tools/openapi] + modules: ["openapi_tool"] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer + excerpt: Allows Haystack to interact with OpenAPI specified services. + category_slug: experiments-api + title: OpenAPI + slug: openapi-api + order: 100 + markdown: + descriptive_class_title: false + classdef_code_block: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: openapi_api.md