Skip to content

Commit

Permalink
feat: Add API docs generation (#22)
Browse files Browse the repository at this point in the history
* Initial API docs

* Use haystack-experimental category slug

* Update category slug to experiments-api

* Add harness api docs

* Remove push branches trigger, leave main only

* Use ReadmeIntegrationRenderer

* Attempt to get haystack version from readme docs and use it

* Output only two last minor versions, current stable and unstable 2.x releases

* Experiment

* More exp

* Update docs/pydoc/config/harness.yml

Co-authored-by: Madeesh Kannan <[email protected]>

* Update docs/pydoc/config/harness.yml

Co-authored-by: Madeesh Kannan <[email protected]>

* Fix harness.yml

* Parametrize versions to sync

* Temp setting SYNC_LAST_N_HAYSTACK_VERSIONS to 1

---------

Co-authored-by: Madeesh Kannan <[email protected]>
  • Loading branch information
vblagoje and shadeMe committed Jul 4, 2024
1 parent f1d85d1 commit 7863505
Show file tree
Hide file tree
Showing 7 changed files with 289 additions and 0 deletions.
72 changes: 72 additions & 0 deletions .github/utils/delete_outdated_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import argparse
import base64
import os
import re
from pathlib import Path
from typing import List

import requests
import yaml

VERSION_VALIDATOR = re.compile(r"^[0-9]+\.[0-9]+$")


def readme_token():
api_key = os.getenv("README_API_KEY", None)
if not api_key:
raise Exception("README_API_KEY env var is not set")

api_key = f"{api_key}:"
return base64.b64encode(api_key.encode("utf-8")).decode("utf-8")


def create_headers(version: str):
return {"authorization": f"Basic {readme_token()}", "x-readme-version": version}


def get_docs_in_category(category_slug: str, version: str) -> List[str]:
"""
Returns the slugs of all documents in a category for the specific version.
"""
url = f"https://dash.readme.com/api/v1/categories/{category_slug}/docs"
headers = create_headers(version)
res = requests.get(url, headers=headers, timeout=10)
return [doc["slug"] for doc in res.json()]


def delete_doc(slug: str, version: str):
url = f"https://dash.readme.com/api/v1/docs/{slug}"
headers = create_headers(version)
res = requests.delete(url, headers=headers, timeout=10)
res.raise_for_status()


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Delete outdated documentation from Readme.io. "
"It will delete all documents that are not present in the current config files."
)
parser.add_argument(
"-c", "--config-path", help="Path to folder containing YAML documentation configs", required=True, type=Path
)
parser.add_argument("-v", "--version", help="The version that will have its documents deleted", required=True)
args = parser.parse_args()

configs = [yaml.safe_load(c.read_text()) for c in args.config_path.glob("*.yml")]

remote_docs = {}
for config in configs:
category_slug = config["renderer"]["category_slug"]
if category_slug in remote_docs:
continue
docs = get_docs_in_category(category_slug, args.version)

remote_docs[category_slug] = docs

for config in configs:
doc_slug = config["renderer"]["slug"]
category_slug = config["renderer"]["category_slug"]
if doc_slug in remote_docs[category_slug]:
continue

delete_doc(doc_slug, args.version)
10 changes: 10 additions & 0 deletions .github/utils/pydoc-markdown.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

set -e # Fails on any error in the following loop
cd docs/pydoc
rm -rf temp && mkdir temp
cd temp
for file in ../config/* ; do
echo "Converting $file..."
pydoc-markdown "$file"
done
67 changes: 67 additions & 0 deletions .github/workflows/api_docs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
name: Sync docs with Readme

on:
pull_request:
paths:
- "docs/pydoc/**"
push:
branches:
- main

env:
HATCH_VERSION: "1.9.3"
PYTHON_VERSION: "3.10"
SYNC_LAST_N_HAYSTACK_VERSIONS: 1

jobs:
get-versions:
runs-on: ubuntu-latest
outputs:
versions: ${{ steps.version_finder.outputs.versions }}
steps:
- name: Get Haystack Docs versions
id: version_finder
run: |
curl -s "https://dash.readme.com/api/v1/version" --header "authorization: Basic ${{ secrets.README_API_KEY }}" > out
VERSIONS=$(jq -c '[ .[] | select(.version | startswith("2.")) | .version ] | .[-${{ env.SYNC_LAST_N_HAYSTACK_VERSIONS }}:]' out)
{
echo 'versions<<EOF'
echo "$VERSIONS"
echo EOF
} >> "$GITHUB_OUTPUT"
sync:
runs-on: ubuntu-latest
needs: get-versions
strategy:
fail-fast: false
max-parallel: 1
matrix:
hs-docs-version: ${{ fromJSON(needs.get-versions.outputs.versions) }}
steps:
- name: Checkout this repo
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "${{ env.PYTHON_VERSION }}"

- name: Install Hatch
run: pip install hatch==${{ env.HATCH_VERSION }}

- name: Generate API docs
env:
# This is necessary to fetch the documentation categories
# from Readme.io as we need them to associate the slug
# in config files with their id.
README_API_KEY: ${{ secrets.README_API_KEY }}
# The command is a bit misleading, we're not actually syncing anything here,
# we're just generating the markdown files from the the yaml configs.
PYDOC_TOOLS_HAYSTACK_DOC_VERSION: ${{ matrix.hs-docs-version }}
run: hatch run readme:sync

- name: Sync docs for ${{ matrix.hs-docs-version }}
if: github.event_name == 'push'
uses: readmeio/rdme@v8
with:
rdme: docs ./docs/pydoc/temp --key="${{ secrets.README_API_KEY }}" --version="${{ matrix.hs-docs-version }}"
57 changes: 57 additions & 0 deletions docs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# :ledger: Looking for the docs?

You can find Haystack's documentation at https://docs.haystack.deepset.ai/.

# :computer: How to update docs?

## Overview, Components, Pipeline Nodes, and Guides

You can find these docs on the Haystack Docs page: https://docs.haystack.deepset.ai/docs/get_started. If you want to contribute, and we welcome every contribution, do the following:
1. Make sure you're on the right version (check the version expanding list in the top left corner).
2. Use the "Suggest Edits" link you can find in the top right corner of every page.
3. Suggest a change right in the docs and click **Submit Suggested Edits**.
4. Optionally, leave us a comment and submit your change.

Once we take care of it, you'll get an email telling you the change's been merged, or not. If not, we'll give you the reason why.

Make sure to check our [Contribution Guidelines](https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md).

## Tutorials

The Tutorials live in a separate repo: https://github.com/deepset-ai/haystack-tutorials. For instructions on how to contribute to tutorials, see [Contributing to Tutorials](https://github.com/deepset-ai/haystack-tutorials/blob/main/Contributing.md#contributing-to-haystack-tutorials).

## API Reference

We use Pydoc-Markdown to create Markdown files from the docstrings in our code. There is a Github Action that regenerates the API pages with each commit.

If you want to generate a new Markdown file for a new Haystack module, create a `.yml` file in `docs/pydoc/config` which configures how Pydoc-Markdown will generate the page and commit it to main.

All the updates to doctrings get pushed to documentation when you commit to the main branch.

### Configuration

Pydoc will read the configuration from a `.yml` file which is located under `/haystack_experimental/docs/pydoc/config. Our files contain three main sections:

- **loader**: A list of plugins that load API objects from python source files.
- **type**: Loader for python source files
- **search_path**: Location of source files
- **modules**: Module which are used for generating the markdown file
- **ignore_when_discovered**: Define which files should be ignored
- **processor**: A list of plugins that process API objects to modify their docstrings (e.g. to adapt them from a documentation format to Markdown or to remove items that should not be rendered into the documentation).
- **type: filter**: Filter for specific modules
- **documented_only**: Only documented API objects
- **do_not_filter_modules**: Do not filter module objects
- **skip_empty_modules**: Skip modules without content
- **renderer**: A plugin that produces the output files. We use a custom ReadmeRenderer based on the Markdown renderer. It makes sure the Markdown files comply with ReadMe requirements.
- **type**: Define the renderer which you want to use. We are using the ReadmeRenderer to make sure the files display properly in ReadMe.
- **excerpt**: Add a short description of the page. It shows up right below the page title.
- **category**: This is the ReadMe category ID to make sure the doc lands in the right section of Haystack docs.
- **title**: The title of the doc as it will appear on the website. Make sure you always add "API" at the end.
- **slug**: The page slug, each word should be separated with a dash.
- **order**: Pages are ordered alphabetically. This defines where in the TOC the page lands.
- markdown:
- **descriptive_class_title**: Remove the word "Object" from class titles.
- **descriptive_module_title**: Adding the word “Module” before the module name.
- **add_method_class_prefix**: Add the class name as a prefix to method names.
- **add_member_class_prefix**: Add the class name as a prefix to member names.
- **filename**: File name of the generated file, use underscores to separate each word.
29 changes: 29 additions & 0 deletions docs/pydoc/config/harness.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
loaders:
- type: haystack_pydoc_tools.loaders.CustomPythonLoader
search_path: [../../../haystack_experimental/evaluation]
modules: ["harness/evaluation_harness",
"harness/rag/harness",
"harness/rag/parameters"]
ignore_when_discovered: ["__init__"]
processors:
- type: filter
expression:
documented_only: true
do_not_filter_modules: false
skip_empty_modules: true
- type: smart
- type: crossref
renderer:
type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer
excerpt: Evaluation harness for Haystack.
category_slug: experiments-api
title: Evaluation Harness
slug: evaluation-harness
order: 50
markdown:
descriptive_class_title: false
classdef_code_block: false
descriptive_module_title: true
add_method_class_prefix: true
add_member_class_prefix: false
filename: evaluation_harness.md
27 changes: 27 additions & 0 deletions docs/pydoc/config/openai_function_caller.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
loaders:
- type: haystack_pydoc_tools.loaders.CustomPythonLoader
search_path: [../../../haystack_experimental/components/tools/openai]
modules: ["function_caller"]
ignore_when_discovered: ["__init__"]
processors:
- type: filter
expression:
documented_only: true
do_not_filter_modules: false
skip_empty_modules: true
- type: smart
- type: crossref
renderer:
type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer
excerpt: OpenAIFunctionCaller invokes Python functions from ChatMessage(s)
category_slug: experiments-api
title: OpenAI Function Caller
slug: openai-function-caller-api
order: 90
markdown:
descriptive_class_title: false
classdef_code_block: false
descriptive_module_title: true
add_method_class_prefix: true
add_member_class_prefix: false
filename: openai_function_caller_api.md
27 changes: 27 additions & 0 deletions docs/pydoc/config/openapitool.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
loaders:
- type: haystack_pydoc_tools.loaders.CustomPythonLoader
search_path: [../../../haystack_experimental/components/tools/openapi]
modules: ["openapi_tool"]
ignore_when_discovered: ["__init__"]
processors:
- type: filter
expression:
documented_only: true
do_not_filter_modules: false
skip_empty_modules: true
- type: smart
- type: crossref
renderer:
type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer
excerpt: Allows Haystack to interact with OpenAPI specified services.
category_slug: experiments-api
title: OpenAPI
slug: openapi-api
order: 100
markdown:
descriptive_class_title: false
classdef_code_block: false
descriptive_module_title: true
add_method_class_prefix: true
add_member_class_prefix: false
filename: openapi_api.md

0 comments on commit 7863505

Please sign in to comment.