From aefb872a2ffb496dedaa30840cb012c5ed7cd12f Mon Sep 17 00:00:00 2001 From: Sunish Sheth Date: Thu, 30 Jan 2025 16:20:28 -0800 Subject: [PATCH] Adding sphinx docs to databricks-ai-bridge --- CONTRIBUTING.md | 5 + docs/Makefile | 247 ++++++++++++ docs/README.md | 29 ++ docs/source/_static/clippy.svg | 3 + .../_static/css/sphinx_rtd_theme-tweaks.css | 28 ++ docs/source/_static/icons/important-icon.svg | 2 + docs/source/_static/icons/nav-home.svg | 20 + docs/source/_static/icons/note-icon.svg | 2 + docs/source/_static/icons/tip-icon.svg | 2 + docs/source/_static/icons/warning-icon.svg | 2 + docs/source/_static/js/custom.js | 5 + docs/source/_static/logo/databricks-logo.svg | 9 + docs/source/_templates/layout.html | 13 + docs/source/conf.py | 377 ++++++++++++++++++ docs/source/index.rst | 13 + docs/source/latest/databricks_ai_bridge.rst | 14 + docs/source/latest/databricks_langchain.rst | 8 + docs/source/latest/databricks_openai.rst | 8 + .../src/databricks_langchain/chat_models.py | 38 +- .../src/databricks_langchain/embeddings.py | 5 + .../src/databricks_langchain/vectorstores.py | 38 +- .../vector_search_retriever_tool.py | 67 ++-- requirements/dev-requirements.txt | 8 + 23 files changed, 891 insertions(+), 52 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/README.md create mode 100644 docs/source/_static/clippy.svg create mode 100644 docs/source/_static/css/sphinx_rtd_theme-tweaks.css create mode 100644 docs/source/_static/icons/important-icon.svg create mode 100644 docs/source/_static/icons/nav-home.svg create mode 100644 docs/source/_static/icons/note-icon.svg create mode 100644 docs/source/_static/icons/tip-icon.svg create mode 100644 docs/source/_static/icons/warning-icon.svg create mode 100644 docs/source/_static/js/custom.js create mode 100644 docs/source/_static/logo/databricks-logo.svg create mode 100644 docs/source/_templates/layout.html create mode 100644 docs/source/conf.py create mode 100644 docs/source/index.rst create mode 100644 docs/source/latest/databricks_ai_bridge.rst create mode 100644 docs/source/latest/databricks_langchain.rst create mode 100644 docs/source/latest/databricks_openai.rst create mode 100644 requirements/dev-requirements.txt diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2f83adc..c126373 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -9,6 +9,7 @@ conda create --name databricks-ai-dev-env python=3.10 conda activate databricks-ai-dev-env pip install -e ".[dev]" pip install -r requirements/lint-requirements.txt +pip install -r requirements/dev-requirements.txt ``` If you are working with integration packages install them as well @@ -16,3 +17,7 @@ If you are working with integration packages install them as well ```sh pip install -e "integrations/langchain[dev]" ``` + +### Build API docs + +See the documentation in docs/README.md for how to build docs. When releasing a new wheel, please send a pull request to change the API reference published in [docs-api-ref](https://github.com/databricks-eng/docs-api-ref/tree/main/content-publish/python/databricks-agents). diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..da2e460 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,247 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) + $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source + +.PHONY: default +default: html + +.PHONY: help +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " applehelp to make an Apple Help Book" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " epub3 to make an epub3" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + @echo " coverage to run coverage check of the documentation (if enabled)" + @echo " dummy to check syntax errors of document sources" + +.PHONY: clean +clean: + rm -rf $(BUILDDIR)/* + +.PHONY: rst +rst: + $(SPHINXBUILD) -b rst $(ALLSPHINXOPTS) $(BUILDDIR)/rst + @echo + @echo "Build finished. The ReST files are in $(BUILDDIR)/rst." + +# Builds only the RST-based documentation (i.e., everything but Java & R docs) +.PHONY: rsthtml +rsthtml: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +.PHONY: html +html: rsthtml + +.PHONY: dirhtml +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +.PHONY: singlehtml +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +.PHONY: livehtml +livehtml: + sphinx-autobuild -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + +.PHONY: pickle +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +.PHONY: json +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +.PHONY: htmlhelp +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +.PHONY: qthelp +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/twitterpandas.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/twitterpandas.qhc" + +.PHONY: applehelp +applehelp: + $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp + @echo + @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." + @echo "N.B. You won't be able to view it unless you put it in" \ + "~/Library/Documentation/Help or install it in your application" \ + "bundle." + +.PHONY: devhelp +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/twitterpandas" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/twitterpandas" + @echo "# devhelp" + +.PHONY: epub +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +.PHONY: epub3 +epub3: + $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 + @echo + @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." + +.PHONY: latex +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +.PHONY: latexpdf +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +.PHONY: latexpdfja +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +.PHONY: text +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +.PHONY: man +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +.PHONY: texinfo +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +.PHONY: info +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +.PHONY: gettext +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +.PHONY: changes +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +.PHONY: linkcheck +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +.PHONY: doctest +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +.PHONY: coverage +coverage: + $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage + @echo "Testing of coverage in the sources finished, look at the " \ + "results in $(BUILDDIR)/coverage/python.txt." + +.PHONY: xml +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +.PHONY: pseudoxml +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." + +.PHONY: dummy +dummy: + $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy + @echo + @echo "Build finished. Dummy builder generates no files." diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..e33f942 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,29 @@ +# Databricks AI Bridge Documentation + +We generate our API docs with Sphinx, and they get published to [this directory](https://github.com/databricks-eng/docs-api-ref/tree/main/content-publish/python). + +## Setup +Requirements: +- Follow the steps in ../CONTRIBUTING.md to set up the development environment. + +## Develop the docs locally +Once you have activated the conda environment, navigate to this directory and run: + +```sh +make livehtml +``` + +## Build for production +To build for production, run: + +```sh +make html +``` + +This will output a set of static files in build/. + +To check the build, you can use a python http server: + +```sh +python3 -m http.server --directory build/html +``` diff --git a/docs/source/_static/clippy.svg b/docs/source/_static/clippy.svg new file mode 100644 index 0000000..e1b1703 --- /dev/null +++ b/docs/source/_static/clippy.svg @@ -0,0 +1,3 @@ + + + diff --git a/docs/source/_static/css/sphinx_rtd_theme-tweaks.css b/docs/source/_static/css/sphinx_rtd_theme-tweaks.css new file mode 100644 index 0000000..726e8e5 --- /dev/null +++ b/docs/source/_static/css/sphinx_rtd_theme-tweaks.css @@ -0,0 +1,28 @@ +/** css/yourtheme.css **/ + +/* for Alabaster */ +@import 'theme.css'; /* for the Read the Docs theme */ + +/* SVG logo sizing */ +a > img.logo { + width: 100% !important; +} + +/* Uniform background */ +.wy-nav-side, .wy-menu-vertical a:hover { + background-color: #fcfcfc; +} + +.wy-menu-vertical a { + color: #404040; +} + +div.wy-side-nav-search > .version { + color: #0a0a0a; +} + +@media screen and (min-width: 1100px) { + .wy-nav-content-wrap { + background: #fcfcfc; + } +} \ No newline at end of file diff --git a/docs/source/_static/icons/important-icon.svg b/docs/source/_static/icons/important-icon.svg new file mode 100644 index 0000000..3ff8c69 --- /dev/null +++ b/docs/source/_static/icons/important-icon.svg @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/docs/source/_static/icons/nav-home.svg b/docs/source/_static/icons/nav-home.svg new file mode 100644 index 0000000..a12863c --- /dev/null +++ b/docs/source/_static/icons/nav-home.svg @@ -0,0 +1,20 @@ + + + + house + Created with Sketch. + + + + + + + + + + + + + + + diff --git a/docs/source/_static/icons/note-icon.svg b/docs/source/_static/icons/note-icon.svg new file mode 100644 index 0000000..69d45cd --- /dev/null +++ b/docs/source/_static/icons/note-icon.svg @@ -0,0 +1,2 @@ + + diff --git a/docs/source/_static/icons/tip-icon.svg b/docs/source/_static/icons/tip-icon.svg new file mode 100644 index 0000000..f220f03 --- /dev/null +++ b/docs/source/_static/icons/tip-icon.svg @@ -0,0 +1,2 @@ + + diff --git a/docs/source/_static/icons/warning-icon.svg b/docs/source/_static/icons/warning-icon.svg new file mode 100644 index 0000000..b9f8b6f --- /dev/null +++ b/docs/source/_static/icons/warning-icon.svg @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/docs/source/_static/js/custom.js b/docs/source/_static/js/custom.js new file mode 100644 index 0000000..b5b5f19 --- /dev/null +++ b/docs/source/_static/js/custom.js @@ -0,0 +1,5 @@ +/* This adds target='_blank' to all external links, + allowing for them to open in a new browser window/tab. */ +$(document).ready(function () { + $('a.external').attr('target', '_blank'); +}); diff --git a/docs/source/_static/logo/databricks-logo.svg b/docs/source/_static/logo/databricks-logo.svg new file mode 100644 index 0000000..187be39 --- /dev/null +++ b/docs/source/_static/logo/databricks-logo.svg @@ -0,0 +1,9 @@ + + + + + + + \ No newline at end of file diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html new file mode 100644 index 0000000..d95a22b --- /dev/null +++ b/docs/source/_templates/layout.html @@ -0,0 +1,13 @@ +{% extends "!layout.html" %} + +{% block footer %} +{{ super() }} + + +{% endblock %} diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..2ba3f10 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,377 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Databricks AI Bridge documentation build configuration file, created by +# cookiecutter pipproject +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import datetime +import importlib.metadata +import os +import sys + +version = importlib.metadata.version("databricks_ai_bridge") + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +sys.path.insert(0, os.path.abspath("../..")) +sys.path.insert(0, os.path.abspath(".")) +sys.path.insert(0, os.path.abspath("../../src/databricks_ai_bridge")) +sys.path.insert(0, os.path.abspath("../../integrations/langchain/src/databricks_langchain")) +sys.path.insert(0, os.path.abspath("../../integrations/openai/src/databricks_openai")) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.napoleon", # support for Google and NumPy style docstrings. + "sphinx.ext.autodoc", + "sphinx.ext.autodoc.typehints", + "sphinx.ext.autosummary", + "sphinx.ext.intersphinx", + "sphinx.ext.todo", + "myst_parser", + "sphinx_rtd_theme", +] + +myst_enable_extensions = ["attrs_block", "attrs_inline"] +myst_heading_anchors = 4 + +autodoc_pydantic_model_show_json = False +autodoc_pydantic_field_list_validators = False +autodoc_pydantic_config_members = False +autodoc_pydantic_model_show_field_summary = False +autodoc_pydantic_model_show_config_summary = False +autodoc_pydantic_model_show_validator_members = False +autodoc_pydantic_model_show_validator_summary = False +autodoc_pydantic_model_signature_prefix = "class" +autodoc_pydantic_field_signature_prefix = "param" +autodoc_typehints_format = "short" + + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +intersphinx_mapping = { + "mlflow": ("https://mlflow.org/docs/latest/", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), + "pyspark": ("https://spark.apache.org/docs/latest/api/python/", None), + "python": ("https://docs.python.org/3", None), + "langchain": ( + "https://api.python.langchain.com/en/latest/", + None, + ), + "pydantic": ("https://docs.pydantic.dev/latest/", None), +} + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# source_suffix = ['.rst', '.md'] +source_suffix = ".rst" + +# The encoding of source files. +# source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = "index" + +# General information about the project. +project = "DatabricksAIBridge" +author = "Databricks" +copyright = f"{datetime.date.today().year}, {author}" + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# + +# The short X.Y version. +version = ".".join(version.split(".")[:-1]) +# The full version, including alpha/beta/rc tags. +release = version + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = "en" + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +# today = '' +# Else, today_fmt is used as the format for a strftime call. +# today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = [] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +default_role = "any" + +# If true, '()' will be appended to :func: etc. cross-reference text. +# add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +# add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +# show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "sphinx" + +# A list of ignored prefixes for module index sorting. +# modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +# keep_warnings = False + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. + +html_context = {"use_algolia": os.environ.get("USE_ALGOLIA", "0")} + +html_theme = "sphinx_rtd_theme" + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +html_theme_options = { + "prev_next_buttons_location": "bottom", + "style_external_links": True, + # Toc options + "collapse_navigation": True, + "sticky_navigation": True, + "navigation_depth": -1, + "titles_only": False, + "logo_only": True, + "style_nav_header_background": "#FFF", +} + + +# Add any paths that contain custom themes here, relative to this directory. +# html_theme_path = [] + +# The name for this set of Sphinx documents. +# " v documentation" by default. +# html_title = 'Databricks AI Bridge v0.0.1' + +# A shorter title for the navigation bar. Default is the same as html_title. +# html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +html_logo = "./_static/logo/databricks-logo.svg" +html_style = "css/sphinx_rtd_theme-tweaks.css" + +# The name of an image file (relative to this directory) to use as a favicon of +# the docs. This file should be a Windows icon file (.ico) being 16x16 or +# 32x32 pixels large. +html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + +# These paths are either relative to html_static_path +# or fully qualified paths (eg. https://...) +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +# html_extra_path = [] + +# If not None, a 'Last updated on:' timestamp is inserted at every page +# bottom, using the given strftime format. +# The empty string is equivalent to '%b %d, %Y'. +# html_last_updated_fmt = None + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +# html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +# html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +# html_additional_pages = {} + +# If false, no module index is generated. +# html_domain_indices = True + +# If false, no index is generated. +# html_use_index = True + +# If true, the index is split into individual pages for each letter. +# html_split_index = False + +# If true, links to the reST sources are added to the pages. +html_show_sourcelink = False +html_copy_source = False + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +html_show_sphinx = False + +html_add_permalinks = " " + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +# html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +# html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +# html_file_suffix = None + +# Language to be used for generating the HTML full-text search index. +# Sphinx supports the following languages: +# 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' +# 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' +# html_search_language = 'en' + +# A dictionary with options for the search language support, empty by default. +# 'ja' uses this config value. +# 'zh' user can custom change `jieba` dictionary path. +# html_search_options = {'type': 'default'} + +# The name of a javascript file (relative to the configuration directory) that +# implements a search results scorer. If empty, the default will be used. +# html_search_scorer = 'scorer.js' + +# Output file base name for HTML help builder. +htmlhelp_basename = "DatabricksAIBridgeDoc" + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # 'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + # 'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + # 'preamble': '', + # Latex figure (float) alignment + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + ( + master_doc, + "DatabricksAIBridge.tex", + "DatabricksAIBridge Documentation", + "Databricks", + "manual", + ), +] + +# Mock torch & fastai imports as per suggestion in +# https://github.com/sphinx-doc/sphinx/issues/6521#issuecomment-505765893 +autodoc_mock_imports = ["torch", "fastai"] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +# latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +# latex_use_parts = False + +# If true, show page references after internal links. +# latex_show_pagerefs = False + +# If true, show URL addresses after external links. +# latex_show_urls = False + +# Documents to append as an appendix to all manuals. +# latex_appendices = [] + +# If false, no module index is generated. +# latex_domain_indices = True + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [(master_doc, "DatabricksAIBridge", "DatabricksAIBridge Documentation", [author], 1)] + +# If true, show URL addresses after external links. +# man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ( + master_doc, + "DatabricksAIBridge", + "DatabricksAIBridge Documentation", + author, + "End-to-end machine learning toolkit.", + "Miscellaneous", + ), +] + +# Documents to append as an appendix to all manuals. +# texinfo_appendices = [] + +# If false, no module index is generated. +# texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +# texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +# texinfo_no_detailmenu = False + +# Enable nitpicky mode to log warnings for broken references +nitpicky = True +nitpick_ignore = [ + # Ignore "parent class reference not found" errors for + # subclasses of ``object`` + ("py:class", "object"), + ("py:class", "enum.Enum"), + ("py:class", "bytes"), + ("py:class", "bytearray"), +] + +linkcheck_ignore = [ + # Ignore local URLs when validating external links + r"http://localhost:\d+/?", +] + +autodoc_member_order = "bysource" diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..d03c495 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,13 @@ +Databricks AI Python API +=============================================== + +This API reference is for the `databricks-ai-bridge `_ Python package. +This package also provides multiple integrations to `databricks-langchain `_ +and `databricks-openai `_ + +.. toctree:: + :maxdepth: 1 + + latest/databricks_ai_bridge + latest/databricks_langchain + latest/databricks_openai diff --git a/docs/source/latest/databricks_ai_bridge.rst b/docs/source/latest/databricks_ai_bridge.rst new file mode 100644 index 0000000..5a86732 --- /dev/null +++ b/docs/source/latest/databricks_ai_bridge.rst @@ -0,0 +1,14 @@ +Databricks AI Bridge Python API +=================================== + +.. automodule:: databricks_ai_bridge.genie + :members: + :undoc-members: + :show-inheritance: + :exclude-members: + +.. automodule:: databricks_ai_bridge.vector_search_retriever_tool + :members: + :undoc-members: + :show-inheritance: + :exclude-members: diff --git a/docs/source/latest/databricks_langchain.rst b/docs/source/latest/databricks_langchain.rst new file mode 100644 index 0000000..50e3b63 --- /dev/null +++ b/docs/source/latest/databricks_langchain.rst @@ -0,0 +1,8 @@ +Databricks Langchain Integrations Python API +=================================== + +.. automodule:: databricks_langchain + :members: + :undoc-members: + :show-inheritance: + :exclude-members: diff --git a/docs/source/latest/databricks_openai.rst b/docs/source/latest/databricks_openai.rst new file mode 100644 index 0000000..944ee0e --- /dev/null +++ b/docs/source/latest/databricks_openai.rst @@ -0,0 +1,8 @@ +Databricks OpenAI Integrations Python API +========================================= + +.. automodule:: databricks_openai + :members: + :undoc-members: + :show-inheritance: + :exclude-members: diff --git a/integrations/langchain/src/databricks_langchain/chat_models.py b/integrations/langchain/src/databricks_langchain/chat_models.py index b6d4165..71c3864 100644 --- a/integrations/langchain/src/databricks_langchain/chat_models.py +++ b/integrations/langchain/src/databricks_langchain/chat_models.py @@ -92,6 +92,7 @@ class ChatDatabricks(BaseChatModel): Any extra parameters to pass to the endpoint. Instantiate: + .. code-block:: python from databricks_langchain import ChatDatabricks @@ -103,6 +104,7 @@ class ChatDatabricks(BaseChatModel): ) Invoke: + .. code-block:: python messages = [ @@ -120,6 +122,7 @@ class ChatDatabricks(BaseChatModel): ) Stream: + .. code-block:: python for chunk in llm.stream(messages): @@ -177,6 +180,7 @@ class ChatDatabricks(BaseChatModel): structured_llm = llm.with_structured_output(...) Async: + .. code-block:: python await llm.ainvoke(messages) @@ -196,6 +200,7 @@ class ChatDatabricks(BaseChatModel): ) Tool calling: + .. code-block:: python from pydantic import BaseModel, Field @@ -391,12 +396,10 @@ def bind_tools( name of the tool (str): calls corresponding tool; "auto": automatically selects a tool (including no tool); "none": model does not generate any tool calls and instead must - generate a standard assistant message; + generate a standard assistant message "required": the model picks the most relevant tool in tools and - must generate a tool call; - - or a dict of the form: - {"type": "function", "function": {"name": <>}}. + must generate a tool call or a dict of the form: + {"type": "function", "function": {"name": <>}}. **kwargs: Any additional parameters to pass to the :class:`~langchain.runnable.Runnable` constructor. """ @@ -479,7 +482,10 @@ def with_structured_output( - ``"parsed"``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above. - ``"parsing_error"``: Optional[BaseException] - Example: Function-calling, Pydantic schema (method="function_calling", include_raw=False): + Example: + + Function-calling, Pydantic schema (method="function_calling", include_raw=False) + .. code-block:: python from databricks_langchain import ChatDatabricks @@ -503,7 +509,8 @@ class AnswerWithJustification(BaseModel): # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.' # ) - Example: Function-calling, Pydantic schema (method="function_calling", include_raw=True): + Function-calling, Pydantic schema (method="function_calling", include_raw=True): + .. code-block:: python from databricks_langchain import ChatDatabricks @@ -527,7 +534,8 @@ class AnswerWithJustification(BaseModel): # 'parsing_error': None # } - Example: Function-calling, dict schema (method="function_calling", include_raw=False): + Function-calling, dict schema (method="function_calling", include_raw=False): + .. code-block:: python from databricks_langchain import ChatDatabricks @@ -552,7 +560,8 @@ class AnswerWithJustification(BaseModel): # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' # } - Example: JSON mode, Pydantic schema (method="json_mode", include_raw=True): + JSON mode, Pydantic schema (method="json_mode", include_raw=True): + .. code-block:: from databricks_langchain import ChatDatabricks @@ -571,27 +580,28 @@ class AnswerWithJustification(BaseModel): structured_llm.invoke( "Answer the following question. " - "Make sure to return a JSON blob with keys 'answer' and 'justification'.\n\n" + "Make sure to return a JSON blob with keys 'answer' and 'justification'." "What's heavier a pound of bricks or a pound of feathers?" ) # -> { - # 'raw': AIMessage(content='{\n "answer": "They are both the same weight.",\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \n}'), + # 'raw': AIMessage(content='{ "answer": "They are both the same weight.", "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." }'), # 'parsed': AnswerWithJustification(answer='They are both the same weight.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'), # 'parsing_error': None # } - Example: JSON mode, no schema (schema=None, method="json_mode", include_raw=True): + JSON mode, no schema (schema=None, method="json_mode", include_raw=True): + .. code-block:: structured_llm = llm.with_structured_output(method="json_mode", include_raw=True) structured_llm.invoke( "Answer the following question. " - "Make sure to return a JSON blob with keys 'answer' and 'justification'.\n\n" + "Make sure to return a JSON blob with keys 'answer' and 'justification'." "What's heavier a pound of bricks or a pound of feathers?" ) # -> { - # 'raw': AIMessage(content='{\n "answer": "They are both the same weight.",\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \n}'), + # 'raw': AIMessage(content='{ "answer": "They are both the same weight.", "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." }'), # 'parsed': { # 'answer': 'They are both the same weight.', # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.' diff --git a/integrations/langchain/src/databricks_langchain/embeddings.py b/integrations/langchain/src/databricks_langchain/embeddings.py index 421e45c..1aa41b7 100644 --- a/integrations/langchain/src/databricks_langchain/embeddings.py +++ b/integrations/langchain/src/databricks_langchain/embeddings.py @@ -35,7 +35,9 @@ class DatabricksEmbeddings(Embeddings, BaseModel): The parameters to use for documents. Instantiate: + .. code-block:: python + from databricks_langchain import DatabricksEmbeddings embed = DatabricksEmbeddings( @@ -43,11 +45,14 @@ class DatabricksEmbeddings(Embeddings, BaseModel): ) Embed single text: + .. code-block:: python + input_text = "The meaning of life is 42" embed.embed_query(input_text) .. code-block:: python + [0.01605224609375, -0.0298309326171875, ...] """ diff --git a/integrations/langchain/src/databricks_langchain/vectorstores.py b/integrations/langchain/src/databricks_langchain/vectorstores.py index 222629d..ea9f9b2 100644 --- a/integrations/langchain/src/databricks_langchain/vectorstores.py +++ b/integrations/langchain/src/databricks_langchain/vectorstores.py @@ -61,7 +61,7 @@ class DatabricksVectorSearch(VectorStore): index_name: The name of the index to use. Format: "catalog.schema.index". endpoint: The name of the Databricks Vector Search endpoint. If not specified, - the endpoint name is automatically inferred based on the index name. + the endpoint name is automatically inferred based on the index name. .. note:: @@ -123,7 +123,9 @@ class DatabricksVectorSearch(VectorStore): ) Add Documents: + .. code-block:: python + from langchain_core.documents import Document document_1 = Document(page_content="foo", metadata={"baz": "bar"}) @@ -134,7 +136,9 @@ class DatabricksVectorSearch(VectorStore): vector_store.add_documents(documents=documents, ids=ids) Delete Documents: + .. code-block:: python + vector_store.delete(ids=["3"]) .. note:: @@ -142,11 +146,15 @@ class DatabricksVectorSearch(VectorStore): The `delete` method is only supported for direct-access index. Search: + .. code-block:: python - results = vector_store.similarity_search(query="thud",k=1) + + results = vector_store.similarity_search(query="thud", k=1) for doc in results: print(f"* {doc.page_content} [{doc.metadata}]") + .. code-block:: python + *thud[{"id": "2"}] .. note: @@ -167,44 +175,60 @@ class DatabricksVectorSearch(VectorStore): # Output: * thud [{'bar': 'baz', 'baz': None, 'id': '2'}] Search with filter: + .. code-block:: python - results = vector_store.similarity_search(query="thud",k=1,filter={"bar": "baz"}) + + results = vector_store.similarity_search(query="thud", k=1, filter={"bar": "baz"}) for doc in results: print(f"* {doc.page_content} [{doc.metadata}]") + .. code-block:: python + *thud[{"id": "2"}] Search with score: + .. code-block:: python - results = vector_store.similarity_search_with_score(query="qux",k=1) + + results = vector_store.similarity_search_with_score(query="qux", k=1) for doc, score in results: print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]") + .. code-block:: python + * [SIM=0.748804] foo [{'id': '1'}] Async: + .. code-block:: python + # add documents await vector_store.aadd_documents(documents=documents, ids=ids) # delete documents await vector_store.adelete(ids=["3"]) # search - results = vector_store.asimilarity_search(query="thud",k=1) + results = vector_store.asimilarity_search(query="thud", k=1) # search with score - results = await vector_store.asimilarity_search_with_score(query="qux",k=1) - for doc,score in results: + results = await vector_store.asimilarity_search_with_score(query="qux", k=1) + for doc, score in results: print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]") + .. code-block:: python + * [SIM=0.748807] foo [{'id': '1'}] Use as Retriever: + .. code-block:: python + retriever = vector_store.as_retriever( search_type="mmr", search_kwargs={"k": 1, "fetch_k": 2, "lambda_mult": 0.5}, ) retriever.invoke("thud") + .. code-block:: python + [Document(metadata={"id": "2"}, page_content="thud")] """ # noqa: E501 diff --git a/integrations/openai/src/databricks_openai/vector_search_retriever_tool.py b/integrations/openai/src/databricks_openai/vector_search_retriever_tool.py index de5648e..8a098b6 100644 --- a/integrations/openai/src/databricks_openai/vector_search_retriever_tool.py +++ b/integrations/openai/src/databricks_openai/vector_search_retriever_tool.py @@ -28,38 +28,43 @@ class VectorSearchRetrieverTool(VectorSearchRetrieverToolMixin): for tool calling using the OpenAI SDK. Example: - # Step 1: call model with VectorSearchRetrieverTool defined - dbvs_tool = VectorSearchRetrieverTool(index_name="catalog.schema.my_index_name") - messages = [ - {"role": "system", "content": "You are a helpful assistant."}, - { - "role": "user", - "content": "Using the Databricks documentation, answer what is Spark?" - } - ] - first_response = client.chat.completions.create( - model="gpt-4o", - messages=messages, - tools=[dbvs_tool.tool] - ) + Step 1: Call model with VectorSearchRetrieverTool defined + + .. code-block:: python + + dbvs_tool = VectorSearchRetrieverTool(index_name="catalog.schema.my_index_name") + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + { + "role": "user", + "content": "Using the Databricks documentation, answer what is Spark?", + }, + ] + first_response = client.chat.completions.create( + model="gpt-4o", messages=messages, tools=[dbvs_tool.tool] + ) - # Step 2: Execute function code – parse the model's response and handle function calls. - tool_call = first_response.choices[0].message.tool_calls[0] - args = json.loads(tool_call.function.arguments) - result = dbvs_tool.execute(query=args["query"]) # For self-managed embeddings, optionally pass in openai_client=client - - # Step 3: Supply model with results – so it can incorporate them into its final response. - messages.append(first_response.choices[0].message) - messages.append({ - "role": "tool", - "tool_call_id": tool_call.id, - "content": json.dumps(result) - }) - second_response = client.chat.completions.create( - model="gpt-4o", - messages=messages, - tools=tools - ) + Step 2: Execute function code – parse the model's response and handle function calls. + + .. code-block:: python + + tool_call = first_response.choices[0].message.tool_calls[0] + args = json.loads(tool_call.function.arguments) + result = dbvs_tool.execute( + query=args["query"] + ) # For self-managed embeddings, optionally pass in openai_client=client + + Step 3: Supply model with results – so it can incorporate them into its final response. + + .. code-block:: python + + messages.append(first_response.choices[0].message) + messages.append( + {"role": "tool", "tool_call_id": tool_call.id, "content": json.dumps(result)} + ) + second_response = client.chat.completions.create( + model="gpt-4o", messages=messages, tools=tools + ) """ text_column: Optional[str] = Field( diff --git a/requirements/dev-requirements.txt b/requirements/dev-requirements.txt new file mode 100644 index 0000000..b25cbbb --- /dev/null +++ b/requirements/dev-requirements.txt @@ -0,0 +1,8 @@ +docutils==0.21.2 +sphinx==8.1.3 +sphinx-autobuild==2024.10.3 +sphinx_rtd_theme==3.0.1 +jinja2>=3.0.0 +MarkupSafe==3.0.2 +commonmark==0.9.1 +myst-parser==4.0.0 \ No newline at end of file