Skip to content

Commit

Permalink
docs: update component gallery (#987)
Browse files Browse the repository at this point in the history
  • Loading branch information
davidberenstein1957 authored Sep 18, 2024
1 parent e67864e commit 370e5b5
Showing 1 changed file with 77 additions and 33 deletions.
110 changes: 77 additions & 33 deletions src/distilabel/utils/mkdocs/components_gallery.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,37 +76,69 @@
)

_STEPS_CATEGORY_TO_ICON = {
"text-generation": ":material-text-box-edit:",
"chat-generation": ":material-chat:",
"text-classification": ":material-label:",
"text-manipulation": ":material-receipt-text-edit:",
"evol": ":material-dna:",
"critique": ":material-comment-edit:",
"scorer": ":octicons-number-16:",
"preference": ":material-poll:",
"embedding": ":material-vector-line:",
"evol": ":material-dna:",
"clustering": ":material-scatter-plot:",
"columns": ":material-table-column:",
"filtering": ":material-filter:",
"format": ":material-format-list-bulleted:",
"load": ":material-file-download:",
"preference": ":material-poll:",
"save": ":material-content-save:",
"scorer": ":octicons-number-16:",
"text-generation": ":material-text-box-edit:",
"text-manipulation": ":material-receipt-text-edit:",
"columns": ":material-table-column:",
"text-classification": ":material-label:",
"clustering": ":material-scatter-plot:",
}

_STEP_CATEGORY_TO_DESCRIPTION = {
"text-generation": "Text generation steps are used to generate text based on a given prompt.",
"evol": "Evol steps are used to rewrite input text and evolve it to a higher quality.",
"chat-generation": "Chat generation steps are used to generate text based on a conversation.",
"text-classification": "Text classification steps are used to classify text into a category.",
"text-manipulation": "Text manipulation steps are used to manipulate or rewrite an input text.",
"evol": "Evol steps are used to rewrite input text and evolve it to a higher quality.",
"critique": "Critique steps are used to provide feedback on the quality of the data with a written explanation.",
"scorer": "Scorer steps are used to evaluate and score the data with a numerical value.",
"preference": "Preference steps are used to collect preferences on the data with numerical values or ranks.",
"embedding": "Embedding steps are used to generate embeddings for the data.",
"clustering": "Clustering steps are used to group similar data points together.",
"columns": "Columns steps are used to manipulate columns in the data.",
"filtering": "Filtering steps are used to filter the data based on some criteria.",
"format": "Format steps are used to format the data.",
"load": "Load steps are used to load the data.",
"save": "Save steps are used to save the data.",
}

assert list(_STEP_CATEGORY_TO_DESCRIPTION.keys()) == list(
_STEPS_CATEGORY_TO_ICON.keys()
)

_STEP_CATEGORIES = list(_STEP_CATEGORY_TO_DESCRIPTION.keys())
_STEP_CATEGORY_TABLE = pd.DataFrame(
{
"Icon": [_STEPS_CATEGORY_TO_ICON[category] for category in _STEP_CATEGORIES],
"Category": _STEP_CATEGORIES,
"Description": [
_STEP_CATEGORY_TO_DESCRIPTION[category] for category in _STEP_CATEGORIES
],
}
).to_markdown(index=False)
_STEP_CATEGORY_TABLE_DESCRIPTION = [
'??? info "Category Overview"',
" The gallery page showcases the different types of components within `distilabel`.",
"",
]
for row in _STEP_CATEGORY_TABLE.split("\n"):
_STEP_CATEGORY_TABLE_DESCRIPTION.append(f" {row}")
_STEP_CATEGORY_TABLE_DESCRIPTION = "\n".join(_STEP_CATEGORY_TABLE_DESCRIPTION)

_CATEGORY_ORDER_INDEX = {
category: idx
for idx, category in enumerate(list(_STEP_CATEGORY_TO_DESCRIPTION.keys()))
}


class ComponentsGalleryConfig(Config):
enabled = Type(bool, default=True)
Expand Down Expand Up @@ -229,13 +261,30 @@ def _generate_steps_pages(self, src_dir: Path, steps: list) -> List[str]:
steps_gallery_page_path = src_dir / paths[0]
steps_gallery_page_path.parent.mkdir(parents=True, exist_ok=True)

# Sort steps based on the index of their first category in the 'category_order'
steps = sorted(
steps,
key=lambda step: _CATEGORY_ORDER_INDEX.get(
step["docstring"]["categories"][0]
if step["docstring"]["categories"]
else float("inf"),
float("inf"),
),
reverse=True,
)

# Create detail page for each `Step`
for step in steps:
docstring = step["docstring"]
if docstring["icon"] == "" and docstring["categories"]:
first_category = docstring["categories"][0]
docstring["icon"] = _STEPS_CATEGORY_TO_ICON.get(first_category, "")

if docstring["icon"]:
assert (
docstring["icon"] in _STEPS_CATEGORY_TO_ICON.values()
), f"Icon {docstring['icon']} not found in _STEPS_CATEGORY_TO_ICON"

name = step["name"]

content = _STEP_DETAIL_TEMPLATE.render(
Expand All @@ -254,10 +303,10 @@ def _generate_steps_pages(self, src_dir: Path, steps: list) -> List[str]:

paths.append(step_path)

# Create the `components-gallery/steps.md` file
# Create the `components-gallery/steps/index.md` file
content = _COMPONENTS_LIST_TEMPLATE.render(
title="Steps Gallery",
description="",
description=_STEP_CATEGORY_TABLE_DESCRIPTION,
components=steps,
default_icon=":material-step-forward:",
)
Expand All @@ -282,12 +331,27 @@ def _generate_tasks_pages(self, src_dir: Path, tasks: list) -> List[str]:
tasks_gallery_page_path = src_dir / paths[0]
tasks_gallery_page_path.parent.mkdir(parents=True, exist_ok=True)

# Sort tasks based on the index of their first category in the 'category_order'
tasks = sorted(
tasks,
key=lambda task: _CATEGORY_ORDER_INDEX.get(
task["docstring"]["categories"][0]
if task["docstring"]["categories"]
else float("inf"),
float("inf"),
),
)

# Create detail page for each `Task`
for task in tasks:
docstring = task["docstring"]
if docstring["icon"] == "" and docstring["categories"]:
first_category = docstring["categories"][0]
docstring["icon"] = _STEPS_CATEGORY_TO_ICON.get(first_category, "")
if docstring["icon"]:
assert (
docstring["icon"] in _STEPS_CATEGORY_TO_ICON.values()
), f"Icon {docstring['icon']} not found in _STEPS_CATEGORY_TO_ICON"

name = task["name"]

Expand All @@ -307,30 +371,10 @@ def _generate_tasks_pages(self, src_dir: Path, tasks: list) -> List[str]:

paths.append(task_path)

global _STEP_CATEGORY_TO_DESCRIPTION
categories = list(_STEP_CATEGORY_TO_DESCRIPTION.keys())
table = pd.DataFrame(
{
"Category": categories,
"Icon": [_STEPS_CATEGORY_TO_ICON[category] for category in categories],
"Description": [
_STEP_CATEGORY_TO_DESCRIPTION[category] for category in categories
],
}
).to_markdown(index=False)

description = [
'??? info "Task Category Overview"',
" The tasks gallery page showcases the different types of tasks that can be performed with `distilabel`.",
"",
]
for row in table.split("\n"):
description.append(f" {row}")

# Create the `components-gallery/steps/index.md` file
# Create the `components-gallery/tasks/index.md` file
content = _COMPONENTS_LIST_TEMPLATE.render(
title="Tasks Gallery",
description="\n".join(description),
description=_STEP_CATEGORY_TABLE_DESCRIPTION,
components=tasks,
default_icon=":material-check-outline:",
)
Expand Down

0 comments on commit 370e5b5

Please sign in to comment.