From 370e5b56bc5bf5fba7a6783dae92426c80a13fa0 Mon Sep 17 00:00:00 2001 From: David Berenstein Date: Wed, 18 Sep 2024 12:27:46 +0200 Subject: [PATCH] docs: update component gallery (#987) --- .../utils/mkdocs/components_gallery.py | 110 ++++++++++++------ 1 file changed, 77 insertions(+), 33 deletions(-) diff --git a/src/distilabel/utils/mkdocs/components_gallery.py b/src/distilabel/utils/mkdocs/components_gallery.py index c2e9fa5cb1..9d5d9b59ee 100644 --- a/src/distilabel/utils/mkdocs/components_gallery.py +++ b/src/distilabel/utils/mkdocs/components_gallery.py @@ -76,30 +76,34 @@ ) _STEPS_CATEGORY_TO_ICON = { + "text-generation": ":material-text-box-edit:", + "chat-generation": ":material-chat:", + "text-classification": ":material-label:", + "text-manipulation": ":material-receipt-text-edit:", + "evol": ":material-dna:", "critique": ":material-comment-edit:", + "scorer": ":octicons-number-16:", + "preference": ":material-poll:", "embedding": ":material-vector-line:", - "evol": ":material-dna:", + "clustering": ":material-scatter-plot:", + "columns": ":material-table-column:", "filtering": ":material-filter:", "format": ":material-format-list-bulleted:", "load": ":material-file-download:", - "preference": ":material-poll:", "save": ":material-content-save:", - "scorer": ":octicons-number-16:", - "text-generation": ":material-text-box-edit:", - "text-manipulation": ":material-receipt-text-edit:", - "columns": ":material-table-column:", - "text-classification": ":material-label:", - "clustering": ":material-scatter-plot:", } _STEP_CATEGORY_TO_DESCRIPTION = { "text-generation": "Text generation steps are used to generate text based on a given prompt.", - "evol": "Evol steps are used to rewrite input text and evolve it to a higher quality.", + "chat-generation": "Chat generation steps are used to generate text based on a conversation.", + "text-classification": "Text classification steps are used to classify text into a category.", "text-manipulation": "Text manipulation steps are used to manipulate or rewrite an input text.", + "evol": "Evol steps are used to rewrite input text and evolve it to a higher quality.", "critique": "Critique steps are used to provide feedback on the quality of the data with a written explanation.", "scorer": "Scorer steps are used to evaluate and score the data with a numerical value.", "preference": "Preference steps are used to collect preferences on the data with numerical values or ranks.", "embedding": "Embedding steps are used to generate embeddings for the data.", + "clustering": "Clustering steps are used to group similar data points together.", "columns": "Columns steps are used to manipulate columns in the data.", "filtering": "Filtering steps are used to filter the data based on some criteria.", "format": "Format steps are used to format the data.", @@ -107,6 +111,34 @@ "save": "Save steps are used to save the data.", } +assert list(_STEP_CATEGORY_TO_DESCRIPTION.keys()) == list( + _STEPS_CATEGORY_TO_ICON.keys() +) + +_STEP_CATEGORIES = list(_STEP_CATEGORY_TO_DESCRIPTION.keys()) +_STEP_CATEGORY_TABLE = pd.DataFrame( + { + "Icon": [_STEPS_CATEGORY_TO_ICON[category] for category in _STEP_CATEGORIES], + "Category": _STEP_CATEGORIES, + "Description": [ + _STEP_CATEGORY_TO_DESCRIPTION[category] for category in _STEP_CATEGORIES + ], + } +).to_markdown(index=False) +_STEP_CATEGORY_TABLE_DESCRIPTION = [ + '??? info "Category Overview"', + " The gallery page showcases the different types of components within `distilabel`.", + "", +] +for row in _STEP_CATEGORY_TABLE.split("\n"): + _STEP_CATEGORY_TABLE_DESCRIPTION.append(f" {row}") +_STEP_CATEGORY_TABLE_DESCRIPTION = "\n".join(_STEP_CATEGORY_TABLE_DESCRIPTION) + +_CATEGORY_ORDER_INDEX = { + category: idx + for idx, category in enumerate(list(_STEP_CATEGORY_TO_DESCRIPTION.keys())) +} + class ComponentsGalleryConfig(Config): enabled = Type(bool, default=True) @@ -229,6 +261,18 @@ def _generate_steps_pages(self, src_dir: Path, steps: list) -> List[str]: steps_gallery_page_path = src_dir / paths[0] steps_gallery_page_path.parent.mkdir(parents=True, exist_ok=True) + # Sort steps based on the index of their first category in the 'category_order' + steps = sorted( + steps, + key=lambda step: _CATEGORY_ORDER_INDEX.get( + step["docstring"]["categories"][0] + if step["docstring"]["categories"] + else float("inf"), + float("inf"), + ), + reverse=True, + ) + # Create detail page for each `Step` for step in steps: docstring = step["docstring"] @@ -236,6 +280,11 @@ def _generate_steps_pages(self, src_dir: Path, steps: list) -> List[str]: first_category = docstring["categories"][0] docstring["icon"] = _STEPS_CATEGORY_TO_ICON.get(first_category, "") + if docstring["icon"]: + assert ( + docstring["icon"] in _STEPS_CATEGORY_TO_ICON.values() + ), f"Icon {docstring['icon']} not found in _STEPS_CATEGORY_TO_ICON" + name = step["name"] content = _STEP_DETAIL_TEMPLATE.render( @@ -254,10 +303,10 @@ def _generate_steps_pages(self, src_dir: Path, steps: list) -> List[str]: paths.append(step_path) - # Create the `components-gallery/steps.md` file + # Create the `components-gallery/steps/index.md` file content = _COMPONENTS_LIST_TEMPLATE.render( title="Steps Gallery", - description="", + description=_STEP_CATEGORY_TABLE_DESCRIPTION, components=steps, default_icon=":material-step-forward:", ) @@ -282,12 +331,27 @@ def _generate_tasks_pages(self, src_dir: Path, tasks: list) -> List[str]: tasks_gallery_page_path = src_dir / paths[0] tasks_gallery_page_path.parent.mkdir(parents=True, exist_ok=True) + # Sort tasks based on the index of their first category in the 'category_order' + tasks = sorted( + tasks, + key=lambda task: _CATEGORY_ORDER_INDEX.get( + task["docstring"]["categories"][0] + if task["docstring"]["categories"] + else float("inf"), + float("inf"), + ), + ) + # Create detail page for each `Task` for task in tasks: docstring = task["docstring"] if docstring["icon"] == "" and docstring["categories"]: first_category = docstring["categories"][0] docstring["icon"] = _STEPS_CATEGORY_TO_ICON.get(first_category, "") + if docstring["icon"]: + assert ( + docstring["icon"] in _STEPS_CATEGORY_TO_ICON.values() + ), f"Icon {docstring['icon']} not found in _STEPS_CATEGORY_TO_ICON" name = task["name"] @@ -307,30 +371,10 @@ def _generate_tasks_pages(self, src_dir: Path, tasks: list) -> List[str]: paths.append(task_path) - global _STEP_CATEGORY_TO_DESCRIPTION - categories = list(_STEP_CATEGORY_TO_DESCRIPTION.keys()) - table = pd.DataFrame( - { - "Category": categories, - "Icon": [_STEPS_CATEGORY_TO_ICON[category] for category in categories], - "Description": [ - _STEP_CATEGORY_TO_DESCRIPTION[category] for category in categories - ], - } - ).to_markdown(index=False) - - description = [ - '??? info "Task Category Overview"', - " The tasks gallery page showcases the different types of tasks that can be performed with `distilabel`.", - "", - ] - for row in table.split("\n"): - description.append(f" {row}") - - # Create the `components-gallery/steps/index.md` file + # Create the `components-gallery/tasks/index.md` file content = _COMPONENTS_LIST_TEMPLATE.render( title="Tasks Gallery", - description="\n".join(description), + description=_STEP_CATEGORY_TABLE_DESCRIPTION, components=tasks, default_icon=":material-check-outline:", )