Skip to content

Commit

Permalink
feat: v1.2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Goldziher committed Feb 3, 2025
1 parent f8472f6 commit f501c9d
Show file tree
Hide file tree
Showing 16 changed files with 1,160 additions and 810 deletions.
8 changes: 4 additions & 4 deletions .github/dependabot.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
version: 2
updates:
- package-ecosystem: github-actions
directory: /
schedule:
interval: daily
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
73 changes: 47 additions & 26 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,39 +3,60 @@ name: CI
on:
pull_request:
branches:
- main
- main
push:
branches:
- main
- main

jobs:
validate:
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- uses: actions/checkout@v4
- uses: pdm-project/setup-pdm@v4
with:
python-version: '3.9'
cache: true
- name: Install Dependencies
run: pdm install
- name: Load Cached Pre-Commit Dependencies
id: cached-pre-commit-dependencies
uses: actions/cache@v4
with:
path: ~/.cache/pre-commit/
key: pre-commit-4|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }}
- name: Lint
run: pdm run pre-commit run --show-diff-on-failure --color=always --all-files
- name: Checkout
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version-file: "pyproject.toml"

- name: Install Dependencies
run: uv sync --all-extras --dev

- name: Load Cached Pre-Commit Dependencies
id: cached-pre-commit-dependencies
uses: actions/cache@v4
with:
path: ~/.cache/pre-commit/
key: pre-commit|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }}

- name: Execute Pre-Commit
run: uv run pre-commit run --show-diff-on-failure --color=always --all-files
test:
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- uses: actions/checkout@v4
- uses: pdm-project/setup-pdm@v4
with:
python-version: '3.9'
cache: true
- name: Install
run: pdm install
- name: Test
run: pdm run test
- name: Checkout
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version-file: "pyproject.toml"

- name: Install Dependencies
run: uv sync --all-extras --dev

- name: Test
run: uv run pytest
20 changes: 20 additions & 0 deletions .github/workflows/pr-title.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: "Check PR Title"

on:
pull_request_target:
types:
- opened
- edited
- synchronize

permissions:
pull-requests: read

jobs:
main:
name: Validate PR title
runs-on: ubuntu-latest
steps:
- uses: amannn/action-semantic-pull-request@v5
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
35 changes: 20 additions & 15 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,26 @@ on:

jobs:
release:
permissions:
id-token: write
runs-on: ubuntu-latest
environment: pypi
permissions:
id-token: write
steps:
- uses: actions/checkout@v4
- uses: pdm-project/setup-pdm@v4
with:
python-version: 3.12
cache: true
- name: Install
run: pdm install
- name: Build
run: pdm build
- name: Publish
uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.PYPI_API_TOKEN }}
- name: Checkout
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version-file: "pyproject.toml"

- name: Install Dependencies
run: uv build

- name: Publish
uses: pypa/gh-action-pypi-publish@release/v1
107 changes: 59 additions & 48 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,49 +1,60 @@
default_language_version:
python: python3.9
repos:
- repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook
rev: v9.17.0
hooks:
- id: commitlint
stages: [commit-msg]
additional_dependencies: ['@commitlint/config-conventional']
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: name-tests-test
args:
- --pytest
exclude: ^tests/factories|^tests/helpers|^tests/data_fixtures.py
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-toml
- id: check-json
- id: pretty-format-json
- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
hooks:
- id: codespell
additional_dependencies:
- tomli
- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
rev: v2.14.0
hooks:
- id: pretty-format-yaml
args: [--autofix, --indent, '2']
- repo: https://github.com/jsh9/pydoclint
rev: 0.5.7
hooks:
- id: pydoclint
args: [--style=google, --check-return-types=False, --arg-type-hints-in-docstring=False]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.4
hooks:
- id: ruff
args: [--fix]
- id: ruff-format
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.11.2 # Use the sha / tag you want to point at
hooks:
- id: mypy
additional_dependencies: [beautifulsoup4, types-beautifulsoup4]
- repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook
rev: "v9.20.0"
hooks:
- id: commitlint
stages: [commit-msg]
additional_dependencies: ["@commitlint/config-conventional"]
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: name-tests-test
args:
- --pytest
exclude: factories|test_utils|completion.py|test_data
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-toml
- id: check-case-conflict
- id: detect-private-key
- repo: https://github.com/rbubley/mirrors-prettier
rev: "v3.4.2"
hooks:
- id: prettier
exclude: ^tests|^.idea|^migrations|^.git
- repo: https://github.com/tox-dev/pyproject-fmt
rev: "v2.5.0"
hooks:
- id: pyproject-fmt
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.4
hooks:
- id: ruff
args: [--fix]
- id: ruff-format
- repo: https://github.com/codespell-project/codespell
rev: v2.4.1
hooks:
- id: codespell
exclude: ^tests|^scripts
additional_dependencies:
- tomli
- repo: https://github.com/jsh9/pydoclint
rev: 0.6.0
hooks:
- id: pydoclint
args:
[
--style=google,
--check-return-types=False,
--arg-type-hints-in-docstring=False,
]
- repo: local
hooks:
- id: mypy
name: mypy
entry: uv run mypy
require_serial: true
language: system
types: [python]
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,20 @@ convert_to_markdown(soup) # > '**Yay** [GitHub](http://github.com)'
The `convert_to_markdown` function accepts the following kwargs:

- autolinks (bool): Automatically convert valid URLs into Markdown links. Defaults to True.
- bullets (str): A string of characters to use for bullet points in lists. Defaults to '*+-'.
- bullets (str): A string of characters to use for bullet points in lists. Defaults to '\*+-'.
- code_language (str): Default language identifier for fenced code blocks. Defaults to an empty string.
- code_language_callback (Callable[[Any], str] | None): Function to dynamically determine the language for code blocks.
- convert (Iterable[str] | None): A list of tag names to convert to Markdown. If None, all supported tags are converted.
- default_title (bool): Use the default title when converting certain elements (e.g., links). Defaults to False.
- escape_asterisks (bool): Escape asterisks (*) to prevent unintended Markdown formatting. Defaults to True.
- escape_asterisks (bool): Escape asterisks (\*) to prevent unintended Markdown formatting. Defaults to True.
- escape_misc (bool): Escape miscellaneous characters to prevent conflicts in Markdown. Defaults to True.
- escape_underscores (bool): Escape underscores (_) to prevent unintended italic formatting. Defaults to True.
- escape*underscores (bool): Escape underscores (*) to prevent unintended italic formatting. Defaults to True.
- heading_style (Literal["underlined", "atx", "atx_closed"]): The style to use for Markdown headings. Defaults to "
underlined".
- keep_inline_images_in (Iterable[str] | None): Tags in which inline images should be preserved. Defaults to None.
- newline_style (Literal["spaces", "backslash"]): Style for handling newlines in text content. Defaults to "spaces".
- strip (Iterable[str] | None): Tags to strip from the output. Defaults to None.
- strong_em_symbol (Literal["*", "_"]): Symbol to use for strong/emphasized text. Defaults to "*".
- strong*em_symbol (Literal["\*", "*"]): Symbol to use for strong/emphasized text. Defaults to "\*".
- sub_symbol (str): Custom symbol for subscript text. Defaults to an empty string.
- sup_symbol (str): Custom symbol for superscript text. Defaults to an empty string.
- wrap (bool): Wrap text to the specified width. Defaults to False.
Expand Down
4 changes: 3 additions & 1 deletion html_to_markdown/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from html_to_markdown.processing import convert_to_markdown

__all__ = ["convert_to_markdown"]
from .legacy import Markdownify

__all__ = ["Markdownify", "convert_to_markdown"]
12 changes: 8 additions & 4 deletions html_to_markdown/__main__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import sys

from html_to_markdown.cli import cli

if __name__ == "__main__":
result = cli(sys.argv[1:])
print(result) # noqa: T201
from html_to_markdown.cli import main

try:
result = main(sys.argv[1:])
print(result) # noqa: T201
except ValueError as e:
print(str(e), file=sys.stderr) # noqa: T201
sys.exit(1)
16 changes: 8 additions & 8 deletions html_to_markdown/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@

import re
from re import Pattern
from typing import Final, Literal
from typing import Final

convert_heading_re: Final[Pattern[str]] = re.compile(r"convert_h(\d+)")
line_beginning_re: Final[Pattern[str]] = re.compile(r"^", re.MULTILINE)
whitespace_re: Final[Pattern[str]] = re.compile(r"[\t ]+")
html_heading_re: Final[Pattern[str]] = re.compile(r"h[1-6]")

ASTERISK: Final[Literal["*"]] = "*"
ATX: Final[Literal["atx"]] = "atx"
ATX_CLOSED: Final[Literal["atx_closed"]] = "atx_closed"
BACKSLASH: Final[Literal["backslash"]] = "backslash"
UNDERLINED: Final[Literal["underlined"]] = "underlined"
SPACES: Final[Literal["spaces"]] = "spaces"
UNDERSCORE: Final[Literal["_"]] = "_"
ASTERISK: Final = "*"
ATX: Final = "atx"
ATX_CLOSED: Final = "atx_closed"
BACKSLASH: Final = "backslash"
UNDERLINED: Final = "underlined"
SPACES: Final = "spaces"
UNDERSCORE: Final = "_"
16 changes: 9 additions & 7 deletions html_to_markdown/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,19 @@
"kbd",
]

ConverterssMap = Mapping[SupportedElements, Callable[[str, Tag], str]]
ConvertersMap = Mapping[SupportedElements, Callable[[str, Tag], str]]

T = TypeVar("T")


def _create_inline_converter(markup_prefix: str) -> Callable[[Tag, str], str]:
"""This abstracts all simple inline tags like b, em, del, ...
Returns a function that wraps the chomped text in a pair of the string
that is returned by markup_fn, with '/' inserted in the string used after
the text if it looks like an HTML tag. markup_fn is necessary to allow for
references to self.strong_em_symbol etc.
"""Create an inline converter for a markup pattern or tag.
Args:
markup_prefix: The markup prefix to insert.
Returns:
A function that can be used to convert HTML to Markdown.
"""

def implementation(*, tag: Tag, text: str) -> str:
Expand Down Expand Up @@ -295,7 +297,7 @@ def create_converters_map(
sup_symbol: str,
wrap: bool,
wrap_width: int,
) -> ConverterssMap:
) -> ConvertersMap:
"""Create a mapping of HTML elements to their corresponding conversion functions.
Args:
Expand Down
Loading

0 comments on commit f501c9d

Please sign in to comment.