Skip to content

Commit

Permalink
Move apache.iceberg provider to new providers structure (apache#45809)
Browse files Browse the repository at this point in the history
This also add the feature of managing devel-dependencies via
dependency groups in pyproject.toml and adjust tests to cover
case where providers can be two levels deeper.
  • Loading branch information
potiuk authored Jan 21, 2025
1 parent d151ab9 commit 4280b83
Show file tree
Hide file tree
Showing 42 changed files with 547 additions and 61 deletions.
5 changes: 1 addition & 4 deletions .github/boring-cyborg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,7 @@ labelPRBasedOnFilePath:
- providers/tests/system/apache/hive/**/*

provider:apache-iceberg:
- providers/src/airflow/providers/apache/iceberg/**/*
- docs/apache-airflow-providers-apache-iceberg/**/*
- providers/tests/apache/iceberg/**/*
- providers/tests/system/apache/iceberg/**/*
- providers/apache/iceberg/**

provider:apache-impala:
- providers/src/airflow/providers/apache/impala/**/*
Expand Down
14 changes: 9 additions & 5 deletions dev/breeze/src/airflow_breeze/commands/developer_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,11 +722,15 @@ def build_docs(
)
rebuild_or_pull_ci_image_if_needed(command_params=build_params)
if clean_build:
docs_dir = AIRFLOW_SOURCES_ROOT / "docs"
for dir_name in ["_build", "_doctrees", "_inventory_cache", "_api"]:
for directory in docs_dir.rglob(dir_name):
get_console().print(f"[info]Removing {directory}")
shutil.rmtree(directory, ignore_errors=True)
directories_to_clean = ["_build", "_doctrees", "_inventory_cache", "_api"]
else:
directories_to_clean = ["_api"]
docs_dir = AIRFLOW_SOURCES_ROOT / "docs"
for dir_name in directories_to_clean:
for directory in docs_dir.rglob(dir_name):
get_console().print(f"[info]Removing {directory}")
shutil.rmtree(directory, ignore_errors=True)

docs_list_as_tuple: tuple[str, ...] = ()
if package_list and len(package_list):
get_console().print(f"\n[info]Populating provider list from PACKAGE_LIST env as {package_list}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1172,8 +1172,10 @@ def _regenerate_pyproject_toml(context: dict[str, Any], provider_details: Provid
pyproject_toml_content = get_pyproject_toml_path.read_text()
required_dependencies: list[str] = []
optional_dependencies: list[str] = []
dependency_groups: list[str] = []
in_required_dependencies = False
in_optional_dependencies = False
in_dependency_groups = False
for line in pyproject_toml_content.splitlines():
if line == "dependencies = [":
in_required_dependencies = True
Expand All @@ -1187,10 +1189,18 @@ def _regenerate_pyproject_toml(context: dict[str, Any], provider_details: Provid
if in_optional_dependencies and line == "":
in_optional_dependencies = False
continue
if line == "[dependency-groups]":
in_dependency_groups = True
continue
if in_dependency_groups and line == "":
in_dependency_groups = False
continue
if in_required_dependencies:
required_dependencies.append(line)
if in_optional_dependencies:
optional_dependencies.append(line)
if in_dependency_groups:
dependency_groups.append(line)

# For additional providers we want to load the dependencies and see if cross-provider-dependencies are
# present and if not, add them to the optional dependencies
Expand All @@ -1204,6 +1214,7 @@ def _regenerate_pyproject_toml(context: dict[str, Any], provider_details: Provid
optional_dependencies.append(f' "{get_pip_package_name(module)}"')
optional_dependencies.append("]")
context["EXTRAS_REQUIREMENTS"] = "\n".join(optional_dependencies)
context["DEPENDENCY_GROUPS"] = "\n".join(dependency_groups)

get_pyproject_toml_content = render_template(
template_name="pyproject",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,13 @@ dependencies = [
{{ EXTRAS_REQUIREMENTS }}
{%- endif %}

{%- if DEPENDENCY_GROUPS %}
# The dependency groups should be modified in place in the generated file
# Any change in the dependencies is preserved when the file is regenerated
[dependency-groups]
{{ DEPENDENCY_GROUPS }}
{%- endif %}

[project.urls]
"Documentation" = "https://airflow.apache.org/docs/{{ PACKAGE_PIP_NAME }}/{{RELEASE}}"
"Changelog" = "https://airflow.apache.org/docs/{{ PACKAGE_PIP_NAME }}/{{RELEASE}}/changelog.html"
Expand Down
6 changes: 6 additions & 0 deletions dev/breeze/src/airflow_breeze/utils/packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ def refresh_provider_metadata_from_yaml_file(provider_yaml_path: Path):
optional_dependencies = toml_content["project"].get("optional-dependencies")
if optional_dependencies:
PROVIDER_METADATA[provider_id]["optional-dependencies"] = optional_dependencies
dependency_groups = toml_content.get("dependency-groups")
if dependency_groups and dependency_groups.get("dev"):
devel_dependencies = dependency_groups.get("dev")
PROVIDER_METADATA[provider_id]["devel-dependencies"] = devel_dependencies


def clear_cache_for_provider_metadata(provider_yaml_path: Path):
Expand Down Expand Up @@ -806,6 +810,8 @@ def get_provider_jinja_context(
"EXTRAS_REQUIREMENTS": get_package_extras_for_old_providers(
provider_id=provider_details.provider_id, version_suffix=version_suffix
),
# TODO(potiuk) - remove when all providers are new-style
"DEPENDENCY_GROUPS": {},
"CHANGELOG_RELATIVE_PATH": os.path.relpath(
provider_details.root_provider_path,
provider_details.documentation_provider_package_path,
Expand Down
10 changes: 7 additions & 3 deletions dev/breeze/tests/test_pytest_args_for_test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,13 @@
# TODO(potiuk): rename to all_providers when we move all providers to the new structure
def _all_new_providers() -> list[str]:
all_new_providers: list[str] = []
for file in (AIRFLOW_SOURCES_ROOT / "providers").iterdir():
if file.is_dir() and not file.name.startswith(".") and file.name not in ["src", "tests"]:
all_new_providers.append(file.name)
providers_root = AIRFLOW_SOURCES_ROOT / "providers"
for file in providers_root.rglob("provider.yaml"):
# TODO: remove this check when all providers are moved to the new structure
if file.is_relative_to(providers_root / "src"):
continue
provider_path = file.parent.relative_to(providers_root)
all_new_providers.append(provider_path.as_posix())
return sorted(all_new_providers)


Expand Down
1 change: 1 addition & 0 deletions docs/.gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# TODO(potiuk): change it to apache-airflow-providers-* after all providers are migrated to the new structure
# Eventually when we swtich to individually build docs for each provider, we should remove this altogether
apache-airflow-providers-airbyte
apache-airflow-providers-apache-iceberg
apache-airflow-providers-celery
apache-airflow-providers-edge
25 changes: 0 additions & 25 deletions docs/apache-airflow-providers-apache-iceberg/changelog.rst

This file was deleted.

6 changes: 6 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -828,6 +828,9 @@ def filter_ignore(record: logging.LogRecord) -> bool:
)
)

# Here we remove all other providers from the autoapi list, only leaving the current provider,
# Otherwise all the other provider indexes will no be found in any TOC.

for p in load_package_data(include_suspended=True):
if p["package-name"] == PACKAGE_NAME:
continue
Expand All @@ -840,6 +843,9 @@ def filter_ignore(record: logging.LogRecord) -> bool:
autoapi_dirs.append(test_dir)

autoapi_ignore.extend(f"{d}/*" for d in test_dir.glob("*") if d.is_dir() and d.name != "system")
print("#### AUTOAPI_IGNORE:")
print(autoapi_ignore)
print("#### END OF AUTOAPI_IGNORE:")
else:
if SYSTEM_TESTS_DIR and os.path.exists(SYSTEM_TESTS_DIR):
autoapi_dirs.append(SYSTEM_TESTS_DIR)
Expand Down
14 changes: 6 additions & 8 deletions docs/exts/provider_yaml_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,15 +129,13 @@ def load_package_data(include_suspended: bool = False) -> list[dict[str, Any]]:
raise RuntimeError(msg)
if provider["state"] == "suspended" and not include_suspended:
continue
package_module = provider["package-name"].replace("-", ".")
package_module_path = package_module.replace(".", "/")
provider_yaml_dir_str = os.path.dirname(new_provider_yaml_path.parent / "src" / package_module_path)
provider["python-module"] = provider["package-name"].replace("-", ".")
provider["package-dir"] = provider_yaml_dir_str
provider_yaml_dir_str = os.path.dirname(new_provider_yaml_path)
module = provider["package-name"][len("apache-") :].replace("-", ".")
module_folder = module[len("airflow-providers-") :].replace(".", "/")
provider["python-module"] = module
provider["package-dir"] = f"{provider_yaml_dir_str}/src/{module.replace('.', '/')}"
provider["docs-dir"] = os.path.dirname(new_provider_yaml_path.parent / "docs")
provider["system-tests-dir"] = (
(Path(provider_yaml_dir_str) / "tests" / "system").relative_to(AIRFLOW_PROVIDERS_DIR).as_posix()
)
provider["system-tests-dir"] = f"{provider_yaml_dir_str}/tests/system/{module_folder}"
# TODO(potiuk) - remove when all providers are new-style
provider["is_new_provider"] = True
result.append(provider)
Expand Down
4 changes: 2 additions & 2 deletions providers/airbyte/docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,14 @@
:maxdepth: 1
:caption: System tests

System Tests <_api/tests/system/providers/airbyte/index>
System Tests <_api/tests/system/airbyte/index>

.. toctree::
:hidden:
:maxdepth: 1
:caption: Resources

Example DAGs <https://github.com/apache/airflow/tree/providers-airbyte/|version|/providers/tests/system/airbyte>
Example DAGs <https://github.com/apache/airflow/tree/providers-airbyte/|version|/providers/airbyte/tests/system/airbyte>
PyPI Repository <https://pypi.org/project/apache-airflow-providers-airbyte/>
Installing from sources <installing-providers-from-sources>

Expand Down
4 changes: 2 additions & 2 deletions providers/airbyte/docs/operators/airbyte.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,14 @@ This Operator will initiate the Airbyte job, and the Operator manages the job st

An example using the synchronous way:

.. exampleinclude:: /../../providers/airbyte/tests/system/providers/airbyte/example_airbyte_trigger_job.py
.. exampleinclude:: /../../providers/airbyte/tests/system/airbyte/example_airbyte_trigger_job.py
:language: python
:start-after: [START howto_operator_airbyte_synchronous]
:end-before: [END howto_operator_airbyte_synchronous]

An example using the async way:

.. exampleinclude:: /../../providers/airbyte/tests/system/providers/airbyte/example_airbyte_trigger_job.py
.. exampleinclude:: /../../providers/airbyte/tests/system/airbyte/example_airbyte_trigger_job.py
:language: python
:start-after: [START howto_operator_airbyte_asynchronous]
:end-before: [END howto_operator_airbyte_asynchronous]
2 changes: 1 addition & 1 deletion providers/airbyte/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ versions:
integrations:
- integration-name: Airbyte
external-doc-url: https://airbyte.com/
logo: /docs/integration-logos/Airbyte.png
logo: /docs/integration-logo/Airbyte.png
how-to-guide:
- /docs/apache-airflow-providers-airbyte/operators/airbyte.rst
tags: [service]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def get_provider_info():
{
"integration-name": "Airbyte",
"external-doc-url": "https://airbyte.com/",
"logo": "/docs/integration-logos/Airbyte.png",
"logo": "/docs/integration-logo/Airbyte.png",
"how-to-guide": ["/docs/apache-airflow-providers-airbyte/operators/airbyte.rst"],
"tags": ["service"],
}
Expand Down
61 changes: 61 additions & 0 deletions providers/apache/iceberg/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@

.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
.. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN!
.. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE
`PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY
Package ``apache-airflow-providers-apache-iceberg``

Release: ``1.2.0``


`Iceberg <https://iceberg.apache.org/>`__


Provider package
----------------

This is a provider package for ``apache.iceberg`` provider. All classes for this provider package
are in ``airflow.providers.apache.iceberg`` python package.

You can find package information and changelog for the provider
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-apache-iceberg/1.2.0/>`_.

Installation
------------

You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below
for the minimum Airflow version supported) via
``pip install apache-airflow-providers-apache-iceberg``

The package supports the following python versions: 3.9,3.10,3.11,3.12

Requirements
------------

================== ==================
PIP package Version required
================== ==================
``apache-airflow`` ``>=2.9.0``
================== ==================

The changelog for the provider package can be found in the
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-apache-iceberg/1.2.0/changelog.html>`_.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
:maxdepth: 1
:caption: Resources

Example DAGs <https://github.com/apache/airflow/tree/providers-apache-iceberg/|version|/providers/tests/system/apache/iceberg>
Example DAGs <https://github.com/apache/airflow/tree/providers-apache-iceberg/|version|/providers/apache/iceberg/tests/system/apache/iceberg>
PyPI Repository <https://pypi.org/project/apache-airflow-providers-apache-iceberg/>
Installing from sources <installing-providers-from-sources>
Python API <_api/airflow/providers/apache/iceberg/index>
Expand Down
File renamed without changes
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,9 @@ versions:
- 1.1.0
- 1.0.0

dependencies:
- apache-airflow>=2.9.0

devel-dependencies:
- pyiceberg>=0.5.0

integrations:
- integration-name: Iceberg
logo: /integration-logos/iceberg/iceberg.png
logo: /docs/integration-logo/iceberg.png
external-doc-url: https://iceberg.apache.org/
tags: [software]

Expand Down
Loading

0 comments on commit 4280b83

Please sign in to comment.