Skip to content

Commit

Permalink
✨ Source Gitlab: spec: start date is optional, groups and projects to…
Browse files Browse the repository at this point in the history
… array (#31375)
  • Loading branch information
darynaishchenko authored Oct 13, 2023
1 parent 31fb8c0 commit efdc945
Show file tree
Hide file tree
Showing 9 changed files with 168 additions and 26 deletions.
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-gitlab/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ COPY main.py ./

ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=1.8.0
LABEL io.airbyte.version=1.8.1
LABEL io.airbyte.name=airbyte/source-gitlab
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ acceptance_tests:
spec:
tests:
- spec_path: "source_gitlab/spec.json"
backward_compatibility_tests_config:
disable_for_version: 1.8.0
connection:
tests:
- config_path: "secrets/config.json"
Expand Down
3 changes: 3 additions & 0 deletions airbyte-integrations/connectors/source-gitlab/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@

from airbyte_cdk.entrypoint import launch
from source_gitlab import SourceGitlab
from source_gitlab.config_migrations import MigrateGroups, MigrateProjects

if __name__ == "__main__":
source = SourceGitlab()
MigrateGroups.migrate(sys.argv[1:], source)
MigrateProjects.migrate(sys.argv[1:], source)
launch(source, sys.argv[1:])
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: 5e6175e5-68e1-4c17-bff9-56103bbb0d80
dockerImageTag: 1.8.0
dockerImageTag: 1.8.1
dockerRepository: airbyte/source-gitlab
githubIssueLabel: source-gitlab
icon: gitlab.svg
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import abc
import logging
from abc import ABC
from typing import Any, List, Mapping

from airbyte_cdk.config_observation import create_connector_config_control_message
from airbyte_cdk.entrypoint import AirbyteEntrypoint
from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository

from .source import SourceGitlab

logger = logging.getLogger("airbyte_logger")


class MigrateStringToArray(ABC):
"""
This class stands for migrating the config at runtime,
while providing the backward compatibility when falling back to the previous source version.
Specifically, starting from `1.7.1`, the `groups` and `projects` properties should be like :
> List(["<group1>", "<group2>", ..., "<group3>"])
instead of, in ` 1.7.0`:
> JSON STR: "group1 group2"
"""

message_repository: MessageRepository = InMemoryMessageRepository()

@property
@abc.abstractmethod
def migrate_from_key(self) -> str:
...

@property
@abc.abstractmethod
def migrate_to_key(self) -> str:
...

@classmethod
def _should_migrate(cls, config: Mapping[str, Any]) -> bool:
"""
This method determines whether config require migration.
Returns:
> True, if the transformation is necessary
> False, otherwise.
"""
if cls.migrate_from_key in config and cls.migrate_to_key not in config:
return True
return False

@classmethod
def _transform_to_array(cls, config: Mapping[str, Any], source: SourceGitlab = None) -> Mapping[str, Any]:
# assign old values to new property that will be used within the new version
config[cls.migrate_to_key] = config[cls.migrate_to_key] if cls.migrate_to_key in config else []
data = set(filter(None, config.get(cls.migrate_from_key).split(" ")))
config[cls.migrate_to_key] = list(data | set(config[cls.migrate_to_key]))
return config

@classmethod
def _modify_and_save(cls, config_path: str, source: SourceGitlab, config: Mapping[str, Any]) -> Mapping[str, Any]:
# modify the config
migrated_config = cls._transform_to_array(config, source)
# save the config
source.write_config(migrated_config, config_path)
# return modified config
return migrated_config

@classmethod
def _emit_control_message(cls, migrated_config: Mapping[str, Any]) -> None:
# add the Airbyte Control Message to message repo
cls.message_repository.emit_message(create_connector_config_control_message(migrated_config))
# emit the Airbyte Control Message from message queue to stdout
for message in cls.message_repository._message_queue:
print(message.json(exclude_unset=True))

@classmethod
def migrate(cls, args: List[str], source: SourceGitlab) -> None:
"""
This method checks the input args, should the config be migrated,
transform if necessary and emit the CONTROL message.
"""
# get config path
config_path = AirbyteEntrypoint(source).extract_config(args)
# proceed only if `--config` arg is provided
if config_path:
# read the existing config
config = source.read_config(config_path)
# migration check
if cls._should_migrate(config):
cls._emit_control_message(
cls._modify_and_save(config_path, source, config),
)


class MigrateGroups(MigrateStringToArray):

migrate_from_key: str = "groups"
migrate_to_key: str = "groups_list"


class MigrateProjects(MigrateStringToArray):

migrate_from_key: str = "projects"
migrate_to_key: str = "projects_list"
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def _groups_stream(self, config: MutableMapping[str, Any]) -> Groups:
def _projects_stream(self, config: MutableMapping[str, Any]) -> Union[Projects, GroupProjects]:
if not self.__projects_stream:
auth_params = self._auth_params(config)
project_ids = list(filter(None, config.get("projects", "").split(" ")))
project_ids = config.get("projects_list", [])
groups_stream = self._groups_stream(config)
if groups_stream.group_ids:
self.__projects_stream = GroupProjects(project_ids=project_ids, parent_stream=groups_stream, **auth_params)
Expand All @@ -123,7 +123,7 @@ def _auth_params(self, config: MutableMapping[str, Any]) -> Mapping[str, Any]:
return self.__auth_params

def _get_group_list(self, config: MutableMapping[str, Any]) -> List[str]:
group_ids = list(filter(None, config.get("groups", "").split(" ")))
group_ids = config.get("groups_list")
# Gitlab exposes different APIs to get a list of groups.
# We use https://docs.gitlab.com/ee/api/groups.html#list-groups in case there's no group IDs in the input config.
# This API provides full information about all available groups, including subgroups.
Expand Down Expand Up @@ -198,22 +198,23 @@ def check_connection(self, logger, config) -> Tuple[bool, Any]:
def streams(self, config: MutableMapping[str, Any]) -> List[Stream]:
config = self._ensure_default_values(config)
auth_params = self._auth_params(config)
start_date = config.get("start_date")

groups, projects = self._groups_stream(config), self._projects_stream(config)
pipelines = Pipelines(parent_stream=projects, start_date=config["start_date"], **auth_params)
merge_requests = MergeRequests(parent_stream=projects, start_date=config["start_date"], **auth_params)
pipelines = Pipelines(parent_stream=projects, start_date=start_date, **auth_params)
merge_requests = MergeRequests(parent_stream=projects, start_date=start_date, **auth_params)
epics = Epics(parent_stream=groups, **auth_params)

streams = [
groups,
projects,
Branches(parent_stream=projects, repository_part=True, **auth_params),
Commits(parent_stream=projects, repository_part=True, start_date=config["start_date"], **auth_params),
Commits(parent_stream=projects, repository_part=True, start_date=start_date, **auth_params),
epics,
Deployments(parent_stream=projects, **auth_params),
EpicIssues(parent_stream=epics, **auth_params),
GroupIssueBoards(parent_stream=groups, **auth_params),
Issues(parent_stream=projects, start_date=config["start_date"], **auth_params),
Issues(parent_stream=projects, start_date=start_date, **auth_params),
Jobs(parent_stream=pipelines, **auth_params),
ProjectMilestones(parent_stream=projects, **auth_params),
GroupMilestones(parent_stream=groups, **auth_params),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Source Gitlab Spec",
"type": "object",
"required": ["start_date", "credentials"],
"required": ["credentials"],
"additionalProperties": true,
"properties": {
"credentials": {
Expand Down Expand Up @@ -76,7 +76,7 @@
"start_date": {
"type": "string",
"title": "Start Date",
"description": "The date from which you'd like to replicate data for GitLab API, in the format YYYY-MM-DDT00:00:00Z. All data generated after this date will be replicated.",
"description": "The date from which you'd like to replicate data for GitLab API, in the format YYYY-MM-DDT00:00:00Z. Optional. If not set, all data will be replicated. All data generated after this date will be replicated.",
"examples": ["2021-03-01T00:00:00Z"],
"pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$",
"order": 1,
Expand All @@ -98,13 +98,33 @@
"type": "string",
"examples": ["airbyte.io"],
"title": "Groups",
"description": "Space-delimited list of groups. e.g. airbyte.io.",
"description": "[DEPRECATED] Space-delimited list of groups. e.g. airbyte.io.",
"airbyte_hidden": true
},
"groups_list": {
"type": "array",
"items": {
"type": "string"
},
"examples": ["airbyte.io"],
"title": "Groups",
"description": "List of groups. e.g. airbyte.io.",
"order": 3
},
"projects": {
"type": "string",
"title": "Projects",
"examples": ["airbyte.io/documentation"],
"description": "[DEPRECATED] Space-delimited list of projects. e.g. airbyte.io/documentation meltano/tap-gitlab.",
"airbyte_hidden": true
},
"projects_list": {
"type": "array",
"items": {
"type": "string"
},
"title": "Projects",
"examples": ["airbyte.io/documentation"],
"description": "Space-delimited list of projects. e.g. airbyte.io/documentation meltano/tap-gitlab.",
"order": 4
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,22 +187,31 @@ def stream_slices(
stream_state = stream_state or {}
super_slices = super().stream_slices(sync_mode, cursor_field, stream_state)
for super_slice in super_slices:
start_point = self._start_date
state_project_value = stream_state.get(str(super_slice["id"]))
if state_project_value:
state_value = state_project_value.get(self.cursor_field)
if state_value:
start_point = max(start_point, state_value)
for start_dt, end_dt in self._chunk_date_range(pendulum.parse(start_point)):
if self._start_date or state_project_value:
start_point = self._start_date
if state_project_value:
state_value = state_project_value.get(self.cursor_field)
if state_value and start_point:
start_point = max(start_point, state_value)
else:
start_point = state_value or start_point
for start_dt, end_dt in self._chunk_date_range(pendulum.parse(start_point)):
stream_slice = {key: value for key, value in super_slice.items()}
stream_slice[self.lower_bound_filter] = start_dt
stream_slice[self.upper_bound_filter] = end_dt
yield stream_slice
else:
stream_slice = {key: value for key, value in super_slice.items()}
stream_slice[self.lower_bound_filter] = start_dt
stream_slice[self.upper_bound_filter] = end_dt
yield stream_slice

def request_params(self, stream_state=None, stream_slice: Mapping[str, Any] = None, **kwargs):
params = super().request_params(stream_state, stream_slice, **kwargs)
params[self.lower_bound_filter] = stream_slice[self.lower_bound_filter]
params[self.upper_bound_filter] = stream_slice[self.upper_bound_filter]
lower_bound_filter = stream_slice.get(self.lower_bound_filter)
upper_bound_filter = stream_slice.get(self.upper_bound_filter)
if lower_bound_filter and upper_bound_filter:
params[self.lower_bound_filter] = lower_bound_filter
params[self.upper_bound_filter] = upper_bound_filter
return params


Expand Down
11 changes: 6 additions & 5 deletions docs/integrations/sources/gitlab.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ This page contains the setup guide and reference information for the Gitlab Sour
## Prerequisites

- Gitlab instance or an account at [Gitlab](https://gitlab.com)
- Start date
- Start date (Optional)
- GitLab Groups (Optional)
- GitLab Projects (Optional)

Expand Down Expand Up @@ -49,10 +49,10 @@ Log into [GitLab](https://gitlab.com) and then generate a [personal access token
3. On the source setup page, select **GitLab** from the Source type dropdown and enter a name for this connector.
4. Click `Authenticate your GitLab account` by selecting Oauth or Personal Access Token for Authentication.
5. Log in and Authorize to the GitLab account.
6. **Start date** - The date from which you'd like to replicate data for streams.
7. **API URL** - The URL to access you self-hosted GitLab instance or `gitlab.com` (default).
8. **Groups (Optional)** - Space-delimited list of GitLab group IDs, e.g. `airbytehq` for single group, `airbytehq another-repo` for multiple groups.
9. **Projects (Optional)** - Space-delimited list of GitLab projects to pull data for, e.g. `airbytehq/airbyte`.
6. **API URL** - The URL to access you self-hosted GitLab instance or `gitlab.com` (default).
7. **Start date (Optional)** - The date from which you'd like to replicate data for streams.
8. **Groups (Optional)** - List of GitLab group IDs, e.g. `airbytehq` for single group, `airbytehq another-repo` for multiple groups.
9. **Projects (Optional)** - List of GitLab projects to pull data for, e.g. `airbytehq/airbyte`.
10. Click **Set up source**.

**Note:** You can specify either Group IDs or Project IDs in the source configuration. If both fields are blank, the connector will retrieve a list of all the groups that are accessible to the configured token and ingest as normal.
Expand Down Expand Up @@ -112,6 +112,7 @@ Gitlab has the [rate limits](https://docs.gitlab.com/ee/user/gitlab_com/index.ht

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:-------------------------------------------------------------------------------------------|
| 1.8.1 | 2023-10-12 | [31375](https://github.com/airbytehq/airbyte/pull/31375) | Mark `start_date` as optional, migrate `groups` and `projects` to array |
| 1.8.0 | 2023-10-12 | [31339](https://github.com/airbytehq/airbyte/pull/31339) | Add undeclared fields to streams schemas, validate date/date-time format in stream schemas |
| 1.7.1 | 2023-10-10 | [31210](https://github.com/airbytehq/airbyte/pull/31210) | Added expired `access_token` handling, while checking the connection |
| 1.7.0 | 2023-08-08 | [27869](https://github.com/airbytehq/airbyte/pull/29203) | Add Deployments stream |
Expand Down

0 comments on commit efdc945

Please sign in to comment.