From d3dec00ede56fcc631fecf165133ff42e8c5c0d0 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Wed, 3 Jul 2024 11:42:22 +0000 Subject: [PATCH 01/45] Added support for gitlab user ingestion --- .../gitlab/.port/resources/blueprints.json | 60 ++++++++++++ .../.port/resources/port-app-config.yaml | 18 ++++ integrations/gitlab/.port/spec.yaml | 1 + integrations/gitlab/CHANGELOG.md | 8 ++ .../gitlab_integration/core/async_fetcher.py | 10 +- .../gitlab_integration/gitlab_service.py | 95 +++++++++++++++++++ .../gitlab/gitlab_integration/ocean.py | 13 +++ .../gitlab/gitlab_integration/utils.py | 1 + integrations/gitlab/pyproject.toml | 2 +- 9 files changed, 206 insertions(+), 2 deletions(-) diff --git a/integrations/gitlab/.port/resources/blueprints.json b/integrations/gitlab/.port/resources/blueprints.json index b51ae9d793..7fdd7a389c 100644 --- a/integrations/gitlab/.port/resources/blueprints.json +++ b/integrations/gitlab/.port/resources/blueprints.json @@ -53,5 +53,65 @@ "calculationProperties": {}, "aggregationProperties": {}, "relations": {} + }, + { + "identifier": "member", + "title": "Member", + "icon": "GitLab", + "schema": { + "properties": { + "state": { + "title": "State", + "type": "string", + "icon": "GitLab", + "description": "The current state of the GitLab item (e.g., open, closed)." + }, + "locked": { + "type": "string", + "title": "Locked", + "icon": "GitLab", + "description": "Indicates if the GitLab item is locked." + }, + "link": { + "icon": "Link", + "type": "string", + "title": "Link", + "format": "url", + "description": "URL link to the GitLab item." + }, + "createdBy": { + "type": "string", + "title": "Created By", + "icon": "GitLab", + "description": "The GitLab username of the item's creator." + }, + "email": { + "type": "string", + "title": "Email", + "description": "GitLab primary email address.", + "icon": "User", + "format": "user" + }, + "publicEmail": { + "type": "string", + "title": "Public Email", + "description": "User's GitLab public email.", + "icon": "User", + "format": "user" + } + }, + "required": [] + }, + "mirrorProperties": {}, + "calculationProperties": {}, + "aggregationProperties": {}, + "relations": { + "gitlabGroup": { + "title": "Group", + "target": "gitlabGroup", + "required": false, + "many": true + } + } } ] diff --git a/integrations/gitlab/.port/resources/port-app-config.yaml b/integrations/gitlab/.port/resources/port-app-config.yaml index d1c2882763..a47792bb3a 100644 --- a/integrations/gitlab/.port/resources/port-app-config.yaml +++ b/integrations/gitlab/.port/resources/port-app-config.yaml @@ -15,3 +15,21 @@ resources: readme: file://README.md description: .description language: .__languages | to_entries | max_by(.value) | .key + - kind: member + selector: + query: 'true' + port: + entity: + mappings: + identifier: .username + title: .name + blueprint: '"member"' + properties: + state: .state + locked: .locked + link: .web_url + email: .email + publicEmail: .__public_email + relations: + gitlabGroup: '[.__groups[].full_path]' + createdBy: .created_by.username diff --git a/integrations/gitlab/.port/spec.yaml b/integrations/gitlab/.port/spec.yaml index fc2b9aed25..735d9e2be1 100644 --- a/integrations/gitlab/.port/spec.yaml +++ b/integrations/gitlab/.port/spec.yaml @@ -8,6 +8,7 @@ features: section: Git Providers resources: - kind: projects + - kind: members configurations: - name: tokenMapping required: true diff --git a/integrations/gitlab/CHANGELOG.md b/integrations/gitlab/CHANGELOG.md index edb9e6c586..66f68ba74b 100644 --- a/integrations/gitlab/CHANGELOG.md +++ b/integrations/gitlab/CHANGELOG.md @@ -7,6 +7,14 @@ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm +0.1.89 (2024-07-03) +=================== + +### Improvements + +- Added support for gitlab user ingestion (0.1.89) + + 0.1.88 (2024-06-23) =================== diff --git a/integrations/gitlab/gitlab_integration/core/async_fetcher.py b/integrations/gitlab/gitlab_integration/core/async_fetcher.py index 4902330259..1028e6546d 100644 --- a/integrations/gitlab/gitlab_integration/core/async_fetcher.py +++ b/integrations/gitlab/gitlab_integration/core/async_fetcher.py @@ -6,7 +6,14 @@ import gitlab.exceptions from gitlab import GitlabList from gitlab.base import RESTObject, RESTObjectList -from gitlab.v4.objects import Project, ProjectPipelineJob, ProjectPipeline, Issue, Group +from gitlab.v4.objects import ( + Project, + ProjectPipelineJob, + ProjectPipeline, + Issue, + Group, + User, +) from loguru import logger from port_ocean.core.models import Entity @@ -28,6 +35,7 @@ async def fetch_single( Issue, Project, Group, + User, ], ], *args, diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index d6f48eba9f..b2badaec4d 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -13,6 +13,8 @@ Issue, Group, ProjectPipeline, + User, + GroupMemberAll, GroupMergeRequest, ProjectPipelineJob, ) @@ -26,6 +28,8 @@ from port_ocean.core.models import Entity PROJECTS_CACHE_KEY = "__cache_all_projects" +GROUPS_CACHE_KEY = "__cache_all_groups" +MAX_CONCURRENT_TASKS = 30 if TYPE_CHECKING: from gitlab_integration.git_integration import ( @@ -178,6 +182,9 @@ def should_run_for_issue( ) return self.should_run_for_path(project_path) + def should_run_for_member(self, member: User): + return not member.username.startswith("group_") + def get_root_groups(self) -> List[Group]: groups = self.gitlab_client.groups.list(iterator=True) return typing.cast( @@ -322,6 +329,15 @@ async def get_group(self, group_id: int) -> Group | None: async def get_all_groups(self) -> typing.AsyncIterator[List[Group]]: logger.info("fetching all groups for the token") + + cached_groups = event.attributes.setdefault(GROUPS_CACHE_KEY, {}).setdefault( + self.gitlab_client.private_token, {} + ) + + if cached_groups: + yield cached_groups.values() + return + async for groups_batch in AsyncFetcher.fetch_batch( fetch_func=self.gitlab_client.groups.list, validation_func=self.should_run_for_group, @@ -333,6 +349,7 @@ async def get_all_groups(self) -> typing.AsyncIterator[List[Group]]: logger.info( f"Queried {len(groups)} groups {[group.path for group in groups]}" ) + cached_groups.update({group.id: group for group in groups}) yield groups async def get_all_projects(self) -> typing.AsyncIterator[List[Project]]: @@ -533,6 +550,84 @@ async def get_all_issues(self, group: Group) -> typing.AsyncIterator[List[Issue] issues: List[Issue] = typing.cast(List[Issue], issues_batch) yield issues + async def get_member_groups( + self, member: User + ) -> typing.AsyncIterator[List[Group]]: + + semaphore = asyncio.Semaphore(MAX_CONCURRENT_TASKS) + + async def check_group_membership(group: Group) -> Group | None: + "check if the user is a member of the group" + async with semaphore: + try: + await AsyncFetcher.fetch_single(group.members.get, member.get_id()) + return group + except GitlabError as err: + if err.response_code != 404: + raise err + return None + + async for groups_batch in self.get_all_groups(): + tasks = [check_group_membership(group) for group in groups_batch] + groups = await asyncio.gather(*tasks) + member_groups: List[Group] = typing.cast( + List[Group], list(filter(None, groups)) + ) + logger.info( + f"Queried {len(member_groups)} groups {[member_group.name for member_group in member_groups]} for user {member.name}" + ) + yield member_groups + + async def get_all_group_members( + self, group: Group + ) -> typing.AsyncIterator[List[GroupMemberAll]]: + + logger.info(f"Fetching all members of group {group.name}") + + async for users_batch in AsyncFetcher.fetch_batch( + fetch_func=group.members_all.list, + validation_func=self.should_run_for_member, + pagination="offset", + order_by="id", + sort="asc", + ): + members: List[GroupMemberAll] = typing.cast( + List[GroupMemberAll], users_batch + ) + logger.info( + f"Queried {len(members)} members {[user.username for user in members]} from {group.name}" + ) + yield members + + async def enrich_member_with_groups_and_public_email( + self, member + ) -> dict[str, Any]: + user: User = await self.get_user(member.id) + + user_groups: List[dict[str, Any]] = [ + {"id": group.id, "full_path": group.full_path} + async for groups in self.get_member_groups(user) + for group in groups + ] + + member_dict: dict[str, Any] = member.asdict() + member_dict.update( + { + "__public_email": user.public_email, + "__groups": user_groups, + } + ) + + return member_dict + + async def get_user(self, user_id: str) -> User: + logger.info(f"fetching user {user_id}") + user_response = await AsyncFetcher.fetch_single( + self.gitlab_client.users.get, user_id + ) + user: User = typing.cast(User, user_response) + return user + def get_entities_diff( self, project: Project, diff --git a/integrations/gitlab/gitlab_integration/ocean.py b/integrations/gitlab/gitlab_integration/ocean.py index b3d8f58fbc..511746251b 100644 --- a/integrations/gitlab/gitlab_integration/ocean.py +++ b/integrations/gitlab/gitlab_integration/ocean.py @@ -207,3 +207,16 @@ async def resync_pipelines(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: {**pipeline.asdict(), "__project": project.asdict()} for pipeline in pipelines_batch ] + + +@ocean.on_resync(ObjectKind.MEMBER) +async def resync_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: + for service in get_cached_all_services(): + for group in service.get_root_groups(): + async for members_batch in service.get_all_group_members(group): + tasks = [ + service.enrich_member_with_groups_and_public_email(member) + for member in members_batch + ] + members = await asyncio.gather(*tasks) + yield members diff --git a/integrations/gitlab/gitlab_integration/utils.py b/integrations/gitlab/gitlab_integration/utils.py index ce7803774b..c02b5e576a 100644 --- a/integrations/gitlab/gitlab_integration/utils.py +++ b/integrations/gitlab/gitlab_integration/utils.py @@ -45,3 +45,4 @@ class ObjectKind: PIPELINE = "pipeline" PROJECT = "project" FOLDER = "folder" + MEMBER = "member" diff --git a/integrations/gitlab/pyproject.toml b/integrations/gitlab/pyproject.toml index ae0ea12271..a032e5ff68 100644 --- a/integrations/gitlab/pyproject.toml +++ b/integrations/gitlab/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "gitlab" -version = "0.1.88" +version = "0.1.89" description = "Gitlab integration for Port using Port-Ocean Framework" authors = ["Yair Siman-Tov "] From b88d5306d99f137fb9f73e241fad6eabd6505794 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Wed, 3 Jul 2024 12:06:28 +0000 Subject: [PATCH 02/45] edited changelog --- integrations/gitlab/CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integrations/gitlab/CHANGELOG.md b/integrations/gitlab/CHANGELOG.md index 66f68ba74b..ff5f15750b 100644 --- a/integrations/gitlab/CHANGELOG.md +++ b/integrations/gitlab/CHANGELOG.md @@ -10,9 +10,9 @@ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm 0.1.89 (2024-07-03) =================== -### Improvements +### Features -- Added support for gitlab user ingestion (0.1.89) +- Added support for gitlab user ingestion (PORT-7708) 0.1.88 (2024-06-23) From cfeddb03c69f5904f2685d35ec868826b28e8335 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Wed, 3 Jul 2024 12:06:52 +0000 Subject: [PATCH 03/45] edited changelog --- integrations/gitlab/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/gitlab/CHANGELOG.md b/integrations/gitlab/CHANGELOG.md index ff5f15750b..6a7fc01029 100644 --- a/integrations/gitlab/CHANGELOG.md +++ b/integrations/gitlab/CHANGELOG.md @@ -12,7 +12,7 @@ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Features -- Added support for gitlab user ingestion (PORT-7708) +- Added support for gitlab member ingestion (PORT-7708) 0.1.88 (2024-06-23) From 81905c5106b5079a3f839ad5c4afded792a18334 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Fri, 12 Jul 2024 15:48:29 +0000 Subject: [PATCH 04/45] updated group -> member relationship --- .../gitlab/.port/resources/blueprints.json | 52 +++++++++--- .../.port/resources/port-app-config.yaml | 2 + integrations/gitlab/.port/spec.yaml | 1 + .../gitlab_integration/git_integration.py | 22 ++++- .../gitlab_integration/gitlab_service.py | 81 +++++++------------ .../gitlab/gitlab_integration/ocean.py | 28 ++++--- 6 files changed, 114 insertions(+), 72 deletions(-) diff --git a/integrations/gitlab/.port/resources/blueprints.json b/integrations/gitlab/.port/resources/blueprints.json index 7fdd7a389c..c5e87566a3 100644 --- a/integrations/gitlab/.port/resources/blueprints.json +++ b/integrations/gitlab/.port/resources/blueprints.json @@ -79,12 +79,6 @@ "format": "url", "description": "URL link to the GitLab item." }, - "createdBy": { - "type": "string", - "title": "Created By", - "icon": "GitLab", - "description": "The GitLab username of the item's creator." - }, "email": { "type": "string", "title": "Email", @@ -105,10 +99,50 @@ "mirrorProperties": {}, "calculationProperties": {}, "aggregationProperties": {}, + "relations": {} + }, + { + "identifier": "gitlabGroup", + "title": "Group", + "icon": "GitLab", + "schema": { + "properties": { + "visibility": { + "icon": "Lock", + "title": "Visibility", + "type": "string", + "enum": [ + "public", + "internal", + "private" + ], + "enumColors": { + "public": "red", + "internal": "yellow", + "private": "green" + } + }, + "url": { + "title": "URL", + "format": "url", + "type": "string", + "icon": "Link" + }, + "description": { + "title": "Description", + "type": "string", + "icon": "BlankPage" + } + }, + "required": [] + }, + "mirrorProperties": {}, + "calculationProperties": {}, + "aggregationProperties": {}, "relations": { - "gitlabGroup": { - "title": "Group", - "target": "gitlabGroup", + "members": { + "title": "Members", + "target": "member", "required": false, "many": true } diff --git a/integrations/gitlab/.port/resources/port-app-config.yaml b/integrations/gitlab/.port/resources/port-app-config.yaml index a47792bb3a..fefa12d466 100644 --- a/integrations/gitlab/.port/resources/port-app-config.yaml +++ b/integrations/gitlab/.port/resources/port-app-config.yaml @@ -18,6 +18,8 @@ resources: - kind: member selector: query: 'true' + publicEmailVisibility: 'false' + filterBots: 'false' port: entity: mappings: diff --git a/integrations/gitlab/.port/spec.yaml b/integrations/gitlab/.port/spec.yaml index 735d9e2be1..7645af4755 100644 --- a/integrations/gitlab/.port/spec.yaml +++ b/integrations/gitlab/.port/spec.yaml @@ -9,6 +9,7 @@ features: resources: - kind: projects - kind: members + - kind: groups configurations: - name: tokenMapping required: true diff --git a/integrations/gitlab/gitlab_integration/git_integration.py b/integrations/gitlab/gitlab_integration/git_integration.py index 568d39f30e..ec0be50a32 100644 --- a/integrations/gitlab/gitlab_integration/git_integration.py +++ b/integrations/gitlab/gitlab_integration/git_integration.py @@ -1,4 +1,4 @@ -from typing import Dict, Any, Tuple, List, Type +from typing import Dict, Any, Tuple, List, Type, Literal from gitlab.v4.objects import Project from loguru import logger @@ -9,6 +9,7 @@ FILE_PROPERTY_PREFIX, SEARCH_PROPERTY_PREFIX, ) +from port_ocean.core.integrations.base import BaseIntegration from gitlab_integration.gitlab_service import PROJECTS_CACHE_KEY from gitlab_integration.utils import get_cached_all_services from port_ocean.context.event import event @@ -122,6 +123,18 @@ class GitlabResourceConfig(ResourceConfig): selector: GitlabSelector +class GitlabMembersResourceConfig(ResourceConfig): + class MembersSelector(Selector): + public_email_visibility: bool | None = Field( + alias="publicEmailVisibility", + default=False, + description="If set to true, the integration will enrich members with public email field. Default value is false", + ) + + kind: Literal["member"] + selector: MembersSelector + + class GitlabPortAppConfig(PortAppConfig): spec_path: str | List[str] = Field(alias="specPath", default="**/port.yml") branch: str | None @@ -131,7 +144,12 @@ class GitlabPortAppConfig(PortAppConfig): project_visibility_filter: str | None = Field( alias="projectVisibilityFilter", default=None ) - resources: list[GitlabResourceConfig] = Field(default_factory=list) # type: ignore + filter_bots: bool | None = Field( + alias="filterBots", + default=True, + description="If set to true, bots will be filtered out from the members list. Default value is true", + ) + resources: list[GitlabMembersResourceConfig | GitlabResourceConfig] = Field(default_factory=list) # type: ignore def _get_project_from_cache(project_id: int) -> Project | None: diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index b2badaec4d..ae6a9959a3 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -14,7 +14,7 @@ Group, ProjectPipeline, User, - GroupMemberAll, + GroupMember, GroupMergeRequest, ProjectPipelineJob, ) @@ -29,12 +29,12 @@ PROJECTS_CACHE_KEY = "__cache_all_projects" GROUPS_CACHE_KEY = "__cache_all_groups" +MEMBERS_CACHE_KEY = "__cache_all_members" + MAX_CONCURRENT_TASKS = 30 if TYPE_CHECKING: - from gitlab_integration.git_integration import ( - GitlabPortAppConfig, - ) + from gitlab_integration.git_integration import GitlabPortAppConfig class GitlabService: @@ -550,74 +550,51 @@ async def get_all_issues(self, group: Group) -> typing.AsyncIterator[List[Issue] issues: List[Issue] = typing.cast(List[Issue], issues_batch) yield issues - async def get_member_groups( - self, member: User - ) -> typing.AsyncIterator[List[Group]]: - - semaphore = asyncio.Semaphore(MAX_CONCURRENT_TASKS) - - async def check_group_membership(group: Group) -> Group | None: - "check if the user is a member of the group" - async with semaphore: - try: - await AsyncFetcher.fetch_single(group.members.get, member.get_id()) - return group - except GitlabError as err: - if err.response_code != 404: - raise err - return None - - async for groups_batch in self.get_all_groups(): - tasks = [check_group_membership(group) for group in groups_batch] - groups = await asyncio.gather(*tasks) - member_groups: List[Group] = typing.cast( - List[Group], list(filter(None, groups)) - ) - logger.info( - f"Queried {len(member_groups)} groups {[member_group.name for member_group in member_groups]} for user {member.name}" - ) - yield member_groups - async def get_all_group_members( self, group: Group - ) -> typing.AsyncIterator[List[GroupMemberAll]]: + ) -> typing.AsyncIterator[List[GroupMember]]: - logger.info(f"Fetching all members of group {group.name}") + port_app_config: GitlabPortAppConfig = typing.cast( + "GitlabPortAppConfig", event.port_app_config + ) + filter_bots = port_app_config.filter_bots + logger.info(f"Fetching all members of group {group.name}") async for users_batch in AsyncFetcher.fetch_batch( - fetch_func=group.members_all.list, - validation_func=self.should_run_for_member, + fetch_func=group.members.list, + validation_func=self.should_run_for_member if filter_bots else None, pagination="offset", order_by="id", sort="asc", ): - members: List[GroupMemberAll] = typing.cast( - List[GroupMemberAll], users_batch - ) + members: List[GroupMember] = typing.cast(List[GroupMember], users_batch) logger.info( f"Queried {len(members)} members {[user.username for user in members]} from {group.name}" ) yield members - async def enrich_member_with_groups_and_public_email( - self, member - ) -> dict[str, Any]: - user: User = await self.get_user(member.id) + async def enrich_group_with_members(self, group: Group) -> List[dict[str, Any]]: - user_groups: List[dict[str, Any]] = [ - {"id": group.id, "full_path": group.full_path} - async for groups in self.get_member_groups(user) - for group in groups + group_members = [ + member + async for members in self.get_all_group_members(group) + for member in members ] - - member_dict: dict[str, Any] = member.asdict() - member_dict.update( + group_dict: dict[str, Any] = group.asdict() + group_dict.update( { - "__public_email": user.public_email, - "__groups": user_groups, + "__members": [ + {"id": group_member.id, "username": group_member.username} + for group_member in group_members + ] } ) + return group_dict + async def enrich_member_with_public_email(self, member) -> dict[str, Any]: + user: User = await self.get_user(member.id) + member_dict: dict[str, Any] = member.asdict() + member_dict.update({"__public_email": user.public_email}) return member_dict async def get_user(self, user_id: str) -> User: diff --git a/integrations/gitlab/gitlab_integration/ocean.py b/integrations/gitlab/gitlab_integration/ocean.py index 511746251b..059486ecb7 100644 --- a/integrations/gitlab/gitlab_integration/ocean.py +++ b/integrations/gitlab/gitlab_integration/ocean.py @@ -12,7 +12,10 @@ WebhookMappingConfig, ) from gitlab_integration.events.setup import setup_application -from gitlab_integration.git_integration import GitlabResourceConfig +from gitlab_integration.git_integration import ( + GitlabResourceConfig, + GitlabMembersResourceConfig, +) from gitlab_integration.utils import ObjectKind, get_cached_all_services from port_ocean.context.event import event from port_ocean.context.ocean import ocean @@ -108,7 +111,9 @@ async def on_start() -> None: async def resync_groups(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: for service in get_cached_all_services(): async for groups_batch in service.get_all_groups(): - yield [group.asdict() for group in groups_batch] + tasks = [service.enrich_group_with_members(group) for group in groups_batch] + enriched_groups = await asyncio.gather(*tasks) + yield enriched_groups @ocean.on_resync(ObjectKind.PROJECT) @@ -211,12 +216,17 @@ async def resync_pipelines(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: @ocean.on_resync(ObjectKind.MEMBER) async def resync_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: + gitlab_resource_config: GitlabMembersResourceConfig = typing.cast( + GitlabMembersResourceConfig, event.resource_config + ) + selector = gitlab_resource_config.selector for service in get_cached_all_services(): for group in service.get_root_groups(): - async for members_batch in service.get_all_group_members(group): - tasks = [ - service.enrich_member_with_groups_and_public_email(member) - for member in members_batch - ] - members = await asyncio.gather(*tasks) - yield members + async for members in service.get_all_group_members(group): + if selector.public_email_visibility: + yield [ + await service.enrich_member_with_public_email(member) + for member in members + ] + else: + yield [member.asdict() for member in members] From 2917e45135bce56ee9e62ec3cea199206b5d0067 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Fri, 12 Jul 2024 16:25:43 +0000 Subject: [PATCH 05/45] Lint --- .../gitlab/gitlab_integration/git_integration.py | 1 - .../gitlab/gitlab_integration/gitlab_service.py | 15 ++++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/git_integration.py b/integrations/gitlab/gitlab_integration/git_integration.py index ec0be50a32..db9ab4ea64 100644 --- a/integrations/gitlab/gitlab_integration/git_integration.py +++ b/integrations/gitlab/gitlab_integration/git_integration.py @@ -9,7 +9,6 @@ FILE_PROPERTY_PREFIX, SEARCH_PROPERTY_PREFIX, ) -from port_ocean.core.integrations.base import BaseIntegration from gitlab_integration.gitlab_service import PROJECTS_CACHE_KEY from gitlab_integration.utils import get_cached_all_services from port_ocean.context.event import event diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index ae6a9959a3..f4e41ab0ca 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -182,9 +182,6 @@ def should_run_for_issue( ) return self.should_run_for_path(project_path) - def should_run_for_member(self, member: User): - return not member.username.startswith("group_") - def get_root_groups(self) -> List[Group]: groups = self.gitlab_client.groups.list(iterator=True) return typing.cast( @@ -559,10 +556,18 @@ async def get_all_group_members( ) filter_bots = port_app_config.filter_bots + def skip_validation(_: User): + return True + + def should_run_for_member(member: User): + return not member.username.__contains__("bot") + + validation_func = should_run_for_member if filter_bots else skip_validation + logger.info(f"Fetching all members of group {group.name}") async for users_batch in AsyncFetcher.fetch_batch( fetch_func=group.members.list, - validation_func=self.should_run_for_member if filter_bots else None, + validation_func=validation_func, pagination="offset", order_by="id", sort="asc", @@ -573,7 +578,7 @@ async def get_all_group_members( ) yield members - async def enrich_group_with_members(self, group: Group) -> List[dict[str, Any]]: + async def enrich_group_with_members(self, group: Group) -> dict[str, Any]: group_members = [ member From 9820b9a0b77bf38e6ab1a0a6fafd0d570c2eb3ed Mon Sep 17 00:00:00 2001 From: mkarmah Date: Fri, 12 Jul 2024 16:48:35 +0000 Subject: [PATCH 06/45] Added error handling to get_all_group_members --- .../gitlab_integration/gitlab_service.py | 48 +++++++++++-------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index f4e41ab0ca..268bad1594 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -551,32 +551,38 @@ async def get_all_group_members( self, group: Group ) -> typing.AsyncIterator[List[GroupMember]]: - port_app_config: GitlabPortAppConfig = typing.cast( - "GitlabPortAppConfig", event.port_app_config - ) - filter_bots = port_app_config.filter_bots + try: + port_app_config: GitlabPortAppConfig = typing.cast( + "GitlabPortAppConfig", event.port_app_config + ) + filter_bots = port_app_config.filter_bots - def skip_validation(_: User): - return True + def skip_validation(_: User): + return True - def should_run_for_member(member: User): - return not member.username.__contains__("bot") + def should_run_for_member(member: User): + return not member.username.__contains__("bot") - validation_func = should_run_for_member if filter_bots else skip_validation + validation_func = should_run_for_member if filter_bots else skip_validation - logger.info(f"Fetching all members of group {group.name}") - async for users_batch in AsyncFetcher.fetch_batch( - fetch_func=group.members.list, - validation_func=validation_func, - pagination="offset", - order_by="id", - sort="asc", - ): - members: List[GroupMember] = typing.cast(List[GroupMember], users_batch) - logger.info( - f"Queried {len(members)} members {[user.username for user in members]} from {group.name}" + logger.info(f"Fetching all members of group {group.name}") + async for users_batch in AsyncFetcher.fetch_batch( + fetch_func=group.members.list, + validation_func=validation_func, + pagination="offset", + order_by="id", + sort="asc", + ): + members: List[GroupMember] = typing.cast(List[GroupMember], users_batch) + logger.info( + f"Queried {len(members)} members {[user.username for user in members]} from {group.name}" + ) + yield members + except Exception as e: + logger.error( + f"Failed to get members for group={group.name}. error={e}" ) - yield members + return async def enrich_group_with_members(self, group: Group) -> dict[str, Any]: From 24b3551652476ca67a1df90eab5cce1c9321a487 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Fri, 12 Jul 2024 16:49:14 +0000 Subject: [PATCH 07/45] Added error handling to get_all_group_members --- integrations/gitlab/gitlab_integration/gitlab_service.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index 268bad1594..21f839caa1 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -579,9 +579,7 @@ def should_run_for_member(member: User): ) yield members except Exception as e: - logger.error( - f"Failed to get members for group={group.name}. error={e}" - ) + logger.error(f"Failed to get members for group={group.name}. error={e}") return async def enrich_group_with_members(self, group: Group) -> dict[str, Any]: From 12ba8006e186ce0b2aac579a6ca907307e40aae0 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Thu, 25 Jul 2024 19:27:37 +0000 Subject: [PATCH 08/45] updated blueprints --- .../gitlab/.port/resources/blueprints.json | 20 ++++++++----------- .../.port/resources/port-app-config.yaml | 20 ++++++++++++++----- .../gitlab_integration/gitlab_service.py | 8 ++++---- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/integrations/gitlab/.port/resources/blueprints.json b/integrations/gitlab/.port/resources/blueprints.json index c5e87566a3..e4a4b4e8fe 100644 --- a/integrations/gitlab/.port/resources/blueprints.json +++ b/integrations/gitlab/.port/resources/blueprints.json @@ -64,20 +64,20 @@ "title": "State", "type": "string", "icon": "GitLab", - "description": "The current state of the GitLab item (e.g., open, closed)." + "description": "The current state of the GitLab member (e.g., active)." }, "locked": { "type": "string", "title": "Locked", "icon": "GitLab", - "description": "Indicates if the GitLab item is locked." + "description": "Indicates if the GitLab member is locked." }, "link": { "icon": "Link", "type": "string", "title": "Link", "format": "url", - "description": "URL link to the GitLab item." + "description": "URL link to the GitLab member." }, "email": { "type": "string", @@ -85,13 +85,6 @@ "description": "GitLab primary email address.", "icon": "User", "format": "user" - }, - "publicEmail": { - "type": "string", - "title": "Public Email", - "description": "User's GitLab public email.", - "icon": "User", - "format": "user" } }, "required": [] @@ -111,6 +104,7 @@ "icon": "Lock", "title": "Visibility", "type": "string", + "description": "Visibility status of the group. (e.g public, internal etc. )", "enum": [ "public", "internal", @@ -126,12 +120,14 @@ "title": "URL", "format": "url", "type": "string", - "icon": "Link" + "icon": "Link", + "description": "Link to the gitlab group" }, "description": { "title": "Description", "type": "string", - "icon": "BlankPage" + "icon": "BlankPage", + "description": "A short description of the gitlab group" } }, "required": [] diff --git a/integrations/gitlab/.port/resources/port-app-config.yaml b/integrations/gitlab/.port/resources/port-app-config.yaml index fefa12d466..de4f78e431 100644 --- a/integrations/gitlab/.port/resources/port-app-config.yaml +++ b/integrations/gitlab/.port/resources/port-app-config.yaml @@ -15,11 +15,25 @@ resources: readme: file://README.md description: .description language: .__languages | to_entries | max_by(.value) | .key + - kind: group + selector: + query: 'true' + port: + entity: + mappings: + identifier: .full_path + title: .name + blueprint: '"gitlabGroup"' + properties: + url: .web_url + visibility: .visibility + description: .description + relations: + members: '[.__members[].username]' - kind: member selector: query: 'true' publicEmailVisibility: 'false' - filterBots: 'false' port: entity: mappings: @@ -31,7 +45,3 @@ resources: locked: .locked link: .web_url email: .email - publicEmail: .__public_email - relations: - gitlabGroup: '[.__groups[].full_path]' - createdBy: .created_by.username diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index 21f839caa1..0dc151cdea 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -560,22 +560,22 @@ async def get_all_group_members( def skip_validation(_: User): return True - def should_run_for_member(member: User): + def should_run_for_member(member: GroupMember): return not member.username.__contains__("bot") validation_func = should_run_for_member if filter_bots else skip_validation logger.info(f"Fetching all members of group {group.name}") - async for users_batch in AsyncFetcher.fetch_batch( + async for members_batch in AsyncFetcher.fetch_batch( fetch_func=group.members.list, validation_func=validation_func, pagination="offset", order_by="id", sort="asc", ): - members: List[GroupMember] = typing.cast(List[GroupMember], users_batch) + members: List[GroupMember] = typing.cast(List[GroupMember], members_batch) logger.info( - f"Queried {len(members)} members {[user.username for user in members]} from {group.name}" + f"Queried {len(members)} members {[member.username for member in members]} from {group.name}" ) yield members except Exception as e: From 9f9b706f73a4929e015f485485cae63a172e2ae9 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Fri, 26 Jul 2024 13:20:35 +0000 Subject: [PATCH 09/45] related service to groups --- .../gitlab/.port/resources/blueprints.json | 121 +++++++++--------- .../.port/resources/port-app-config.yaml | 38 +++--- integrations/gitlab/.port/spec.yaml | 2 +- 3 files changed, 86 insertions(+), 75 deletions(-) diff --git a/integrations/gitlab/.port/resources/blueprints.json b/integrations/gitlab/.port/resources/blueprints.json index e4a4b4e8fe..a98f888bb4 100644 --- a/integrations/gitlab/.port/resources/blueprints.json +++ b/integrations/gitlab/.port/resources/blueprints.json @@ -1,61 +1,6 @@ [ { - "identifier": "service", - "title": "Service", - "icon": "GitLab", - "schema": { - "properties": { - "readme": { - "title": "README", - "type": "string", - "format": "markdown", - "icon": "Book" - }, - "url": { - "title": "URL", - "format": "url", - "type": "string", - "icon": "Link" - }, - "language": { - "type": "string", - "title": "Language", - "icon": "Git" - }, - "slack": { - "icon": "Slack", - "type": "string", - "title": "Slack", - "format": "url" - }, - "tier": { - "title": "Tier", - "type": "string", - "description": "How mission-critical the service is", - "enum": [ - "Mission Critical", - "Customer Facing", - "Internal Service", - "Other" - ], - "enumColors": { - "Mission Critical": "turquoise", - "Customer Facing": "green", - "Internal Service": "darkGray", - "Other": "yellow" - }, - "icon": "DefaultProperty" - } - }, - "required": [] - }, - "mirrorProperties": {}, - "calculationProperties": {}, - "aggregationProperties": {}, - "relations": {} - }, - { - "identifier": "member", + "identifier": "gitlabGroupMember", "title": "Member", "icon": "GitLab", "schema": { @@ -138,10 +83,72 @@ "relations": { "members": { "title": "Members", - "target": "member", + "target": "gitlabGroupMember", "required": false, "many": true } } + }, + { + "identifier": "service", + "title": "Service", + "icon": "GitLab", + "schema": { + "properties": { + "readme": { + "title": "README", + "type": "string", + "format": "markdown", + "icon": "Book" + }, + "url": { + "title": "URL", + "format": "url", + "type": "string", + "icon": "Link" + }, + "language": { + "type": "string", + "title": "Language", + "icon": "Git" + }, + "slack": { + "icon": "Slack", + "type": "string", + "title": "Slack", + "format": "url" + }, + "tier": { + "title": "Tier", + "type": "string", + "description": "How mission-critical the service is", + "enum": [ + "Mission Critical", + "Customer Facing", + "Internal Service", + "Other" + ], + "enumColors": { + "Mission Critical": "turquoise", + "Customer Facing": "green", + "Internal Service": "darkGray", + "Other": "yellow" + }, + "icon": "DefaultProperty" + } + }, + "required": [] + }, + "mirrorProperties": {}, + "calculationProperties": {}, + "aggregationProperties": {}, + "relations": { + "group": { + "title": "Group", + "target": "gitlabGroup", + "required": true, + "many": false + } + } } ] diff --git a/integrations/gitlab/.port/resources/port-app-config.yaml b/integrations/gitlab/.port/resources/port-app-config.yaml index de4f78e431..e1718261dd 100644 --- a/integrations/gitlab/.port/resources/port-app-config.yaml +++ b/integrations/gitlab/.port/resources/port-app-config.yaml @@ -1,20 +1,21 @@ createMissingRelatedEntities: true deleteDependentEntities: true resources: - - kind: project + - kind: member selector: - query: "true" + query: 'true' + publicEmailVisibility: 'false' port: entity: mappings: - identifier: .path_with_namespace | gsub(" "; "") + identifier: .username title: .name - blueprint: '"service"' + blueprint: '"gitlabGroupMember"' properties: - url: .web_url - readme: file://README.md - description: .description - language: .__languages | to_entries | max_by(.value) | .key + state: .state + locked: .locked + link: .web_url + email: .email - kind: group selector: query: 'true' @@ -30,18 +31,21 @@ resources: description: .description relations: members: '[.__members[].username]' - - kind: member + - kind: project selector: - query: 'true' - publicEmailVisibility: 'false' + query: "true" port: entity: mappings: - identifier: .username + identifier: .path_with_namespace | gsub(" "; "") title: .name - blueprint: '"member"' + blueprint: '"service"' properties: - state: .state - locked: .locked - link: .web_url - email: .email + url: .web_url + readme: file://README.md + description: .description + language: .__languages | to_entries | max_by(.value) | .key + relations: + group: >- + .path_with_namespace | gsub(" "; "") | split("/") | .[:-1] | + join("/") diff --git a/integrations/gitlab/.port/spec.yaml b/integrations/gitlab/.port/spec.yaml index 7645af4755..534b2875d7 100644 --- a/integrations/gitlab/.port/spec.yaml +++ b/integrations/gitlab/.port/spec.yaml @@ -7,9 +7,9 @@ features: - type: exporter section: Git Providers resources: - - kind: projects - kind: members - kind: groups + - kind: projects configurations: - name: tokenMapping required: true From 9df3202a139d0f4a2204f2a9004b04f1acd10a14 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Fri, 26 Jul 2024 13:24:26 +0000 Subject: [PATCH 10/45] lint --- integrations/gitlab/gitlab_integration/gitlab_service.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index 9621def703..5be0b43436 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -575,7 +575,9 @@ def should_run_for_member(member: GroupMember): order_by="id", sort="asc", ): - members: List[GroupMember] = typing.cast(List[GroupMember], members_batch) + members: List[GroupMember] = typing.cast( + List[GroupMember], members_batch + ) logger.info( f"Queried {len(members)} members {[member.username for member in members]} from {group.name}" ) From 5daceeb0ca5cea75a4b8cdaf9f169ebae89b219b Mon Sep 17 00:00:00 2001 From: mkarmah Date: Fri, 26 Jul 2024 13:43:52 +0000 Subject: [PATCH 11/45] fixed lint issues --- .../gitlab/gitlab_integration/gitlab_service.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index 5be0b43436..b9df15c3c6 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -1,7 +1,7 @@ import asyncio import typing from datetime import datetime, timedelta -from typing import List, Tuple, Any, Union, TYPE_CHECKING +from typing import List, Tuple, Any, Union, TYPE_CHECKING, Callable import anyio.to_thread import yaml @@ -559,13 +559,15 @@ async def get_all_group_members( ) filter_bots = port_app_config.filter_bots - def skip_validation(_: User): + def skip_validation(_: User) -> bool: return True - def should_run_for_member(member: GroupMember): + def should_run_for_member(member: GroupMember) -> bool: return not member.username.__contains__("bot") - validation_func = should_run_for_member if filter_bots else skip_validation + validation_func: Union[ + Callable[[User], bool], Callable[[GroupMember], bool] + ] = (should_run_for_member if filter_bots else skip_validation) logger.info(f"Fetching all members of group {group.name}") async for members_batch in AsyncFetcher.fetch_batch( From 4d98b1d1593e74d9054d5b635bf86d924f3e2c61 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Fri, 26 Jul 2024 17:55:02 +0000 Subject: [PATCH 12/45] change default behavior for filterBots flag --- integrations/gitlab/.port/resources/port-app-config.yaml | 1 + integrations/gitlab/gitlab_integration/git_integration.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/integrations/gitlab/.port/resources/port-app-config.yaml b/integrations/gitlab/.port/resources/port-app-config.yaml index e1718261dd..bcfdb1f938 100644 --- a/integrations/gitlab/.port/resources/port-app-config.yaml +++ b/integrations/gitlab/.port/resources/port-app-config.yaml @@ -1,5 +1,6 @@ createMissingRelatedEntities: true deleteDependentEntities: true +filterBots: false resources: - kind: member selector: diff --git a/integrations/gitlab/gitlab_integration/git_integration.py b/integrations/gitlab/gitlab_integration/git_integration.py index db9ab4ea64..a574129b5c 100644 --- a/integrations/gitlab/gitlab_integration/git_integration.py +++ b/integrations/gitlab/gitlab_integration/git_integration.py @@ -145,8 +145,8 @@ class GitlabPortAppConfig(PortAppConfig): ) filter_bots: bool | None = Field( alias="filterBots", - default=True, - description="If set to true, bots will be filtered out from the members list. Default value is true", + default=False, + description="If set to true, bots will be filtered out from the members list. Default value is false", ) resources: list[GitlabMembersResourceConfig | GitlabResourceConfig] = Field(default_factory=list) # type: ignore From 1b69d29e7061e7b5daece5df97f75b059e74f9d2 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Fri, 26 Jul 2024 19:39:40 +0000 Subject: [PATCH 13/45] made the filterBots and publicEmailVisibility flags unset by default, defaulting to false --- integrations/gitlab/.port/resources/port-app-config.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/integrations/gitlab/.port/resources/port-app-config.yaml b/integrations/gitlab/.port/resources/port-app-config.yaml index bcfdb1f938..45d407f7bb 100644 --- a/integrations/gitlab/.port/resources/port-app-config.yaml +++ b/integrations/gitlab/.port/resources/port-app-config.yaml @@ -1,11 +1,9 @@ createMissingRelatedEntities: true deleteDependentEntities: true -filterBots: false resources: - kind: member selector: query: 'true' - publicEmailVisibility: 'false' port: entity: mappings: From ff737e6cec2a3f54d9b8caaeb9cb7dbc05939533 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Thu, 1 Aug 2024 08:53:29 +0000 Subject: [PATCH 14/45] gitlab member webhook development --- .../gitlab/.port/resources/blueprints.json | 9 +-- .../.port/resources/port-app-config.yaml | 2 - .../gitlab_integration/events/hooks/base.py | 20 +++++- .../gitlab_integration/events/hooks/group.py | 69 ++++++++++++------ .../events/hooks/members.py | 72 +++++++++++++++++++ .../gitlab/gitlab_integration/events/setup.py | 9 ++- .../gitlab_integration/git_integration.py | 23 +++--- .../gitlab_integration/gitlab_service.py | 50 +++++++------ .../gitlab/gitlab_integration/ocean.py | 2 +- 9 files changed, 185 insertions(+), 71 deletions(-) create mode 100644 integrations/gitlab/gitlab_integration/events/hooks/members.py diff --git a/integrations/gitlab/.port/resources/blueprints.json b/integrations/gitlab/.port/resources/blueprints.json index a98f888bb4..1d4f9062d7 100644 --- a/integrations/gitlab/.port/resources/blueprints.json +++ b/integrations/gitlab/.port/resources/blueprints.json @@ -80,14 +80,7 @@ "mirrorProperties": {}, "calculationProperties": {}, "aggregationProperties": {}, - "relations": { - "members": { - "title": "Members", - "target": "gitlabGroupMember", - "required": false, - "many": true - } - } + "relations": {} }, { "identifier": "service", diff --git a/integrations/gitlab/.port/resources/port-app-config.yaml b/integrations/gitlab/.port/resources/port-app-config.yaml index 45d407f7bb..8ff05ed36d 100644 --- a/integrations/gitlab/.port/resources/port-app-config.yaml +++ b/integrations/gitlab/.port/resources/port-app-config.yaml @@ -28,8 +28,6 @@ resources: url: .web_url visibility: .visibility description: .description - relations: - members: '[.__members[].username]' - kind: project selector: query: "true" diff --git a/integrations/gitlab/gitlab_integration/events/hooks/base.py b/integrations/gitlab/gitlab_integration/events/hooks/base.py index 6ed43a3403..e973eb0c37 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/base.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/base.py @@ -1,8 +1,8 @@ from abc import ABC, abstractmethod from typing import List, Any from loguru import logger -from gitlab.v4.objects import Project - +from gitlab.v4.objects import Project, Group +from gitlab_integration.core.async_fetcher import AsyncFetcher from gitlab_integration.gitlab_service import GitlabService @@ -44,3 +44,19 @@ async def on_hook(self, event: str, body: dict[str, Any]) -> None: @abstractmethod async def _on_hook(self, body: dict[str, Any], gitlab_project: Project) -> None: pass + + +class GroupHandler(HookHandler): + async def on_hook(self, event: str, body: dict[str, Any]) -> None: + event_name = body["event_name"] + group_id = body.get("group_id", body.get("group", {}).get("id")) + logger.info(f"Handling {event_name} for {event} and group {group_id}") + group = await self.gitlab_service.get_group(group_id) + group_path = body.get('full_path',body.get('group_path')) + logger.info(f"Handling hook {event} for group {group_path}") + await self._on_hook(body, group) + logger.info(f"Finished handling {event} for group {group_path}") + + @abstractmethod + async def _on_hook(self, body: dict[str, Any], gitlab_group: Group) -> None: + pass diff --git a/integrations/gitlab/gitlab_integration/events/hooks/group.py b/integrations/gitlab/gitlab_integration/events/hooks/group.py index d4713ac786..278b68161d 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/group.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/group.py @@ -4,36 +4,61 @@ from gitlab_integration.events.hooks.base import HookHandler from gitlab_integration.utils import ObjectKind +from gitlab_integration.events.hooks.base import GroupHandler from port_ocean.context.ocean import ocean +from gitlab.v4.objects import Group -class GroupHook(HookHandler): - events = ["Subgroup Hook"] - system_events = [ - "group_destroy", - "group_create", - "group_rename", - ] - async def on_hook(self, event: str, body: dict[str, Any]) -> None: - event_name = body["event_name"] +# class GroupHook(HookHandler): +# events = ["Subgroup Hook"] +# system_events = [ +# "group_destroy", +# "group_create", +# "group_rename", +# ] + +# async def on_hook(self, event: str, body: dict[str, Any]) -> None: +# event_name = body["event_name"] + +# logger.info(f"Handling {event_name} for {event}") - logger.info(f"Handling {event_name} for {event}") +# group_id = body["group_id"] if "group_id" in body else body["group"]["id"] - group_id = body["group_id"] if "group_id" in body else body["group"]["id"] +# logger.info(f"Handling hook {event} for group {group_id}") - logger.info(f"Handling hook {event} for group {group_id}") +# group = await self.gitlab_service.get_group(group_id) - group = await self.gitlab_service.get_group(group_id) +# group_full_path = body.get("full_path") +# if group: +# await ocean.register_raw(ObjectKind.GROUP, [self.gitlab_service.enrich_group_with_members(group)]) +# elif ( +# group_full_path +# and self.gitlab_service.should_run_for_path(group_full_path) +# and event_name in ("subgroup_destroy", "group_destroy") +# ): +# await ocean.unregister_raw(ObjectKind.GROUP, [body]) +# else: +# logger.info(f"Group {group_id} was filtered for event {event}. Skipping...") + + +class Groups(GroupHandler): + events = ['Subgroup Hook', "Member Hook"] + system_events = [ + "group_destroy", + "group_create", + "group_rename", + "user_add_to_group", + "user_remove_from_group", + ] - group_full_path = body.get("full_path") - if group: - await ocean.register_raw(ObjectKind.GROUP, [group.asdict()]) - elif ( - group_full_path - and self.gitlab_service.should_run_for_path(group_full_path) - and event_name in ("subgroup_destroy", "group_destroy") - ): + async def _on_hook(self, body: dict[str, Any], gitlab_group: Group) -> None: + print("Event Triggered in group handler", body) + if gitlab_group and self.gitlab_service.should_run_for_group(gitlab_group): + await ocean.register_raw(ObjectKind.GROUP, [await self.gitlab_service.enrich_group_with_members(gitlab_group)]) + + elif body['event_name'] in ("subgroup_destroy", "group_destroy"): await ocean.unregister_raw(ObjectKind.GROUP, [body]) + else: - logger.info(f"Group {group_id} was filtered for event {event}. Skipping...") + logger.warning(f"Group {body['group_id']} was filtered. Skipping ...") diff --git a/integrations/gitlab/gitlab_integration/events/hooks/members.py b/integrations/gitlab/gitlab_integration/events/hooks/members.py new file mode 100644 index 0000000000..ebe977e333 --- /dev/null +++ b/integrations/gitlab/gitlab_integration/events/hooks/members.py @@ -0,0 +1,72 @@ +import typing +from typing import Any +from loguru import logger + +from gitlab_integration.events.hooks.base import HookHandler +from gitlab_integration.utils import ObjectKind +from port_ocean.context.ocean import ocean +from gitlab_integration.events.hooks.base import GroupHandler +from gitlab_integration.git_integration import MembersSelector +from port_ocean.context.event import event +from gitlab.v4.objects import Group, GroupMember +from gitlab_integration.git_integration import GitlabPortAppConfig + + +class Members(GroupHandler): + events = ["Member Hook"] + system_events = [ + "user_remove_from_group", + "user_update_for_group", + "user_add_to_group", + ] + + async def _on_hook(self, body: dict[str, Any], gitlab_group: Group) -> None: + + event_name, user_username = body["event_name"], body["user_username"] + logger.info(f"Handling {event_name} for group member {user_username}") + + if event_name == "user_remove_from_group": + body['username'] = body['user_username'] + await ocean.unregister_raw(ObjectKind.MEMBER, [body]) + + elif group_member := await self.gitlab_service.get_group_member( + gitlab_group, body["user_id"] + ): + await self._register_group_member(group_member) + + else: + logger.warning( + f"Group Member {user_username} was filtered. Skipping ..." + ) + + async def _register_group_member( + self, group_member: GroupMember + ) -> None: + + resource_configs=typing.cast( + GitlabPortAppConfig, event.port_app_config + ).resources + + matching_resource_configs = [ + resource_config + for resource_config in resource_configs + if ( + resource_config.kind == ObjectKind.MEMBER + and isinstance(resource_config.selector, MembersSelector) + ) + ] + if not matching_resource_configs: + logger.info( + "Member resource not found in port app config, update port app config to include the resource type" + ) + return + for resource_config in matching_resource_configs: + enrich_with_public_email = resource_config.selector.enrich_with_public_email + if enrich_with_public_email: + group_member = await self.gitlab_service.enrich_member_with_public_email( + group_member + ) + else: + group_member = group_member.asdict() + + await ocean.register_raw(ObjectKind.MEMBER, [group_member]) diff --git a/integrations/gitlab/gitlab_integration/events/setup.py b/integrations/gitlab/gitlab_integration/events/setup.py index 4c6fb41692..c00a86e308 100644 --- a/integrations/gitlab/gitlab_integration/events/setup.py +++ b/integrations/gitlab/gitlab_integration/events/setup.py @@ -11,7 +11,8 @@ from gitlab_integration.events.hooks.merge_request import MergeRequest from gitlab_integration.events.hooks.pipelines import Pipelines from gitlab_integration.events.hooks.push import PushHook -from gitlab_integration.events.hooks.group import GroupHook +from gitlab_integration.events.hooks.members import Members +from gitlab_integration.events.hooks.group import Groups from gitlab_integration.gitlab_service import GitlabService from gitlab_integration.models.webhook_groups_override_config import ( WebhookMappingConfig, @@ -121,7 +122,8 @@ def setup_listeners(gitlab_service: GitlabService, webhook_id: str) -> None: Job(gitlab_service), Issues(gitlab_service), Pipelines(gitlab_service), - GroupHook(gitlab_service), + Groups(gitlab_service), + Members(gitlab_service) ] for handler in handlers: logger.info( @@ -138,7 +140,8 @@ def setup_system_listeners(gitlab_clients: list[GitlabService]) -> None: Job, Issues, Pipelines, - GroupHook, + Groups, + Members ] for handler in handlers: system_event_handler.on(handler) diff --git a/integrations/gitlab/gitlab_integration/git_integration.py b/integrations/gitlab/gitlab_integration/git_integration.py index a574129b5c..57123a6eda 100644 --- a/integrations/gitlab/gitlab_integration/git_integration.py +++ b/integrations/gitlab/gitlab_integration/git_integration.py @@ -66,7 +66,7 @@ async def _search(self, data: Dict[str, Any], pattern: str) -> Any: match = None if project: if scope == "blobs": - # if the query does not contain a path filter, we add the base path to the query + # if the query does not contain a path ots, we add the base path to the query # this is done to avoid searching the entire project for the file, if the base path is known # having the base path applies to the case where we export a folder as a monorepo if base_path and "path:" not in query: @@ -121,15 +121,15 @@ class GitlabSelector(Selector): class GitlabResourceConfig(ResourceConfig): selector: GitlabSelector +class MembersSelector(Selector): -class GitlabMembersResourceConfig(ResourceConfig): - class MembersSelector(Selector): - public_email_visibility: bool | None = Field( - alias="publicEmailVisibility", - default=False, - description="If set to true, the integration will enrich members with public email field. Default value is false", - ) + enrich_with_public_email: bool | None = Field( + alias="enrichWithPublicEmail", + default=False, + description="If set to true, the integration will enrich members with public email field. Default value is false", + ) +class GitlabMembersResourceConfig(ResourceConfig): kind: Literal["member"] selector: MembersSelector @@ -143,8 +143,11 @@ class GitlabPortAppConfig(PortAppConfig): project_visibility_filter: str | None = Field( alias="projectVisibilityFilter", default=None ) - filter_bots: bool | None = Field( - alias="filterBots", + # The "include bot members" flag affects both the "group" and "member" kinds. + # To prevent inconsistencies, the behavior or value of this parameter should be consistent for both "groups" and "members". + # Therefore, it should be included at the top level of the configuration. + include_bot_members: bool | None = Field( + alias="includeBotMembers", default=False, description="If set to true, bots will be filtered out from the members list. Default value is false", ) diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index b9df15c3c6..dc495f745e 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -7,6 +7,7 @@ import yaml from gitlab import Gitlab, GitlabList, GitlabError from gitlab.base import RESTObject +import gitlab.exceptions from gitlab.v4.objects import ( Project, MergeRequest, @@ -31,7 +32,6 @@ GROUPS_CACHE_KEY = "__cache_all_groups" MEMBERS_CACHE_KEY = "__cache_all_members" -MAX_CONCURRENT_TASKS = 30 if TYPE_CHECKING: from gitlab_integration.git_integration import GitlabPortAppConfig @@ -48,6 +48,7 @@ class GitlabService: "tag_push_events", "subgroup_events", "confidential_issues_events", + "member_events" ] def __init__( @@ -319,12 +320,15 @@ async def get_project(self, project_id: int) -> Project | None: return None async def get_group(self, group_id: int) -> Group | None: - logger.info(f"fetching group {group_id}") - group = await AsyncFetcher.fetch_single(self.gitlab_client.groups.get, group_id) - if isinstance(group, Group) and self.should_run_for_group(group): + try: + logger.info(f"fetching group {group_id}") + group_response = await AsyncFetcher.fetch_single(self.gitlab_client.groups.get, group_id) + group: Group = typing.cast(Group, group_response) return group - else: - return None + except gitlab.exceptions.GitlabGetError as err: + if err.response_code == 404: + return None + async def get_all_groups(self) -> typing.AsyncIterator[List[Group]]: logger.info("fetching all groups for the token") @@ -549,30 +553,22 @@ async def get_all_issues(self, group: Group) -> typing.AsyncIterator[List[Issue] issues: List[Issue] = typing.cast(List[Issue], issues_batch) yield issues + def should_run_for_members(self, member: GroupMember): + port_app_config: GitlabPortAppConfig = typing.cast( + "GitlabPortAppConfig", event.port_app_config + ) + include_bot_members = port_app_config.include_bot_members + return not(member.username.__contains__("bot")) if include_bot_members else True + async def get_all_group_members( self, group: Group ) -> typing.AsyncIterator[List[GroupMember]]: - try: - port_app_config: GitlabPortAppConfig = typing.cast( - "GitlabPortAppConfig", event.port_app_config - ) - filter_bots = port_app_config.filter_bots - - def skip_validation(_: User) -> bool: - return True - - def should_run_for_member(member: GroupMember) -> bool: - return not member.username.__contains__("bot") - - validation_func: Union[ - Callable[[User], bool], Callable[[GroupMember], bool] - ] = (should_run_for_member if filter_bots else skip_validation) logger.info(f"Fetching all members of group {group.name}") async for members_batch in AsyncFetcher.fetch_batch( fetch_func=group.members.list, - validation_func=validation_func, + validation_func=self.should_run_for_members, pagination="offset", order_by="id", sort="asc", @@ -589,7 +585,6 @@ def should_run_for_member(member: GroupMember) -> bool: return async def enrich_group_with_members(self, group: Group) -> dict[str, Any]: - group_members = [ member async for members in self.get_all_group_members(group) @@ -620,6 +615,15 @@ async def get_user(self, user_id: str) -> User: user: User = typing.cast(User, user_response) return user + async def get_group_member(self, group: Group, member_id: int) -> GroupMember: + logger.info(f"fetching group member {member_id} from group {group.id}") + group_member = await AsyncFetcher.fetch_single( + group.members.get, member_id + ) + if self.should_run_for_members(group_member): + return group_member + return None + def get_entities_diff( self, project: Project, diff --git a/integrations/gitlab/gitlab_integration/ocean.py b/integrations/gitlab/gitlab_integration/ocean.py index 059486ecb7..9edb93447e 100644 --- a/integrations/gitlab/gitlab_integration/ocean.py +++ b/integrations/gitlab/gitlab_integration/ocean.py @@ -223,7 +223,7 @@ async def resync_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: for service in get_cached_all_services(): for group in service.get_root_groups(): async for members in service.get_all_group_members(group): - if selector.public_email_visibility: + if selector.enrich_with_public_email: yield [ await service.enrich_member_with_public_email(member) for member in members From 47e2e2455f33fbfd8ee6218de1d7da697f5a14f8 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Fri, 2 Aug 2024 09:47:33 +0000 Subject: [PATCH 15/45] support for member webhook --- .../gitlab_integration/core/async_fetcher.py | 2 + .../gitlab_integration/events/hooks/base.py | 21 ++-- .../gitlab_integration/events/hooks/group.py | 62 +++--------- .../events/hooks/members.py | 95 +++++++++++++------ .../gitlab/gitlab_integration/events/setup.py | 4 +- .../gitlab/gitlab_integration/events/utils.py | 13 +++ .../gitlab_integration/git_integration.py | 2 + .../gitlab_integration/gitlab_service.py | 61 +++++++----- .../gitlab/gitlab_integration/utils.py | 8 +- 9 files changed, 155 insertions(+), 113 deletions(-) create mode 100644 integrations/gitlab/gitlab_integration/events/utils.py diff --git a/integrations/gitlab/gitlab_integration/core/async_fetcher.py b/integrations/gitlab/gitlab_integration/core/async_fetcher.py index 1028e6546d..d56a7b29c9 100644 --- a/integrations/gitlab/gitlab_integration/core/async_fetcher.py +++ b/integrations/gitlab/gitlab_integration/core/async_fetcher.py @@ -13,6 +13,7 @@ Issue, Group, User, + GroupMember, ) from loguru import logger @@ -36,6 +37,7 @@ async def fetch_single( Project, Group, User, + GroupMember, ], ], *args, diff --git a/integrations/gitlab/gitlab_integration/events/hooks/base.py b/integrations/gitlab/gitlab_integration/events/hooks/base.py index e973eb0c37..b5c85254fb 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/base.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/base.py @@ -1,9 +1,10 @@ from abc import ABC, abstractmethod -from typing import List, Any +from typing import List, Any, Optional from loguru import logger from gitlab.v4.objects import Project, Group -from gitlab_integration.core.async_fetcher import AsyncFetcher from gitlab_integration.gitlab_service import GitlabService +from gitlab_integration.utils import ObjectKind +from port_ocean.context.ocean import ocean class HookHandler(ABC): @@ -48,15 +49,21 @@ async def _on_hook(self, body: dict[str, Any], gitlab_project: Project) -> None: class GroupHandler(HookHandler): async def on_hook(self, event: str, body: dict[str, Any]) -> None: - event_name = body["event_name"] group_id = body.get("group_id", body.get("group", {}).get("id")) - logger.info(f"Handling {event_name} for {event} and group {group_id}") group = await self.gitlab_service.get_group(group_id) - group_path = body.get('full_path',body.get('group_path')) - logger.info(f"Handling hook {event} for group {group_path}") await self._on_hook(body, group) + group_path = body.get("full_path", body.get("group_path")) logger.info(f"Finished handling {event} for group {group_path}") @abstractmethod - async def _on_hook(self, body: dict[str, Any], gitlab_group: Group) -> None: + async def _on_hook( + self, body: dict[str, Any], gitlab_group: Optional[Group] + ) -> None: pass + + async def _register_group(self, gitlab_group: Group) -> None: + if self.gitlab_service.should_run_for_group(gitlab_group): + await ocean.register_raw( + ObjectKind.GROUP, + [await self.gitlab_service.enrich_group_with_members(gitlab_group)], + ) diff --git a/integrations/gitlab/gitlab_integration/events/hooks/group.py b/integrations/gitlab/gitlab_integration/events/hooks/group.py index 278b68161d..c8e3d0f424 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/group.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/group.py @@ -1,64 +1,24 @@ -from typing import Any +from typing import Any, Optional from loguru import logger -from gitlab_integration.events.hooks.base import HookHandler from gitlab_integration.utils import ObjectKind from gitlab_integration.events.hooks.base import GroupHandler from port_ocean.context.ocean import ocean from gitlab.v4.objects import Group - -# class GroupHook(HookHandler): -# events = ["Subgroup Hook"] -# system_events = [ -# "group_destroy", -# "group_create", -# "group_rename", -# ] - -# async def on_hook(self, event: str, body: dict[str, Any]) -> None: -# event_name = body["event_name"] - -# logger.info(f"Handling {event_name} for {event}") - -# group_id = body["group_id"] if "group_id" in body else body["group"]["id"] - -# logger.info(f"Handling hook {event} for group {group_id}") - -# group = await self.gitlab_service.get_group(group_id) - -# group_full_path = body.get("full_path") -# if group: -# await ocean.register_raw(ObjectKind.GROUP, [self.gitlab_service.enrich_group_with_members(group)]) -# elif ( -# group_full_path -# and self.gitlab_service.should_run_for_path(group_full_path) -# and event_name in ("subgroup_destroy", "group_destroy") -# ): -# await ocean.unregister_raw(ObjectKind.GROUP, [body]) -# else: -# logger.info(f"Group {group_id} was filtered for event {event}. Skipping...") - - class Groups(GroupHandler): - events = ['Subgroup Hook', "Member Hook"] - system_events = [ - "group_destroy", - "group_create", - "group_rename", - "user_add_to_group", - "user_remove_from_group", - ] - - async def _on_hook(self, body: dict[str, Any], gitlab_group: Group) -> None: - print("Event Triggered in group handler", body) - if gitlab_group and self.gitlab_service.should_run_for_group(gitlab_group): - await ocean.register_raw(ObjectKind.GROUP, [await self.gitlab_service.enrich_group_with_members(gitlab_group)]) - - elif body['event_name'] in ("subgroup_destroy", "group_destroy"): + events = ["Subgroup Hook"] + system_events = ["group_destroy", "group_create", "group_rename"] + + async def _on_hook( + self, body: dict[str, Any], gitlab_group: Optional[Group] + ) -> None: + logger.info(f"Handling {body['event_name']} for group {body['group_id']}") + if gitlab_group: + await self._register_group(gitlab_group) + elif body["event_name"] in ("subgroup_destroy", "group_destroy"): await ocean.unregister_raw(ObjectKind.GROUP, [body]) - else: logger.warning(f"Group {body['group_id']} was filtered. Skipping ...") diff --git a/integrations/gitlab/gitlab_integration/events/hooks/members.py b/integrations/gitlab/gitlab_integration/events/hooks/members.py index ebe977e333..79b17b9aa6 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/members.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/members.py @@ -1,8 +1,8 @@ import typing -from typing import Any +from typing import Any, List, Optional from loguru import logger +import asyncio -from gitlab_integration.events.hooks.base import HookHandler from gitlab_integration.utils import ObjectKind from port_ocean.context.ocean import ocean from gitlab_integration.events.hooks.base import GroupHandler @@ -10,6 +10,9 @@ from port_ocean.context.event import event from gitlab.v4.objects import Group, GroupMember from gitlab_integration.git_integration import GitlabPortAppConfig +from gitlab_integration.events.utils import remove_prefix_from_keys + +CONCURENT_TASKS_LIMIT = 10 class Members(GroupHandler): @@ -20,30 +23,47 @@ class Members(GroupHandler): "user_add_to_group", ] - async def _on_hook(self, body: dict[str, Any], gitlab_group: Group) -> None: - - event_name, user_username = body["event_name"], body["user_username"] + async def _on_hook( + self, body: dict[str, Any], gitlab_group: Optional[Group] + ) -> None: + event_name, user_username = (body["event_name"], body["user_username"]) logger.info(f"Handling {event_name} for group member {user_username}") if event_name == "user_remove_from_group": - body['username'] = body['user_username'] - await ocean.unregister_raw(ObjectKind.MEMBER, [body]) - - elif group_member := await self.gitlab_service.get_group_member( - gitlab_group, body["user_id"] - ): - await self._register_group_member(group_member) - + # This event is triggered by GitLab when a group or subgroup is destroyed. + # When a group is deleted, GitLab tries to remove all direct members associated with that group. + # However, to prevent accidental deletion of members who may also be part of other groups, + # we perform a check to determine if the member is associated with any other groups. + # If the member is not associated with any other groups, we proceed to delete the member from Port. + # Otherwise, we skip the deletion process to ensure that members are not inadvertently removed + # from groups they are still part of. + if not (await self._is_root_group_member(body["user_id"])): + body = remove_prefix_from_keys( + "user_", body + ) # Removing user_ prefix from the keys makes the event data close to being consistent with the member api response data. + # Thereby enhancing flexibility in processing custom identifiers. + await ocean.unregister_raw(ObjectKind.MEMBER, [body]) + else: + logger.warning( + f"Group member {user_username} belongs to other groups. Skipping ..." + ) + + elif gitlab_group: + if group_member := await self.gitlab_service.get_group_member( + gitlab_group, body["user_id"] + ): + await self._register_group_member(group_member) + if body["event_name"] == "user_add_to_group": + # This step ensures that when a new user is added to a group, we update the group entities to link the newly created member to the group. + # Note: This event is triggered by Gitlab when a group or subgroup is created. + await self._register_group(gitlab_group) + else: - logger.warning( - f"Group Member {user_username} was filtered. Skipping ..." - ) + logger.warning(f"Group Member {user_username} was filtered. Skipping ...") - async def _register_group_member( - self, group_member: GroupMember - ) -> None: + async def _register_group_member(self, group_member: GroupMember) -> None: - resource_configs=typing.cast( + resource_configs = typing.cast( GitlabPortAppConfig, event.port_app_config ).resources @@ -62,11 +82,32 @@ async def _register_group_member( return for resource_config in matching_resource_configs: enrich_with_public_email = resource_config.selector.enrich_with_public_email - if enrich_with_public_email: - group_member = await self.gitlab_service.enrich_member_with_public_email( - group_member + member = ( + await self.gitlab_service.enrich_member_with_public_email(group_member) + if enrich_with_public_email + else group_member.asdict() + ) + + await ocean.register_raw(ObjectKind.MEMBER, [member]) + + async def _is_root_group_member(self, member_id: int) -> bool: + root_groups: List[Group] = self.gitlab_service.get_root_groups() + semaphore = asyncio.Semaphore(CONCURENT_TASKS_LIMIT) + + async def check_group(group: Group) -> bool: + async with semaphore: + return any( + [await self.gitlab_service.get_group_member(group, member_id)] ) - else: - group_member = group_member.asdict() - - await ocean.register_raw(ObjectKind.MEMBER, [group_member]) + + tasks = [asyncio.create_task(check_group(group)) for group in root_groups] + for completed_task in asyncio.as_completed(tasks): + try: + result = await completed_task + if result: + return True # A single validation is enough + except Exception as e: + logger.error( + f"Error checking group membership for member {member_id}: {e}" + ) + return False diff --git a/integrations/gitlab/gitlab_integration/events/setup.py b/integrations/gitlab/gitlab_integration/events/setup.py index c00a86e308..619371df74 100644 --- a/integrations/gitlab/gitlab_integration/events/setup.py +++ b/integrations/gitlab/gitlab_integration/events/setup.py @@ -123,7 +123,7 @@ def setup_listeners(gitlab_service: GitlabService, webhook_id: str) -> None: Issues(gitlab_service), Pipelines(gitlab_service), Groups(gitlab_service), - Members(gitlab_service) + Members(gitlab_service), ] for handler in handlers: logger.info( @@ -141,7 +141,7 @@ def setup_system_listeners(gitlab_clients: list[GitlabService]) -> None: Issues, Pipelines, Groups, - Members + Members, ] for handler in handlers: system_event_handler.on(handler) diff --git a/integrations/gitlab/gitlab_integration/events/utils.py b/integrations/gitlab/gitlab_integration/events/utils.py new file mode 100644 index 0000000000..6a72e959ab --- /dev/null +++ b/integrations/gitlab/gitlab_integration/events/utils.py @@ -0,0 +1,13 @@ +from typing import Any, Dict + + +def remove_prefix_from_keys(prefix: str, data: Dict[str, Any]) -> Dict[str, Any]: + """ + Removes the prefix from dictionary keys. + Args: + prefix (str): The prefix to remove from the keys + data (dict[str, Any]): The original dictionary with keys that may start with the given prefix. + Returns: + dict[str, Any]: A new dictionary with `prefix` stripped from the keys. + """ + return {key.replace(prefix, "", 1): value for key, value in data.items()} diff --git a/integrations/gitlab/gitlab_integration/git_integration.py b/integrations/gitlab/gitlab_integration/git_integration.py index 57123a6eda..25733f289b 100644 --- a/integrations/gitlab/gitlab_integration/git_integration.py +++ b/integrations/gitlab/gitlab_integration/git_integration.py @@ -121,6 +121,7 @@ class GitlabSelector(Selector): class GitlabResourceConfig(ResourceConfig): selector: GitlabSelector + class MembersSelector(Selector): enrich_with_public_email: bool | None = Field( @@ -129,6 +130,7 @@ class MembersSelector(Selector): description="If set to true, the integration will enrich members with public email field. Default value is false", ) + class GitlabMembersResourceConfig(ResourceConfig): kind: Literal["member"] selector: MembersSelector diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index dc495f745e..521ccf5126 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -1,7 +1,7 @@ import asyncio import typing from datetime import datetime, timedelta -from typing import List, Tuple, Any, Union, TYPE_CHECKING, Callable +from typing import List, Tuple, Any, Union, TYPE_CHECKING, Optional import anyio.to_thread import yaml @@ -27,6 +27,7 @@ from gitlab_integration.core.utils import does_pattern_apply from port_ocean.context.event import event from port_ocean.core.models import Entity +from port_ocean.utils.cache import cache_iterator_result PROJECTS_CACHE_KEY = "__cache_all_projects" GROUPS_CACHE_KEY = "__cache_all_groups" @@ -48,7 +49,7 @@ class GitlabService: "tag_push_events", "subgroup_events", "confidential_issues_events", - "member_events" + "member_events", ] def __init__( @@ -319,28 +320,26 @@ async def get_project(self, project_id: int) -> Project | None: else: return None - async def get_group(self, group_id: int) -> Group | None: + async def get_group(self, group_id: int) -> Optional[Group]: try: - logger.info(f"fetching group {group_id}") - group_response = await AsyncFetcher.fetch_single(self.gitlab_client.groups.get, group_id) + logger.info(f"Fetching group with ID: {group_id}") + group_response = await AsyncFetcher.fetch_single( + self.gitlab_client.groups.get, group_id + ) group: Group = typing.cast(Group, group_response) return group except gitlab.exceptions.GitlabGetError as err: if err.response_code == 404: + logger.warning(f"Group with ID {group_id} not found (404).") return None + else: + logger.error(f"Failed to fetch group with ID {group_id}: {err}") + raise - + @cache_iterator_result() async def get_all_groups(self) -> typing.AsyncIterator[List[Group]]: logger.info("fetching all groups for the token") - cached_groups = event.attributes.setdefault(GROUPS_CACHE_KEY, {}).setdefault( - self.gitlab_client.private_token, {} - ) - - if cached_groups: - yield cached_groups.values() - return - async for groups_batch in AsyncFetcher.fetch_batch( fetch_func=self.gitlab_client.groups.list, validation_func=self.should_run_for_group, @@ -352,7 +351,6 @@ async def get_all_groups(self) -> typing.AsyncIterator[List[Group]]: logger.info( f"Queried {len(groups)} groups {[group.path for group in groups]}" ) - cached_groups.update({group.id: group for group in groups}) yield groups async def get_all_projects(self) -> typing.AsyncIterator[List[Project]]: @@ -558,7 +556,9 @@ def should_run_for_members(self, member: GroupMember): "GitlabPortAppConfig", event.port_app_config ) include_bot_members = port_app_config.include_bot_members - return not(member.username.__contains__("bot")) if include_bot_members else True + return ( + not (member.username.__contains__("bot")) if include_bot_members else True + ) async def get_all_group_members( self, group: Group @@ -601,7 +601,9 @@ async def enrich_group_with_members(self, group: Group) -> dict[str, Any]: ) return group_dict - async def enrich_member_with_public_email(self, member) -> dict[str, Any]: + async def enrich_member_with_public_email( + self, member: GroupMember + ) -> dict[str, Any]: user: User = await self.get_user(member.id) member_dict: dict[str, Any] = member.asdict() member_dict.update({"__public_email": user.public_email}) @@ -615,14 +617,23 @@ async def get_user(self, user_id: str) -> User: user: User = typing.cast(User, user_response) return user - async def get_group_member(self, group: Group, member_id: int) -> GroupMember: - logger.info(f"fetching group member {member_id} from group {group.id}") - group_member = await AsyncFetcher.fetch_single( - group.members.get, member_id - ) - if self.should_run_for_members(group_member): - return group_member - return None + async def get_group_member( + self, group: Group, member_id: int + ) -> Optional[GroupMember]: + try: + + logger.info(f"fetching group member {member_id} from group {group.id}") + result = await AsyncFetcher.fetch_single(group.members.get, member_id) + group_member = typing.cast(GroupMember, result) + return group_member if self.should_run_for_members(group_member) else None + + except gitlab.exceptions.GitlabGetError as err: + if err.response_code == 404: + logger.warning(f"Group Member with ID {member_id} not found (404).") + return None + else: + logger.error(f"Failed to fetch group with ID {member_id}: {err}") + raise def get_entities_diff( self, diff --git a/integrations/gitlab/gitlab_integration/utils.py b/integrations/gitlab/gitlab_integration/utils.py index c02b5e576a..8db703e30e 100644 --- a/integrations/gitlab/gitlab_integration/utils.py +++ b/integrations/gitlab/gitlab_integration/utils.py @@ -7,6 +7,8 @@ from port_ocean.context.ocean import ocean from port_ocean.exceptions.context import EventContextNotFoundError +RETRY_TRANSIENT_ERRORS = True + def get_all_services() -> List[GitlabService]: logic_settings = ocean.integration_config @@ -16,7 +18,11 @@ def get_all_services() -> List[GitlabService]: f"Creating gitlab clients for {len(logic_settings['token_mapping'])} tokens" ) for token, group_mapping in logic_settings["token_mapping"].items(): - gitlab_client = Gitlab(logic_settings["gitlab_host"], token) + gitlab_client = Gitlab( + logic_settings["gitlab_host"], + token, + retry_transient_errors=RETRY_TRANSIENT_ERRORS, + ) gitlab_service = GitlabService( gitlab_client, logic_settings["app_host"], group_mapping ) From 4457ba309d3529d84534ed1de3f1d62471f22e75 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Wed, 7 Aug 2024 01:15:36 +0000 Subject: [PATCH 16/45] updated scripts for syncing members --- .../gitlab/.port/resources/blueprints.json | 92 +------------------ .../.port/resources/port-app-config.yaml | 31 ------- .../gitlab_integration/core/async_fetcher.py | 11 ++- .../events/hooks/members.py | 16 +--- .../gitlab_integration/git_integration.py | 4 +- .../gitlab_integration/gitlab_service.py | 41 ++++++--- .../gitlab/gitlab_integration/ocean.py | 32 +++++-- 7 files changed, 63 insertions(+), 164 deletions(-) diff --git a/integrations/gitlab/.port/resources/blueprints.json b/integrations/gitlab/.port/resources/blueprints.json index 1d4f9062d7..b51ae9d793 100644 --- a/integrations/gitlab/.port/resources/blueprints.json +++ b/integrations/gitlab/.port/resources/blueprints.json @@ -1,87 +1,4 @@ [ - { - "identifier": "gitlabGroupMember", - "title": "Member", - "icon": "GitLab", - "schema": { - "properties": { - "state": { - "title": "State", - "type": "string", - "icon": "GitLab", - "description": "The current state of the GitLab member (e.g., active)." - }, - "locked": { - "type": "string", - "title": "Locked", - "icon": "GitLab", - "description": "Indicates if the GitLab member is locked." - }, - "link": { - "icon": "Link", - "type": "string", - "title": "Link", - "format": "url", - "description": "URL link to the GitLab member." - }, - "email": { - "type": "string", - "title": "Email", - "description": "GitLab primary email address.", - "icon": "User", - "format": "user" - } - }, - "required": [] - }, - "mirrorProperties": {}, - "calculationProperties": {}, - "aggregationProperties": {}, - "relations": {} - }, - { - "identifier": "gitlabGroup", - "title": "Group", - "icon": "GitLab", - "schema": { - "properties": { - "visibility": { - "icon": "Lock", - "title": "Visibility", - "type": "string", - "description": "Visibility status of the group. (e.g public, internal etc. )", - "enum": [ - "public", - "internal", - "private" - ], - "enumColors": { - "public": "red", - "internal": "yellow", - "private": "green" - } - }, - "url": { - "title": "URL", - "format": "url", - "type": "string", - "icon": "Link", - "description": "Link to the gitlab group" - }, - "description": { - "title": "Description", - "type": "string", - "icon": "BlankPage", - "description": "A short description of the gitlab group" - } - }, - "required": [] - }, - "mirrorProperties": {}, - "calculationProperties": {}, - "aggregationProperties": {}, - "relations": {} - }, { "identifier": "service", "title": "Service", @@ -135,13 +52,6 @@ "mirrorProperties": {}, "calculationProperties": {}, "aggregationProperties": {}, - "relations": { - "group": { - "title": "Group", - "target": "gitlabGroup", - "required": true, - "many": false - } - } + "relations": {} } ] diff --git a/integrations/gitlab/.port/resources/port-app-config.yaml b/integrations/gitlab/.port/resources/port-app-config.yaml index 8ff05ed36d..d1c2882763 100644 --- a/integrations/gitlab/.port/resources/port-app-config.yaml +++ b/integrations/gitlab/.port/resources/port-app-config.yaml @@ -1,33 +1,6 @@ createMissingRelatedEntities: true deleteDependentEntities: true resources: - - kind: member - selector: - query: 'true' - port: - entity: - mappings: - identifier: .username - title: .name - blueprint: '"gitlabGroupMember"' - properties: - state: .state - locked: .locked - link: .web_url - email: .email - - kind: group - selector: - query: 'true' - port: - entity: - mappings: - identifier: .full_path - title: .name - blueprint: '"gitlabGroup"' - properties: - url: .web_url - visibility: .visibility - description: .description - kind: project selector: query: "true" @@ -42,7 +15,3 @@ resources: readme: file://README.md description: .description language: .__languages | to_entries | max_by(.value) | .key - relations: - group: >- - .path_with_namespace | gsub(" "; "") | split("/") | .[:-1] | - join("/") diff --git a/integrations/gitlab/gitlab_integration/core/async_fetcher.py b/integrations/gitlab/gitlab_integration/core/async_fetcher.py index d56a7b29c9..cbdbae8e3f 100644 --- a/integrations/gitlab/gitlab_integration/core/async_fetcher.py +++ b/integrations/gitlab/gitlab_integration/core/async_fetcher.py @@ -69,10 +69,13 @@ async def fetch_batch( List[Union[RESTObject, Dict[str, Any]]], ], ], - validation_func: Callable[ - [Any], - bool, - ], + validation_func: ( + Callable[ + [Any], + bool, + ] + | None + ) = None, page_size: int = DEFAULT_PAGINATION_PAGE_SIZE, **kwargs, ) -> AsyncIterator[ diff --git a/integrations/gitlab/gitlab_integration/events/hooks/members.py b/integrations/gitlab/gitlab_integration/events/hooks/members.py index 79b17b9aa6..4facb1b9f7 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/members.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/members.py @@ -30,18 +30,8 @@ async def _on_hook( logger.info(f"Handling {event_name} for group member {user_username}") if event_name == "user_remove_from_group": - # This event is triggered by GitLab when a group or subgroup is destroyed. - # When a group is deleted, GitLab tries to remove all direct members associated with that group. - # However, to prevent accidental deletion of members who may also be part of other groups, - # we perform a check to determine if the member is associated with any other groups. - # If the member is not associated with any other groups, we proceed to delete the member from Port. - # Otherwise, we skip the deletion process to ensure that members are not inadvertently removed - # from groups they are still part of. if not (await self._is_root_group_member(body["user_id"])): - body = remove_prefix_from_keys( - "user_", body - ) # Removing user_ prefix from the keys makes the event data close to being consistent with the member api response data. - # Thereby enhancing flexibility in processing custom identifiers. + body = remove_prefix_from_keys("user_", body) await ocean.unregister_raw(ObjectKind.MEMBER, [body]) else: logger.warning( @@ -54,8 +44,6 @@ async def _on_hook( ): await self._register_group_member(group_member) if body["event_name"] == "user_add_to_group": - # This step ensures that when a new user is added to a group, we update the group entities to link the newly created member to the group. - # Note: This event is triggered by Gitlab when a group or subgroup is created. await self._register_group(gitlab_group) else: @@ -105,7 +93,7 @@ async def check_group(group: Group) -> bool: try: result = await completed_task if result: - return True # A single validation is enough + return True except Exception as e: logger.error( f"Error checking group membership for member {member_id}: {e}" diff --git a/integrations/gitlab/gitlab_integration/git_integration.py b/integrations/gitlab/gitlab_integration/git_integration.py index 25733f289b..ab42fb0606 100644 --- a/integrations/gitlab/gitlab_integration/git_integration.py +++ b/integrations/gitlab/gitlab_integration/git_integration.py @@ -150,8 +150,8 @@ class GitlabPortAppConfig(PortAppConfig): # Therefore, it should be included at the top level of the configuration. include_bot_members: bool | None = Field( alias="includeBotMembers", - default=False, - description="If set to true, bots will be filtered out from the members list. Default value is false", + default=True, + description="If set to false, bots will be filtered out from the members list. Default value is true", ) resources: list[GitlabMembersResourceConfig | GitlabResourceConfig] = Field(default_factory=list) # type: ignore diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index 521ccf5126..3f1bf611c0 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -32,11 +32,14 @@ PROJECTS_CACHE_KEY = "__cache_all_projects" GROUPS_CACHE_KEY = "__cache_all_groups" MEMBERS_CACHE_KEY = "__cache_all_members" - +USERS_CACHE_KEY = "__cache_all_users" if TYPE_CHECKING: from gitlab_integration.git_integration import GitlabPortAppConfig +MAXIMUM_CONCURRENT_TASK = 10 +semaphore = asyncio.BoundedSemaphore(MAXIMUM_CONCURRENT_TASK) + class GitlabService: all_events_in_webhook: list[str] = [ @@ -337,12 +340,16 @@ async def get_group(self, group_id: int) -> Optional[Group]: raise @cache_iterator_result() - async def get_all_groups(self) -> typing.AsyncIterator[List[Group]]: + async def get_all_groups( + self, skip_validation: bool = False + ) -> typing.AsyncIterator[List[Group]]: logger.info("fetching all groups for the token") async for groups_batch in AsyncFetcher.fetch_batch( fetch_func=self.gitlab_client.groups.list, - validation_func=self.should_run_for_group, + validation_func=( + self.should_run_for_group if not (skip_validation) else None + ), pagination="offset", order_by="id", sort="asc", @@ -556,15 +563,12 @@ def should_run_for_members(self, member: GroupMember): "GitlabPortAppConfig", event.port_app_config ) include_bot_members = port_app_config.include_bot_members - return ( - not (member.username.__contains__("bot")) if include_bot_members else True - ) + return include_bot_members or not member.username.__contains__("bot") async def get_all_group_members( self, group: Group ) -> typing.AsyncIterator[List[GroupMember]]: try: - logger.info(f"Fetching all members of group {group.name}") async for members_batch in AsyncFetcher.fetch_batch( fetch_func=group.members.list, @@ -610,12 +614,23 @@ async def enrich_member_with_public_email( return member_dict async def get_user(self, user_id: str) -> User: - logger.info(f"fetching user {user_id}") - user_response = await AsyncFetcher.fetch_single( - self.gitlab_client.users.get, user_id - ) - user: User = typing.cast(User, user_response) - return user + async with semaphore: + logger.info(f"fetching user {user_id}") + users = event.attributes.setdefault(USERS_CACHE_KEY, {}).setdefault( + self.gitlab_client.private_token, {} + ) + + if cached_user := users.get(user_id): + return cached_user + + user_response = await AsyncFetcher.fetch_single( + self.gitlab_client.users.get, user_id + ) + user: User = typing.cast(User, user_response) + event.attributes[USERS_CACHE_KEY][self.gitlab_client.private_token][ + user_id + ] = user + return user async def get_group_member( self, group: Group, member_id: int diff --git a/integrations/gitlab/gitlab_integration/ocean.py b/integrations/gitlab/gitlab_integration/ocean.py index 9edb93447e..4943e5834b 100644 --- a/integrations/gitlab/gitlab_integration/ocean.py +++ b/integrations/gitlab/gitlab_integration/ocean.py @@ -220,13 +220,27 @@ async def resync_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: GitlabMembersResourceConfig, event.resource_config ) selector = gitlab_resource_config.selector + + async def fetch_group_members(service, group): + if selector.enrich_with_public_email: + enriched_member_tasks = [ + service.enrich_member_with_public_email(member) + async for members in service.get_all_group_members(group) + for member in members + ] + enriched_members = await asyncio.gather(*enriched_member_tasks) + return enriched_members + else: + member_dicts = [ + member.asdict() + async for members in service.get_all_group_members(group) + for member in members + ] + return member_dicts + for service in get_cached_all_services(): - for group in service.get_root_groups(): - async for members in service.get_all_group_members(group): - if selector.enrich_with_public_email: - yield [ - await service.enrich_member_with_public_email(member) - for member in members - ] - else: - yield [member.asdict() for member in members] + async for groups in service.get_all_groups(skip_validation=True): + group_tasks = [fetch_group_members(service, group) for group in groups] + for group_task in asyncio.as_completed(group_tasks): + group_members = await group_task + yield group_members From 9721fd966aff307e9ef4ed97e3698a4ffe87ac4e Mon Sep 17 00:00:00 2001 From: Michael Kofi Armah Date: Wed, 7 Aug 2024 09:01:49 +0000 Subject: [PATCH 17/45] Update CHANGELOG.md --- integrations/gitlab/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/gitlab/CHANGELOG.md b/integrations/gitlab/CHANGELOG.md index e2c9fe0249..e5e3cad7f9 100644 --- a/integrations/gitlab/CHANGELOG.md +++ b/integrations/gitlab/CHANGELOG.md @@ -7,7 +7,7 @@ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm -0.2.0 (2024-08-02) +0.2.0 (2024-08-06) =================== ### Features From 62a11f8f8f07ca0628effe91211a95e4ff4fc22e Mon Sep 17 00:00:00 2001 From: Michael Kofi Armah Date: Wed, 7 Aug 2024 09:03:56 +0000 Subject: [PATCH 18/45] removed groups and members from spec.yaml --- integrations/gitlab/.port/spec.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/integrations/gitlab/.port/spec.yaml b/integrations/gitlab/.port/spec.yaml index 534b2875d7..fc2b9aed25 100644 --- a/integrations/gitlab/.port/spec.yaml +++ b/integrations/gitlab/.port/spec.yaml @@ -7,8 +7,6 @@ features: - type: exporter section: Git Providers resources: - - kind: members - - kind: groups - kind: projects configurations: - name: tokenMapping From 33e9b19eb97d4bcaa9e954f0957de52678e55edc Mon Sep 17 00:00:00 2001 From: mkarmah Date: Wed, 7 Aug 2024 13:34:02 +0000 Subject: [PATCH 19/45] refactored fetch group members function in members resync --- .../gitlab/gitlab_integration/ocean.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/ocean.py b/integrations/gitlab/gitlab_integration/ocean.py index 4943e5834b..72bbff41ac 100644 --- a/integrations/gitlab/gitlab_integration/ocean.py +++ b/integrations/gitlab/gitlab_integration/ocean.py @@ -222,21 +222,20 @@ async def resync_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: selector = gitlab_resource_config.selector async def fetch_group_members(service, group): + members = [ + member + async for members_batch in service.get_all_group_members(group) + for member in members_batch + ] + if selector.enrich_with_public_email: enriched_member_tasks = [ - service.enrich_member_with_public_email(member) - async for members in service.get_all_group_members(group) - for member in members + service.enrich_member_with_public_email(member) for member in members ] enriched_members = await asyncio.gather(*enriched_member_tasks) return enriched_members - else: - member_dicts = [ - member.asdict() - async for members in service.get_all_group_members(group) - for member in members - ] - return member_dicts + + return [member.asdict() for member in members] for service in get_cached_all_services(): async for groups in service.get_all_groups(skip_validation=True): From d5b4ea179496c28d18d75b562d3ba3f3ea7cb998 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Thu, 8 Aug 2024 16:49:20 +0000 Subject: [PATCH 20/45] renamed fetch_group_members function to process_group_members --- integrations/gitlab/gitlab_integration/ocean.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/ocean.py b/integrations/gitlab/gitlab_integration/ocean.py index 72bbff41ac..7d01bc7cd9 100644 --- a/integrations/gitlab/gitlab_integration/ocean.py +++ b/integrations/gitlab/gitlab_integration/ocean.py @@ -221,7 +221,7 @@ async def resync_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: ) selector = gitlab_resource_config.selector - async def fetch_group_members(service, group): + async def process_group_members(service, group): members = [ member async for members_batch in service.get_all_group_members(group) @@ -239,7 +239,7 @@ async def fetch_group_members(service, group): for service in get_cached_all_services(): async for groups in service.get_all_groups(skip_validation=True): - group_tasks = [fetch_group_members(service, group) for group in groups] + group_tasks = [process_group_members(service, group) for group in groups] for group_task in asyncio.as_completed(group_tasks): group_members = await group_task yield group_members From b15ba21cbac45178d89bf66ac11efba9238edbad Mon Sep 17 00:00:00 2001 From: mkarmah Date: Mon, 12 Aug 2024 17:55:08 +0000 Subject: [PATCH 21/45] bumped ocean version --- integrations/gitlab/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/gitlab/pyproject.toml b/integrations/gitlab/pyproject.toml index 1a9aac434f..a38593dcdf 100644 --- a/integrations/gitlab/pyproject.toml +++ b/integrations/gitlab/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "gitlab" -version = "0.1.100" +version = "0.1.101" description = "Gitlab integration for Port using Port-Ocean Framework" authors = ["Yair Siman-Tov "] From 1458fac88cbf74c192c071de02f4efddd102334f Mon Sep 17 00:00:00 2001 From: mkarmah Date: Wed, 21 Aug 2024 10:49:42 +0000 Subject: [PATCH 22/45] updated resync groups with members --- .../gitlab_integration/gitlab_service.py | 22 +++++++++++++++++++ .../gitlab/gitlab_integration/ocean.py | 9 +++++++- .../gitlab/gitlab_integration/utils.py | 1 + 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index 3f1bf611c0..6d03a44b9a 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -588,6 +588,28 @@ async def get_all_group_members( logger.error(f"Failed to get members for group={group.name}. error={e}") return + async def get_unsynced_group_members( + self, group: Group + ) -> typing.AsyncIterator[List[GroupMember]]: + logger.info(f"Fetching unsynced members of group {group.name}") + + cached_member_ids = event.attributes.setdefault( + MEMBERS_CACHE_KEY, {} + ).setdefault(self.gitlab_client.private_token, []) + async for members_batch in self.get_all_group_members(group): + unsynced_members = [ + member for member in members_batch if member.id not in cached_member_ids + ] + + if unsynced_members: + cached_member_ids.extend(member.id for member in unsynced_members) + + logger.info( + f"Found {len(unsynced_members)} unsynced members " + f"{[member.username for member in unsynced_members]} from {group.name}" + ) + yield unsynced_members + async def enrich_group_with_members(self, group: Group) -> dict[str, Any]: group_members = [ member diff --git a/integrations/gitlab/gitlab_integration/ocean.py b/integrations/gitlab/gitlab_integration/ocean.py index 7d01bc7cd9..31f3d30c73 100644 --- a/integrations/gitlab/gitlab_integration/ocean.py +++ b/integrations/gitlab/gitlab_integration/ocean.py @@ -108,6 +108,13 @@ async def on_start() -> None: @ocean.on_resync(ObjectKind.GROUP) +async def resync_groups(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: + for service in get_cached_all_services(): + async for groups_batch in service.get_all_groups(): + yield [group.asdict() for group in groups_batch] + + +@ocean.on_resync(ObjectKind.GROUPWITHMEMBERS) async def resync_groups(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: for service in get_cached_all_services(): async for groups_batch in service.get_all_groups(): @@ -224,7 +231,7 @@ async def resync_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: async def process_group_members(service, group): members = [ member - async for members_batch in service.get_all_group_members(group) + async for members_batch in service.get_unsynced_group_members(group) for member in members_batch ] diff --git a/integrations/gitlab/gitlab_integration/utils.py b/integrations/gitlab/gitlab_integration/utils.py index 8db703e30e..0c7bcb8ebb 100644 --- a/integrations/gitlab/gitlab_integration/utils.py +++ b/integrations/gitlab/gitlab_integration/utils.py @@ -52,3 +52,4 @@ class ObjectKind: PROJECT = "project" FOLDER = "folder" MEMBER = "member" + GROUPWITHMEMBERS = "group-with-members" From 56f7b44835c9528835273a5c7747cd288aa6432a Mon Sep 17 00:00:00 2001 From: mkarmah Date: Wed, 21 Aug 2024 10:51:41 +0000 Subject: [PATCH 23/45] updated resync with groups --- integrations/gitlab/gitlab_integration/ocean.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/integrations/gitlab/gitlab_integration/ocean.py b/integrations/gitlab/gitlab_integration/ocean.py index 31f3d30c73..1c54b86114 100644 --- a/integrations/gitlab/gitlab_integration/ocean.py +++ b/integrations/gitlab/gitlab_integration/ocean.py @@ -115,11 +115,12 @@ async def resync_groups(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: @ocean.on_resync(ObjectKind.GROUPWITHMEMBERS) -async def resync_groups(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: +async def resync_groups_with_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: for service in get_cached_all_services(): async for groups_batch in service.get_all_groups(): tasks = [service.enrich_group_with_members(group) for group in groups_batch] enriched_groups = await asyncio.gather(*tasks) + logger.warning(f"Enriched Groups {enriched_groups}") yield enriched_groups From c5344664704b471557a548603fdfcde3ab315e1d Mon Sep 17 00:00:00 2001 From: mkarmah Date: Wed, 21 Aug 2024 12:28:56 +0000 Subject: [PATCH 24/45] removed group hook --- integrations/gitlab/gitlab_integration/events/setup.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/events/setup.py b/integrations/gitlab/gitlab_integration/events/setup.py index 3f252c4a76..f17b6d5a91 100644 --- a/integrations/gitlab/gitlab_integration/events/setup.py +++ b/integrations/gitlab/gitlab_integration/events/setup.py @@ -13,7 +13,6 @@ from gitlab_integration.events.hooks.push import PushHook from gitlab_integration.events.hooks.members import Members from gitlab_integration.events.hooks.group import Groups -from gitlab_integration.events.hooks.group import GroupHook from gitlab_integration.events.hooks.project_files import ProjectFiles from gitlab_integration.gitlab_service import GitlabService from gitlab_integration.models.webhook_groups_override_config import ( @@ -126,7 +125,6 @@ def setup_listeners(gitlab_service: GitlabService, webhook_id: str) -> None: Pipelines(gitlab_service), Groups(gitlab_service), Members(gitlab_service), - GroupHook(gitlab_service), ProjectFiles(gitlab_service), ] for handler in handlers: @@ -146,7 +144,6 @@ def setup_system_listeners(gitlab_clients: list[GitlabService]) -> None: Pipelines, Groups, Members, - GroupHook, ProjectFiles, ] for handler in handlers: From 6ae715687f87a739632182ba891f864699fa519e Mon Sep 17 00:00:00 2001 From: mkarmah Date: Fri, 30 Aug 2024 17:36:53 +0000 Subject: [PATCH 25/45] lint --- integrations/gitlab/gitlab_integration/core/async_fetcher.py | 2 +- integrations/gitlab/gitlab_integration/git_integration.py | 4 ++-- integrations/gitlab/gitlab_integration/gitlab_service.py | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/core/async_fetcher.py b/integrations/gitlab/gitlab_integration/core/async_fetcher.py index bafd9621e6..6bbe27d8bc 100644 --- a/integrations/gitlab/gitlab_integration/core/async_fetcher.py +++ b/integrations/gitlab/gitlab_integration/core/async_fetcher.py @@ -14,7 +14,7 @@ Group, User, GroupMember, - ProjectFile + ProjectFile, ) from loguru import logger diff --git a/integrations/gitlab/gitlab_integration/git_integration.py b/integrations/gitlab/gitlab_integration/git_integration.py index 332b1d69cd..ab530326e1 100644 --- a/integrations/gitlab/gitlab_integration/git_integration.py +++ b/integrations/gitlab/gitlab_integration/git_integration.py @@ -134,8 +134,8 @@ class MembersSelector(Selector): class GitlabMembersResourceConfig(ResourceConfig): kind: Literal["member"] selector: MembersSelector - - + + class FilesSelector(BaseModel): path: str = Field(description="The path to get the files from") repos: List[str] = Field( diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index e9ad3bdfd9..15a99e005a 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -709,7 +709,6 @@ async def get_group_member( logger.error(f"Failed to fetch group with ID {member_id}: {err}") raise - async def get_entities_diff( self, project: Project, From 44c3b7f73bbbe5b474a92ae6360e775b9b591db4 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Mon, 2 Sep 2024 09:15:26 +0000 Subject: [PATCH 26/45] rephrased comments --- integrations/gitlab/gitlab_integration/gitlab_service.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index 15a99e005a..e5e90e6d7a 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -628,7 +628,7 @@ async def get_all_group_members( async def get_unsynced_group_members( self, group: Group ) -> typing.AsyncIterator[List[GroupMember]]: - logger.info(f"Fetching unsynced members of group {group.name}") + logger.info(f"Fetching members unique to group {group.name}") cached_member_ids = event.attributes.setdefault( MEMBERS_CACHE_KEY, {} @@ -642,8 +642,8 @@ async def get_unsynced_group_members( cached_member_ids.extend(member.id for member in unsynced_members) logger.info( - f"Found {len(unsynced_members)} unsynced members " - f"{[member.username for member in unsynced_members]} from {group.name}" + f"Found {len(unsynced_members)} members " + f"{[member.username for member in unsynced_members]} unique to {group.name}" ) yield unsynced_members From fe2e7a2dcdebe55e7f8e80ee9fd529c02ffd06c1 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Thu, 5 Sep 2024 11:33:25 +0000 Subject: [PATCH 27/45] updated group webhook to cater for groupswithmembers kind --- .../gitlab_integration/events/hooks/base.py | 15 +++++++++------ .../gitlab_integration/events/hooks/group.py | 9 ++++++++- .../gitlab/gitlab_integration/gitlab_service.py | 17 ++++++++++++++--- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/events/hooks/base.py b/integrations/gitlab/gitlab_integration/events/hooks/base.py index b5c85254fb..0929bcc294 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/base.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/base.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import List, Any, Optional +from typing import List, Any, Optional, Dict from loguru import logger from gitlab.v4.objects import Project, Group from gitlab_integration.gitlab_service import GitlabService @@ -61,9 +61,12 @@ async def _on_hook( ) -> None: pass - async def _register_group(self, gitlab_group: Group) -> None: + async def _register_group(self, kind: str, gitlab_group: Dict[str, Any]) -> None: if self.gitlab_service.should_run_for_group(gitlab_group): - await ocean.register_raw( - ObjectKind.GROUP, - [await self.gitlab_service.enrich_group_with_members(gitlab_group)], - ) + await ocean.register_raw(kind, [gitlab_group]) + + async def _register_group_with_members( + self, kind: str, gitlab_group: Group + ) -> None: + gitlab_group = await self.gitlab_service.enrich_group_with_members(gitlab_group) + await self._register_group(kind, gitlab_group) diff --git a/integrations/gitlab/gitlab_integration/events/hooks/group.py b/integrations/gitlab/gitlab_integration/events/hooks/group.py index c8e3d0f424..8aa0dd49fe 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/group.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/group.py @@ -17,8 +17,15 @@ async def _on_hook( ) -> None: logger.info(f"Handling {body['event_name']} for group {body['group_id']}") if gitlab_group: - await self._register_group(gitlab_group) + await self._register_group( + ObjectKind.GROUP, + gitlab_group.asdict(), + ) + await self._register_group_with_members( + gitlab_group, ObjectKind.GROUPWITHMEMBERS + ) elif body["event_name"] in ("subgroup_destroy", "group_destroy"): await ocean.unregister_raw(ObjectKind.GROUP, [body]) + await ocean.unregister_raw(ObjectKind.GROUPWITHMEMBERS, [body]) else: logger.warning(f"Group {body['group_id']} was filtered. Skipping ...") diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index e5e90e6d7a..dc806fd435 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -603,12 +603,21 @@ def should_run_for_members(self, member: GroupMember): return include_bot_members or not member.username.__contains__("bot") async def get_all_group_members( - self, group: Group + self, group: Group, include_inherited: bool = False ) -> typing.AsyncIterator[List[GroupMember]]: + """ + Fetches all members of a group + :param group: Group object + :param include_inherited: Whether to include members inherited through ancestor groups + :return: List of GroupMember objects + """ try: logger.info(f"Fetching all members of group {group.name}") + fetch_func = ( + group.members_all.list if include_inherited else group.members.list + ) async for members_batch in AsyncFetcher.fetch_batch( - fetch_func=group.members.list, + fetch_func=fetch_func, validation_func=self.should_run_for_members, pagination="offset", order_by="id", @@ -633,7 +642,9 @@ async def get_unsynced_group_members( cached_member_ids = event.attributes.setdefault( MEMBERS_CACHE_KEY, {} ).setdefault(self.gitlab_client.private_token, []) - async for members_batch in self.get_all_group_members(group): + async for members_batch in self.get_all_group_members( + group, include_inherited=True + ): unsynced_members = [ member for member in members_batch if member.id not in cached_member_ids ] From 0a279ae668c1c9b58eaa723f43e26f5ad1acb785 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Wed, 6 Nov 2024 18:38:12 +0000 Subject: [PATCH 28/45] remove debug logs --- integrations/gitlab/gitlab_integration/git_integration.py | 2 +- integrations/gitlab/gitlab_integration/ocean.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/git_integration.py b/integrations/gitlab/gitlab_integration/git_integration.py index 4c1d7a20e9..b2aa259226 100644 --- a/integrations/gitlab/gitlab_integration/git_integration.py +++ b/integrations/gitlab/gitlab_integration/git_integration.py @@ -65,7 +65,7 @@ async def _search(self, data: Dict[str, Any], pattern: str) -> Any: match = None if project: if scope == "blobs": - # if the query does not contain a path ots, we add the base path to the query + # if the query does not contain a path filter, we add the base path to the query # this is done to avoid searching the entire project for the file, if the base path is known # having the base path applies to the case where we export a folder as a monorepo if base_path and "path:" not in query: diff --git a/integrations/gitlab/gitlab_integration/ocean.py b/integrations/gitlab/gitlab_integration/ocean.py index 2f0c567c0a..d5d630be64 100644 --- a/integrations/gitlab/gitlab_integration/ocean.py +++ b/integrations/gitlab/gitlab_integration/ocean.py @@ -136,7 +136,6 @@ async def resync_groups_with_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: async for groups_batch in service.get_all_groups(): tasks = [service.enrich_group_with_members(group) for group in groups_batch] enriched_groups = await asyncio.gather(*tasks) - logger.warning(f"Enriched Groups {enriched_groups}") yield enriched_groups @@ -330,4 +329,5 @@ async def process_group_members(service, group): group_tasks = [process_group_members(service, group) for group in groups] for group_task in asyncio.as_completed(group_tasks): group_members = await group_task + logger.warning(f"Enriched Members {group_members}") yield group_members From 99e3724b8d838561b09baf11040de0ebbe324b16 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Wed, 6 Nov 2024 18:54:45 +0000 Subject: [PATCH 29/45] test fix --- .../tests/gitlab_integration/test_gitlab_service_webhook.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/integrations/gitlab/tests/gitlab_integration/test_gitlab_service_webhook.py b/integrations/gitlab/tests/gitlab_integration/test_gitlab_service_webhook.py index f3656de006..2fe823cc5d 100644 --- a/integrations/gitlab/tests/gitlab_integration/test_gitlab_service_webhook.py +++ b/integrations/gitlab/tests/gitlab_integration/test_gitlab_service_webhook.py @@ -72,6 +72,7 @@ async def test_create_group_webhook_success( "tag_push_events": False, "subgroup_events": False, "confidential_issues_events": False, + "member_events": False, } ) @@ -103,5 +104,6 @@ async def test_create_group_webhook_failure( "tag_push_events": False, "subgroup_events": False, "confidential_issues_events": False, + "member_events": False, } ) From 82bfc94d7936628e88c2dd1eece517eba44212e0 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Fri, 8 Nov 2024 00:06:13 +0000 Subject: [PATCH 30/45] updated webhook logic --- .../gitlab_integration/events/hooks/base.py | 46 ++++++++-- .../gitlab_integration/events/hooks/group.py | 30 +++--- .../events/hooks/members.py | 91 ++----------------- .../gitlab_integration/git_integration.py | 14 +-- .../gitlab_integration/gitlab_service.py | 55 +++++------ .../gitlab/gitlab_integration/ocean.py | 46 +++------- 6 files changed, 101 insertions(+), 181 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/events/hooks/base.py b/integrations/gitlab/gitlab_integration/events/hooks/base.py index 7cf5943ff8..66a2c1d9a0 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/base.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/base.py @@ -1,10 +1,15 @@ from abc import ABC, abstractmethod -from typing import List, Any, Optional, Dict +from typing import List, Any, Dict +import typing from loguru import logger from gitlab.v4.objects import Project, Group from gitlab_integration.gitlab_service import GitlabService -from gitlab_integration.utils import ObjectKind from port_ocean.context.ocean import ocean +from port_ocean.context.event import event +from gitlab_integration.git_integration import ( + GitlabPortAppConfig, + GroupWithMembersSelector, +) class HookHandler(ABC): @@ -61,17 +66,42 @@ async def on_hook(self, event: str, body: dict[str, Any]) -> None: logger.info(f"Finished handling {event} for group {group_path}") @abstractmethod - async def _on_hook( - self, body: dict[str, Any], gitlab_group: Optional[Group] - ) -> None: + async def _on_hook(self, body: dict[str, Any], gitlab_group: Group) -> None: pass async def _register_group(self, kind: str, gitlab_group: Dict[str, Any]) -> None: - if self.gitlab_service.should_run_for_group(gitlab_group): + if self.gitlab_service.should_run_for_path(gitlab_group["full_path"]): await ocean.register_raw(kind, [gitlab_group]) async def _register_group_with_members( self, kind: str, gitlab_group: Group ) -> None: - gitlab_group = await self.gitlab_service.enrich_group_with_members(gitlab_group) - await self._register_group(kind, gitlab_group) + + resource_configs = typing.cast( + GitlabPortAppConfig, event.port_app_config + ).resources + + matching_resource_configs = [ + resource_config + for resource_config in resource_configs + if ( + resource_config.kind == kind + and isinstance(resource_config.selector, GroupWithMembersSelector) + ) + ] + + if not matching_resource_configs: + logger.info( + "Group With Member resource not found in port app config, update port app config to include the resource type" + ) + return + for resource_config in matching_resource_configs: + enrich_with_public_email = resource_config.selector.enrich_with_public_email + gitlab_group_result: Dict[str, Any] = ( + await self.gitlab_service.enrich_group_with_members( + gitlab_group, enrich_with_public_email + ) + if enrich_with_public_email + else gitlab_group.asdict() + ) + await self._register_group(kind, gitlab_group_result) diff --git a/integrations/gitlab/gitlab_integration/events/hooks/group.py b/integrations/gitlab/gitlab_integration/events/hooks/group.py index 8aa0dd49fe..086759b529 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/group.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/group.py @@ -1,4 +1,4 @@ -from typing import Any, Optional +from typing import Any from loguru import logger @@ -12,20 +12,20 @@ class Groups(GroupHandler): events = ["Subgroup Hook"] system_events = ["group_destroy", "group_create", "group_rename"] - async def _on_hook( - self, body: dict[str, Any], gitlab_group: Optional[Group] - ) -> None: + async def _on_hook(self, body: dict[str, Any], gitlab_group: Group) -> None: logger.info(f"Handling {body['event_name']} for group {body['group_id']}") - if gitlab_group: - await self._register_group( - ObjectKind.GROUP, - gitlab_group.asdict(), - ) - await self._register_group_with_members( - gitlab_group, ObjectKind.GROUPWITHMEMBERS - ) - elif body["event_name"] in ("subgroup_destroy", "group_destroy"): + + if body["event_name"] in ("subgroup_destroy", "group_destroy"): await ocean.unregister_raw(ObjectKind.GROUP, [body]) await ocean.unregister_raw(ObjectKind.GROUPWITHMEMBERS, [body]) - else: - logger.warning(f"Group {body['group_id']} was filtered. Skipping ...") + logger.info(f"Unregistered group {body['group_id']}") + return + + await self._register_group( + ObjectKind.GROUP, + gitlab_group.asdict(), + ) + await self._register_group_with_members( + ObjectKind.GROUPWITHMEMBERS, gitlab_group + ) + logger.info(f"Registered group {body['group_id']}") diff --git a/integrations/gitlab/gitlab_integration/events/hooks/members.py b/integrations/gitlab/gitlab_integration/events/hooks/members.py index 4facb1b9f7..f7ab1b3e8b 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/members.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/members.py @@ -1,18 +1,9 @@ -import typing -from typing import Any, List, Optional +from typing import Any from loguru import logger -import asyncio from gitlab_integration.utils import ObjectKind -from port_ocean.context.ocean import ocean from gitlab_integration.events.hooks.base import GroupHandler -from gitlab_integration.git_integration import MembersSelector -from port_ocean.context.event import event -from gitlab.v4.objects import Group, GroupMember -from gitlab_integration.git_integration import GitlabPortAppConfig -from gitlab_integration.events.utils import remove_prefix_from_keys - -CONCURENT_TASKS_LIMIT = 10 +from gitlab.v4.objects import Group class Members(GroupHandler): @@ -23,79 +14,9 @@ class Members(GroupHandler): "user_add_to_group", ] - async def _on_hook( - self, body: dict[str, Any], gitlab_group: Optional[Group] - ) -> None: + async def _on_hook(self, body: dict[str, Any], gitlab_group: Group) -> None: event_name, user_username = (body["event_name"], body["user_username"]) logger.info(f"Handling {event_name} for group member {user_username}") - - if event_name == "user_remove_from_group": - if not (await self._is_root_group_member(body["user_id"])): - body = remove_prefix_from_keys("user_", body) - await ocean.unregister_raw(ObjectKind.MEMBER, [body]) - else: - logger.warning( - f"Group member {user_username} belongs to other groups. Skipping ..." - ) - - elif gitlab_group: - if group_member := await self.gitlab_service.get_group_member( - gitlab_group, body["user_id"] - ): - await self._register_group_member(group_member) - if body["event_name"] == "user_add_to_group": - await self._register_group(gitlab_group) - - else: - logger.warning(f"Group Member {user_username} was filtered. Skipping ...") - - async def _register_group_member(self, group_member: GroupMember) -> None: - - resource_configs = typing.cast( - GitlabPortAppConfig, event.port_app_config - ).resources - - matching_resource_configs = [ - resource_config - for resource_config in resource_configs - if ( - resource_config.kind == ObjectKind.MEMBER - and isinstance(resource_config.selector, MembersSelector) - ) - ] - if not matching_resource_configs: - logger.info( - "Member resource not found in port app config, update port app config to include the resource type" - ) - return - for resource_config in matching_resource_configs: - enrich_with_public_email = resource_config.selector.enrich_with_public_email - member = ( - await self.gitlab_service.enrich_member_with_public_email(group_member) - if enrich_with_public_email - else group_member.asdict() - ) - - await ocean.register_raw(ObjectKind.MEMBER, [member]) - - async def _is_root_group_member(self, member_id: int) -> bool: - root_groups: List[Group] = self.gitlab_service.get_root_groups() - semaphore = asyncio.Semaphore(CONCURENT_TASKS_LIMIT) - - async def check_group(group: Group) -> bool: - async with semaphore: - return any( - [await self.gitlab_service.get_group_member(group, member_id)] - ) - - tasks = [asyncio.create_task(check_group(group)) for group in root_groups] - for completed_task in asyncio.as_completed(tasks): - try: - result = await completed_task - if result: - return True - except Exception as e: - logger.error( - f"Error checking group membership for member {member_id}: {e}" - ) - return False + await self._register_group_with_members( + ObjectKind.GROUPWITHMEMBERS, gitlab_group + ) diff --git a/integrations/gitlab/gitlab_integration/git_integration.py b/integrations/gitlab/gitlab_integration/git_integration.py index b2aa259226..b3d97970a7 100644 --- a/integrations/gitlab/gitlab_integration/git_integration.py +++ b/integrations/gitlab/gitlab_integration/git_integration.py @@ -121,18 +121,18 @@ class GitlabResourceConfig(ResourceConfig): selector: GitlabSelector -class MembersSelector(Selector): +class GroupWithMembersSelector(Selector): - enrich_with_public_email: bool | None = Field( + enrich_with_public_email: bool = Field( alias="enrichWithPublicEmail", default=False, - description="If set to true, the integration will enrich members with public email field. Default value is false", + description="If set to true, the integration will enrich group members with public email field. Default value is false", ) -class GitlabMembersResourceConfig(ResourceConfig): - kind: Literal["member"] - selector: MembersSelector +class GitlabGroupWithMembersResourceConfig(ResourceConfig): + kind: Literal["group-with-members"] + selector: GroupWithMembersSelector class FilesSelector(BaseModel): @@ -169,7 +169,7 @@ class GitlabPortAppConfig(PortAppConfig): default=True, description="If set to false, bots will be filtered out from the members list. Default value is true", ) - resources: list[GitlabMembersResourceConfig | GitLabFilesResourceConfig | GitlabResourceConfig] = Field(default_factory=list) # type: ignore + resources: list[GitlabGroupWithMembersResourceConfig | GitLabFilesResourceConfig | GitlabResourceConfig] = Field(default_factory=list) # type: ignore def _get_project_from_cache(project_id: int) -> Project | None: diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index 76c406b5c4..44a6237406 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -455,21 +455,13 @@ async def get_project(self, project_id: int) -> Project | None: else: return None - async def get_group(self, group_id: int) -> Optional[Group]: - try: - logger.info(f"Fetching group with ID: {group_id}") - group_response = await AsyncFetcher.fetch_single( - self.gitlab_client.groups.get, group_id - ) - group: Group = typing.cast(Group, group_response) - return group - except gitlab.exceptions.GitlabGetError as err: - if err.response_code == 404: - logger.warning(f"Group with ID {group_id} not found (404).") - return None - else: - logger.error(f"Failed to fetch group with ID {group_id}: {err}") - raise + async def get_group(self, group_id: int) -> Group: + logger.info(f"Fetching group with ID: {group_id}") + group_response = await AsyncFetcher.fetch_single( + self.gitlab_client.groups.get, group_id + ) + group: Group = typing.cast(Group, group_response) + return group @cache_iterator_result() async def get_all_groups( @@ -772,21 +764,21 @@ async def get_unsynced_group_members( ) yield unsynced_members - async def enrich_group_with_members(self, group: Group) -> dict[str, Any]: - group_members = [ - member - async for members in self.get_all_group_members(group) - for member in members - ] - group_dict: dict[str, Any] = group.asdict() - group_dict.update( - { - "__members": [ - {"id": group_member.id, "username": group_member.username} - for group_member in group_members + async def enrich_group_with_members( + self, group: Group, include_public_email: bool = False + ) -> dict[str, Any]: + group_members = [] + async for members in self.get_all_group_members(group): + if include_public_email: + tasks = [ + self.enrich_member_with_public_email(member) for member in members ] - } - ) + group_members.extend(await asyncio.gather(*tasks)) + else: + group_members.extend(member.asdict() for member in members) + + group_dict: dict[str, Any] = group.asdict() + group_dict["__members"] = group_members return group_dict async def enrich_member_with_public_email( @@ -830,9 +822,8 @@ async def get_group_member( if err.response_code == 404: logger.warning(f"Group Member with ID {member_id} not found (404).") return None - else: - logger.error(f"Failed to fetch group with ID {member_id}: {err}") - raise + logger.error(f"Failed to fetch group with ID {member_id}: {err}") + raise async def get_entities_diff( self, diff --git a/integrations/gitlab/gitlab_integration/ocean.py b/integrations/gitlab/gitlab_integration/ocean.py index d5d630be64..4daca5769c 100644 --- a/integrations/gitlab/gitlab_integration/ocean.py +++ b/integrations/gitlab/gitlab_integration/ocean.py @@ -14,7 +14,7 @@ from gitlab_integration.events.setup import setup_application from gitlab_integration.git_integration import ( GitlabResourceConfig, - GitlabMembersResourceConfig, + GitlabGroupWithMembersResourceConfig, GitLabFilesResourceConfig, ) from gitlab_integration.utils import ObjectKind, get_cached_all_services @@ -130,11 +130,21 @@ async def resync_groups(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: yield [group.asdict() for group in groups_batch] +# from memory_profiler import profile +# @profile @ocean.on_resync(ObjectKind.GROUPWITHMEMBERS) async def resync_groups_with_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: + gitlab_resource_config: GitlabGroupWithMembersResourceConfig = typing.cast( + GitlabGroupWithMembersResourceConfig, event.resource_config + ) + enrich_with_public_email = gitlab_resource_config.selector.enrich_with_public_email + for service in get_cached_all_services(): async for groups_batch in service.get_all_groups(): - tasks = [service.enrich_group_with_members(group) for group in groups_batch] + tasks = [ + service.enrich_group_with_members(group, enrich_with_public_email) + for group in groups_batch + ] enriched_groups = await asyncio.gather(*tasks) yield enriched_groups @@ -299,35 +309,3 @@ async def resync_pipelines(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: {**pipeline.asdict(), "__project": project.asdict()} for pipeline in pipelines_batch ] - - -@ocean.on_resync(ObjectKind.MEMBER) -async def resync_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: - gitlab_resource_config: GitlabMembersResourceConfig = typing.cast( - GitlabMembersResourceConfig, event.resource_config - ) - selector = gitlab_resource_config.selector - - async def process_group_members(service, group): - members = [ - member - async for members_batch in service.get_unsynced_group_members(group) - for member in members_batch - ] - - if selector.enrich_with_public_email: - enriched_member_tasks = [ - service.enrich_member_with_public_email(member) for member in members - ] - enriched_members = await asyncio.gather(*enriched_member_tasks) - return enriched_members - - return [member.asdict() for member in members] - - for service in get_cached_all_services(): - async for groups in service.get_all_groups(skip_validation=True): - group_tasks = [process_group_members(service, group) for group in groups] - for group_task in asyncio.as_completed(group_tasks): - group_members = await group_task - logger.warning(f"Enriched Members {group_members}") - yield group_members From c81ab10b580a07fff27d4dbc1593f25c5813352a Mon Sep 17 00:00:00 2001 From: mkarmah Date: Fri, 8 Nov 2024 09:17:10 +0000 Subject: [PATCH 31/45] revert get_group function, stick with optional response --- .../gitlab_integration/events/hooks/base.py | 9 ++--- .../gitlab_integration/events/hooks/group.py | 34 +++++++++++++------ .../events/hooks/members.py | 21 ++++++++---- .../gitlab_integration/gitlab_service.py | 14 ++++---- .../gitlab/gitlab_integration/ocean.py | 2 -- 5 files changed, 49 insertions(+), 31 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/events/hooks/base.py b/integrations/gitlab/gitlab_integration/events/hooks/base.py index 66a2c1d9a0..daca370827 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/base.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/base.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import List, Any, Dict +from typing import List, Any, Dict, Optional import typing from loguru import logger from gitlab.v4.objects import Project, Group @@ -66,12 +66,13 @@ async def on_hook(self, event: str, body: dict[str, Any]) -> None: logger.info(f"Finished handling {event} for group {group_path}") @abstractmethod - async def _on_hook(self, body: dict[str, Any], gitlab_group: Group) -> None: + async def _on_hook( + self, body: dict[str, Any], gitlab_group: Optional[Group] + ) -> None: pass async def _register_group(self, kind: str, gitlab_group: Dict[str, Any]) -> None: - if self.gitlab_service.should_run_for_path(gitlab_group["full_path"]): - await ocean.register_raw(kind, [gitlab_group]) + await ocean.register_raw(kind, [gitlab_group]) async def _register_group_with_members( self, kind: str, gitlab_group: Group diff --git a/integrations/gitlab/gitlab_integration/events/hooks/group.py b/integrations/gitlab/gitlab_integration/events/hooks/group.py index 086759b529..cf8c3305d0 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/group.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/group.py @@ -1,4 +1,4 @@ -from typing import Any +from typing import Any, Optional from loguru import logger @@ -12,20 +12,32 @@ class Groups(GroupHandler): events = ["Subgroup Hook"] system_events = ["group_destroy", "group_create", "group_rename"] - async def _on_hook(self, body: dict[str, Any], gitlab_group: Group) -> None: + async def _on_hook( + self, body: dict[str, Any], gitlab_group: Optional[Group] + ) -> None: logger.info(f"Handling {body['event_name']} for group {body['group_id']}") - if body["event_name"] in ("subgroup_destroy", "group_destroy"): + group_full_path = body.get("full_path") + if gitlab_group: + await self._register_group( + ObjectKind.GROUP, + gitlab_group.asdict(), + ) + await self._register_group_with_members( + ObjectKind.GROUPWITHMEMBERS, gitlab_group + ) + logger.info(f"Registered group {body['group_id']}") + elif ( + group_full_path + and self.gitlab_service.should_run_for_path(group_full_path) + and body["event_name"] in ("subgroup_destroy", "group_destroy") + ): await ocean.unregister_raw(ObjectKind.GROUP, [body]) await ocean.unregister_raw(ObjectKind.GROUPWITHMEMBERS, [body]) logger.info(f"Unregistered group {body['group_id']}") return - await self._register_group( - ObjectKind.GROUP, - gitlab_group.asdict(), - ) - await self._register_group_with_members( - ObjectKind.GROUPWITHMEMBERS, gitlab_group - ) - logger.info(f"Registered group {body['group_id']}") + else: + logger.info( + f"Group {body['group_id']} was filtered for event {body['event_name']}. Skipping..." + ) diff --git a/integrations/gitlab/gitlab_integration/events/hooks/members.py b/integrations/gitlab/gitlab_integration/events/hooks/members.py index f7ab1b3e8b..6e699132bc 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/members.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/members.py @@ -1,4 +1,4 @@ -from typing import Any +from typing import Any, Optional from loguru import logger from gitlab_integration.utils import ObjectKind @@ -14,9 +14,16 @@ class Members(GroupHandler): "user_add_to_group", ] - async def _on_hook(self, body: dict[str, Any], gitlab_group: Group) -> None: - event_name, user_username = (body["event_name"], body["user_username"]) - logger.info(f"Handling {event_name} for group member {user_username}") - await self._register_group_with_members( - ObjectKind.GROUPWITHMEMBERS, gitlab_group - ) + async def _on_hook( + self, body: dict[str, Any], gitlab_group: Optional[Group] + ) -> None: + if gitlab_group: + event_name, user_username = (body["event_name"], body["user_username"]) + logger.info(f"Handling {event_name} for group member {user_username}") + await self._register_group_with_members( + ObjectKind.GROUPWITHMEMBERS, gitlab_group + ) + else: + logger.info( + f"Group member's group {body['group_id']} was filtered for event {body['event_name']}. Skipping..." + ) diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index 44a6237406..ca12783adb 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -455,13 +455,13 @@ async def get_project(self, project_id: int) -> Project | None: else: return None - async def get_group(self, group_id: int) -> Group: - logger.info(f"Fetching group with ID: {group_id}") - group_response = await AsyncFetcher.fetch_single( - self.gitlab_client.groups.get, group_id - ) - group: Group = typing.cast(Group, group_response) - return group + async def get_group(self, group_id: int) -> Group | None: + logger.info(f"fetching group {group_id}") + group = await AsyncFetcher.fetch_single(self.gitlab_client.groups.get, group_id) + if isinstance(group, Group) and self.should_run_for_group(group): + return group + else: + return None @cache_iterator_result() async def get_all_groups( diff --git a/integrations/gitlab/gitlab_integration/ocean.py b/integrations/gitlab/gitlab_integration/ocean.py index 4daca5769c..c6be6a745a 100644 --- a/integrations/gitlab/gitlab_integration/ocean.py +++ b/integrations/gitlab/gitlab_integration/ocean.py @@ -130,8 +130,6 @@ async def resync_groups(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: yield [group.asdict() for group in groups_batch] -# from memory_profiler import profile -# @profile @ocean.on_resync(ObjectKind.GROUPWITHMEMBERS) async def resync_groups_with_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: gitlab_resource_config: GitlabGroupWithMembersResourceConfig = typing.cast( From bb7f1467cffd113ce67c91a35caf73d52d87bf53 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Mon, 11 Nov 2024 15:37:09 +0000 Subject: [PATCH 32/45] added support for project members --- .../gitlab_integration/events/hooks/base.py | 20 ++- .../gitlab_integration/events/hooks/push.py | 2 +- .../gitlab_integration/git_integration.py | 34 ++-- .../gitlab_integration/gitlab_service.py | 170 +++++++++++++----- .../gitlab/gitlab_integration/ocean.py | 80 ++++++++- .../gitlab/gitlab_integration/utils.py | 3 +- 6 files changed, 236 insertions(+), 73 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/events/hooks/base.py b/integrations/gitlab/gitlab_integration/events/hooks/base.py index daca370827..6a0de650e2 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/base.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/base.py @@ -8,7 +8,7 @@ from port_ocean.context.event import event from gitlab_integration.git_integration import ( GitlabPortAppConfig, - GroupWithMembersSelector, + GitlabMemberSelector, ) @@ -87,7 +87,7 @@ async def _register_group_with_members( for resource_config in resource_configs if ( resource_config.kind == kind - and isinstance(resource_config.selector, GroupWithMembersSelector) + and isinstance(resource_config.selector, GitlabMemberSelector) ) ] @@ -97,12 +97,18 @@ async def _register_group_with_members( ) return for resource_config in matching_resource_configs: - enrich_with_public_email = resource_config.selector.enrich_with_public_email + include_public_email = resource_config.selector.include_public_email + include_bot_members = resource_config.selector.include_bot_members + include_inherited_members = ( + resource_config.selector.include_inherited_members + ) + gitlab_group_result: Dict[str, Any] = ( await self.gitlab_service.enrich_group_with_members( - gitlab_group, enrich_with_public_email + gitlab_group, + include_public_email, + include_bot_members, + include_inherited_members, ) - if enrich_with_public_email - else gitlab_group.asdict() ) - await self._register_group(kind, gitlab_group_result) + await self._register_group(resource_config.kind, gitlab_group_result) diff --git a/integrations/gitlab/gitlab_integration/events/hooks/push.py b/integrations/gitlab/gitlab_integration/events/hooks/push.py index 11291e277b..43c22fc70f 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/push.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/push.py @@ -103,7 +103,7 @@ async def _on_hook(self, body: dict[str, Any], gitlab_project: Project) -> None: enriched_project = await self.gitlab_service.enrich_project_with_extras( gitlab_project ) - await ocean.register_raw(ObjectKind.PROJECT, [enriched_project]) + await ocean.register_raw(ObjectKind.PROJECT, [enriched_project.asdict()]) else: logger.debug( diff --git a/integrations/gitlab/gitlab_integration/git_integration.py b/integrations/gitlab/gitlab_integration/git_integration.py index b3d97970a7..a7d00f5b34 100644 --- a/integrations/gitlab/gitlab_integration/git_integration.py +++ b/integrations/gitlab/gitlab_integration/git_integration.py @@ -121,18 +121,31 @@ class GitlabResourceConfig(ResourceConfig): selector: GitlabSelector -class GroupWithMembersSelector(Selector): +class GitlabMemberSelector(Selector): - enrich_with_public_email: bool = Field( + include_public_email: bool = Field( alias="enrichWithPublicEmail", default=False, description="If set to true, the integration will enrich group members with public email field. Default value is false", ) + include_inherited_members: bool = Field( + alias="includeInheritedMembers", + default=False, + description="If set to true, the integration will include inherited members in the group members list. Default value is false", + ) + # The "include bot members" flag affects both the "group" and "member" kinds. + # To prevent inconsistencies, the behavior or value of this parameter should be consistent for both "groups" and "members". + # Therefore, it should be included at the top level of the configuration. + include_bot_members: bool = Field( + alias="includeBotMembers", + default=True, + description="If set to false, bots will be filtered out from the members list. Default value is true", + ) -class GitlabGroupWithMembersResourceConfig(ResourceConfig): - kind: Literal["group-with-members"] - selector: GroupWithMembersSelector +class GitlabObjectWithMembersResourceConfig(ResourceConfig): + kind: Literal["project-with-members", "group-with-members"] + selector: GitlabMemberSelector class FilesSelector(BaseModel): @@ -160,16 +173,7 @@ class GitlabPortAppConfig(PortAppConfig): project_visibility_filter: str | None = Field( alias="projectVisibilityFilter", default=None ) - - # The "include bot members" flag affects both the "group" and "member" kinds. - # To prevent inconsistencies, the behavior or value of this parameter should be consistent for both "groups" and "members". - # Therefore, it should be included at the top level of the configuration. - include_bot_members: bool | None = Field( - alias="includeBotMembers", - default=True, - description="If set to false, bots will be filtered out from the members list. Default value is true", - ) - resources: list[GitlabGroupWithMembersResourceConfig | GitLabFilesResourceConfig | GitlabResourceConfig] = Field(default_factory=list) # type: ignore + resources: list[GitlabObjectWithMembersResourceConfig | GitLabFilesResourceConfig | GitlabResourceConfig] = Field(default_factory=list) # type: ignore def _get_project_from_cache(project_id: int) -> Project | None: diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index ca12783adb..ec7071d6aa 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -10,11 +10,11 @@ import yaml from gitlab import Gitlab, GitlabError, GitlabList from gitlab.base import RESTObject -import gitlab.exceptions from gitlab.v4.objects import ( Group, User, GroupMember, + ProjectMember, GroupMergeRequest, Issue, MergeRequest, @@ -33,6 +33,7 @@ from port_ocean.context.event import event from port_ocean.core.models import Entity from port_ocean.utils.cache import cache_iterator_result +import functools PROJECTS_CACHE_KEY = "__cache_all_projects" GROUPS_CACHE_KEY = "__cache_all_groups" @@ -551,21 +552,32 @@ async def async_project_language_wrapper(cls, project: Project) -> dict[str, Any ) return {"__languages": {}} + # @classmethod + # async def enrich_project_with_extras(cls, project: Project) -> dict[str, Any]: + # tasks = [ + # cls.async_project_language_wrapper(project), + # ] + # tasks_extras = await asyncio.gather(*tasks) + # project_with_extras = project.asdict() + # project_with_extras.update( + # **{ + # key: value + # for task_extras in tasks_extras + # for key, value in task_extras.items() + # } + # ) + # return project_with_extras + @classmethod - async def enrich_project_with_extras(cls, project: Project) -> dict[str, Any]: + async def enrich_project_with_extras(cls, project: Project) -> Project: tasks = [ cls.async_project_language_wrapper(project), ] tasks_extras = await asyncio.gather(*tasks) - project_with_extras = project.asdict() - project_with_extras.update( - **{ - key: value - for task_extras in tasks_extras - for key, value in task_extras.items() - } - ) - return project_with_extras + for task_extras in tasks_extras: + for key, value in task_extras.items(): + setattr(project, key, value) # Update the project object + return project @staticmethod def validate_file_is_directory( @@ -701,30 +713,100 @@ async def get_all_issues(self, group: Group) -> typing.AsyncIterator[List[Issue] issues: List[Issue] = typing.cast(List[Issue], issues_batch) yield issues - def should_run_for_members(self, member: GroupMember): - port_app_config: GitlabPortAppConfig = typing.cast( - "GitlabPortAppConfig", event.port_app_config - ) - include_bot_members = port_app_config.include_bot_members - return include_bot_members or not member.username.__contains__("bot") + def should_run_for_members(self, include_bot_members: bool, member: GroupMember): + return ( + include_bot_members or not member.is_bot + ) # member.username.__contains__("bot") + + async def get_all_project_members( + self, + project: Project, + include_inherited_members: bool = False, + include_bot_members: bool = True, + ) -> typing.AsyncIterator[List[ProjectMember]]: + """ + Fetches all members of a project + :param project: Project object + :param include_inherited_members: Whether to include members inherited through ancestor groups + :return: List of ProjectMember objects + """ + try: + logger.info(f"Fetching all members of project {project.name}") + fetch_func = ( + project.members_all.list + if include_inherited_members + else project.members.list + ) + validation_func = functools.partial( + self.should_run_for_members, include_bot_members + ) + + async for members_batch in AsyncFetcher.fetch_batch( + fetch_func=fetch_func, + validation_func=validation_func, + pagination="offset", + order_by="id", + sort="asc", + ): + members: List[ProjectMember] = typing.cast( + List[ProjectMember], members_batch + ) + logger.info( + f"Queried {len(members)} members {[member.username for member in members]} from {project.name}" + ) + yield members + except Exception as e: + logger.error(f"Failed to get members for project={project.name}. error={e}") + return + + async def enrich_project_with_members( + self, + project: Project, + include_inherited_members: bool = False, + include_bot_members: bool = True, + include_public_email: bool = False, + ) -> dict[str, Any]: + project_members = [] + async for members in self.get_all_project_members( + project, include_inherited_members, include_bot_members + ): + if include_public_email: + tasks = [ + self.enrich_member_with_public_email(member) for member in members + ] + project_members.extend(await asyncio.gather(*tasks)) + else: + project_members.extend(member.asdict() for member in members) + + project_dict: dict[str, Any] = project.asdict() + project_dict["__members"] = project_members + return project_dict async def get_all_group_members( - self, group: Group, include_inherited: bool = False + self, + group: Group, + include_inherited_members: bool = False, + include_bot_members: bool = True, ) -> typing.AsyncIterator[List[GroupMember]]: """ Fetches all members of a group :param group: Group object - :param include_inherited: Whether to include members inherited through ancestor groups + :param include_inherited_members: Whether to include members inherited through ancestor groups :return: List of GroupMember objects """ try: logger.info(f"Fetching all members of group {group.name}") fetch_func = ( - group.members_all.list if include_inherited else group.members.list + group.members_all.list + if include_inherited_members + else group.members.list + ) + validation_func = functools.partial( + self.should_run_for_members, include_bot_members ) async for members_batch in AsyncFetcher.fetch_batch( fetch_func=fetch_func, - validation_func=self.should_run_for_members, + validation_func=validation_func, pagination="offset", order_by="id", sort="asc", @@ -749,7 +831,7 @@ async def get_unsynced_group_members( MEMBERS_CACHE_KEY, {} ).setdefault(self.gitlab_client.private_token, []) async for members_batch in self.get_all_group_members( - group, include_inherited=True + group, include_inherited_members=True ): unsynced_members = [ member for member in members_batch if member.id not in cached_member_ids @@ -765,10 +847,16 @@ async def get_unsynced_group_members( yield unsynced_members async def enrich_group_with_members( - self, group: Group, include_public_email: bool = False + self, + group: Group, + include_public_email: bool = False, + include_inherited_members: bool = False, + include_bot_members: bool = True, ) -> dict[str, Any]: group_members = [] - async for members in self.get_all_group_members(group): + async for members in self.get_all_group_members( + group, include_inherited_members, include_bot_members + ): if include_public_email: tasks = [ self.enrich_member_with_public_email(member) for member in members @@ -782,7 +870,7 @@ async def enrich_group_with_members( return group_dict async def enrich_member_with_public_email( - self, member: GroupMember + self, member: GroupMember | ProjectMember ) -> dict[str, Any]: user: User = await self.get_user(member.id) member_dict: dict[str, Any] = member.asdict() @@ -808,22 +896,22 @@ async def get_user(self, user_id: str) -> User: ] = user return user - async def get_group_member( - self, group: Group, member_id: int - ) -> Optional[GroupMember]: - try: - - logger.info(f"fetching group member {member_id} from group {group.id}") - result = await AsyncFetcher.fetch_single(group.members.get, member_id) - group_member = typing.cast(GroupMember, result) - return group_member if self.should_run_for_members(group_member) else None - - except gitlab.exceptions.GitlabGetError as err: - if err.response_code == 404: - logger.warning(f"Group Member with ID {member_id} not found (404).") - return None - logger.error(f"Failed to fetch group with ID {member_id}: {err}") - raise + # async def get_group_member( + # self, group: Group, member_id: int + # ) -> Optional[GroupMember]: + # try: + + # logger.info(f"fetching group member {member_id} from group {group.id}") + # result = await AsyncFetcher.fetch_single(group.members.get, member_id) + # group_member = typing.cast(GroupMember, result) + # return group_member if self.should_run_for_members(group_member) else None + + # except gitlab.exceptions.GitlabGetError as err: + # if err.response_code == 404: + # logger.warning(f"Group Member with ID {member_id} not found (404).") + # return None + # logger.error(f"Failed to fetch group with ID {member_id}: {err}") + # raise async def get_entities_diff( self, diff --git a/integrations/gitlab/gitlab_integration/ocean.py b/integrations/gitlab/gitlab_integration/ocean.py index c6be6a745a..2b5dc5a834 100644 --- a/integrations/gitlab/gitlab_integration/ocean.py +++ b/integrations/gitlab/gitlab_integration/ocean.py @@ -14,8 +14,8 @@ from gitlab_integration.events.setup import setup_application from gitlab_integration.git_integration import ( GitlabResourceConfig, - GitlabGroupWithMembersResourceConfig, GitLabFilesResourceConfig, + GitlabObjectWithMembersResourceConfig, ) from gitlab_integration.utils import ObjectKind, get_cached_all_services from port_ocean.context.event import event @@ -132,15 +132,26 @@ async def resync_groups(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: @ocean.on_resync(ObjectKind.GROUPWITHMEMBERS) async def resync_groups_with_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: - gitlab_resource_config: GitlabGroupWithMembersResourceConfig = typing.cast( - GitlabGroupWithMembersResourceConfig, event.resource_config - ) - enrich_with_public_email = gitlab_resource_config.selector.enrich_with_public_email for service in get_cached_all_services(): + group_with_members_resource_config: GitlabObjectWithMembersResourceConfig = ( + typing.cast(GitlabObjectWithMembersResourceConfig, event.resource_config) + ) + group_with_members_selector = group_with_members_resource_config.selector + include_inherited_members = ( + group_with_members_selector.include_inherited_members + ) + include_public_email = group_with_members_selector.include_public_email + include_bot_members = group_with_members_selector.include_bot_members + async for groups_batch in service.get_all_groups(): tasks = [ - service.enrich_group_with_members(group, enrich_with_public_email) + service.enrich_group_with_members( + group, + include_public_email, + include_inherited_members, + include_bot_members, + ) for group in groups_batch ] enriched_groups = await asyncio.gather(*tasks) @@ -148,7 +159,7 @@ async def resync_groups_with_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: @ocean.on_resync(ObjectKind.PROJECT) -async def on_resync(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: +async def resync_projects(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: for service in get_cached_all_services(): masked_token = len(str(service.gitlab_client.private_token)[:-4]) * "*" logger.info(f"fetching projects for token {masked_token}") @@ -172,7 +183,60 @@ async def on_resync(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: logger.info( f"Finished Processing extras for {projects_processed_in_full_batch}/{len(projects)} projects in batch" ) - yield enriched_projects + yield [ + enriched_project.asict() for enriched_project in enriched_projects + ] + + +@ocean.on_resync(ObjectKind.PROJECTWITHMEMBERS) +async def resync_project_with_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: + + for service in get_cached_all_services(): + + project_with_members_resource_config: GitlabObjectWithMembersResourceConfig = ( + typing.cast(GitlabObjectWithMembersResourceConfig, event.resource_config) + ) + if not isinstance( + project_with_members_resource_config, GitlabObjectWithMembersResourceConfig + ): + return + + project_with_members_selector = project_with_members_resource_config.selector + include_inherited_members = ( + project_with_members_selector.include_inherited_members + ) + include_bot_members = project_with_members_selector.include_bot_members + include_public_email = project_with_members_selector.include_public_email + + async for projects in service.get_all_projects(): + projects_batch_iter = iter(projects) + projects_processed_in_full_batch = 0 + while projects_batch := tuple( + islice(projects_batch_iter, PROJECT_RESYNC_BATCH_SIZE) + ): + projects_processed_in_full_batch += len(projects_batch) + logger.info( + f"Processing extras for {projects_processed_in_full_batch}/{len(projects)} projects in batch" + ) + tasks = [ + service.enrich_project_with_extras(project) + for project in projects_batch + ] + projects_enriched_with_extras = await asyncio.gather(*tasks) + logger.info( + f"Finished Processing extras for {projects_processed_in_full_batch}/{len(projects)} projects in batch" + ) + members_tasks = [ + service.enrich_project_with_members( + project, + include_inherited_members, + include_bot_members, + include_public_email, + ) + for project in projects_enriched_with_extras + ] + projects_enriched_with_members = await asyncio.gather(*members_tasks) + yield projects_enriched_with_members @ocean.on_resync(ObjectKind.FOLDER) diff --git a/integrations/gitlab/gitlab_integration/utils.py b/integrations/gitlab/gitlab_integration/utils.py index af3c550ff9..7621f5a656 100644 --- a/integrations/gitlab/gitlab_integration/utils.py +++ b/integrations/gitlab/gitlab_integration/utils.py @@ -52,5 +52,6 @@ class ObjectKind: PROJECT = "project" FOLDER = "folder" MEMBER = "member" - GROUPWITHMEMBERS = "group-with-members" FILE = "file" + GROUPWITHMEMBERS = "group-with-members" + PROJECTWITHMEMBERS = "project-with-members" From 0056a276c3a604bc2ea51b89b9cb8ada596ec27b Mon Sep 17 00:00:00 2001 From: mkarmah Date: Mon, 11 Nov 2024 16:30:54 +0000 Subject: [PATCH 33/45] checking for bot members in username works for both webhooks and requests responses --- integrations/gitlab/gitlab_integration/gitlab_service.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index ec7071d6aa..b8a77682e4 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -714,9 +714,7 @@ async def get_all_issues(self, group: Group) -> typing.AsyncIterator[List[Issue] yield issues def should_run_for_members(self, include_bot_members: bool, member: GroupMember): - return ( - include_bot_members or not member.is_bot - ) # member.username.__contains__("bot") + return include_bot_members or not member.username.__contains__("bot") async def get_all_project_members( self, From be3dc5af010a8d25ac285e8d946e01b852850c05 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Mon, 11 Nov 2024 17:34:32 +0000 Subject: [PATCH 34/45] clean unused functions --- .../gitlab_integration/gitlab_service.py | 44 +------------------ 1 file changed, 2 insertions(+), 42 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index b8a77682e4..ad95cd4f15 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -726,6 +726,7 @@ async def get_all_project_members( Fetches all members of a project :param project: Project object :param include_inherited_members: Whether to include members inherited through ancestor groups + :param include_bot_members: Whether to include bot members (tokens, etc.) :return: List of ProjectMember objects """ try: @@ -820,30 +821,6 @@ async def get_all_group_members( logger.error(f"Failed to get members for group={group.name}. error={e}") return - async def get_unsynced_group_members( - self, group: Group - ) -> typing.AsyncIterator[List[GroupMember]]: - logger.info(f"Fetching members unique to group {group.name}") - - cached_member_ids = event.attributes.setdefault( - MEMBERS_CACHE_KEY, {} - ).setdefault(self.gitlab_client.private_token, []) - async for members_batch in self.get_all_group_members( - group, include_inherited_members=True - ): - unsynced_members = [ - member for member in members_batch if member.id not in cached_member_ids - ] - - if unsynced_members: - cached_member_ids.extend(member.id for member in unsynced_members) - - logger.info( - f"Found {len(unsynced_members)} members " - f"{[member.username for member in unsynced_members]} unique to {group.name}" - ) - yield unsynced_members - async def enrich_group_with_members( self, group: Group, @@ -872,7 +849,7 @@ async def enrich_member_with_public_email( ) -> dict[str, Any]: user: User = await self.get_user(member.id) member_dict: dict[str, Any] = member.asdict() - member_dict.update({"__public_email": user.public_email}) + member_dict["__public_email"] = user.public_email return member_dict async def get_user(self, user_id: str) -> User: @@ -894,23 +871,6 @@ async def get_user(self, user_id: str) -> User: ] = user return user - # async def get_group_member( - # self, group: Group, member_id: int - # ) -> Optional[GroupMember]: - # try: - - # logger.info(f"fetching group member {member_id} from group {group.id}") - # result = await AsyncFetcher.fetch_single(group.members.get, member_id) - # group_member = typing.cast(GroupMember, result) - # return group_member if self.should_run_for_members(group_member) else None - - # except gitlab.exceptions.GitlabGetError as err: - # if err.response_code == 404: - # logger.warning(f"Group Member with ID {member_id} not found (404).") - # return None - # logger.error(f"Failed to fetch group with ID {member_id}: {err}") - # raise - async def get_entities_diff( self, project: Project, From c1289c7c376cccdaba9d9ab16016437dc77f1c26 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Mon, 11 Nov 2024 19:34:58 +0000 Subject: [PATCH 35/45] refactored codebase against DRY --- .../gitlab_integration/events/hooks/base.py | 77 +++++---- .../gitlab_integration/events/hooks/group.py | 2 +- .../events/hooks/members.py | 2 +- .../gitlab_integration/events/hooks/push.py | 3 + .../gitlab_integration/gitlab_service.py | 149 ++++-------------- .../gitlab/gitlab_integration/ocean.py | 6 +- 6 files changed, 81 insertions(+), 158 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/events/hooks/base.py b/integrations/gitlab/gitlab_integration/events/hooks/base.py index 6a0de650e2..b1d68681c2 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/base.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/base.py @@ -3,6 +3,7 @@ import typing from loguru import logger from gitlab.v4.objects import Project, Group +from gitlab.base import RESTObject from gitlab_integration.gitlab_service import GitlabService from port_ocean.context.ocean import ocean from port_ocean.context.event import event @@ -26,6 +27,43 @@ def __init__( async def on_hook(self, event: str, body: dict[str, Any]) -> None: pass + async def _register_object_with_members(self, kind: str, gitlab_object: RESTObject): + resource_configs = typing.cast( + GitlabPortAppConfig, event.port_app_config + ).resources + + matching_resource_configs = [ + resource_config + for resource_config in resource_configs + if ( + resource_config.kind == kind + and isinstance(resource_config.selector, GitlabMemberSelector) + ) + ] + + if not matching_resource_configs: + logger.info( + "Resource not found in port app config, update port app config to include the resource type" + ) + return + + for resource_config in matching_resource_configs: + include_public_email = resource_config.selector.include_public_email + include_bot_members = resource_config.selector.include_bot_members + include_inherited_members = ( + resource_config.selector.include_inherited_members + ) + + object_result: Dict[str, Any] = ( + await self.gitlab_service.enrich_object_with_members( + gitlab_object, + include_public_email, + include_bot_members, + include_inherited_members, + ) + ) + await ocean.register_raw(resource_config.kind, [object_result]) + class ProjectHandler(HookHandler): async def on_hook(self, event: str, body: dict[str, Any]) -> None: @@ -73,42 +111,3 @@ async def _on_hook( async def _register_group(self, kind: str, gitlab_group: Dict[str, Any]) -> None: await ocean.register_raw(kind, [gitlab_group]) - - async def _register_group_with_members( - self, kind: str, gitlab_group: Group - ) -> None: - - resource_configs = typing.cast( - GitlabPortAppConfig, event.port_app_config - ).resources - - matching_resource_configs = [ - resource_config - for resource_config in resource_configs - if ( - resource_config.kind == kind - and isinstance(resource_config.selector, GitlabMemberSelector) - ) - ] - - if not matching_resource_configs: - logger.info( - "Group With Member resource not found in port app config, update port app config to include the resource type" - ) - return - for resource_config in matching_resource_configs: - include_public_email = resource_config.selector.include_public_email - include_bot_members = resource_config.selector.include_bot_members - include_inherited_members = ( - resource_config.selector.include_inherited_members - ) - - gitlab_group_result: Dict[str, Any] = ( - await self.gitlab_service.enrich_group_with_members( - gitlab_group, - include_public_email, - include_bot_members, - include_inherited_members, - ) - ) - await self._register_group(resource_config.kind, gitlab_group_result) diff --git a/integrations/gitlab/gitlab_integration/events/hooks/group.py b/integrations/gitlab/gitlab_integration/events/hooks/group.py index cf8c3305d0..9b44e80c45 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/group.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/group.py @@ -23,7 +23,7 @@ async def _on_hook( ObjectKind.GROUP, gitlab_group.asdict(), ) - await self._register_group_with_members( + await self._register_object_with_members( ObjectKind.GROUPWITHMEMBERS, gitlab_group ) logger.info(f"Registered group {body['group_id']}") diff --git a/integrations/gitlab/gitlab_integration/events/hooks/members.py b/integrations/gitlab/gitlab_integration/events/hooks/members.py index 6e699132bc..5f7050194b 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/members.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/members.py @@ -20,7 +20,7 @@ async def _on_hook( if gitlab_group: event_name, user_username = (body["event_name"], body["user_username"]) logger.info(f"Handling {event_name} for group member {user_username}") - await self._register_group_with_members( + await self._register_object_with_members( ObjectKind.GROUPWITHMEMBERS, gitlab_group ) else: diff --git a/integrations/gitlab/gitlab_integration/events/hooks/push.py b/integrations/gitlab/gitlab_integration/events/hooks/push.py index 43c22fc70f..0d597f0aa1 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/push.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/push.py @@ -104,6 +104,9 @@ async def _on_hook(self, body: dict[str, Any], gitlab_project: Project) -> None: gitlab_project ) await ocean.register_raw(ObjectKind.PROJECT, [enriched_project.asdict()]) + await self._register_object_with_members( + ObjectKind.PROJECTWITHMEMBERS, gitlab_project + ) else: logger.debug( diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index ad95cd4f15..ad2a1acd14 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -9,12 +9,10 @@ import anyio.to_thread import yaml from gitlab import Gitlab, GitlabError, GitlabList -from gitlab.base import RESTObject +from gitlab.base import RESTObject, RESTObjectList from gitlab.v4.objects import ( Group, User, - GroupMember, - ProjectMember, GroupMergeRequest, Issue, MergeRequest, @@ -552,22 +550,6 @@ async def async_project_language_wrapper(cls, project: Project) -> dict[str, Any ) return {"__languages": {}} - # @classmethod - # async def enrich_project_with_extras(cls, project: Project) -> dict[str, Any]: - # tasks = [ - # cls.async_project_language_wrapper(project), - # ] - # tasks_extras = await asyncio.gather(*tasks) - # project_with_extras = project.asdict() - # project_with_extras.update( - # **{ - # key: value - # for task_extras in tasks_extras - # for key, value in task_extras.items() - # } - # ) - # return project_with_extras - @classmethod async def enrich_project_with_extras(cls, project: Project) -> Project: tasks = [ @@ -713,96 +695,59 @@ async def get_all_issues(self, group: Group) -> typing.AsyncIterator[List[Issue] issues: List[Issue] = typing.cast(List[Issue], issues_batch) yield issues - def should_run_for_members(self, include_bot_members: bool, member: GroupMember): + def should_run_for_members(self, include_bot_members: bool, member: RESTObject): return include_bot_members or not member.username.__contains__("bot") - async def get_all_project_members( - self, - project: Project, - include_inherited_members: bool = False, - include_bot_members: bool = True, - ) -> typing.AsyncIterator[List[ProjectMember]]: - """ - Fetches all members of a project - :param project: Project object - :param include_inherited_members: Whether to include members inherited through ancestor groups - :param include_bot_members: Whether to include bot members (tokens, etc.) - :return: List of ProjectMember objects - """ - try: - logger.info(f"Fetching all members of project {project.name}") - fetch_func = ( - project.members_all.list - if include_inherited_members - else project.members.list - ) - validation_func = functools.partial( - self.should_run_for_members, include_bot_members - ) - - async for members_batch in AsyncFetcher.fetch_batch( - fetch_func=fetch_func, - validation_func=validation_func, - pagination="offset", - order_by="id", - sort="asc", - ): - members: List[ProjectMember] = typing.cast( - List[ProjectMember], members_batch - ) - logger.info( - f"Queried {len(members)} members {[member.username for member in members]} from {project.name}" - ) - yield members - except Exception as e: - logger.error(f"Failed to get members for project={project.name}. error={e}") - return - - async def enrich_project_with_members( + async def enrich_object_with_members( self, - project: Project, + obj: RESTObject, include_inherited_members: bool = False, include_bot_members: bool = True, include_public_email: bool = False, ) -> dict[str, Any]: - project_members = [] - async for members in self.get_all_project_members( - project, include_inherited_members, include_bot_members + """ + Enriches an object (e.g., Project or Group) with its members. + """ + members_list = [] + async for members in self.get_all_object_members( + obj, include_inherited_members, include_bot_members ): if include_public_email: tasks = [ self.enrich_member_with_public_email(member) for member in members ] - project_members.extend(await asyncio.gather(*tasks)) + members_list.extend(await asyncio.gather(*tasks)) else: - project_members.extend(member.asdict() for member in members) + members_list.extend(member.asdict() for member in members) - project_dict: dict[str, Any] = project.asdict() - project_dict["__members"] = project_members - return project_dict + obj_dict: dict[str, Any] = obj.asdict() + obj_dict["__members"] = members_list + return obj_dict - async def get_all_group_members( + async def get_all_object_members( self, - group: Group, + obj: RESTObject, include_inherited_members: bool = False, include_bot_members: bool = True, - ) -> typing.AsyncIterator[List[GroupMember]]: + ) -> AsyncIterator[RESTObjectList]: """ - Fetches all members of a group - :param group: Group object - :param include_inherited_members: Whether to include members inherited through ancestor groups - :return: List of GroupMember objects + Fetches all members of an object (e.g., Project or Group) generically. """ try: - logger.info(f"Fetching all members of group {group.name}") - fetch_func = ( - group.members_all.list - if include_inherited_members - else group.members.list - ) + obj_name = getattr(obj, "name", "unknown") + logger.info(f"Fetching all members of {obj_name}") + + members_attr = "members_all" if include_inherited_members else "members" + members_manager = getattr(obj, members_attr, None) + if not members_manager: + raise AttributeError(f"Object does not have attribute '{members_attr}'") + + fetch_func = members_manager.list + validation_func = functools.partial( self.should_run_for_members, include_bot_members ) + async for members_batch in AsyncFetcher.fetch_batch( fetch_func=fetch_func, validation_func=validation_func, @@ -810,42 +755,18 @@ async def get_all_group_members( order_by="id", sort="asc", ): - members: List[GroupMember] = typing.cast( - List[GroupMember], members_batch - ) + members: RESTObjectList = typing.cast(RESTObjectList, members_batch) + logger.info( - f"Queried {len(members)} members {[member.username for member in members]} from {group.name}" + f"Queried {len(members)} members {[member.username for member in members]} from {obj_name}" ) yield members except Exception as e: - logger.error(f"Failed to get members for group={group.name}. error={e}") + logger.error(f"Failed to get members for object='{obj_name}'. Error: {e}") return - async def enrich_group_with_members( - self, - group: Group, - include_public_email: bool = False, - include_inherited_members: bool = False, - include_bot_members: bool = True, - ) -> dict[str, Any]: - group_members = [] - async for members in self.get_all_group_members( - group, include_inherited_members, include_bot_members - ): - if include_public_email: - tasks = [ - self.enrich_member_with_public_email(member) for member in members - ] - group_members.extend(await asyncio.gather(*tasks)) - else: - group_members.extend(member.asdict() for member in members) - - group_dict: dict[str, Any] = group.asdict() - group_dict["__members"] = group_members - return group_dict - async def enrich_member_with_public_email( - self, member: GroupMember | ProjectMember + self, member: RESTObject ) -> dict[str, Any]: user: User = await self.get_user(member.id) member_dict: dict[str, Any] = member.asdict() diff --git a/integrations/gitlab/gitlab_integration/ocean.py b/integrations/gitlab/gitlab_integration/ocean.py index 2b5dc5a834..98bf9c77a1 100644 --- a/integrations/gitlab/gitlab_integration/ocean.py +++ b/integrations/gitlab/gitlab_integration/ocean.py @@ -146,11 +146,11 @@ async def resync_groups_with_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: async for groups_batch in service.get_all_groups(): tasks = [ - service.enrich_group_with_members( + service.enrich_object_with_members( group, - include_public_email, include_inherited_members, include_bot_members, + include_public_email, ) for group in groups_batch ] @@ -227,7 +227,7 @@ async def resync_project_with_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: f"Finished Processing extras for {projects_processed_in_full_batch}/{len(projects)} projects in batch" ) members_tasks = [ - service.enrich_project_with_members( + service.enrich_object_with_members( project, include_inherited_members, include_bot_members, From 56b031ca282f39c1807ced569c4357f31952eb96 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Tue, 12 Nov 2024 07:38:44 +0000 Subject: [PATCH 36/45] added tests --- .../gitlab_integration/events/hooks/push.py | 2 +- .../gitlab_integration/test_gitlab_service.py | 213 +++++++++++++++++- 2 files changed, 213 insertions(+), 2 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/events/hooks/push.py b/integrations/gitlab/gitlab_integration/events/hooks/push.py index 0d597f0aa1..8cbc804415 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/push.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/push.py @@ -105,7 +105,7 @@ async def _on_hook(self, body: dict[str, Any], gitlab_project: Project) -> None: ) await ocean.register_raw(ObjectKind.PROJECT, [enriched_project.asdict()]) await self._register_object_with_members( - ObjectKind.PROJECTWITHMEMBERS, gitlab_project + ObjectKind.PROJECTWITHMEMBERS, enriched_project ) else: diff --git a/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py b/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py index bd5accf0e3..d6f7c276ec 100644 --- a/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py +++ b/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py @@ -1,6 +1,9 @@ from typing import Any -from unittest.mock import MagicMock +from unittest.mock import MagicMock, Mock, AsyncMock from gitlab_integration.gitlab_service import GitlabService +from gitlab.base import RESTObject +from gitlab.v4.objects import User +import pytest def mock_search(page: int, *args: Any, **kwargs: Any) -> Any: @@ -213,3 +216,211 @@ async def test_get_and_parse_single_file_json( # Assert assert expected_parsed_single_file == actual_parsed_single_file + + +class MockMember(RESTObject): + def __init__(self, id, username): + self.id = id + self.username = username + + def asdict(self): + return {"id": self.id, "username": self.username} + + def __setattr__(self, name, value): + self.__dict__[name] = value + + +class MockGroup(RESTObject): + def __init__(self, id, name): + self.id = id + self.name = name + self.members = self.MockMembers() + self.members_all = self.MockMembersAll() + + class MockMembers: + def list(self, page, *args: Any, **kwargs: Any): + if page == 1: + return [ + MockMember(1, "user1"), + MockMember(1, "bot_user1"), + ] + elif page == 2: + return [ + MockMember(2, "user2"), + MockMember(2, "bot_user2"), + ] + elif page == 3: + return [ + MockMember(3, "user3"), + MockMember(3, "bot_user3"), + ] + return + + class MockMembersAll: + def list(self, page, *args: Any, **kwargs: Any): + if page == 1: + return [ + MockMember(1, "user1"), + MockMember(1, "bot_user1"), + MockMember(1, "inherited_member_1"), + ] + elif page == 2: + return [ + MockMember(2, "user2"), + MockMember(2, "bot_user2"), + MockMember(2, "inherited_member_2"), + ] + elif page == 3: + return [ + MockMember(3, "user3"), + MockMember(3, "bot_user3"), + MockMember(3, "inherited_member_3"), + ] + return + + def asdict(self): + return { + "id": self.id, + "name": self.name, + "path": f"get{self.name}-path", + "full_name": self.name, + "full_path": f"get{self.name}-path", + } + + def __setattr__(self, name, value): + self.__dict__[name] = value + + +def test_should_run_for_members( + monkeypatch: Any, mocked_gitlab_service: GitlabService +) -> None: + + bot_member = Mock(spec=RESTObject) + bot_member.username = "bot_user" + + non_bot_member = Mock(spec=RESTObject) + non_bot_member.username = "regular_user" + + assert mocked_gitlab_service.should_run_for_members(True, bot_member) is True + assert mocked_gitlab_service.should_run_for_members(True, non_bot_member) is True + + assert mocked_gitlab_service.should_run_for_members(False, bot_member) is False + assert mocked_gitlab_service.should_run_for_members(False, non_bot_member) is True + + +@pytest.mark.asyncio +async def test_enrich_member_with_public_email( + monkeypatch: Any, mocked_gitlab_service: GitlabService +) -> None: + + # Arrange + member = MockMember(id="123", username="test_user") + mock_user = Mock(spec=User) + mock_user.public_email = "user@example.com" + + monkeypatch.setattr( + mocked_gitlab_service, "get_user", AsyncMock(return_value=mock_user) + ) + + # Act + enriched_member = await mocked_gitlab_service.enrich_member_with_public_email( + member + ) + + # Assert + assert enriched_member == { + "id": "123", + "username": "test_user", + "__public_email": "user@example.com", + } + mocked_gitlab_service.get_user.assert_awaited_once_with("123") # type: ignore + + +@pytest.mark.asyncio +async def test_get_all_object_members( + monkeypatch: Any, mocked_gitlab_service: GitlabService +) -> None: + + # Arrange + obj = MockGroup(123, "test_project") + + # Act + from typing import List + + results_without_inherited_members: List[RESTObject] = [] + async for members in mocked_gitlab_service.get_all_object_members( + obj, include_inherited_members=False, include_bot_members=True + ): + results_without_inherited_members.extend(members) + + results_with_inherited_members: List[RESTObject] = [] + async for members in mocked_gitlab_service.get_all_object_members( + obj, include_inherited_members=True, include_bot_members=True + ): + results_with_inherited_members.extend(members) + + results_without_bot_members: List[RESTObject] = [] + async for members in mocked_gitlab_service.get_all_object_members( + obj, include_inherited_members=True, include_bot_members=False + ): + results_without_bot_members.extend(members) + + # Assert + assert len(results_without_inherited_members) == 6 + assert results_without_inherited_members[0].username == "user1" + assert results_without_inherited_members[1].username == "bot_user1" + assert len(results_with_inherited_members) == 9 + assert len(results_without_bot_members) == 6 + + +@pytest.mark.asyncio +async def test_enrich_object_with_members( + monkeypatch: Any, mocked_gitlab_service: GitlabService +) -> None: + + # Arrange + obj = MockGroup(123, "test_project") + + monkeypatch.setattr( + mocked_gitlab_service, + "enrich_member_with_public_email", + AsyncMock( + side_effect=[ + {"id": 1, "username": "user1", "__public_email": "user1@example.com"}, + {"id": 2, "username": "user2", "__public_email": "user2@example.com"}, + {"id": 3, "username": "user2", "__public_email": "user3@example.com"}, + ] + * 2 + ), + ) + + # Act + enriched_obj_with_public_email = ( + await mocked_gitlab_service.enrich_object_with_members( + obj, + include_inherited_members=False, + include_bot_members=True, + include_public_email=True, + ) + ) + + enriched_obj = await mocked_gitlab_service.enrich_object_with_members( + obj, + include_inherited_members=False, + include_bot_members=True, + include_public_email=False, + ) + + # Assert + assert enriched_obj["name"] == "test_project" + assert len(enriched_obj["__members"]) == 6 + assert enriched_obj["__members"][0] == {"id": 1, "username": "user1"} + + assert enriched_obj_with_public_email["name"] == "test_project" + assert len(enriched_obj_with_public_email["__members"]) == 6 + assert enriched_obj_with_public_email["__members"][0] == { + "id": 1, + "username": "user1", + "__public_email": "user1@example.com", + } + mocked_gitlab_service.enrich_member_with_public_email.assert_awaited() # type: ignore From 7f985005ba43d71d28fabf0839eb5fd3509111aa Mon Sep 17 00:00:00 2001 From: Michael Kofi Armah Date: Wed, 13 Nov 2024 09:59:35 +0000 Subject: [PATCH 37/45] Update integrations/gitlab/gitlab_integration/git_integration.py Co-authored-by: Tom Tankilevitch <59158507+Tankilevitch@users.noreply.github.com> --- integrations/gitlab/gitlab_integration/git_integration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/gitlab/gitlab_integration/git_integration.py b/integrations/gitlab/gitlab_integration/git_integration.py index a7d00f5b34..34089f38bd 100644 --- a/integrations/gitlab/gitlab_integration/git_integration.py +++ b/integrations/gitlab/gitlab_integration/git_integration.py @@ -134,7 +134,7 @@ class GitlabMemberSelector(Selector): description="If set to true, the integration will include inherited members in the group members list. Default value is false", ) # The "include bot members" flag affects both the "group" and "member" kinds. - # To prevent inconsistencies, the behavior or value of this parameter should be consistent for both "groups" and "members". + # To prevent inconsistencies, the behavior or value of this parameter should be consistent for both "groups-with-members" and "project-with-members". # Therefore, it should be included at the top level of the configuration. include_bot_members: bool = Field( alias="includeBotMembers", From 8df02ae8b89b67ef404c254e65fdbc5af88af3cc Mon Sep 17 00:00:00 2001 From: mkarmah Date: Wed, 13 Nov 2024 12:06:08 +0000 Subject: [PATCH 38/45] addressed comments --- .../gitlab_integration/events/hooks/base.py | 2 ++ .../gitlab_integration/events/hooks/group.py | 15 ++++++++++----- .../gitlab/gitlab_integration/events/utils.py | 13 ------------- .../gitlab/gitlab_integration/git_integration.py | 3 --- .../gitlab/gitlab_integration/gitlab_service.py | 4 +--- integrations/gitlab/gitlab_integration/utils.py | 1 - 6 files changed, 13 insertions(+), 25 deletions(-) delete mode 100644 integrations/gitlab/gitlab_integration/events/utils.py diff --git a/integrations/gitlab/gitlab_integration/events/hooks/base.py b/integrations/gitlab/gitlab_integration/events/hooks/base.py index b1d68681c2..3ffbe15013 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/base.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/base.py @@ -97,6 +97,8 @@ async def _on_hook(self, body: dict[str, Any], gitlab_project: Project) -> None: class GroupHandler(HookHandler): async def on_hook(self, event: str, body: dict[str, Any]) -> None: + logger.info(f"Handling {event}") + group_id = body.get("group_id", body.get("group", {}).get("id")) group = await self.gitlab_service.get_group(group_id) await self._on_hook(body, group) diff --git a/integrations/gitlab/gitlab_integration/events/hooks/group.py b/integrations/gitlab/gitlab_integration/events/hooks/group.py index 9b44e80c45..e2c6ffda12 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/group.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/group.py @@ -15,9 +15,14 @@ class Groups(GroupHandler): async def _on_hook( self, body: dict[str, Any], gitlab_group: Optional[Group] ) -> None: - logger.info(f"Handling {body['event_name']} for group {body['group_id']}") + group_id = body.get("group_id") group_full_path = body.get("full_path") + event_name = body["event_name"] + + logger.info( + f"Handling event '{event_name}' for group with ID '{group_id}' and full path '{group_full_path}'" + ) if gitlab_group: await self._register_group( ObjectKind.GROUP, @@ -26,18 +31,18 @@ async def _on_hook( await self._register_object_with_members( ObjectKind.GROUPWITHMEMBERS, gitlab_group ) - logger.info(f"Registered group {body['group_id']}") + logger.info(f"Registered group {group_id}") elif ( group_full_path and self.gitlab_service.should_run_for_path(group_full_path) - and body["event_name"] in ("subgroup_destroy", "group_destroy") + and event_name in ("subgroup_destroy", "group_destroy") ): await ocean.unregister_raw(ObjectKind.GROUP, [body]) await ocean.unregister_raw(ObjectKind.GROUPWITHMEMBERS, [body]) - logger.info(f"Unregistered group {body['group_id']}") + logger.info(f"Unregistered group {group_id}") return else: logger.info( - f"Group {body['group_id']} was filtered for event {body['event_name']}. Skipping..." + f"Group {group_id} was filtered for event {event_name}. Skipping..." ) diff --git a/integrations/gitlab/gitlab_integration/events/utils.py b/integrations/gitlab/gitlab_integration/events/utils.py deleted file mode 100644 index 6a72e959ab..0000000000 --- a/integrations/gitlab/gitlab_integration/events/utils.py +++ /dev/null @@ -1,13 +0,0 @@ -from typing import Any, Dict - - -def remove_prefix_from_keys(prefix: str, data: Dict[str, Any]) -> Dict[str, Any]: - """ - Removes the prefix from dictionary keys. - Args: - prefix (str): The prefix to remove from the keys - data (dict[str, Any]): The original dictionary with keys that may start with the given prefix. - Returns: - dict[str, Any]: A new dictionary with `prefix` stripped from the keys. - """ - return {key.replace(prefix, "", 1): value for key, value in data.items()} diff --git a/integrations/gitlab/gitlab_integration/git_integration.py b/integrations/gitlab/gitlab_integration/git_integration.py index 34089f38bd..b1d0d78989 100644 --- a/integrations/gitlab/gitlab_integration/git_integration.py +++ b/integrations/gitlab/gitlab_integration/git_integration.py @@ -133,9 +133,6 @@ class GitlabMemberSelector(Selector): default=False, description="If set to true, the integration will include inherited members in the group members list. Default value is false", ) - # The "include bot members" flag affects both the "group" and "member" kinds. - # To prevent inconsistencies, the behavior or value of this parameter should be consistent for both "groups-with-members" and "project-with-members". - # Therefore, it should be included at the top level of the configuration. include_bot_members: bool = Field( alias="includeBotMembers", default=True, diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index ad2a1acd14..4eadd147ff 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -34,8 +34,6 @@ import functools PROJECTS_CACHE_KEY = "__cache_all_projects" -GROUPS_CACHE_KEY = "__cache_all_groups" -MEMBERS_CACHE_KEY = "__cache_all_members" USERS_CACHE_KEY = "__cache_all_users" MAX_ALLOWED_FILE_SIZE_IN_BYTES = 1024 * 1024 # 1MB @@ -706,7 +704,7 @@ async def enrich_object_with_members( include_public_email: bool = False, ) -> dict[str, Any]: """ - Enriches an object (e.g., Project or Group) with its members. + Enriches an object (e.g., Project or Group) with its members and optionally their public emails. """ members_list = [] async for members in self.get_all_object_members( diff --git a/integrations/gitlab/gitlab_integration/utils.py b/integrations/gitlab/gitlab_integration/utils.py index 7621f5a656..c39b1cc46e 100644 --- a/integrations/gitlab/gitlab_integration/utils.py +++ b/integrations/gitlab/gitlab_integration/utils.py @@ -51,7 +51,6 @@ class ObjectKind: PIPELINE = "pipeline" PROJECT = "project" FOLDER = "folder" - MEMBER = "member" FILE = "file" GROUPWITHMEMBERS = "group-with-members" PROJECTWITHMEMBERS = "project-with-members" From 48329c0f80ff3dd59a23a071607866dab2edcbd5 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Wed, 13 Nov 2024 15:00:59 +0000 Subject: [PATCH 39/45] all enrichments are performed on object level directly --- .../gitlab_integration/events/hooks/base.py | 4 +- .../gitlab_integration/gitlab_service.py | 22 ++++----- .../gitlab/gitlab_integration/ocean.py | 9 ++-- .../gitlab_integration/test_gitlab_service.py | 45 +++++++++---------- 4 files changed, 39 insertions(+), 41 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/events/hooks/base.py b/integrations/gitlab/gitlab_integration/events/hooks/base.py index 3ffbe15013..3821f210a4 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/base.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/base.py @@ -54,7 +54,7 @@ async def _register_object_with_members(self, kind: str, gitlab_object: RESTObje resource_config.selector.include_inherited_members ) - object_result: Dict[str, Any] = ( + object_result: RESTObject = ( await self.gitlab_service.enrich_object_with_members( gitlab_object, include_public_email, @@ -62,7 +62,7 @@ async def _register_object_with_members(self, kind: str, gitlab_object: RESTObje include_inherited_members, ) ) - await ocean.register_raw(resource_config.kind, [object_result]) + await ocean.register_raw(resource_config.kind, [object_result.asdict()]) class ProjectHandler(HookHandler): diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index 4eadd147ff..46db07ec5e 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -702,7 +702,7 @@ async def enrich_object_with_members( include_inherited_members: bool = False, include_bot_members: bool = True, include_public_email: bool = False, - ) -> dict[str, Any]: + ) -> RESTObject: """ Enriches an object (e.g., Project or Group) with its members and optionally their public emails. """ @@ -711,16 +711,15 @@ async def enrich_object_with_members( obj, include_inherited_members, include_bot_members ): if include_public_email: - tasks = [ + tasks = ( self.enrich_member_with_public_email(member) for member in members - ] + ) members_list.extend(await asyncio.gather(*tasks)) else: - members_list.extend(member.asdict() for member in members) + members_list.extend(members) - obj_dict: dict[str, Any] = obj.asdict() - obj_dict["__members"] = members_list - return obj_dict + setattr(obj, "__members", [member.asdict() for member in members_list]) + return obj async def get_all_object_members( self, @@ -763,13 +762,10 @@ async def get_all_object_members( logger.error(f"Failed to get members for object='{obj_name}'. Error: {e}") return - async def enrich_member_with_public_email( - self, member: RESTObject - ) -> dict[str, Any]: + async def enrich_member_with_public_email(self, member: RESTObject) -> RESTObject: user: User = await self.get_user(member.id) - member_dict: dict[str, Any] = member.asdict() - member_dict["__public_email"] = user.public_email - return member_dict + setattr(member, "__public_email", user.public_email) + return member async def get_user(self, user_id: str) -> User: async with semaphore: diff --git a/integrations/gitlab/gitlab_integration/ocean.py b/integrations/gitlab/gitlab_integration/ocean.py index 98bf9c77a1..c939375a59 100644 --- a/integrations/gitlab/gitlab_integration/ocean.py +++ b/integrations/gitlab/gitlab_integration/ocean.py @@ -155,7 +155,7 @@ async def resync_groups_with_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: for group in groups_batch ] enriched_groups = await asyncio.gather(*tasks) - yield enriched_groups + yield [enriched_group.asdict() for enriched_group in enriched_groups] @ocean.on_resync(ObjectKind.PROJECT) @@ -184,7 +184,7 @@ async def resync_projects(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: f"Finished Processing extras for {projects_processed_in_full_batch}/{len(projects)} projects in batch" ) yield [ - enriched_project.asict() for enriched_project in enriched_projects + enriched_project.asdict() for enriched_project in enriched_projects ] @@ -236,7 +236,10 @@ async def resync_project_with_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: for project in projects_enriched_with_extras ] projects_enriched_with_members = await asyncio.gather(*members_tasks) - yield projects_enriched_with_members + yield [ + enriched_projects.asdict() + for enriched_projects in projects_enriched_with_members + ] @ocean.on_resync(ObjectKind.FOLDER) diff --git a/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py b/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py index d6f7c276ec..9427aa492b 100644 --- a/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py +++ b/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py @@ -224,7 +224,8 @@ def __init__(self, id, username): self.username = username def asdict(self): - return {"id": self.id, "username": self.username} + # return {"id": self.id, "username": self.username} + return self.__dict__ def __setattr__(self, name, value): self.__dict__[name] = value @@ -328,11 +329,8 @@ async def test_enrich_member_with_public_email( ) # Assert - assert enriched_member == { - "id": "123", - "username": "test_user", - "__public_email": "user@example.com", - } + member.__public_email = "user@example.com" + assert enriched_member.asdict() == member.asdict() mocked_gitlab_service.get_user.assert_awaited_once_with("123") # type: ignore @@ -379,19 +377,20 @@ async def test_enrich_object_with_members( ) -> None: # Arrange - obj = MockGroup(123, "test_project") + obj = MockGroup(123, "test_group") + obj2 = MockGroup(123, "test_group") + + user_1 = MockMember(1, "user1") + user_1.__setattr__("__public_email", "user1@example.com"), + user_2 = MockMember(2, "user2") + user_2.__setattr__("__public_email", "user2@example.com"), + user_3 = MockMember(3, "user3") + user_3.__setattr__("__public_email", "user3@example.com"), monkeypatch.setattr( mocked_gitlab_service, "enrich_member_with_public_email", - AsyncMock( - side_effect=[ - {"id": 1, "username": "user1", "__public_email": "user1@example.com"}, - {"id": 2, "username": "user2", "__public_email": "user2@example.com"}, - {"id": 3, "username": "user2", "__public_email": "user3@example.com"}, - ] - * 2 - ), + AsyncMock(side_effect=[user_1, user_2, user_3] * 2), ) # Act @@ -404,21 +403,21 @@ async def test_enrich_object_with_members( ) ) - enriched_obj = await mocked_gitlab_service.enrich_object_with_members( - obj, + enriched_obj: RESTObject = await mocked_gitlab_service.enrich_object_with_members( + obj2, include_inherited_members=False, include_bot_members=True, include_public_email=False, ) # Assert - assert enriched_obj["name"] == "test_project" - assert len(enriched_obj["__members"]) == 6 - assert enriched_obj["__members"][0] == {"id": 1, "username": "user1"} + assert enriched_obj.name == "test_group" + assert len(enriched_obj.__members) == 6 + assert enriched_obj.__members[0] == {"id": 1, "username": "user1"} - assert enriched_obj_with_public_email["name"] == "test_project" - assert len(enriched_obj_with_public_email["__members"]) == 6 - assert enriched_obj_with_public_email["__members"][0] == { + assert enriched_obj_with_public_email.name == "test_group" + assert len(enriched_obj_with_public_email.__members) == 6 + assert enriched_obj_with_public_email.__members[0] == { "id": 1, "username": "user1", "__public_email": "user1@example.com", From 986e9fab13fbbc2b110cf2a4bf1396c6145705ea Mon Sep 17 00:00:00 2001 From: mkarmah Date: Wed, 13 Nov 2024 18:07:31 +0000 Subject: [PATCH 40/45] remove unnecessary comment --- .../gitlab/tests/gitlab_integration/test_gitlab_service.py | 1 - 1 file changed, 1 deletion(-) diff --git a/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py b/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py index 9427aa492b..d151b53b09 100644 --- a/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py +++ b/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py @@ -224,7 +224,6 @@ def __init__(self, id, username): self.username = username def asdict(self): - # return {"id": self.id, "username": self.username} return self.__dict__ def __setattr__(self, name, value): From 57ad257d849e372d94c31ffdf659ea44f7934a75 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Wed, 13 Nov 2024 19:07:12 +0000 Subject: [PATCH 41/45] removed public email querying --- .../gitlab_integration/events/hooks/base.py | 2 - .../gitlab_integration/git_integration.py | 5 -- .../gitlab_integration/gitlab_service.py | 14 +---- .../gitlab/gitlab_integration/ocean.py | 8 +-- .../gitlab_integration/test_gitlab_service.py | 60 +------------------ 5 files changed, 3 insertions(+), 86 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/events/hooks/base.py b/integrations/gitlab/gitlab_integration/events/hooks/base.py index 3821f210a4..91a675fff0 100644 --- a/integrations/gitlab/gitlab_integration/events/hooks/base.py +++ b/integrations/gitlab/gitlab_integration/events/hooks/base.py @@ -48,7 +48,6 @@ async def _register_object_with_members(self, kind: str, gitlab_object: RESTObje return for resource_config in matching_resource_configs: - include_public_email = resource_config.selector.include_public_email include_bot_members = resource_config.selector.include_bot_members include_inherited_members = ( resource_config.selector.include_inherited_members @@ -57,7 +56,6 @@ async def _register_object_with_members(self, kind: str, gitlab_object: RESTObje object_result: RESTObject = ( await self.gitlab_service.enrich_object_with_members( gitlab_object, - include_public_email, include_bot_members, include_inherited_members, ) diff --git a/integrations/gitlab/gitlab_integration/git_integration.py b/integrations/gitlab/gitlab_integration/git_integration.py index b1d0d78989..7c8faca24f 100644 --- a/integrations/gitlab/gitlab_integration/git_integration.py +++ b/integrations/gitlab/gitlab_integration/git_integration.py @@ -123,11 +123,6 @@ class GitlabResourceConfig(ResourceConfig): class GitlabMemberSelector(Selector): - include_public_email: bool = Field( - alias="enrichWithPublicEmail", - default=False, - description="If set to true, the integration will enrich group members with public email field. Default value is false", - ) include_inherited_members: bool = Field( alias="includeInheritedMembers", default=False, diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index 46db07ec5e..7179432ed2 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -701,7 +701,6 @@ async def enrich_object_with_members( obj: RESTObject, include_inherited_members: bool = False, include_bot_members: bool = True, - include_public_email: bool = False, ) -> RESTObject: """ Enriches an object (e.g., Project or Group) with its members and optionally their public emails. @@ -710,13 +709,7 @@ async def enrich_object_with_members( async for members in self.get_all_object_members( obj, include_inherited_members, include_bot_members ): - if include_public_email: - tasks = ( - self.enrich_member_with_public_email(member) for member in members - ) - members_list.extend(await asyncio.gather(*tasks)) - else: - members_list.extend(members) + members_list.extend(members) setattr(obj, "__members", [member.asdict() for member in members_list]) return obj @@ -762,11 +755,6 @@ async def get_all_object_members( logger.error(f"Failed to get members for object='{obj_name}'. Error: {e}") return - async def enrich_member_with_public_email(self, member: RESTObject) -> RESTObject: - user: User = await self.get_user(member.id) - setattr(member, "__public_email", user.public_email) - return member - async def get_user(self, user_id: str) -> User: async with semaphore: logger.info(f"fetching user {user_id}") diff --git a/integrations/gitlab/gitlab_integration/ocean.py b/integrations/gitlab/gitlab_integration/ocean.py index c939375a59..887b0a3bde 100644 --- a/integrations/gitlab/gitlab_integration/ocean.py +++ b/integrations/gitlab/gitlab_integration/ocean.py @@ -141,16 +141,12 @@ async def resync_groups_with_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: include_inherited_members = ( group_with_members_selector.include_inherited_members ) - include_public_email = group_with_members_selector.include_public_email include_bot_members = group_with_members_selector.include_bot_members async for groups_batch in service.get_all_groups(): tasks = [ service.enrich_object_with_members( - group, - include_inherited_members, - include_bot_members, - include_public_email, + group, include_inherited_members, include_bot_members ) for group in groups_batch ] @@ -206,7 +202,6 @@ async def resync_project_with_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: project_with_members_selector.include_inherited_members ) include_bot_members = project_with_members_selector.include_bot_members - include_public_email = project_with_members_selector.include_public_email async for projects in service.get_all_projects(): projects_batch_iter = iter(projects) @@ -231,7 +226,6 @@ async def resync_project_with_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: project, include_inherited_members, include_bot_members, - include_public_email, ) for project in projects_enriched_with_extras ] diff --git a/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py b/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py index d151b53b09..595cf22133 100644 --- a/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py +++ b/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py @@ -308,31 +308,6 @@ def test_should_run_for_members( assert mocked_gitlab_service.should_run_for_members(False, non_bot_member) is True -@pytest.mark.asyncio -async def test_enrich_member_with_public_email( - monkeypatch: Any, mocked_gitlab_service: GitlabService -) -> None: - - # Arrange - member = MockMember(id="123", username="test_user") - mock_user = Mock(spec=User) - mock_user.public_email = "user@example.com" - - monkeypatch.setattr( - mocked_gitlab_service, "get_user", AsyncMock(return_value=mock_user) - ) - - # Act - enriched_member = await mocked_gitlab_service.enrich_member_with_public_email( - member - ) - - # Assert - member.__public_email = "user@example.com" - assert enriched_member.asdict() == member.asdict() - mocked_gitlab_service.get_user.assert_awaited_once_with("123") # type: ignore - - @pytest.mark.asyncio async def test_get_all_object_members( monkeypatch: Any, mocked_gitlab_service: GitlabService @@ -377,48 +352,15 @@ async def test_enrich_object_with_members( # Arrange obj = MockGroup(123, "test_group") - obj2 = MockGroup(123, "test_group") - - user_1 = MockMember(1, "user1") - user_1.__setattr__("__public_email", "user1@example.com"), - user_2 = MockMember(2, "user2") - user_2.__setattr__("__public_email", "user2@example.com"), - user_3 = MockMember(3, "user3") - user_3.__setattr__("__public_email", "user3@example.com"), - - monkeypatch.setattr( - mocked_gitlab_service, - "enrich_member_with_public_email", - AsyncMock(side_effect=[user_1, user_2, user_3] * 2), - ) # Act - enriched_obj_with_public_email = ( - await mocked_gitlab_service.enrich_object_with_members( - obj, - include_inherited_members=False, - include_bot_members=True, - include_public_email=True, - ) - ) - enriched_obj: RESTObject = await mocked_gitlab_service.enrich_object_with_members( - obj2, + obj, include_inherited_members=False, include_bot_members=True, - include_public_email=False, ) # Assert assert enriched_obj.name == "test_group" assert len(enriched_obj.__members) == 6 assert enriched_obj.__members[0] == {"id": 1, "username": "user1"} - - assert enriched_obj_with_public_email.name == "test_group" - assert len(enriched_obj_with_public_email.__members) == 6 - assert enriched_obj_with_public_email.__members[0] == { - "id": 1, - "username": "user1", - "__public_email": "user1@example.com", - } - mocked_gitlab_service.enrich_member_with_public_email.assert_awaited() # type: ignore From 56aaaef5c72d00bf728a8c98bedff7e438e84cbc Mon Sep 17 00:00:00 2001 From: mkarmah Date: Wed, 13 Nov 2024 19:12:52 +0000 Subject: [PATCH 42/45] lint --- .../gitlab/gitlab_integration/gitlab_service.py | 12 +++++++----- .../tests/gitlab_integration/test_gitlab_service.py | 3 +-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index 7179432ed2..1bb80eea33 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -705,11 +705,13 @@ async def enrich_object_with_members( """ Enriches an object (e.g., Project or Group) with its members and optionally their public emails. """ - members_list = [] - async for members in self.get_all_object_members( - obj, include_inherited_members, include_bot_members - ): - members_list.extend(members) + members_list = [ + member + async for members in self.get_all_object_members( + obj, include_inherited_members, include_bot_members + ) + for member in members + ] setattr(obj, "__members", [member.asdict() for member in members_list]) return obj diff --git a/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py b/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py index 595cf22133..b7f5036ede 100644 --- a/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py +++ b/integrations/gitlab/tests/gitlab_integration/test_gitlab_service.py @@ -1,8 +1,7 @@ from typing import Any -from unittest.mock import MagicMock, Mock, AsyncMock +from unittest.mock import MagicMock, Mock from gitlab_integration.gitlab_service import GitlabService from gitlab.base import RESTObject -from gitlab.v4.objects import User import pytest From f42ee30e3be4f8d6c5f4aad88db2e9ece4131580 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Thu, 14 Nov 2024 12:45:14 +0000 Subject: [PATCH 43/45] control resync batch size to avoid hitting rate limits --- .../gitlab_integration/gitlab_service.py | 22 +------------ .../gitlab/gitlab_integration/ocean.py | 32 ++++++++++++------- 2 files changed, 21 insertions(+), 33 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index 1bb80eea33..fc8e8fa1b1 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -12,7 +12,6 @@ from gitlab.base import RESTObject, RESTObjectList from gitlab.v4.objects import ( Group, - User, GroupMergeRequest, Issue, MergeRequest, @@ -34,7 +33,7 @@ import functools PROJECTS_CACHE_KEY = "__cache_all_projects" -USERS_CACHE_KEY = "__cache_all_users" + MAX_ALLOWED_FILE_SIZE_IN_BYTES = 1024 * 1024 # 1MB GITLAB_SEARCH_RATE_LIMIT = 100 @@ -757,25 +756,6 @@ async def get_all_object_members( logger.error(f"Failed to get members for object='{obj_name}'. Error: {e}") return - async def get_user(self, user_id: str) -> User: - async with semaphore: - logger.info(f"fetching user {user_id}") - users = event.attributes.setdefault(USERS_CACHE_KEY, {}).setdefault( - self.gitlab_client.private_token, {} - ) - - if cached_user := users.get(user_id): - return cached_user - - user_response = await AsyncFetcher.fetch_single( - self.gitlab_client.users.get, user_id - ) - user: User = typing.cast(User, user_response) - event.attributes[USERS_CACHE_KEY][self.gitlab_client.private_token][ - user_id - ] = user - return user - async def get_entities_diff( self, project: Project, diff --git a/integrations/gitlab/gitlab_integration/ocean.py b/integrations/gitlab/gitlab_integration/ocean.py index 887b0a3bde..c6b8766920 100644 --- a/integrations/gitlab/gitlab_integration/ocean.py +++ b/integrations/gitlab/gitlab_integration/ocean.py @@ -25,7 +25,7 @@ from port_ocean.utils.async_iterators import stream_async_iterators_tasks NO_WEBHOOK_WARNING = "Without setting up the webhook, the integration will not export live changes from the gitlab" -PROJECT_RESYNC_BATCH_SIZE = 10 +RESYNC_BATCH_SIZE = 10 async def start_processors() -> None: @@ -143,15 +143,23 @@ async def resync_groups_with_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: ) include_bot_members = group_with_members_selector.include_bot_members - async for groups_batch in service.get_all_groups(): - tasks = [ - service.enrich_object_with_members( - group, include_inherited_members, include_bot_members + async for groups in service.get_all_groups(): + groups_batch_iter = iter(groups) + groups_processed_in_full_batch = 0 + + while groups_batch := tuple(islice(groups_batch_iter, RESYNC_BATCH_SIZE)): + groups_processed_in_full_batch += len(groups_batch) + logger.info( + f"Processing extras for {groups_processed_in_full_batch}/{len(groups)} groups in batch" ) - for group in groups_batch - ] - enriched_groups = await asyncio.gather(*tasks) - yield [enriched_group.asdict() for enriched_group in enriched_groups] + tasks = [ + service.enrich_object_with_members( + group, include_inherited_members, include_bot_members + ) + for group in groups_batch + ] + enriched_groups = await asyncio.gather(*tasks) + yield [enriched_group.asdict() for enriched_group in enriched_groups] @ocean.on_resync(ObjectKind.PROJECT) @@ -166,7 +174,7 @@ async def resync_projects(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: projects_batch_iter = iter(projects) projects_processed_in_full_batch = 0 while projects_batch := tuple( - islice(projects_batch_iter, PROJECT_RESYNC_BATCH_SIZE) + islice(projects_batch_iter, RESYNC_BATCH_SIZE) ): projects_processed_in_full_batch += len(projects_batch) logger.info( @@ -207,7 +215,7 @@ async def resync_project_with_members(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: projects_batch_iter = iter(projects) projects_processed_in_full_batch = 0 while projects_batch := tuple( - islice(projects_batch_iter, PROJECT_RESYNC_BATCH_SIZE) + islice(projects_batch_iter, RESYNC_BATCH_SIZE) ): projects_processed_in_full_batch += len(projects_batch) logger.info( @@ -277,7 +285,7 @@ async def resync_files(kind: str) -> ASYNC_GENERATOR_RESYNC_TYPE: projects_batch_iter = iter(projects) projects_processed_in_full_batch = 0 while projects_batch := tuple( - islice(projects_batch_iter, PROJECT_RESYNC_BATCH_SIZE) + islice(projects_batch_iter, RESYNC_BATCH_SIZE) ): projects_processed_in_full_batch += len(projects_batch) logger.info( From 62db938d15860da63415d1136f64570c796ca3b4 Mon Sep 17 00:00:00 2001 From: mkarmah Date: Thu, 14 Nov 2024 14:49:10 +0000 Subject: [PATCH 44/45] Added error handling for group not found --- .../gitlab_integration/gitlab_service.py | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index fc8e8fa1b1..0ecaec3671 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -8,6 +8,7 @@ import aiolimiter import anyio.to_thread import yaml +import gitlab.exceptions from gitlab import Gitlab, GitlabError, GitlabList from gitlab.base import RESTObject, RESTObjectList from gitlab.v4.objects import ( @@ -451,13 +452,23 @@ async def get_project(self, project_id: int) -> Project | None: else: return None - async def get_group(self, group_id: int) -> Group | None: - logger.info(f"fetching group {group_id}") - group = await AsyncFetcher.fetch_single(self.gitlab_client.groups.get, group_id) - if isinstance(group, Group) and self.should_run_for_group(group): - return group - else: - return None + async def get_group(self, group_id: int) -> Optional[Group]: + try: + logger.info(f"Fetching group with ID: {group_id}") + group = await AsyncFetcher.fetch_single( + self.gitlab_client.groups.get, group_id + ) + if isinstance(group, Group) and self.should_run_for_group(group): + return group + else: + return None + except gitlab.exceptions.GitlabGetError as err: + if err.response_code == 404: + logger.warning(f"Group with ID {group_id} not found (404).") + return None + else: + logger.error(f"Failed to fetch group with ID {group_id}: {err}") + raise @cache_iterator_result() async def get_all_groups( From 4718261679b634edf75fec31890768d4cc7de3ec Mon Sep 17 00:00:00 2001 From: Tom Tankilevitch <59158507+Tankilevitch@users.noreply.github.com> Date: Thu, 14 Nov 2024 17:58:41 +0200 Subject: [PATCH 45/45] Update integrations/gitlab/CHANGELOG.md --- integrations/gitlab/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/gitlab/CHANGELOG.md b/integrations/gitlab/CHANGELOG.md index aa21ab7e50..3cff5ced76 100644 --- a/integrations/gitlab/CHANGELOG.md +++ b/integrations/gitlab/CHANGELOG.md @@ -7,7 +7,7 @@ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm -0.1.41 (2024-11-13) +0.1.141 (2024-11-13) =================== ### Features