Skip to content

Commit

Permalink
feat: okta apps
Browse files Browse the repository at this point in the history
create applications from while fetching okta groups
  • Loading branch information
Erez Sharim authored and asaf committed Aug 20, 2024
1 parent 93522e3 commit 058bdc4
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 22 deletions.
20 changes: 14 additions & 6 deletions app/data_fetching/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from app.data_fetching.okta import DFOktaImpl
from app.data_fetching.utils import prepare_metadata_ids_content
from app.models import Directory
from app.sql import SQLAlchemyTransactionContext
from app.vault_utils import resolve_ws_config_secrets
from app.vector_store import delete_ids

Expand All @@ -30,7 +31,7 @@ def DataFetcherFactory(dir: Directory) -> DataFetcherInterface:
case "_mock_":
return DFMockImpl()
case "okta":
return DFOktaImpl(**resolved_config)
return DFOktaImpl(workspace_id=dir.workspace_id, **resolved_config)

raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
Expand All @@ -39,19 +40,26 @@ def DataFetcherFactory(dir: Directory) -> DataFetcherInterface:


async def background_data_fetch(
data_fetcher: DataFetcherInterface, ovstore, dir: Directory
data_fetcher: DataFetcherInterface, ovstore, dir: Directory, **kwargs
):
try:
docs = await data_fetcher.fetch_content(dir_name=dir.name)
for doc in docs:
doc.external_id = dir.name
docs = await data_fetcher.fetch_content(dir_name=dir.name, **kwargs)
except Exception as e:
log.error(f"failed to fetch directory {dir.name} data: {str(e)}")
return

texts, metadata, ids = prepare_metadata_ids_content(docs)
try:
delete_ids(ovstore=ovstore, ids=[dir.name])
with SQLAlchemyTransactionContext().manage() as tx_context:
docs, _ = ovstore.__list_docs__(
workspace_id=dir.workspace_id,
directory=dir.name,
limit=100000,
projection=["custom_id"],
tx_context=tx_context,
)
ids_to_delete = [d.custom_id for d in docs]
delete_ids(ovstore=ovstore, ids=ids_to_delete)
except NotImplementedError:
log.error(f"could not delete directory {dir.name} data before importing")
return
Expand Down
2 changes: 1 addition & 1 deletion app/data_fetching/iface.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@

class DataFetcherInterface(ABC):
@abstractmethod
async def fetch_content(dir_name: str) -> List[Doc]:
async def fetch_content(self, dir_name: str, **kwargs) -> List[Doc]:
pass
2 changes: 1 addition & 1 deletion app/data_fetching/mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@


class DFMockImpl(DataFetcherInterface):
async def fetch_content() -> List[Doc]:
async def fetch_content(dir_name: str, **kwargs) -> List[Doc]:
return []
56 changes: 50 additions & 6 deletions app/data_fetching/okta.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import logging
from typing import List

from app.data_fetching.iface import DataFetcherInterface
from app.data_fetching.utils import Doc
from app.models import Application
from app.services import factory_app_store
from app.sql import SQLAlchemyTransactionContext

logger = logging.getLogger(__name__)


def okta_group_to_doc(id, name, description, dir_name, apps) -> Doc:
Expand All @@ -11,11 +17,21 @@ def okta_group_to_doc(id, name, description, dir_name, apps) -> Doc:
f"**role description**: {description}",
]
content = "\n".join(ra)
return Doc(directory=dir_name, content=content, external_id="okta", apps=apps)
return Doc(
directory=dir_name,
display_name=name,
content=content,
external_id=id,
apps=apps,
)


def create_app_name(name: str):
return name.replace("_", "")


class DFOktaImpl(DataFetcherInterface):
def __init__(self, tenant, token) -> None:
def __init__(self, workspace_id, tenant, token) -> None:
try:
from okta.client import Client as OktaClient
except ImportError:
Expand All @@ -26,21 +42,29 @@ def __init__(self, tenant, token) -> None:

config = {"orgUrl": f"https://{tenant}", "token": token}
self.client = OktaClient(config)
self.workspace_id = workspace_id

async def fetch_content(self, dir_name: str) -> List[Doc]:
groups, resp, err = await self.client.list_groups()
async def fetch_content(self, dir_name: str, **kwargs) -> List[Doc]:
groups, resp, err = await self.client.list_groups(
query_params={"expand": "stats"}
)
if err is not None:
raise ValueError(f"error while fetching groups: {err}")

groups_md = []
all_apps = set()
while True:
for group in groups:
if group.embedded.get("stats", {}).get("appsCount", 0) == 0:
continue

gapps, _, err = await self.client.list_assigned_applications_for_group(
groupId=group.id
)

# TODO: this is a bit weird, and we'll have to think about this
apps = [a.label.lower().replace(" ", "_") for a in gapps]
# apps must be alphanumeric only
apps = [create_app_name(a.name) for a in gapps]
all_apps.update(gapps)

gmd = okta_group_to_doc(
id=group.id,
Expand All @@ -57,4 +81,24 @@ async def fetch_content(self, dir_name: str) -> List[Doc]:
else:
break

if kwargs.get("create_apps"):
app_store = factory_app_store()
with SQLAlchemyTransactionContext().manage() as tx_context:
known_apps, _ = app_store.list(
workspace_id=self.workspace_id,
limit=1000,
tx_context=tx_context,
projection=["name"],
)
known_apps_names = [ka.name for ka in known_apps]
for gapp in all_apps:
gapp_name = create_app_name(gapp.name)
if gapp_name not in known_apps_names:
a = Application(
name=gapp_name,
aliases=[gapp.label],
workspace_id=self.workspace_id,
)
app_store.insert(app=a, tx_context=tx_context)

return groups_md
2 changes: 2 additions & 0 deletions app/data_fetching/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

class Doc(BaseModel):
id: Optional[str] = None
display_name: Optional[str] = None
apps: Optional[list[str]]
directory: Optional[str]
content: Optional[str]
Expand All @@ -24,6 +25,7 @@ def prepare_metadata_ids_content(docs: List[Doc]):
"directory": doc.directory,
"app": doc.apps,
"created_at": datetime.now().isoformat(),
"display_name": doc.display_name or "",
}
metadatas.append(dmeta)
ids.append(doc.external_id if doc.external_id is not None else generate())
Expand Down
2 changes: 1 addition & 1 deletion app/ext_vector_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def create_projection(proj: Optional[List[str]] = []):
"content": "document",
}

to_get = {translate[item] for item in proj if item in translate}
to_get = {translate.get(item, item) for item in proj}
return ", ".join(to_get)


Expand Down
6 changes: 3 additions & 3 deletions app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,9 @@ class Application(BaseModel):
workspace_id: str
name: str
_normalize_name = field_validator("name")(must_be_lowercase_alphanumeric_validator)
aliases: Optional[list[str]]
extra_instructions: Optional[str]
provision_schema: Optional[dict]
aliases: Optional[list[str]] = []
extra_instructions: Optional[str] = None
provision_schema: Optional[dict] = None
created_at: datetime = Field(default_factory=datetime.now)


Expand Down
5 changes: 3 additions & 2 deletions app/routers/directories.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from typing import Annotated, List
from typing import Annotated, Any, List

import jsonpatch
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status
Expand Down Expand Up @@ -199,6 +199,7 @@ async def list(
"/{dir_id}/.import", response_model=dict, status_code=status.HTTP_202_ACCEPTED
)
async def import_content(
body: dict[str, Any],
dir_id: str,
workspace: Annotated[Workspace, Depends(get_current_workspace)],
directory_store: Annotated[DirectoryStore, Depends(get_service(DirectoryStore))],
Expand All @@ -215,5 +216,5 @@ async def import_content(
raise HTTPException(status_code=404, detail="Directory not found")

data_fetcher = DataFetcherFactory(dir=dir)
background_tasks.add_task(background_data_fetch, data_fetcher, ovstore, dir)
background_tasks.add_task(background_data_fetch, data_fetcher, ovstore, dir, **body)
return {"message": "data import happening in background"}
4 changes: 2 additions & 2 deletions docs/dev/api_exploration.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -706,8 +706,8 @@
"outputs": [],
"source": [
"# import okta roles\n",
"dir_id = \"-Ci2o1lpxL\"\n",
"resp = requests.post(f\"{API_URL}/directories/{dir_id}/.import\", headers=headers)\n",
"dir_id = \"ABCDE11ere\"\n",
"resp = requests.post(f\"{API_URL}/directories/{dir_id}/.import\", headers=headers, json={\"create_apps\": True})\n",
"if resp.status_code != status.HTTP_202_ACCEPTED:\n",
" raise SystemExit(resp.text)\n",
"print(\"content being processed: \", resp.json())"
Expand Down

0 comments on commit 058bdc4

Please sign in to comment.