From 3b65504f09d27349258d282bd04042f31ea5e1a5 Mon Sep 17 00:00:00 2001 From: gruebel Date: Thu, 18 Jul 2024 22:55:40 +0200 Subject: [PATCH] improve IAM Db loading --- policy_sentry/command/initialize.py | 11 +++++++---- policy_sentry/shared/iam_data.py | 22 ++++++++++++++++++++-- requirements.txt | 2 ++ setup.py | 1 + 4 files changed, 30 insertions(+), 6 deletions(-) diff --git a/policy_sentry/command/initialize.py b/policy_sentry/command/initialize.py index 161ad243..828962d5 100755 --- a/policy_sentry/command/initialize.py +++ b/policy_sentry/command/initialize.py @@ -13,10 +13,6 @@ from policy_sentry import set_stream_logger from policy_sentry.querying.all import get_all_service_prefixes -from policy_sentry.shared.awsdocs import ( - create_database, - update_html_docs_directory, -) from policy_sentry.shared.constants import ( BUNDLED_DATA_DIRECTORY, BUNDLED_DATASTORE_FILE_PATH, @@ -90,6 +86,13 @@ def initialize( Initialize the local data file to store AWS IAM information, which can be used to generate IAM policies, and for querying the database. """ + + # importing 'awsdocs' is quite pricey, when it is actually only used for initialize the IAM DB + from policy_sentry.shared.awsdocs import ( + create_database, + update_html_docs_directory, + ) + if not access_level_overrides_file: overrides_file = LOCAL_ACCESS_OVERRIDES_FILE else: diff --git a/policy_sentry/shared/iam_data.py b/policy_sentry/shared/iam_data.py index fb3dcc23..add0c020 100644 --- a/policy_sentry/shared/iam_data.py +++ b/policy_sentry/shared/iam_data.py @@ -3,11 +3,13 @@ from __future__ import annotations import functools -import json +import gc import logging from pathlib import Path from typing import Any, cast +import orjson + from policy_sentry.shared.constants import ( DATASTORE_FILE_PATH, POLICY_SENTRY_SCHEMA_VERSION_NAME, @@ -18,7 +20,23 @@ # On initialization, load the IAM data iam_definition_path = DATASTORE_FILE_PATH logger.debug(f"Leveraging the IAM definition at {iam_definition_path}") -iam_definition = json.loads(Path(iam_definition_path).read_bytes()) + + +def load_iam_definition() -> dict[str, Any]: + gc_enabled = gc.isenabled() + if gc_enabled: + # https://github.com/msgpack/msgpack-python?tab=readme-ov-file#performance-tips + gc.disable() + + data: dict[str, Any] = orjson.loads(Path(iam_definition_path).read_bytes()) + + if gc_enabled: + gc.enable() + + return data + + +iam_definition = load_iam_definition() @functools.lru_cache(maxsize=1) diff --git a/requirements.txt b/requirements.txt index 1981814d..df73bac7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,5 @@ requests==2.32.3 # Config files and schema validation PyYAML==6.0.1 schema==0.7.7 +# IAM DB +orjson==3.10.6 diff --git a/setup.py b/setup.py index 9b23227a..ea4f9406 100644 --- a/setup.py +++ b/setup.py @@ -13,6 +13,7 @@ "requests", "schema", "PyYAML", + "orjson", ] PROJECT_URLS = { "Documentation": "https://policy-sentry.readthedocs.io/",