[Core] Performance measure framework #1176

port-labs · Dec 3, 2024 · 51ef7ab · 51ef7ab
1 parent 741bec3
commit 51ef7ab
Show file tree

Hide file tree

Showing 18 changed files with 453 additions and 69 deletions.
diff --git a/.github/workflows/core-test.yml b/.github/workflows/core-test.yml
@@ -59,6 +59,7 @@ jobs:
           make test/smoke
 
       - name: Cleanup Smoke Test
+        if: always()
         env:
           PYTEST_ADDOPTS: --junitxml=junit/smoke-test-results-ocean/core.xml
           PORT_CLIENT_ID: ${{ secrets.PORT_CLIENT_ID }}

diff --git a/.github/workflows/perf-test.yml b/.github/workflows/perf-test.yml
@@ -0,0 +1,100 @@
+name: 🌊 Ocean Core Performance Tests
+
+on:
+  workflow_dispatch:
+    inputs:
+      batch_size:
+        type: choice
+        description: Batch size of requests against fake 3rd party API
+        default: "1000"
+        options:
+          - "10"
+          - "100"
+          - "200"
+          - "500"
+          - "1000"
+      entity_kb_size:
+        type: choice
+        description: Entity size in kb
+        default: "1"
+        options:
+          - "1"
+          - "5"
+          - "10"
+          - "25"
+          - "100"
+          - "500"
+      third_party_latency_ms:
+        type: choice
+        description: Latency in ms to each 3rd party API call
+        default: "0"
+        options:
+          - "0"
+          - "5"
+          - "10"
+          - "25"
+          - "100"
+          - "200"
+          - "500"
+      entities_amount:
+        type: choice
+        description: Explicit amount of entities created
+        default: "500"
+        options:
+          - "10"
+          - "50"
+          - "200"
+          - "500"
+          - "2500"
+          - "5000"
+          - "10000"
+          - "20000"
+          - "25000"
+          - "35000"
+jobs:
+  test:
+    name: 🌊 Ocean Performance Tests
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@v4
+
+      - name: Install poetry
+        run: pipx install poetry
+
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+          cache: 'poetry'
+
+      - name: Install dependencies
+        run: |
+          make install
+
+      - name: Run Performance Test
+        env:
+          PORT_CLIENT_ID: ${{ secrets.PORT_CLIENT_ID }}
+          PORT_CLIENT_SECRET: ${{ secrets.PORT_CLIENT_SECRET }}
+          PORT_BASE_URL: ${{ secrets.PORT_BASE_URL }}
+          SMOKE_TEST_SUFFIX: ${{ github.run_id }}
+          THIRD_PARTY_BATCH_SIZE: ${{ inputs.batch_size }}
+          THIRD_PARTY_LATENCY_MS: ${{ inputs.third_party_latency_ms }}
+          ENTITY_AMOUNT: ${{ inputs.entities_amount }}
+          ENTITY_KB_SIZE: ${{ inputs.entity_kb_size }}
+        run: |
+          ./scripts/run-local-perf-test.sh
+
+      - name: Cleanup Smoke Test
+        if: always()
+        env:
+          PORT_CLIENT_ID: ${{ secrets.PORT_CLIENT_ID }}
+          PORT_CLIENT_SECRET: ${{ secrets.PORT_CLIENT_SECRET }}
+          PORT_BASE_URL: ${{ secrets.PORT_BASE_URL }}
+          SMOKE_TEST_SUFFIX: ${{ github.run_id }}
+        run: |
+          make clean/smoke
+
+      - name: Publish Performance Test Summary
+        run: |
+          cat ./perf-test-results-${{ github.run_id }}.log.md >> ${GITHUB_STEP_SUMMARY}
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,14 @@ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 
 <!-- towncrier release notes start -->
 
+## 0.14.5 (2024-12-03)
+
+
+### Improvements
+
+- Add performance test framework
+
+
 ## 0.14.4 (2024-12-03)
 
 

diff --git a/integrations/_infra/Makefile b/integrations/_infra/Makefile
@@ -11,8 +11,6 @@ define run_checks
 	ruff check . || exit_code=$$?; \
 	echo "Running black"; \
 	black --check . || exit_code=$$?; \
-	echo "Running yamllint"; \
-	yamllint . || exit_code=$$?; \
 	if [ $$exit_code -eq 1 ]; then \
 		echo "\033[0;31mOne or more checks failed with exit code $$exit_code\033[0m"; \
 	else \

diff --git a/integrations/fake-integration/.port/resources/blueprints.json b/integrations/fake-integration/.port/resources/blueprints.json
@@ -40,6 +40,10 @@
                 "age": {
                     "type": "number",
                     "title": "Age"
+                },
+                "bio": {
+                    "type": "string",
+                    "title": "Bio"
                 }
             }
         },

diff --git a/integrations/fake-integration/.port/resources/port-app-config.yml b/integrations/fake-integration/.port/resources/port-app-config.yml
@@ -26,5 +26,6 @@ resources:
             status: .status
             age: .age
             department: .department.name
+            bio: .bio
           relations:
             department: .department.id
diff --git a/integrations/fake-integration/.port/spec.yaml b/integrations/fake-integration/.port/spec.yaml
@@ -7,3 +7,29 @@ features:
     resources:
       - kind: fake-department
       - kind: fake-person
+configurations:
+  - name: entityAmount
+    required: false
+    type: integer
+    description: Amount of fake persons per department created
+    default: -1
+  - name: entityKbSize
+    required: false
+    type: integer
+    description: Factor of size of entity (by making the 'bio' string field)
+    default: -1
+  - name: thirdPartyBatchSize
+    required: false
+    type: integer
+    description: Batch size of requests against fake 3rd party API
+    default: -1
+  - name: thirdPartyLatencyMS
+    required: false
+    type: integer
+    description: latency in milliseconds to each 3rd party API call
+    default: -1
+  - name: singleDepartmentRun
+    required: false
+    type: boolean
+    description: Run only 1 static department instead of the default 5
+    default: False
diff --git a/integrations/fake-integration/fake_org_data/fake_client.py b/integrations/fake-integration/fake_org_data/fake_client.py
@@ -1,21 +1,76 @@
-from faker import Faker
-from typing import List
+from enum import StrEnum, IntEnum
+from typing import List, Tuple, Dict, Any, AsyncGenerator
 from random import randint
 
 from port_ocean.utils import http_async_client
+from port_ocean.context.ocean import ocean
 
-from .types import FakeDepartment, FakePerson
+from .types import FakePerson
+from .static import FAKE_DEPARTMENTS
 
 
-fake = Faker()
-
 API_URL = "http://localhost:8000/integration/department"
 USER_AGENT = "Ocean Framework Fake Integration (https://github.com/port-labs/ocean)"
 
 
-async def get_fake_persons(department: FakeDepartment) -> List[FakePerson]:
-    amount = randint(2, 19)
-    url = f"{API_URL}/{department.name}/employees/{amount}"
+class FakeIntegrationDefaults(IntEnum):
+    ENTITY_AMOUNT = 20
+    ENTITY_KB_SIZE_FACTOR = 1
+    THIRD_PARTY_BATCH_SIZE = 1000
+    THIRD_PARTY_LATENCY_MS = 0
+
+
+class FakeIntegrationConfigKeys(StrEnum):
+    ENTITY_AMOUNT = "entity_amount"
+    ENTITY_KB_SIZE_FACTOR = "entity_kb_size_factor"
+    THIRD_PARTY_BATCH_SIZE = "third_party_batch_size"
+    THIRD_PARTY_LATENCY_MS = "third_party_latency_ms"
+    SINGLE_PERF_RUN = "single_department_run"
+
+
+def get_config() -> Tuple[List[int], int, int]:
+    entity_amount = ocean.integration_config.get(
+        FakeIntegrationConfigKeys.ENTITY_AMOUNT,
+        FakeIntegrationDefaults.ENTITY_AMOUNT,
+    )
+    batch_size = ocean.integration_config.get(
+        FakeIntegrationConfigKeys.THIRD_PARTY_BATCH_SIZE,
+        FakeIntegrationDefaults.THIRD_PARTY_BATCH_SIZE,
+    )
+    if batch_size < 1:
+        batch_size = FakeIntegrationDefaults.THIRD_PARTY_BATCH_SIZE
+
+    entity_kb_size_factor: int = ocean.integration_config.get(
+        FakeIntegrationConfigKeys.ENTITY_KB_SIZE_FACTOR,
+        FakeIntegrationDefaults.ENTITY_KB_SIZE_FACTOR,
+    )
+    if entity_kb_size_factor < 1:
+        entity_kb_size_factor = FakeIntegrationDefaults.ENTITY_KB_SIZE_FACTOR
+
+    latency_ms = ocean.integration_config.get(
+        FakeIntegrationConfigKeys.THIRD_PARTY_LATENCY_MS,
+        FakeIntegrationDefaults.THIRD_PARTY_LATENCY_MS,
+    )
+    if latency_ms < 0:
+        latency_ms = FakeIntegrationDefaults.THIRD_PARTY_LATENCY_MS
+
+    batches = [entity_amount]
+    if entity_amount > batch_size:
+        round_batches = entity_amount // batch_size
+        leftover = entity_amount % batch_size
+
+        batches = [batch_size for _ in range(round_batches)]
+
+        if leftover > 0:
+            batches += [leftover]
+
+    return batches, entity_kb_size_factor, latency_ms
+
+
+async def get_fake_persons_batch(
+    department_id: str, limit: int, entity_kb_size: int, latency_ms: int
+) -> List[Dict[Any, Any]]:
+    url = f"{API_URL}/{department_id}/employees?limit={limit}&entity_kb_size={entity_kb_size}&latency={latency_ms}"
     response = await http_async_client.get(
         url,
         headers={
@@ -30,8 +85,37 @@ async def get_fake_persons(department: FakeDepartment) -> List[FakePerson]:
         FakePerson(
             **{
                 **person,
-                "department": department,
+                "department": [
+                    department
+                    for department in FAKE_DEPARTMENTS
+                    if department_id == department.id
+                ][0],
             }
-        )
+        ).dict()
         for person in raw_persons["results"]
     ]
+
+
+async def get_fake_persons() -> AsyncGenerator[List[Dict[Any, Any]], None]:
+    batches, entity_kb_size, latency_ms = get_config()
+    async for departments_batch in get_departments():
+        for department in departments_batch:
+            for batch in batches:
+                current_result = await get_fake_persons_batch(
+                    department["id"], batch, entity_kb_size, latency_ms
+                )
+                yield current_result
+
+
+async def get_departments() -> AsyncGenerator[List[Dict[Any, Any]], None]:
+    single_department_run = ocean.integration_config.get(
+        FakeIntegrationConfigKeys.SINGLE_PERF_RUN, False
+    )
+
+    departments = (
+        FAKE_DEPARTMENTS
+        if not single_department_run
+        else [FAKE_DEPARTMENTS[randint(0, len(FAKE_DEPARTMENTS) - 1)]]
+    )
+
+    yield [department.dict() for department in departments]
diff --git a/integrations/fake-integration/fake_org_data/fake_router.py b/integrations/fake-integration/fake_org_data/fake_router.py
@@ -5,10 +5,27 @@
 from fake_org_data.generator import generate_fake_persons
 
 
-FAKE_ROUTE = "/department/{department_id}/employees/{limit}"
+FAKE_DEPARTMENT_EMPLOYEES = "/department/{department_id}/employees"
 
 
 def initialize_fake_routes() -> None:
-    @ocean.router.get(FAKE_ROUTE)
-    def get_employees_per_department(department_id: str, limit: int) -> Dict[str, Any]:
-        return generate_fake_persons(department_id, limit)
+    @ocean.router.get(FAKE_DEPARTMENT_EMPLOYEES)
+    async def get_employees_per_department(
+        department_id: str,
+        limit: int = -1,
+        entity_kb_size: int = -1,
+        latency: int = -1,
+    ) -> Dict[str, Any]:
+        """Get Employees per Department
+
+        Since we grab these numbers from the config,
+        we need a way to set the variables and use the default,
+        since the config validation will fail for an empty value,
+        we add -1 as the default
+
+
+        """
+        result = await generate_fake_persons(
+            department_id, limit, entity_kb_size, latency
+        )
+        return result
diff --git a/integrations/fake-integration/fake_org_data/generator.py b/integrations/fake-integration/fake_org_data/generator.py
@@ -1,3 +1,4 @@
+import asyncio
 from random import randint
 from typing import Any, Dict, Union
 
@@ -8,33 +9,49 @@
 
 fake = Faker()
 
+DEFAULT_ENTITIES_AMOUNT = 400
+DEFAULT_ENTITY_KB_SIZE = 1
+DEFAULT_LATENCY_MS = 0
 
-def generate_fake_persons(
-    department_id: Union[str, None] = None, amount: Union[int, None] = None
+
+async def generate_fake_persons(
+    department_id: Union[str, None],
+    amount: int,
+    entity_kb_size: int,
+    latency: int,
 ) -> Dict[str, Any]:
     departments = [x for x in FAKE_DEPARTMENTS if x.id == department_id]
     department = (
         departments[0]
         if len(departments)
-        else FAKE_DEPARTMENTS[randint(0, len(FAKE_DEPARTMENTS))]
+        else FAKE_DEPARTMENTS[randint(0, len(FAKE_DEPARTMENTS) - 1)]
     )
 
     company_domain = fake.company_email().split("@")[-1]
     results = []
-    for _ in range(amount or 400):
+    for _ in range(amount if amount > 0 else DEFAULT_ENTITIES_AMOUNT):
         results.append(
             FakePerson(
                 id=fake.passport_number(),
                 name=fake.name(),
                 email=fake.email(domain=company_domain),
                 age=randint(20, 100),
                 department=department,
+                bio=fake.text(
+                    max_nb_chars=(
+                        entity_kb_size if entity_kb_size > 0 else DEFAULT_ENTITY_KB_SIZE
+                    )
+                    * 1024
+                ),
                 status=(
                     FakePersonStatus.WORKING
                     if randint(0, 2) % 2 == 0
                     else FakePersonStatus.NOPE
                 ),
             ).dict()
         )
+    latency_to_use = latency / 1000 if latency > 0 else DEFAULT_LATENCY_MS
+    if latency_to_use > 0:
+        await asyncio.sleep(latency_to_use)
 
     return {"results": results}