Merge branch 'dev' into docker-compose-rm-postgres

topoteretes · Feb 9, 2025 · f0ac3c4 · f0ac3c4
2 parents 1ce9e16 + 186b82c
commit f0ac3c4
Show file tree

Hide file tree

Showing 97 changed files with 1,762 additions and 861 deletions.
diff --git a/.github/workflows/approve_dco.yaml b/.github/workflows/approve_dco.yaml
@@ -1,4 +1,4 @@
-name: DCO Check
+name: community | DCO Check
 
 on:
   pull_request:

diff --git a/.github/workflows/clean_stale_pr.yaml b/.github/workflows/clean_stale_pr.yaml
@@ -1,7 +1,7 @@
 name: clean | remove stale PRs
 
 on:
-  # Run this action periodically (daily at 0:00 UTC in this example).
+  # Run this action periodically (daily at 0:00 UTC).
   schedule:
     - cron: "0 0 * * *"
   # Optionally, also run when pull requests are labeled, unlabeled, synchronized, or reopened
@@ -13,12 +13,20 @@ jobs:
   stale:
     runs-on: ubuntu-latest
     steps:
-      - name: Mark and Close Stale
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Mark and Close Stale PRs
         uses: actions/stale@v6
         with:
           # Number of days of inactivity before the pull request is marked stale
           days-before-stale: 60
           # Number of days of inactivity after being marked stale before the pull request is closed
           days-before-close: 7
           # Comment to post when marking as stale
-          stale-pr-message: "This pull request has been automatically marke
+          stale-pr-message: "This pull request has been automatically marked as stale due to inactivity. It will be closed in 7 days if no further activity occurs."
+          # Comment to post when closing a stale pull request
+          close-pr-message: "This pull request has been closed due to prolonged inactivity."
+          # Labels for stale and closed PRs
+          stale-pr-label: "stale"
+          exempt-pr-labels: "keep-open"
diff --git a/.github/workflows/ruff_format.yaml b/.github/workflows/ruff_format.yaml
@@ -1,4 +1,4 @@
-name: ruff format
+name: lint | ruff format
 on: [ pull_request ]
 
 jobs:

diff --git a/.github/workflows/ruff_lint.yaml b/.github/workflows/ruff_lint.yaml
@@ -1,4 +1,4 @@
-name: ruff lint
+name: lint | ruff lint
 on: [ pull_request ]
 
 jobs:

diff --git a/.github/workflows/test_descriptive_graph_metrics.yml b/.github/workflows/test_descriptive_graph_metrics.yml
@@ -0,0 +1,28 @@
+name: test | descriptive graph metrics
+
+on:
+  workflow_dispatch:
+  pull_request:
+    types: [labeled, synchronize]
+
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  run_networkx_metrics_test:
+      uses: ./.github/workflows/reusable_python_example.yml
+      with:
+        example-location: ./cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py
+      secrets:
+        LLM_MODEL: ${{ secrets.LLM_MODEL }}
+        LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+        LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+        LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+        EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+        EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+        EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+        EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+        GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
+        GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
diff --git a/.github/workflows/test_dynamic_steps_example.yml b/.github/workflows/test_dynamic_steps_example.yml
@@ -16,13 +16,7 @@ jobs:
       with:
         example-location: ./examples/python/dynamic_steps_example.py
       secrets:
-        LLM_MODEL: ${{ secrets.LLM_MODEL }}
-        LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
-        LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
-        LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
-        EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
-        EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
-        EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
-        EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+        LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
         GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
diff --git a/.github/workflows/test_llama_index_cognee_integration_notebook.yml b/.github/workflows/test_llama_index_cognee_integration_notebook.yml
@@ -9,20 +9,51 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
   cancel-in-progress: true
 
+env:
+  RUNTIME__LOG_LEVEL: ERROR
+
 jobs:
   run_notebook_test:
-      uses: ./.github/workflows/reusable_notebook.yml
-      with:
-        notebook-location: notebooks/llama_index_cognee_integration.ipynb
-      secrets:
-        #LLM_MODEL: ${{ secrets.LLM_MODEL }}
-        #LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
-        LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-        #LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
-        EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
-        EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
-        EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
-        EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
-        GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
-        GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
+    name: test
+    runs-on: ubuntu-22.04
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - name: Check out
+        uses: actions/checkout@master
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11.x'
+
+      - name: Install Poetry
+        uses: snok/[email protected]
+        with:
+          virtualenvs-create: true
+          virtualenvs-in-project: true
+          installer-parallel: true
+
+      - name: Install dependencies
+        run: |
+          pip install jupyter
+          pip install llama-index-graph-rag-cognee==0.1.2
+
+      - name: Execute Jupyter Notebook
+        env:
+          ENV: 'dev'
+          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+          GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
+          GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
+        run: |
+          poetry run jupyter nbconvert \
+          --to notebook \
+          --execute notebooks/llama_index_cognee_integration.ipynb \
+          --output executed_notebook.ipynb \
+          --ExecutePreprocessor.timeout=1200
diff --git a/.github/workflows/test_multimedia_example.yaml b/.github/workflows/test_multimedia_example.yaml
@@ -16,7 +16,7 @@ jobs:
       with:
         example-location: ./examples/python/multimedia_example.py
       secrets:
-        LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} # Use OpenAI until we deploy models to handle multimedia
+        LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
         GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
diff --git a/.github/workflows/test_simple_example.yml b/.github/workflows/test_simple_example.yml
@@ -16,13 +16,7 @@ jobs:
       with:
         example-location: ./examples/python/simple_example.py
       secrets:
-        LLM_MODEL: ${{ secrets.LLM_MODEL }}
-        LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
-        LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
-        LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
-        EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
-        EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
-        EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
-        EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+        LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
         GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
diff --git a/.github/workflows/update-contributors.yml b/.github/workflows/update-contributors.yml
@@ -0,0 +1,37 @@
+name: community | contributors leaderboard
+
+on:
+  schedule:
+    - cron: "0 0 * * 1" # Runs every Monday
+  workflow_dispatch: # Allows manual trigger
+
+jobs:
+  update-contributors:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Generate Contributor List
+        run: |
+          echo "## 💫 Contributors" > CONTRIBUTORS.md
+          echo "" >> CONTRIBUTORS.md
+          echo "Thanks to our amazing contributors! 💖" >> CONTRIBUTORS.md
+          echo "" >> CONTRIBUTORS.md
+          echo '<a href="https://github.com/topoteretes/cognee/graphs/contributors">' >> CONTRIBUTORS.md
+          echo '  <img src="https://contrib.rocks/image?repo=topoteretes/cognee" />' >> CONTRIBUTORS.md
+          echo '</a>' >> CONTRIBUTORS.md
+          echo "" >> CONTRIBUTORS.md
+          echo "## 🏆 Top Contributors" >> CONTRIBUTORS.md
+          echo "" >> CONTRIBUTORS.md
+          echo "| Rank | Contributor | Contributions |" >> CONTRIBUTORS.md
+          echo "|------|------------|---------------|" >> CONTRIBUTORS.md
+          git shortlog -sne | sort -rn | head -10 | awk '{print "| "NR" | ["$2"](https://github.com/"$2") | "$1" Commits |"}' >> CONTRIBUTORS.md
+
+      - name: Commit and Push Changes
+        run: |
+          git config --global user.name "github-actions[bot]"
+          git config --global user.email "[email protected]"
+          git add CONTRIBUTORS.md
+          git commit -m "Update contributors list"
+          git push
diff --git a/.gitignore b/.gitignore
@@ -179,6 +179,7 @@ cognee/cache/
 # Default cognee system directory, used in development
 .cognee_system/
 .data_storage/
+.artifacts/
 .anon_id
 
 node_modules/

diff --git a/README.md b/README.md
@@ -12,14 +12,16 @@ We build for developers who need a reliable, production-ready data layer for AI
 
 ## What is cognee?
 
-Cognee implements scalable, modular ECL (Extract, Cognify, Load) pipelines that allow you to interconnect and retrieve past conversations, documents, and audio transcriptions while reducing hallucinations, developer effort, and cost. 
+Cognee implements scalable, modular ECL (Extract, Cognify, Load) pipelines that allow you to interconnect and retrieve past conversations, documents, and audio transcriptions while reducing hallucinations, developer effort, and cost.
 
-Cognee merges graph and vector databases to uncover hidden relationships and new patterns in your data. You can automatically model, load and retrieve entities and objects representing your business domain and analyze their relationships, uncovering insights that neither vector stores nor graph stores alone can provide. Learn more about use-cases [here](https://docs.cognee.ai/use_cases)
+Cognee merges graph and vector databases to uncover hidden relationships and new patterns in your data. You can automatically model, load and retrieve entities and objects representing your business domain and analyze their relationships, uncovering insights that neither vector stores nor graph stores alone can provide. Learn more about use-cases [here](https://docs.cognee.ai/use_cases).
 
 
-Try it in a Google Colab  <a href="https://colab.research.google.com/drive/1g-Qnx6l_ecHZi0IOw23rg0qC4TYvEvWZ?usp=sharing">notebook</a>  or have a look at our <a href="https://docs.cognee.ai">documentation</a>
+Try it in a Google Colab  <a href="https://colab.research.google.com/drive/1g-Qnx6l_ecHZi0IOw23rg0qC4TYvEvWZ?usp=sharing">notebook</a>  or have a look at our <a href="https://docs.cognee.ai">documentation</a>.
 
-If you have questions, join our  <a href="https://discord.gg/NQPKmU5CCg">Discord</a> community
+If you have questions, join our  <a href="https://discord.gg/NQPKmU5CCg">Discord</a> community.
+
+Have you seen cognee's <a href="https://github.com/topoteretes/cognee-starter">starter repo</a>? Check it out!
 
 <div align="center">
 <img src="assets/cognee_benefits.png" alt="why cognee" width="80%" />
@@ -170,7 +172,7 @@ async def main():
     print(f"Searching cognee for insights with query: '{query_text}'")
     # Query cognee for insights on the added text
     search_results = await cognee.search(
-        SearchType.INSIGHTS, query_text=query_text
+        query_text=query_text, query_type=SearchType.INSIGHTS
     )
 
     print("Search results:")
@@ -200,7 +202,7 @@ cognee framework consists of tasks that can be grouped into pipelines.
 Each task can be an independent part of business logic, that can be tied to other tasks to form a pipeline.
 These tasks persist data into your memory store enabling you to search for relevant context of past conversations, documents, or any other data you have stored.
 <div align="center">
-<img src="assets/cognee_diagram.png" alt="cognee concept diagram" width="50%" />
+<img src="assets/cognee_diagram.png" alt="cognee concept diagram" width="80%" />
 </div>
 
 

diff --git a/assets/cognee_diagram.png b/assets/cognee_diagram.png
diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py
@@ -165,7 +165,6 @@ async def get_default_tasks(
                 task_config={"batch_size": 10},
             ),
             Task(add_data_points, task_config={"batch_size": 10}),
-            Task(store_descriptive_metrics, include_optional=True),
         ]
     except Exception as error:
         send_telemetry("cognee.cognify DEFAULT TASKS CREATION ERRORED", user.id)

diff --git a/cognee/api/v1/search/search_v2.py b/cognee/api/v1/search/search_v2.py
@@ -8,8 +8,8 @@
 
 
 async def search(
-    query_type: SearchType,
     query_text: str,
+    query_type: SearchType = SearchType.GRAPH_COMPLETION,
     user: User = None,
     datasets: Union[list[str], str, None] = None,
 ) -> list:

diff --git a/cognee/api/v1/visualize/visualize.py b/cognee/api/v1/visualize/visualize.py
@@ -9,13 +9,19 @@
 from cognee.shared.utils import setup_logging
 
 
-async def visualize_graph():
+async def visualize_graph(destination_file_path: str = None):
     graph_engine = await get_graph_engine()
     graph_data = await graph_engine.get_graph_data()
     logging.info(graph_data)
 
-    graph = await cognee_network_visualization(graph_data)
-    logging.info("The HTML file has been stored on your home directory! Navigate there with cd ~")
+    graph = await cognee_network_visualization(graph_data, destination_file_path)
+
+    if destination_file_path:
+        logging.info(f"The HTML file has been stored at path: {destination_file_path}")
+    else:
+        logging.info(
+            "The HTML file has been stored on your home directory! Navigate there with cd ~"
+        )
 
     return graph