From a3fb9e2c1144feb42b66068505c1f234eae64a35 Mon Sep 17 00:00:00 2001 From: Alberto Cattaneo Date: Mon, 17 Jun 2024 09:34:03 +0000 Subject: [PATCH] release kg-topology-toolbox Co-authored-by: Daniel Justus Co-authored-by: Stephen Bonner Co-authored-by: Thomas Martynec --- .flake8 | 13 + .github/workflows/ci.yaml | 39 + .gitignore | 177 ++ LICENSE | 21 + NOTICE.md | 90 + README.md | 32 + dev | 171 ++ docs/Makefile | 22 + docs/make.bat | 36 + docs/source/API_reference.rst | 11 + docs/source/_templates/class.rst | 11 + docs/source/_templates/module.rst | 60 + docs/source/_templates/package.rst | 61 + docs/source/_templates/toc.rst | 12 + docs/source/conf.py | 67 + docs/source/images/edge_patterns.png | Bin 0 -> 32677 bytes docs/source/index.rst | 12 + docs/source/notebooks/ogb_biokg_demo.ipynb | 2269 +++++++++++++++++++ docs/source/user_guide.rst | 27 + pyproject.toml | 84 + requirements-dev.txt | 15 + requirements.txt | 3 + src/kg_topology_toolbox/__init__.py | 8 + src/kg_topology_toolbox/topology_toolbox.py | 588 +++++ src/kg_topology_toolbox/utils.py | 95 + tests/__init__.py | 1 + tests/test_edge_topology_toolbox.py | 84 + tests/test_node_topology_toolbox.py | 43 + tests/test_relation_topology_toolbox.py | 89 + 29 files changed, 4141 insertions(+) create mode 100644 .flake8 create mode 100644 .github/workflows/ci.yaml create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 NOTICE.md create mode 100644 README.md create mode 100755 dev create mode 100644 docs/Makefile create mode 100644 docs/make.bat create mode 100644 docs/source/API_reference.rst create mode 100644 docs/source/_templates/class.rst create mode 100644 docs/source/_templates/module.rst create mode 100644 docs/source/_templates/package.rst create mode 100644 docs/source/_templates/toc.rst create mode 100644 docs/source/conf.py create mode 100644 docs/source/images/edge_patterns.png create mode 100644 docs/source/index.rst create mode 100644 docs/source/notebooks/ogb_biokg_demo.ipynb create mode 100644 docs/source/user_guide.rst create mode 100644 pyproject.toml create mode 100644 requirements-dev.txt create mode 100644 requirements.txt create mode 100644 src/kg_topology_toolbox/__init__.py create mode 100644 src/kg_topology_toolbox/topology_toolbox.py create mode 100644 src/kg_topology_toolbox/utils.py create mode 100644 tests/__init__.py create mode 100644 tests/test_edge_topology_toolbox.py create mode 100644 tests/test_node_topology_toolbox.py create mode 100644 tests/test_relation_topology_toolbox.py diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..b7d7cb4 --- /dev/null +++ b/.flake8 @@ -0,0 +1,13 @@ +######################### +# Flake8 Configuration # +# (.flake8) # +######################### +[flake8] +ignore = + # line too long + E501 + # line break before binary operator + W503 + # whitespace before ':' + E203 +max-line-length = 90 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..487b761 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,39 @@ +name: CI + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + workflow_dispatch: + +concurrency: + # Run everything on main, most-recent on PR builds + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + ci: + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Install dev-requirements + run: | + sudo apt install pandoc + python -m pip install --upgrade pip + pip install -r requirements-dev.txt --no-cache-dir + shell: bash + - name: Run CI + run: ./dev ci + shell: bash + - name: Publish docs + if: ${{github.ref == 'refs/heads/main'}} + uses: Cecilapp/GitHub-Pages-deploy@3.2.1 + env: { GITHUB_TOKEN: "${{ github.token }}" } + with: + build_dir: docs/build/html/ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..59d7a62 --- /dev/null +++ b/.gitignore @@ -0,0 +1,177 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# ONNX checkpoints +*.onnx + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Data +data/ +datasets/ +*.pkl +*.pt + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/build/ +docs/source/generated +docs/source/api + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# VSCode +.vscode/ + +#WandB +wandb/ + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv* +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..51ce057 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Graphcore Ltd. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/NOTICE.md b/NOTICE.md new file mode 100644 index 0000000..24bc565 --- /dev/null +++ b/NOTICE.md @@ -0,0 +1,90 @@ +Copyright (c) 2023 Graphcore Ltd. Licensed under the MIT License. + +The included code is released under an MIT license, (see [LICENSE](LICENSE)). + +## Dependencies + +Our dependencies are (see [requirements.txt](requirements.txt)): + +| Component | About | License | +| --- | --- | --- | +| numpy | Array processing library | BSD 3-Clause | +| pandas | Structured data analysis library | BSD 3-Clause | +| scipy | Mathematical routines library | BSD 3-Clause | + +We also use additional Python dependencies for development/testing/documentation (see [requirements-dev.txt](requirements-dev.txt)). + +The [tutorial notebook](docs/source/notebooks/ogb_biokg_demo.ipynb) make use of the [ogbl-biokg](https://ogb.stanford.edu/docs/linkprop/#ogbl-biokg) dataset, licensed under CC-0; + +## Derived work + +This directory includes derived work from the following: + +--- + +Sphinx: https://github.com/sphinx-doc/sphinx, licensed under: + +> Unless otherwise indicated, all code in the Sphinx project is licenced under the +> two clause BSD licence below. +> +> Copyright (c) 2007-2023 by the Sphinx team (see AUTHORS file). +> All rights reserved. +> +> Redistribution and use in source and binary forms, with or without +> modification, are permitted provided that the following conditions are +> met: +> +> * Redistributions of source code must retain the above copyright +> notice, this list of conditions and the following disclaimer. +> +> * Redistributions in binary form must reproduce the above copyright +> notice, this list of conditions and the following disclaimer in the +> documentation and/or other materials provided with the distribution. +> +> THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +> "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +> LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +> A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +> HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +> SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +> LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +> DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +> THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +> (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +> OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +this applies to: +- `docs/source/_templates/module.rst` (modified) +- `docs/source/_templates/package.rst` (modified) +- `docs/source/_templates/toc.rst` (modified) + +--- + +The Example: Basic Sphinx project for Read the Docs: https://github.com/readthedocs-examples/example-sphinx-basic, licensed under: + +> MIT License +> +> Copyright (c) 2022 Read the Docs Inc +> +> Permission is hereby granted, free of charge, to any person obtaining a copy +> of this software and associated documentation files (the "Software"), to deal +> in the Software without restriction, including without limitation the rights +> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +> copies of the Software, and to permit persons to whom the Software is +> furnished to do so, subject to the following conditions: +> +> The above copyright notice and this permission notice shall be included in all +> copies or substantial portions of the Software. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +> SOFTWARE. + +this applies to: +- `docs/source/conf.py` (modified) +- `docs/make.bat` +- `docs/Makefile` \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..babb423 --- /dev/null +++ b/README.md @@ -0,0 +1,32 @@ +# KG Topology Toolbox +![Continuous integration](https://github.com/graphcore-research/kg-topology-toolbox/actions/workflows/ci.yaml/badge.svg) + +A Python toolbox to compute topological metrics and statistics for Knowledge Graphs. + +Documentation can be found at https://curly-barnacle-lnejye6.pages.github.io/ + +For a walkthrough of the main functionalities, we provide an introductory [Jupyter notebook](docs/source/notebooks/ogb_biokg_demo.ipynb). + +## Usage + +Tested on Ubuntu 20.04, Python >=3.8 + +To install the `kg-topology-toolbox` library, run + +``` +pip install wheel +pip install git+ssh://git@github.com/graphcore-research/kg-topology-toolbox +``` + +4\. Import and use: +```python +from kg_topology_toolbox import KGTopologyToolbox +``` + +## License + +Copyright (c) 2023 Graphcore Ltd. Licensed under the MIT License. + +The included code is released under the MIT license (see [details of the license](LICENSE)). + +See [notices](NOTICE.md) for dependencies, credits, derived work and further details. diff --git a/dev b/dev new file mode 100755 index 0000000..763e4b9 --- /dev/null +++ b/dev @@ -0,0 +1,171 @@ +#!/usr/bin/env python3 +# Copyright (c) 2023 Graphcore Ltd. All rights reserved. + +# Code derived from +# https://github.com/graphcore-research/poptorch-experimental-addons/blob/main/dev +# Copyright (c) 2023 Graphcore Ltd +# Licensed under the MIT License (credits @DouglasOrr) + +"""Dev task launcher.""" + +import argparse +import datetime +import os +import subprocess +import sys +from pathlib import Path +from typing import Any, Callable, Iterable, List, Optional, TypeVar + +# Utilities + + +def run(command: Iterable[Any], gdb: bool = False) -> None: + """Run a command, terminating on failure.""" + cmd = [str(arg) for arg in command if arg is not None] + if gdb: + cmd = ["gdb", "-ex", "catch throw", "-ex", "run", "--args"] + cmd + print("$ " + " ".join(cmd), file=sys.stderr) + environ = os.environ.copy() + environ["PYTHONPATH"] = f"{os.getcwd()}:{environ.get('PYTHONPATH', '')}" + exit_code = subprocess.call(cmd, env=environ) + if exit_code: + sys.exit(exit_code) + + +T = TypeVar("T") + + +def cli(*args: Any, **kwargs: Any) -> Callable[[T], T]: + """Declare a CLI command / arguments for that command.""" + + def wrap(func: T) -> T: + if not hasattr(func, "cli_args"): + setattr(func, "cli_args", []) + if args or kwargs: + getattr(func, "cli_args").append((args, kwargs)) + return func + + return wrap + + +# Commands + +PYTHON_ROOTS = ["src/kg_topology_toolbox", "tests", "dev"] + + +@cli("-k", "--filter") +@cli("--gdb", action="store_true") +def tests(filter: Optional[str], gdb: bool) -> None: + """run Python tests""" + run( + [ + "python", + "-m", + "pytest", + "tests", + None if filter else "--cov=kg_topology_toolbox", + *(["-k", filter] if filter else []), + ], + gdb=gdb, + ) + + +@cli() +def lint() -> None: + """run static analysis""" + run(["python", "-m", "flake8", *PYTHON_ROOTS]) + run(["python", "-m", "mypy", *PYTHON_ROOTS]) + + +@cli("--check", action="store_true") +def format(check: bool) -> None: + """autoformat all sources""" + run(["python", "-m", "black", "--check" if check else None, *PYTHON_ROOTS]) + run(["python", "-m", "isort", "--check" if check else None, *PYTHON_ROOTS]) + + +@cli() +def copyright() -> None: + """check for Graphcore copyright headers on relevant files""" + command = ( + "find " + " ".join(PYTHON_ROOTS) + " -type f -not -name *.pyc" + " | xargs grep -L 'Copyright (c) 202. Graphcore Ltd[.] All rights reserved[.]'" + ) + print(f"$ {command}", file=sys.stderr) + # Note: grep exit codes are not consistent between versions, so we don't use + # check=True + output = ( + subprocess.run( + command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) + .stdout.decode() + .strip() + ) + if output: + print( + "Error - failed copyright header check in:\n " + + output.replace("\n", "\n "), + file=sys.stderr, + ) + print("Template(s):") + comment_prefixes = { + {".cpp": "//"}.get(Path(f).suffix, "#") for f in output.split("\n") + } + for prefix in comment_prefixes: + print( + f"{prefix} Copyright (c) {datetime.datetime.now().year}" + " Graphcore Ltd. All rights reserved.", + file=sys.stderr, + ) + sys.exit(1) + + +@cli() +def doc() -> None: + """generate Sphinx documentation""" + subprocess.call(["rm", "-r", "docs/build"]) + subprocess.call(["rm", "-r", "docs/source/api"]) + subprocess.call(["rm", "-r", "docs/source/generated"]) + run(["make", "clean", "-C", "docs/"]) + run(["make", "html", "-C", "docs/"]) + + +@cli("--skip", nargs="*", default=[], help="commands to skip") +def ci(skip: List[str] = []) -> None: + """run continuous integration tests & checks + doc build""" + if "lint" not in skip: + lint() + if "format" not in skip: + format(check=True) + if "copyright" not in skip: + copyright() + if "tests" not in skip: + tests(filter=None, gdb=False) + if "doc" not in skip: + doc() + + +# Script + + +def _main() -> None: + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.set_defaults(action=ci) + + subs = parser.add_subparsers() + for key, value in globals().items(): + if hasattr(value, "cli_args"): + sub = subs.add_parser(key.replace("_", "-"), help=value.__doc__) + for args, kwargs in value.cli_args: + sub.add_argument(*args, **kwargs) + sub.set_defaults(action=value) + + cli_args = vars(parser.parse_args()) + action = cli_args.pop("action") + action(**cli_args) + + +if __name__ == "__main__": + _main() diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..43eab3f --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,22 @@ +# Copyright (c) 2022 Read the Docs Inc. All rights reserved. + +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..c370eac --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,36 @@ +:: Copyright (c) 2022 Read the Docs Inc. All rights reserved. +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/source/API_reference.rst b/docs/source/API_reference.rst new file mode 100644 index 0000000..eb5d47d --- /dev/null +++ b/docs/source/API_reference.rst @@ -0,0 +1,11 @@ +API Reference +====================================== + +.. autosummary:: + :toctree: generated + :template: module.rst + :recursive: + + kg_topology_toolbox.topology_toolbox + kg_topology_toolbox.utils + \ No newline at end of file diff --git a/docs/source/_templates/class.rst b/docs/source/_templates/class.rst new file mode 100644 index 0000000..bcfcb17 --- /dev/null +++ b/docs/source/_templates/class.rst @@ -0,0 +1,11 @@ +.. + # Copyright (c) 2023 Graphcore Ltd. All rights reserved. + # Copyright (c) 2007-2023 by the Sphinx team. All rights reserved. + +{{ fullname | escape | underline}} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + :members: + :inherited-members: Module \ No newline at end of file diff --git a/docs/source/_templates/module.rst b/docs/source/_templates/module.rst new file mode 100644 index 0000000..b3fda69 --- /dev/null +++ b/docs/source/_templates/module.rst @@ -0,0 +1,60 @@ +.. + # Copyright (c) 2023 Graphcore Ltd. All rights reserved. + # Copyright (c) 2007-2023 by the Sphinx team. All rights reserved. + +{{ fullname | escape | underline}} + +.. automodule:: {{ fullname }} + + {% block attributes %} + {% if attributes %} + .. rubric:: {{ _('Module Attributes') }} + + .. autosummary:: + :toctree: + {% for item in attributes %} + {{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% block functions %} + {% if functions %} + .. rubric:: {{ _('Functions') }} + + .. autosummary:: + :toctree: + {% for item in functions %} + {{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% block classes %} + {% if classes %} + .. rubric:: {{ _('Classes') }} + + .. autosummary:: + :toctree: + :template: class.rst + {% for item in classes %} + {{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + +{% block modules %} +{% if modules %} +.. rubric:: Modules + +.. autosummary:: + :toctree: + :template: class.rst + :recursive: +{% for item in modules %} + {% if "test" not in item and "docs" not in item %} + {{ item }} + {% endif %} +{%- endfor %} +{% endif %} +{% endblock %} diff --git a/docs/source/_templates/package.rst b/docs/source/_templates/package.rst new file mode 100644 index 0000000..325848d --- /dev/null +++ b/docs/source/_templates/package.rst @@ -0,0 +1,61 @@ +.. + # Copyright (c) 2023 Graphcore Ltd. All rights reserved. + # Copyright (c) 2007-2023 by the Sphinx team. All rights reserved. + +{%- macro automodule(modname, options) -%} +.. automodule:: {{ modname }} +{%- for option in options %} + :{{ option }}: +{%- endfor %} +{%- endmacro %} + +{%- macro toctree(docnames) -%} +.. toctree:: + :maxdepth: {{ maxdepth }} +{% for docname in docnames %} + {{ docname }} +{%- endfor %} +{%- endmacro %} + +{%- if is_namespace %} +{{- [pkgname, "namespace"] | join(" ") | e | heading }} +{% else %} +{{- [pkgname, "package"] | join(" ") | e | heading }} +{% endif %} + +{%- if is_namespace %} +.. py:module:: {{ pkgname }} +{% endif %} + +{%- if modulefirst and not is_namespace %} +{{ automodule(pkgname, automodule_options) }} +{% endif %} + +{%- if subpackages %} +Subpackages +----------- + +{{ toctree(subpackages) }} +{% endif %} + +{%- if submodules %} +Submodules +---------- +{% if separatemodules %} +{{ toctree(submodules) }} +{% else %} +{%- for submodule in submodules %} +{% if show_headings %} +{{- submodule | e | heading(2) }} +{% endif %} +{{ automodule(submodule, automodule_options) }} +{% endfor %} +{%- endif %} +{%- endif %} + +{%- if not modulefirst and not is_namespace %} +Module contents +--------------- + +{{ automodule(pkgname, automodule_options) }} +{% endif %} diff --git a/docs/source/_templates/toc.rst b/docs/source/_templates/toc.rst new file mode 100644 index 0000000..39ab508 --- /dev/null +++ b/docs/source/_templates/toc.rst @@ -0,0 +1,12 @@ +.. + # Copyright (c) 2023 Graphcore Ltd. All rights reserved. + # Copyright (c) 2007-2023 by the Sphinx team. All rights reserved. + +{{ header | heading }} + +.. toctree:: + :maxdepth: {{ maxdepth }} +{% for docname in docnames %} + {{ docname }} +{%- endfor %} + diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..aae352e --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,67 @@ +# Copyright (c) 2023 Graphcore Ltd. All rights reserved. +# Copyright (c) 2022 Read the Docs Inc. All rights reserved. + +import os +import sys + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))) +sys.path.insert(0, os.path.abspath("../../src/")) + +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "KG Topology Toolbox" +copyright = "(c) 2023 Graphcore Ltd. All rights reserved" +author = "Alberto Cattaneo, Daniel Justus" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "sphinx.ext.todo", + "sphinx.ext.viewcode", + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx.ext.mathjax", + "sphinx.ext.viewcode", + "sphinx.ext.autosummary", + "sphinx_autodoc_typehints", + "sphinx_automodapi.automodapi", + "sphinx_automodapi.smart_resolver", + "sphinx.ext.intersphinx", + "sphinx.ext.autosectionlabel", + "myst_parser", + "nbsphinx", +] +numpydoc_show_class_members = False +todo_include_todos = True +autosummary_generate = True +autoclass_content = "both" +autodoc_typehints = "both" +napoleon_google_docstring = True +napoleon_numpy_docstring = False + +templates_path = ["_templates"] +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "sphinx_rtd_theme" +html_static_path = ["_static"] + + +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "numpy": ("https://numpy.org/doc/stable", None), + "pandas": ("http://pandas.pydata.org/pandas-docs/dev", None), + "scipy": ("https://docs.scipy.org/doc/scipy/", None), + "rtd": ("https://docs.readthedocs.io/en/stable/", None), + "sphinx": ("https://www.sphinx-doc.org/en/master/", None), +} diff --git a/docs/source/images/edge_patterns.png b/docs/source/images/edge_patterns.png new file mode 100644 index 0000000000000000000000000000000000000000..85b9dd90bd11594c7e3dc9be2285a24387a142ca GIT binary patch literal 32677 zcmcG#WmHt*9ycl~B_$0~0@4iv(kk6uLrY0_mx6S+v~+j3fON^g3?(2P!!UFWakuB3 zyWX|lZ}-F9Yq6M_we#7}^N(Lds;S80ym!MHJNM>S+y@DMp@)mKa19c#FK0zn^|wWPTxinbLs8 z3sU=m`hp{vh=`e0?%sk(IX)DO}S! z#{cf=`sDvUC2u2JsmtvK#~;Cac_|BtXMqxKuP|E?XW1JT*ihe%LVObHuWip~*SrW` zQjmw$jlBEr^zT%z-DDHqY^0)pC^p9%yP7BF*JJqfvUc5u@fremu0Hfm{F&44=@;En%XfZP z)CS)pMo%$QFL|9CAAtNwM8~y95amb|T2jCx2=bxTOdgwawS?ps&{+ zfzEDh@ju8mdod(GKE?J@3VI0)WBVMcKjbZ`D23`^V(%hISt^d_5J@KO+8Zhk3+jV+ ziWa`32x;bg)-Tg$)8|Xpk6n`WZ^(JUgOJZzl~3+;OrFDjN^O-%CP%$iy@<#6*~DGI z`g228Ig^j*CLF2Ev4>6Y^y>5yED+*jvv2xV+fUUi8O9+{7Oku2K)LR*^RO0hn@b& zmnRWtuCtx~+r7`_ZkWx3v(L18*Pgp;<8e&5H{kFPKuZIw~IDX>n5y3{JrsD<%~e;mz`cZ`=2d4OwIqL!-fiP=-!l&nJ7!W+0bnsIyP!`mY^ki_Ge|?*by7| zq4@seVEB*Z%T|L_8(==CM9C@Hc3|uxRwd7?TL12^$LT}|X{-xbDbQ}69)DbmC8&z6 zUaHq^gRKWloBgpzeFy z@1S(t?$i&F^r!60%=rmki5yXpWy((|MZb0E>OYB-+WN6pUoLCBRbzT^>ClnN zO9HHh#A<9aGS1D)a6})6PDJDgE@?1~KD|`1M>HTE2ojX$3(+&YJ-7E%a&41(_jUh5 z$RaEFT~Ej34mvOr5nwLrO5ryYz?-qC9T+-+1VAN#*F+u<96KUI3H*$^er-$VN*7l< zMfop0+4loaw)Hx$mXWS>{rX@c_}`8?g6&l#_(H+sw)lyJD_vhH40T}n<4t|SjEra$ zf{wq^O#&k$37J0q{rUCtT8~&)cei?p5_#A4Mzph=o8-}-_LJVnd%S6}E6Nu{%wOoF zqkc$55_oUM3aZtAlGdsQ5jWe-%M8a+@wo4)Ddh+gkEe4D=Zg6`AIz~B_6Dw;$NVn@ zwv4~+0|lQ?Wbn;2SSk`HF}$_b8qE^qiX!2NvV~NXKk4;^gACi8`l!D5g)ACLpB-T&c_T)&~51 zdpupX$N$DXkKQNoKIuM7#ETP%n8#~Dj~SP_5lJF`m+YR03ttDLNzGTgd=C8RAG-sP zo_FW7swIB-_{H#PiN|00lHqfqM_fJEPu+GV)E~KmWMyPFhGHp7{0Q)iCkHH~yF<;K1#7X-_HYtrmd)a0}y_ zg-*F$9U0x-!fFrDh2Ol93K(4tdJO2gzg+G>GDw5K4B54Y?cj4}DWUls5wAM;eT|EN zfTQ+Z^)_exxeCa8B+|(J=#L5U4PvR8!gdW-;C;Jq{ky@E&Hn?eB~%jpSm3lTB~C!{6O`3Zo8eE#SG**{)jl*T&7}%GBSv zZeg$udcF&K5M2`-Ybna5^&m1?gL+>s?rMoi9WYlLctNTQ<-~^O=XFsLF=d5vKYG35 zXJae{$qA}9H#fD_YK*&mqv~#<22S~2h*iBdXIhm!afhXi^(3}!SCsdl4j81ev?GdX z&53+K%;(hN?sS~1q&mVC?ItjWN`ypJ?BeY!0sF725=O!I$e@9E2|9@Dv8iZ@vO(NX zY{1F&rmE@Ih3=ET|0(O zsfrO1%jt{TE1D2UCEnkO3?$1G?dbSYGd|fnXUZs&0PRXjKl8Jl$-AlLA8xx-6^XQk zaz@8nqsb9bQT@PR#w(_Co_HtDj{cDcrrBV!s(3L(9c1f~Qd|+21Qn7#;&WKyt&Rh~ zm=--_mx-r_H~4%Xil>PWyuV$)gfEvEjOesFu8gI45T&ER*mP^k@TPz%o%bfC2bc`k z*PHQ>Ei?VEmcP>?XV^`~HC4rmS&xxd2*H&sW@OSjg{yX}+phRR``Iv&gwj5T-(~i2 zCjwVX!apzIGGl)xM{aO)2PlbxN#$3b#FtFlqz?Wi_))l`N1sT>jI ztC37-tx2sfQ7W@KLA>ZGhH+p!*;oooyx?ltmrsLHl{)?D#=iQ>Z`wc4(EaK1nQdZ7 zgj;YX3pgyohp6iGK?V`?S*Gq1R%cAzUkG z{wsw=Q>%M`xEf?VmEAY1DiI56wk=oUliNR!kiiudg zVvb@YovZiNj&8AnHbnD-?@Qj#$@E*D^wqOQ9p|%w@j(s9>;}7eRS;Ei?`6hh_|4g_ z$i>sSHkZu=HeF64PQ!(xwcnMxwH58jlpw!RMnk&wd8wkTm z?h;_B1~reptv-^PdP7`0mDBm-(_n>OBjd-b@R3{s0)k@g3fp|6PTcF=ydb4%D+#wU zfgmQ`Dm}AXQkeM77NI)~s$5O|`t05J)>N!7P*xw0gE{%GJj+Cdf@X&|Wg?sW!ddP< zS=?s974PK)+vCMEqaBu24yd_UIz4kBtas7to3m)`Zo7j&K7?qG6DD#LalRgq48zX& zp&f|W4}>1FNQR?rl$YlA9Y8Aiq~(%H$_bfN%A%^X^UhhDd3!Wjmwb1LY!<=GGQP+W z0S3i1t>hkD3IXid5@pRkL)K}VZqN4G($Z32TJM|AQjC`Zi+>-Q!J-?sjLvS zc#iHiN(RC=S-oBwZ&Ztjm!3U+`n%iDOB<08nKtQ>4K7wRfH$oE`0&iz+xv}pe7hXg z;1cXz;N7((W`Rv&K$7oLG}q4TflX}F*`BEViZA$pCoP&TnJMVs;@ga5%AE@|5Bqz% zJx+e<_czIuLzkHYugK74lC^raC82Tx9D6^?l&9ohjfzF0r5hg65WNIrZ3hPzdRc zoQXbMZyxk#vp+IbYXBjgcZGga{+d~@MR_Pd&}r2WD{3$~+rUEnMYqB^ zP0*d0TU{Dw0<;Aa7gy!XV?0!9Jitw+ElsD%YYkrV)b;&)Zk4u)j@?1@3WnS`zbKkwyfVwpPqrJcyN{*Ed9O%IZ{yDcrWJXG~$!KpQ7|B+B^!HSR zLu`b-m5k=#(r=st50eX7{7RaeenGK-ttw)yS($a6YymJH{U zG3=e7^F)SgD~o=$3v!WK-dS$EA}BVGrA-d82AJ#M9f_HpRf zwQGY}Q@aPk@ha0CY`W7TwHfRc1=a0{KeX3z8EK*8QCG3ffUD>H;}W%%&Zk&-(~Llq z4BiGUnHkaa>!1T)>1dMKgOBu0?*nwNzAtFV#AZe@<@vv|gI1Zr`1Kh+w7TyLHvJaH ztgo-ZgzWIT?^&d981&URI~+aa+=(!G5RrozKj;i$!o+H@rBm0ScoKJN5RalMI4xUh&-b9*Vo*E^V@$ece04WCb=rqI zyj=C!rQ(;#VHP4s=yrLtaD2NjK;F1`CQ=jgmgr95#|2((_h)N;snjYjm=YvNsczno zV{3H3G3@bg&kT|dc9ltVfk8Xm6w*(crcy7z*$LIkM=zc4S7t*|b;rQPX10_q?9soR zl}F{T5Uko2Hu=5{m&k+b!R+&td`Tu76gX8@N9gz+3e-1Uk3^`~LMpm~pEtz=h}ac1 zZ&0)t)6<}K<&ArA-H#uGL1TJDjv}4X2CpbV&8M?dovrTiA>G9WtzT_5Z1;^eSyiy0 zuG@2W>k0LdLaBxyl3p}!r-w4L9V}_5O&*l`xw=~J&zLhWwIUL%r1h*|{)q0YB>(yI zD8s|CsPQd%tOQF2y|kw(^*T_{>TIR?aNO|)d?$&sO#>)8YPCge_27;&Ldnl3hN=B1 zDFmtA7Ohol<(yvVhraJo1)jbAxOl!;BWmI!lA8;<$ST$cigG7S{hdVZx~>`r(=~y+ zkGiBDo8oF^u5tH=1w@IH_tETUr9t%xW^-YuZ!Is4+M1}dJBtH^LRjQHHZe`s9B9Q zyV~xORXt2dxkvs4OHpU)%>bi$bwmR}#!0*X&DfGW^Cs)hG-<}y@(wM>b#>ex{E#~; z#Y(yKvMQM=+S;C5+o*Xuz8i(mRcd;fnD>P;)rQpe7X1a`kii_@QrB(PGD|03UahR? ziFZJqybZ5tQec>ASZ-?4Qp8GWIt!;`@`ph`~+(4vsg#EEtAyr{Ml%3`AD(KOAi5Hf@ zv*0SO^zdr?5AFED;+at@tt|)0-TlS|K4bo4ydzYEL2sM@b4T!P<@7CF30&NN*E6E+ zR8aN41Ak|HH&nZ_XI6~XQYvw=-!jL(n{$x&MBSG)EKR!Ohn1zPajG`@R&&$mnf?Pj z9a}fh$jx4=uEOWkZ3&MxH*3-iGegD{bLsCzrJ?RHH}~vp`oa0uMq6e1)g(G|JWI0l z2}--_)MEWcD_i@`fy0u0cHTTG5Xa{-ow8hZ|luFj7b z2EL8n<9;l?#+}#MLgzoz=%zUcxeuz{IK~Z&OJS{+iDBQW=0FbeoG;|46n7Sq+@iC& zs!^d%z4y?A8!1McWUL=NJVEA5)^R2HD>yPq4u+t7tP zdL2W?!^`tso~>C4Z2lm=s9|y1a7b_*M=A8_wzjCKfmXtnce!>|0edhy{U@o3J!Ew` z#)bzQN32+zliKBTiPzX|(FTiAz1Lv`)?^AHO zrir!*FFId)9ei`sd^VKLEiqN5Vfk}lx1a0*QKtp4{=|g z2=C$3!xCj4@0|>5s?2?(c+KEu?C0rtVc!uF%13*P~d{8+Eu^_!T;2-cq#QqAW znHq>8ujC1Ou=0PnVdQTgTBvVaM@VQ@hhP42o8G*b9|yXZL>;!OxTL6>E(vgTGIOKf zQ9Egq+PP#@vf9$%moln+y^hHrXHG4o8IY?;riW4$-v*`7iCmip{G_pUzG=~rkD)Kh zJq+2j%6>3TCXKl~+}(H>G#Lr$OIy9Jb{y@r@bUAjJhBCwmql20hmA&6aW* zRJ+y5Co>kB4;x=wfU)Q%GA#HUsS-6oIDcDe8;8md}ckCY}$5<8P8i({Ymh-;fGf z?iqyRit%>Y1+DcIYF(W*4UI4N1K3ap$tcAf&`-~DkjsL;uKB_o_*WiA5>XbqZrYBr z(m{zd;e8>)`Fi!b%{23%r8Sc*>J=fKYEbGm?NXz4{Ju`Xgsgbw>MDR=_NUQQd9zb3 z(P$eB1P+M=1RCv%U#1>Fy0ry0Y%WsTDIV_5rrq2qjOH0Dr^SV+3w z6%kMQ_wbPJFZEg}e>?Ka_LP4=N87zT%bA!M^mAZ9lY>f1Xcku6n;*b=wXB6HUhhDB z%U@=N{j*kCI)Y?udhgFPw87Kin@%}LRSzKmE_7vb%Z0I7Y*nq`B`dM&6-Z%1Dj3;t z<;W89`5SZ&Ym7SQG(t2U6D`p#86DeSGj*GEc)}~5ex=0Q$7J5sXjij#vb%_s#C(_U z=I0w#l_lYOj&9#Pm7m6G6jh?UY}*TO^^l;s-S%Rq;UR-W`}7a2;85+{#c7sa99jF7 zYZRy5=8g9usO^XD z7YB1EDjv!p17dbPwQgD7g*@iQq;-7WogTK?i^BL>m+?vcnmgfdcy|RO+m~!wt)qHjO`o}v!>bc4Sc~z;h!BX!?0cF5a`3zyM1w*9F}p0;~%S$XhMyY@0|5Hls7kLr92j$zq!lu_sU9TB)W&6 zpR$^IjhCc1^8_!(ID|%R@S7=w7FE(^{ARKJBf5l0&YK`-FR4(Gt%RV=oJJh^h z9s0ttJF}bXm-vAZcw{qR(j1DEwlV9WqoGwg{=O4kl7&{Mt$%FM4JNFss{Lk5zGjT8 z?5hl!VmYg`=!FjhfvvXV^MLvH#(%(G#c*Wzrn3=#RWT9kdBYe}*KhMYg;ZAai-TtZ zCSSx(CJBfu#8!ZMI(C&z!2XrYq;TYSy=7`9c6ODEV0pySy^RZKEIG7~;jMrT_?LUM z8%T>6qRHZ)~1G(5cWq7a_Lzr?F23x6SPsv4wK zeiSPZGVrp*?eUz%2{B}=EY}bpVWg98~^YRPS*3feQ3U6a1@wt@rq9D%- z&<76#IHj_ml+d6EV(wQIF_IDwDq%R3Nj^K%?qZ~6Glz#l@i;@RX`SUxM2_G$iXqYrSjkb{dy)TxlK+#ju$a7De~9 zgcxS?p$Vz_Pqm*PtMARw(d@O8W=(Qn96Dx%}7T<(PK_K}yE!r$vUvsjMzY&U3=GM}*;cfIMk*^ynD zmdYHYtct$)Vqze1j$s(jaQ0LkDGe8BbtS7`uQAFSHR_xy*Wn7|Maqde-d$&0dLSNo z2qd-Zj$mrK$J64=-_ke&8V5TzQUZBh^pTfhl(^7~rHh#Z=2`BGaR&_lE2=H<6rUqp zVEspqQKUi3VUaKFH2aeXIY^`Ix8ggO{+{Sd{6?j|rLQ%AHCDX~pT<#%?4(4~t0?>I z=)iz4nT|d`r>F6>D|PtqQv<=MT0;SWxBD6^g7oy!-|X}=$8#4q$*NcylSe6*S}x}A z4@*md!Kv;;$G7acmm>2mD|Wcu9RG|2^>@0HM+mv?e7{Vxr`0v3+FwOfnHp7tsG4zw zw%KpaNPj)tr@KoK+NAv=fdzhlb*uUb0}HFHwu3O8!i@mP-i9=yWVM&ksiw&D!$nQy zg>&wZ3!o7zGiXH#@xY}|Htj*y+{1LULM}v}UDAU87+wD|?n)OC6kWOf+c?nbBD^TD z&q5_~ITVbF<*iab090sd1ycLr)KviH848r30%xc&%keaZFj;*sXq7>$V`S?xua1WC z5B6e)uR4fWZ?fW`N0-lYW%V|Avran`83z{<$iv1Gg9zUAzkf|jpTmmGt2XiHMu@87 z7ig+1MX46!`ZB2*cb~&7%&X>=!NJYw3b9e2>H;y|=&9yTm@1R2LO*!Q)Tsy$-Fi)Y z&O~n)Z88n`4FpmgLebKNxISU3Fg%)EvVu~ToG1XbW_aL`F40xx3VJl&n=6k$$~xv& zOlEXA?n9^fe#slas#{ZK90csE)cVqrbL!y5(V^*N&EHETkoS)PSo(ARLJtk4H7ZPS z%^wS>D@(cyvilc^?_!Ru=_WS{OuOyo^*vF&egfbl-v`~}*w2d%{J_X=R@MBS!i65b zaFp18VO(dr4vg4%3QL|)XK)t(=ijg8zqhdO8^L@5jMW?7%-iAgbKqaqQTFu++$|#Z z>C-0ylShCTQEDZybkEjH$^oD zUtn>0uLJ3|M64jYz-kht*W+IsAjW7&RHa*UrvfKs>$}8!@uJ@kfzxPy%F8l^3Edo? z{_yzl49L3QoqBH2JYcsL9i~QwGDe4sjl(SY>`cD5`=u^hBm65@0@WK;ruTPeTpIv7 zmtl9xs~B{5f==ysSpSPVqYFq7KiySf#sSUGN!T$U0nd0PZ_Z~KY{|3EulEY$=U@O> z-8wMtiQgVIv6Rha5{P8&1G#9|1>S6bT6(m5{NS0$@FJZ9OZB9ur1TxV9%Ah=tK;RU zg6tIiN*I&M9j_)T?8}$Xy_tc$JXsO)^krN7waiA97eI2=8CpIi+hMn!R&YutxLywr z@{l04A=X8O1*4jLG&H14UvALc9@Cz)#ko8Wc9E%DZQKs%a)nsP#EO7Ul0GtOt(9qX^sZ^ZO0Z$daO=ps zYKj6fU)5K3j+(o^hP-U2aNHbI-L}X~iT9{>tzK7U7nK7&OJ8DnTEwO%GVGepd=#k% zC_e+t)ZW~_7_tMW6Ug%ia|X&`BWhMO8C@3uw%O1K;YOqB%15=k68>IyfvX}rXu7&F zE@0B+2X7XfV|^xCx+Q9Cd{gg*nD)HSZKu;yWMfd4aCie%})kelVuq9}i3)c9;@~H)5UI3~L`qp;Al@-4>FN zDudMp9HP|Cw{lz@i>gW`!*MHy%fxnx-0B?p$9eTkex@uDR8g!aJ3P`thoVJ+8c{3O z8l0Is8@O47?BwHQmJ?vrEUCKmGaG&f`z}dlEFYJrBkNNm&@m-cNM*1bNSetH`vPEw zLJm`CrD(8v^I^uYk)|m3&6!4k`2-+WldD1NAq{{JS!i|2vq27fhWa!!7* zVwX)FZ-6#vQS0#Q#kW??VPs znsW=jQ2)#%=$@a1%ecyyLMek+njhC4zC;Wp7wv9cM%CP^sSm*Fl(w%D+pKEdWZO+< zMl!OUUeQGHm=C%5d*Cvx%`+ zyC`IEvmx;8=3#dhoYjsQUl`X4F#A6qoB%!VUSBKb=PkfRxh)J=_6lwWea0nC&u3=P{@R;c2D`O2Z^n5&y6*JnrQKh${z(b7nEml>br68m%siX z@b)|$`%z5MxW!Q{e+O?yBu|b?dgpsW^xvR|n^GG@W1g-_fcDizX=x0Ngn0e#Xp_zC z@0yiPAe=wCui5FQ0_;l->OH;+9A)#o<}dHV&Nx@E7Oepywu#;KqCd%S9Z($9;xrBn zn(zO9C_8O9IT4<0rG4Y{;-gLL>E21Id7tpxb0UBojo2p^eXU5EVN%*ehi^%I>XXV) zb1se`*Y9xO4+kbv*^3mV@3a4?{);)y$ThZcI*txkX{`=nD<)taHQvZp0?h>m9-g3B z|1$uX)ES>!5P@}81Cta1xw)G+3F3#NY)fgBr|`5JwC2ueYK{K_ zdMoYwLV!E%SgQpxb!9uI14{{(JjC~ zCjunu%DjE`h_uo^t+|g%;v=X2DStI}>tIj5q-RP(0y95Fo6;ZHg!x9&==elYst)TA zuZ#v_l0&CTFNrg_;g9NIl(xs(m$|XbzM{AEJM95%H)bMw*4Vi=+)Ef$e3t z4c|xgqIoUH$^w!UIESCA>(H57SrG8)Gs+^)a`B}*o{L@mIkOctaWykD^M1U)YDTP# z%TF=w(5)mDp--l97|fkw>Ss)ZIV#p2L94Tl{1}To9*fhXHjdXTSyr8NQw|_7Q)rdG zy*`~%=3C6ktk$5iYAAsCz6Z9bFW10KD9a~`r4WSP*T~o6@4w(6J!%;ErVmb@k{l{y z^2>onVe`vBvE&(_qb zS|~ku2)e&Wx2dI0V5c8|zS-Q%(X^c_%jeV?b!uo#^Y+y%;*sS1!4wn=RY>PF(t1|4 zYG>RVD5j8}7KsTr@EwQJc@VX~-#ffGHhNXXG5XzhAK-?RTkI!O*+u|b85;0RgRKP} z_Bn=iJ8V}ZjBd<-xs~>_4krdh@*l1MX)A8QT2&6x7XTrX^J|`I6Dy)2vsf{GBUX@A zw5dkNsn^Iz+jI65-nv#1?FE!>RvTWNK8ZDXU!|__A#Mgk-dd_^YXy(V*C#s+fz9d8{jYWq@c5%0IiE( z>q4R5lmxWfPesUAJr)ed(rc@xh;4E-FA;0CQ^eb2sk1nXt_Fiq#3!qMh;m_7dwct| z+Zc-U_lhQ7&Tg=rzE0F5B3V3Wwb7jXZY^ z3=Gui?1mAy*QKvs!B)K1q#LaQIkDb#`^J9E5%#ERaaEssLwGBvXVl8cD>=~YewFOf zuF==LolQrW$zjlMy3~a7VT@3sz)5D~2_l zTPVXfbn-uj{w|Ql;+F`Wv*VrJH9lYlH?VU|gW+P`yF@n+C0-b^yHhzc9Uflkp>{!! z>lNWim1%P0zfBz&t2&GYM!`yo!~p)L1>1T`B2U{vUSilr(6b4ufjJ`E%dc}>Se9Jj0h zgjM+;mXhBpnwTvv6pK`t$g{Y!)PK6DfbC3(hy(N%f+(7Eu*l8m2|!lJ`+WTP@g*VQ z<|!gHThQ`4Vq%2tg9&K<;*zTwbHXBt*Pv6zaiv2$6vm=yLH=g9*$^E)s8D+~PxI`O zTT9dRRH$0NkpvAVGr`RK9v$yN3K?4ob5<<%SDo}Ax`C@!i!>W1r1-{6Y)QCmNHynt zb1#=@yQNzNBmiFlPI?m%&uH(W*^xfi=u~^{roC``pX$s(SVJx5OFzitT;vWk1XvJp zPBBu66jKM!^?p?1itX5Rys<2&w85RpVUM+#xhNq$!Mz2V$t6_U&-o-Zom%|VTH{Bn z6>nf@oQk9NSTfJroywn7sgJ*Z^H(a#L4ylmJ=BWi5`W<(PUtA5P>sl60Fh(p3rY>% zJVC{HijSu9n|1qB`fFfzmn+n;W0@`5F*egByQVO!e{6QXg3YU5v?VH3il*Hy$XVXZ zm1|arq&z1WQw*oVTmG-FQjJ-`?`cyV%8$nq66Y>Ifcmy58N1yrSR}eJ z!w)ebRt>7WH)`?JAvs|1TlddTa)TaFJ$fw5811%1GBjQB{ASCyPr_+lL&}cf z;r)?spGCV|gFoHlvE^JXq&efcy$zg0qo~YCw_&oc1Nt`to%Z%0aP(}M2I~|1QC zn@*3B%?s$I2Wp8!wqH$opO*NYK2()esb){S>+_7D_lYzYzmKs@DDS>7hnP{(Risvoi65Lli}Aum zIa%`s z8f>@!!KLIJ=Yu`wLXZcOXh)yxjGJvG#!@SPFj~9q&}~X(M8>qIVNPZX8C)Mg(A9eU ziQyJ2{E{l(HiE9(rM#L8aqEeA1Z-;=X3Oabc}z{<5k zML7ThL49GG}qHvHdy{;lMvtXfey z2Ck5?z{*(AWEHu;jcsmkNCA<*JC@|nPt$tfm1Is<=xutbSSxB1n{@UWz?e?P*lLUH z2N1*+n}`x09Z^77%)H_7A| zehKBl7qHrN_0*OvCoK@eeILkLz?lUX&5oSZxYjrTvYpHVTW zMnc>EkttJ5wUuVfsuK4xtbh_*ov=z;R<1727d-d;srMX36i@6MaH*X)|lvJ`H=h5vpX&?x}wn-F@5>2msFw!tO%XeF}59CsAQqIWv@{SwBeZE>!JQ-nx%;#2e|`go6y6$x_8xa4QdeeuCCL{ zH3O(RShc^)@b!3^Z*Vq@&zy<@01xA+?ztV7Tvzb?Z5g`zK&=#QMCc(G0LP=<^cy;bqi~u)HS=K0 z9#k8jf6(ne2m#a@ND5!N(DU(GoU5m6U@&W=Iv`z7pZLQ%z*%IFKhe`|+V}M0@w9bf z2vNZ=7s>3$JFjjLtXBSCReOK5(iS%Gt`*mAK+UfX!sh?g6RjE6^KK$i` zJK3%LES3r_NPceDYNp2u(+v+dJ7Sa^L+Z2C_8RLb+o}Fn%P*u_@&d|hXRz88zr?-x z{%H#rwoIwUkSScJM&haN;O4Jp&2XXP4Q&Dt7H{{{e>NyOx-lDG?K+ZXTgVU~6>^uXqTiO%dRkDR;S{Pe z9gswtvG=~Dn5lsB4J|JIx>-b=;62W>4fFn&B-F9j;k~wG_?A2;!vpx#KGgG*E9$k` zGFz+MQrX?Omud`jS%2ETNFw|318%5~%Fn`4dV_jN*ZtzxPlK+0YDU2T5M0db$gs&4 zA}{#MYUp%Iwcr>zjOk>`B18t_YrrRW|W+CxEJ70DbBXtIn_==l)2Jnv~LxB6$~Vu$&# zy8?CpT6wSJ9i_nXT+?D1_Qh(^Z;Ul0jP{9KJ!WH_xRJiAZDFtK8kQH`#=64 z*{BFgi~r;Qi4CEPj5zc(Q>O>0O^UOi>v%C^UbPZz=%<6ZasUTycPyL4-mY$~_;j){ zSklci8mI%tk=hjfz z(^%~fqyG4Wx}O= zXSyUpHx~I0I3zplpCb$Sc)LD&-Ung_HDTPGZtIR4?4z#6Fh`RjWB_@6z00P|tTDd& z)USW`BdLGJB$a^^2%csd2J~b91>k9@2E|?k*RqWR?8e`nof1?ePDwXy0VSL<9YRNE zz1{!<4ij|xi z>{oBm!f}meaE32ay8VK}dLLf{j-AJUKAqXd|GGHw_s1Lh{@IaG?r$$N`8)vr1#3Jl zbU$qSJS7xM5@?dG0gnheAawkh%BHI{{qet-u`o)*g<74QoLmDL`vh>oY~Jjeya#ZK zCg(ero$0mU$*lOzP6Q_{>w$0_WqTN)7%%Jana*lOkqmnd2+Bxog=!BVcBWI=Le~H> zq8zXA-}9!l*yC4m+yiDAz4}k604wu?CXbO>b4MAF>c0e>CsnMx_=`a2CplX($zwam z3=pg;sBZs_k92+(o&T?6H6`)?HybP9zx@B;jKyQoqj=eAOE%;fDR`d~B~p)ZB!W## zsuWbZ^XYkTZ)W(9@(+=gtHGwMtaC_KmQF|b&oFD+0_>NqmQO%IFRfm*Vq#Ej;d;T5!aS*_90Zr> zKQ4Z%Wvfo($xpazxp~P#RB;)@_O6fLza2&3wzjZ2XGq9MEoxD=;5ml4h}0JMg3BR1 zIybE!e%A+qn`6y}-*26s|2`lOsAr(1`YIYqHc@sI31#ElI0|@17m}GDbM;p0-eKAt ze`dhqI*?@P3VSFL=2Z=#RNv!ee5k*MBM5+6Bv24nmJeHfR~{W$ZeE%D9pE%5pyFiT9uX#3r<@Ga58MxJO@ zzWQR`swDWCj`4Hfk{H~Sqrh5AvRQuEnGMqU>ZO1N^(-mdM$+m`T@WQ}8~WcL)zyj_ z8HQ57r#u$-8SocnvC2{S)dIlkcMf)%Q>kz=y#_+)V|JXvQNBEIAr$27%tlIZO8~6{ z5)=KXZZMLW4e5ik%CSNYn_82p(7yt|a{nFNTq<2hGQ2zYaZ*0~q|}pIeg&a7%fdx{ z;w~A@KuT7{SC5SY)hOpg?8GMk#zcD}v1ZY(j&HB4akl`UY(@g@Q*f6sQmdK&jfp_rE{0B?Yi?P>)GMEBeR_|a`6xK%m^=HMBU5;}CvHREw zHpnz!e3r)K(a~hwN$Ix%^TLIlp#p4t%d{@eWJPCrHK#9=S#h?%#)P^{Z2RSdA{&_j zCbX>sGvKr*Boh5W>Hx3LG)LRQv<}MJ6x&?4Ecn*)>QLk)LXY^HSwOhWAn5~rFn&I1 zk9lyD=}~C?dU>nYGG!Iw&1+xDm$zP5;X$OEXW_ZZi;;hvd9eHFZwG4ACHzG$Wq(&- z{95C=FKi<(?T&kLJsm02Kl<{y`;|Z(Jex4>JWxtV?T@Z<(~}jNO+kI=(=u zgg?w~E$avcL#SLZfgUCQz6mL4{M|PFc#oWT&b-6t#&jvM=mWJNVT_m+whKm%Iggc1 z#$~s**Y7Fx@fbJ$?@oM%5kJSu?|D4OEe+>;mS>SS6^P5a)Y8|n2uy29o>~(-xPa5O zYQXP%!=gVrN7hR^oq6`ij8uovY{i&m?}1i?=poOQc`rej%j%7`pl|EYJ-71m+Fv<% zv+Zu<0zA*`CwhJxC281CYLDhCH_l5qmWqEhFX!9Cs`}+G>-JW@JR@L*JR zE0S<&WwG@cFRYq_V#u{K`z!t@Ilec2tw?TmY_ar)-SHE05I5Hsp3X-VvEu_&mIyOuvJv;-!o%EN6n#Ov$paYNmupO#9OyqwN_ z;5RX<-;W2-Cid3QJSaY|U5UHZBL$=XI!4HD*}`3RDF@bOqPgn=#IAAIGQDYEH#^|y zs?h&wg0HOgAGdq<`r`P{a9uOkFa*^v`CUNDzT&M`I_w_DTc(p$Pm%Okd^@>-AihSz zg5++o}QE9kni^({GHyuGZ+h`HLK9)v5dUw<+cnM+4p^wYPw>LMRZ#Nzn#CV#qLEKA^F z^$OV?PP(1F6$lW}ToHH5+81b^=HqFT41iODU))vwkUc;D<__@iII|CNf+9k*rq|=7uX|l=39WGKE8zx4O$U4wB0-Zq zq{MujdU-futvV>@#xJcq`q+Ugt?Mt;w)nDbb^l%zK?OlT=@0=?DFF#- zNs&;bhZaE?VCXKT8>OVARJvh^89JqV$RR{Pa_9l6yNBn#y!ZLs|BE|s8O}L#_TFo) zz1RAE7ZUOM{e7|b7m~bi9ixb*{G@AdUcd@4{#rJj4>CM?H|@_#4T=5?jYLq)h66qhQZ@%J0Pe7Z2;8PR16m zKeCL16!H|V`u(FIrRE`?c^)~Cvh4TaTq`_U&iv{ zgfRssfcPjNZYUl3`f%Ywc_mmT7HWwdYw&&S2Y129MSfl(v+ccDXw*Q&uH#$X91AD- zzRglQZc2xd_c)S*vnf_h(uhGAAM{xLRK| zaDJPt273`Px5sakYjf+_*QN`hcP`u=f6SwES8OLT=>B<&-E}5fl0LurC1mc|4LRBY zxroDw@)Ig87pd9MxrKzho98W4FpD2&XTE$1`l3SKL(-lM*CF$cn+Mr-_rDyN>6EHY z3>#fnp403SahMBj(#ex$C$MsSdf485cQNlf*?sK6t-n4=ws@upQmPVHpy;@7*gpKo^b&VwI{ zjHT!3>Ec(2BFekTi5;OS81hC)OSNcSZ*;tv4hPwd(I-7yhB9RS~TRH?u`Emd-p6Lj{KbY zwY0-~NCPT(iInu9FKP)<`sLKFXBF|q^T~5EvtwJB&_fPx9j}s@is5_uh{TmK#_Ho3 zLay0eeRs#!JwTxtY$-(Cc$ytTS=D2*o9PS3l+7rrn#w)hO~LWXq9P zt@z%;cW(lCAI1valsZjuTQa=emooHd@Q1>eoGdvDp^f&xiexohbl>#+65OZXO|BAB z-R_N!wLLYY9z8eXp`k2X)IQGd-Yhi`S$K;uPe#0=)|RXDm!C~GDA}?HcW56h_TgGH&yEkmjB#!H~i`{rBiiT$0qz0{1&P!O$e5>1OJ-- zWoqe4mUS<^u)>Do(Aitx2f}Z?VH1qm#0>$^^bp$6~a5~jeR7XQz#3)sU&MQ0S+ch z-1nR;yR;~9F4~AG8Z88~GMTuWrKBOh>G8O69AkUbK7pRT8zMR7%E6l&PM2~*?O#5i zFhT7pM>I%k_{}~=`%P_ef34-V`Dx5){VxJOF`a0%ErL1eXxshooE4`Z%OaUMynTwk z@Zfy&lM~^3z?*&T_|3THDf;Xs-mDKV3#wF0Rf(6g&&tRP;YO(aO;R-%Gm_LrF25h4 zKBYcS@h00?x&#M_@-1HtUj137ROQ5*ZcdXtR%2rc=eDgg@T+=lH_W|G%B-WH9EUZ_ zU5kRn_ccRIAz{5m(;oUrUnNhYFcWLxPAzqmISDGEsJ(^ST5poNaOpfx<4FHbVIZPw-@L zRNifsjI^jfffPv7%Wt8BN@AxK2Lr7(`5pdfb{!OI^HEh=?GSNqp9p19T1t zDUTF}iBcc_Yo4lnpKyqdD{EHVV9N2!#|-~LBUG}MGjk3Nl#Xr764@8)mXI}kx~VE`Oc}|MlEeyLZ`Du zbvzeh&qShGda;;#8s9{YL(1OQa&c!#esI0=;(17}4G-SGzQ>B4{9uid`uy;74wuVe z!e72ie;Y*#NkA5bHY3uwN*|SZRz22fOtL1f3O)9;5!azZ3&RP%)80k(GjILUwt7fn z8GSF)&v}y7V35LWghCz^h{#@VQsU8vW~#%m$7{`-D1Z3l8+s9)s92ZMDET+{ZnlDx zNvk^BUGBAxt!YhO%#Xw1fKskywqb*U1W07OI-$;2y zuc%bSU3b{nCV4}dJ*2aQPjy#dS(L-nk8NgoykV}ZlgjFW#4`!)#=b7Ffwy4B?(TN} zlZ>%Bx#I`7=d9)3V7IA6J-HC?pfLi6myi?jkt%g6^18Y==j!2Qe`IyXh7_R=#ac@J z4{ru8myH{G#|%(12FtZ-EqKLDKj2mLGwoRtwu1W=*vn~emmqUH_h0eaPhBtEdd_I# z3mq^m;&6jZ&$^Y`K$}HO*6$bnE*4kCaMi^8IWTI{ZiQplws=R});f-M_x-)8%10+X zi3Zr{DpcbP&Z1PrE1#}Ce=c~JXh zeeA)b-7VVFv>K)Nc89$(EGKDBh>;mEJW-arSpszLUQ+-k@i%h~da$ zw~AggOD)qg!tYSD*ofh5n;V6FwV(Eg^|+~C;2aa$*}LCT)=SS_9a+JmWArreFYENf z+o&4yFudbPlR0)fI#mNzsg}SSUa0TSPEiJeun)5 z4={Mu@B`JjC5ud*hm3R$J(W%oq09ua`8cnY_~eXgzUg>9M6#aY6W1E^yj{5&{I^g> zMGm`HWw!#f_TP(A)v5Atv&P;gfxJ3cwA)hiUdls~y}<@duR+zCL31EYmsEf(u}CNj74ytvDqLY+RSiuf7!%HcM1X`%;C@jau@){nkE%}!fNRuI;oTtgRTf0Xn?I-CAL7M*c^p(Py zQgS?-f)=56FvFyvF;98$SuK+i5c*FEj_w9(Gx+vS-^osx-u;5nvhMy_^<&8sK^axh z|I+3+pXDYu!Q9sQlf9Gyem2@GT5YERYz!;5vk!50Z(Mm&vnin+trzJ zMQ%Q`7PU9{X4TtcQR{we0-JDWwWtp4Uwxn2rw=0%51cEqMn18hj)G3Fmv8D%Y88}r z+e}rJizF`WKiKzb728SHGCbhzw@$0^A39Q2>0J(dtLgRB-t(C>Rbs-lLqQlX2X;O# ze$T9>BW-*8rq5GRVKqvs>QWjpBIPz8F4l*~8b%XQWX?pnc0~;eC6z6yt>ZBou)s># z%9BARJ)dgwJGXa2CXp|QmXCmZ&-x+suM9jL|qfU z821lgQK&um+aB`nFtdOX zqj|LjzG*nt0(Y1GH9j@)BF@sCJBxmC*)YQ6xe$cse30Hvcy~>eXVbqt z5Z=k>jSYWB9U&mu)w@h^>SImuP-vjY&s(A~X`<$W^PE?_7=Jy+Oz*cT*XQ&|bX1p5 zAFtP9bSNftp_hC=&v@ejOf2VF+dQ1~TpZaoXv`Jgl<~w6!S7E;PX5{0-f_1v^C9=1 z-(3EvUI9a9nmWfaO}8_eH}`~<75$tA;Ae{GgPg=D)krABztirn_YxN;Ht^p}sMm<$ zl_kiK9=k$6YGq$#Tzce_+&GSy(^V?Zc0RsO-xKg+xk=+^hj;!uvpypqFp(}*%^~+YX_e1p1GzSkE zUrr%pWaFM(BT<0{aC2;0e5!Z9p-$`4d~589H6pUSE`k*GTl!JZ!7XVgkrRbZ8ybQO znxUjdQMh}Zpud3HliznlMwwoy4NpG*29sy`TW@lscD~n$=~~u@DtUC(8QC+qWd1T! zLcRZT`)%sM4DM5f-u4OZ;{_HfvLjQ~$uCZ4TjC6z#XouUJ8c*~8S+Oj(8>sqP4<3V zbo^)h{@3f8*x3NDJz?_$y3m6UH2D=N!4!q<-`v^^U%ozZy!yM}{1aJs5WcK0Oh2Kg zXy$2vp|Ox%)@WOLl#(my-PS;zHqo<__lAv-lYS3((|59hMm>{tK5&<)CsQ`wPbgBz z%&tKg7Av(nU!JHJkMZ_fIurXa;83Cc22y^9$mBHj(1n$eqq8aobkXAxPq$_fByY4( z`6JwkczOg|FYlbDbK38Wm$=l_2h;yM6!m|WO#)Z{&*#5zR`nm?*$0EPKSlIOwjMFn zcQQsC{Bw^B{9_!$CB60eH@q)QBvN)8kMXPbq~JG=OwR)FZrl=$-aEzWr zYlnlSqHwu#*+14)+L?X7o}M~N;!U5pH?`J_b$FeSZp}q^2DvU_o+Ms=E6(OW7uRh; z;Zc_^At--*C(~)VwuNr!fS+Vj+q{&L_G>j7mGw-`iSWU*W5p=w*BIv^N1 zn)fH&-sxmA1!%fN0jq>5A5h1sQ(=A^=x74Kca1^GHK;hFzf`gD<3_@~75DJ(nI$9` zfo>pC$d*w)iar$ZC-dLv7ie(-938>+TX&^EmKr3U6Bd>NsSKq8Z{5={#*CLR0kKd7 zKt=L?oDHoDHqz&D!tV-x)@}=jg-zUe3{I0r# zLL6L$r$(U;8Ble!0~u&k44cyLcx}UGAT@{p>OY)r3MaqX9#wyxx5-xL>EW>^;R9uJ zKS~&{2JAcloRG}cRs{pne>qbor;_+Tl$x{>jL9KE(dUyH2_W1-WNMVWoZwQ+!q3+# z0&s_v-=O%m9jHjh78S=gfjWT}?B)( z?ZJbr{dZtpXK}7~$8$i3pC%C(YUI5|G!K=(6j}$`pxk+Jo2h5k(^a=u2GWJZA&2oe zU7k*?;xMEdR)ok~ua|uLg9zbdz$E7O6i7@yfg+9+#y~Lh3QaDry9j=C3($jTXm3={ z`REBzzp{6A)izCGvl{?K#ev-b_13rlh}Gx(_=CXS5#WLb04Cr(P`HhH^ZW$$!#86e zs)vQK7#ne)RRM{DLQ0z+;(UTkm#IE7>7nke(d{&)^5pyc>uzu1v?r?0H_@VWhO zd>mhcStck0a7T~kF3!LHu1`=Lmye*%?1<-o4<-=}FYaAtI~;ABjHw`j*3a$yctssw z^f7tqPdNzc(`>CHI~Xd-5!4^$@e-rXDY(^G;c6fem8w1ck9fTVme>>hdXKTnd$?tJ zIkQQ>RmOE68Cg23GS2?^wW|(TY3)E%F-ESdFY;uutDT4@RszU`>kg-^9!CwaM2lsi zP7-K3D~8)_ar5dDRZSY$`%WCxHjI`S8AXOx5nH~NIA*1NW|4vTsPwtPeJ|URqnqfh<6UN?Nox?)p%UqeN!|jtjT6UB&o3iqCz!??H*cz~ z%dK-QYUHRgC<75LU`+DbyqHvb|NaMrR1~Z>#i$G1It1nh{)Y-Yy36J7SFJGZB-zKz zG0BD9RV4rM0|%`IkVIK!<%reUIQ7%Hi?ejd>pGyeQiPNN+=u1<`Zy^e3B9q9J*X}l z##K!GVjdQ$2WZUEVgpgd&k1jo&yqqePy5u-RJ1K|QdF49$_U9|Mn=jd3#Wj6qmMQ%(z&Iiv`ZmVo3V6|F|4wzP-f8E) z!F%!IMM!kC5IPnZ;=N1%m_je%MSYEsqz$ltVp-n^K}T*D6@7fqmIjE1=W`{p{UNG$T!(EC8s3#jUjR&&yov8)bhfP%W7umZs2^|tGi@mnF*5R){o-=g)zt6(fnm*@QaRv9_8N&cu1hm zhZZ^25B$(S-PzuLQ#7Mptj~)Er7?d*aFdI4s7SAR8))kO16*Z{+MKR#9Um8J|D%Sh z&)He*RKY&*sByu}79}i|pQct>3`rNI!$>A}4a(`ib5Vr>u0_syWJt&_P25=Xg(&y3 zR?wduJ(B^1iU+o{b-!ZSc^G|9Z~p0yC&CeG0f;J^+hA6c3)XnL8*ZsIK3Q$c1lICb z)?AHZeVU37107=p+Tf<$`aga`g4lF?V4_ZcGb9E898R-nY`6v3AwrJ5aW=RwosyZP)G;reTe}m zH{f7>JcY5(Ju52)3E-8VQ)Cn(X$2;-R#@3zr?r7fd_U+I)B>}^@(8l82@F03pf2SaAjxKZ zkhB0G8F6ujQ5-Fqlj34w<2Q~qUGD!@V4h!1Jkq5E{QtLMHhu0_U9L#LEXh^xq+9CI zmP+EY%>vLki=nKav=U`NA$@ps|3SA2?tP^yCuHm%76&7oy@9*rMK@VnC5brG)YaX> z;k8_@&jZVH54?^JPctlz(z|V;6re`FU9J4j`rnz_ z^ff?%?t)p*P)Bm^=wxYVdQ>MpIeYNhYu)zfdT;8d?5i z6o5YIPpvJX%irKqh|&i&TbuKXBltqFnWy5WYXJU~ef5*FtT_o4a11EFe;NYzw9n-e z7}n89A)rjnrJQOgT~Aa{xCmTmy*8S75-oY2$lR+sjGpF`)#CsLL=Y!Q%AN;tltPYb zeVNbMf)mqXEc-?K55@2|RU3Ms61gGS^iyX-$fJ|Xp^pJxCC_djyZ~p8ss1NGvDPwe ztK9)6CCp{w;uyX65WV5jdeG1(%ho=Ycu1k>Xg!kI2aJqNbV$f4Ig zTQB(?l;c?pW-xObHCTI4zKRCGE3x(fv%GlVxJCLtb1W_b$w(x?Ig`c*{*qdsz8SQRC|q@b7&IM(c`+3Rt^;0S-F8bjwYbPj z(FW{WdKOQdEJdv-SqRvoK%BQp!v)$zIAcWk?YX0T-#o~PP@-?a3tnNjWPlk(w%OgM z_&ipq%dhD6clc*`vUO-GyO@~1%DEumz2yW$9chGMf}l>mi>7VlgR2W*$hK)EA1ew2U*rZI8gfwxRs~r#>6T&k#1f* z_^}xPk^*=rYB5cNb?%O1Yx4ch=cn#&?n|8;lk56Eb-|?RS?2y9#!aRbUz6w>uK)VN z0*vaXD@B(?zqSG-)9yBKBTzL8q?~~Pz!?HZd^?r3OAbT|0c0!>KTPbv#`~GhPDp?6IFVbjMTLaC z=}84;*EsTZ#J|~>SH5Cvpkr&)l0H!`Uf)K}rXgPhHs-u4uKM}=(-en>l;6a?T+y!H z84!Qm6||Ko)^qRAeZc>!@yvw{$1W!}=6+^7D>j-p@@pxxr<`8$T<8`Rw;vkmx;KFc z!QTXK9jM-Ja_RkB*k6r!-T=7sxu_jnxL3&z99cZhkxf3ne_yH~0_$M`?Cb;nw~`m9 zMEP_awH*MZu0gf!y#4@=6*G;-0uIqA;;;3j69Y|8ny4!bFoyYL8V7*7{SA85-5NZt z#O2f8`Xf{<(PMaIGVg7a+8VRe_L% z9%qOcaES8rv(Bk|l_aXRw_RO<((o}5+jas`oU-Rj>4z?{;nd*8cY>gP^Omu_y*-Z6 z9a)q_KtEH7$8>xk3&=OZ5&G+?>0TL+;i&+y_So$D2PPz_<#(@}7)4z;gNTnO z4q@}WIlPL}@IwO^M=Rn+laZ&;Hje-w-?{9MTmyiIV0e1GlAbLNb~c3}xP?jrHjwge zM+1m^OO;^-eh0Z~>G@?@#3)i$Qq=M=Fc6%&<@iAD&4(UAd|^>A`8%ix6507}rXHqT zvdFK&b2E;ioyTpz7p5d?Tl;*4qNXRNGj%ISpep8!bA&O3R*qjj)`ObrQxNlJdqT!5 zE%|XcNxJ9~t#`LWZW#o~r%l=W*j&|OL#J`eP+Gx>;~F4G-Ntnic;6L+SOU&CTl@1UF47n` z^yF$k11%1$1lj--?^*lV&tI&{9@D=(v$6DV-&a|Q51UO-2?DtGGc?CzU*8K>>47NGh8TY*JTP)pn#!0SeZCxRIY6{6FE5YFol-MyD_{Of|uLa_ZtU=~4;>=YfZ zFARK~M%ancWT}b&mS8XmLpdT6ZiolkLJXlxJPrzZxDF4eZP{x}kVyLnP5TKr+xd4e zQwQn7<#TqReUQlK;(c-6gTS~aw<|Uytj6)6-<*JT$tn!L94LUw5H)mTPG96gF=*X? zRu+ikRvOL5xA#4aB!XZ(bgQDWOtwKM!;lSS5%9hzZ@>D6{T3MSI`J zC`8r;aQRw#(V*WzKCHF5mmD%m z&KQfkY;fK?JusXrC~1HS!~qYd?wSH*#(!Zf%~KGiQm6EBQ7s4iwyB+Xfvf|s{UHU! z^j!|E-Zfc65QRF0olL8P&LA0(@*I_vpqw-hS~9YWoRhR1-PF5{WA|CRCFoC(6knIKqo?(w}i``~e15ybVX*mDrl zX!#%fP>gk|o~*Qt-4Yyb5d(W(w2f+-m?c8{EiW&xRy8j*akeMh0SI8(YSX!xPk;_b zdw~7ZIUrPKD+ExMdY9TQ90t$66MxtC9v}<3yVm)sR-KJEb^4M8lw9eO;#6$uZo2vt zlA_Sd+Y7CT>C@cJ47jP;u_0ut!5g_Hh}))(eSGfl=NBs$zI3EjV#vVcbHogG72p0m zi0%Ely;2Toh+ zkX6~CqhwVaFbwv1I^J`DFB0}xol*5}5}V6s;8C zBdF6V3Ysc=wIb5V{<~}koC)fD#njy?z@nu`qc)Fn&G4Kz&@=fNuLntm-`SszJOTQW zY7xWdIpxHtWhtBW6(b1>aZi#eZw9N!YSTk>P`OWdPwAdd>66KiO+4fS3LMa zJQ`bw>&VyBE~>yvuz@9S=G-C(jtgjUvwY*esUXokkFHGPvu?W{W1FU7L-IoOp!X^l zfKYG}YJyG|Ra)#7Kz^4~#|}s(ZGXC|qAFYrS0^2*L5rEr#gvGOC=AeZX!18sdwRW= zWwXFTXPnqt#s}TjGwzD{S!W?F8`pjr@Z@#J{vf|4?X^=ON;u!yC z$JYxXP7G;OAyYimAB25oJs%`&Ab(cW9}H*rAyX&10|j9-A+O@06XOOG%?_tWb5O-c zN$x57riUAo7GwF}UA#6e;Z=>+6LxpO@q)RK9SBu%-OYZfmS>k#S4Y%)3l}9WgWepv zfEd3Br72Zr`#*^6I1^d&HS)4M#y;%tFaquI{@;c4W5}~b(>siyvDtcd_;XisyMJ$l zTOKSLH$3Ke45_512J4ThefI~#M+8JtT(~7lvIfq$r>e07+uSWD*ozyAoj77=5Dl z>CyaF6eII^Fy{Ab`0zk z1SQnUwtNN$!Xdbvgm|~<8ZXy-J$fVmW08SxZUF-fQFu%dAaPYRguv);9CJA`ErL!N z7M-qjJuL@Y=nra@e+zIi1M(x-@v&U)__q5+1$f`TlnM%3hmO_pnzRHcqkrG4F69=R zsW01B;4oR+a8XVdB<1)TDQCd-@wHg)^=BiJW?JAd=tX;&t!dN8XnmJDz3QzfaOP#c zJ8vd+^yu~G5lQ=2<3W_3nBWn%rF(DLm;XLrhKShSiOF$ouV?DT(ITlpt51pH_f*DW zA)BxvY3G|!1;AScT^=+@p`y9L23J0sr2Dg-K{bqM>}^^t@(Z*%zkqP2=jZonY`dn# z=0em*&O3#2US4&G#CI6TGU^hO-C6Zm#C0q6x}P@jRU(h3R!Ft&W@6P2=M{{*_&Uu~|yV+TMSQ!;@UIl0`e#ek!w zEC88(YTV@ZHE`XZa8V6F7(ovtq1a$(Br|cVnb^U$_z`=N0hm;fm%?kPB4qM5bh6wo zIQkfn$>*0euYjZafzwW{RD%Ey$jig?--VQ^D%EUc!P~VXWqkj%-U->8P8>>|U*XdE z@QHVV$!iFlTP=G^D}U`Ko&;D z6ZnNC#~8ciIAyPNLQP1BdGqNX0Hz1y&O?eu!NR=U=@9+e)y9!04lgPyUsaiMB`qva z4d#K&^!c9ZVsFJFZm$e7_Q(oFRT#CNVT%?Lh$hhtFFCj%TE^~Pe zXB=oA@$r#^fcr(dg!j-7MSgHBLIHwuna!zLi|Hx`kb(&XiM++rAu6y zP~?ffIBK*S?Qrwt+K|VaNBwiJR|Kg9jcZ(eYCMmP-%Xg!@Y%Ys;|&!MW+VgUI=W+C zN{~2ihmJ7I@S2gb=q~A7ZzM5q;%o!s+Mz-8+*_4-kOhgO#Dxsq!cN~1B7X_Q2 z1U<~1HBy|2FMhxm+He2Wkn(fXR2Fr4jddq?Ay&X6H7Y!kyO^u^PQ7_X6ftPX!~YCq z(nl_ndIX`J`7VsPPY*2C4C0Dv%kjj;^O@(jCG!GIuC*voehfy^)yZQ}(lv0;4imLl zl+LkL{|!`-%%|L~g{V+M3?EoDWVM&gU(IgF90}jGE6JD(^I~UQ2oL`L;hHVbt9G`1 zRu?hNrzovYClsUQX?t$fC1xT@;O2&#hNcZF;?LMqqvY$F_k%m^c88&nQhBZ^>^b?1g@sv|~eo zqE1Kj2uyiGoa8`*sDX|an-m5XjQtx=N6Z}W{jI?#Hbz0F(@Q@vnvj$G17t7{aV&T6 zLWNJ!*!pVq-$%u~wIDZ2@}E&}`dIn}GVjb3CVl~6R3L==H2bZTuip*1mc6+ZmGvF_ z!@@pFPluFoYgXszd;B0h4PYs{*3`mV!iM$dOj@ZzHjZrci}~*NeZPeY zZ|AtA+=^LzqvC0eb#NXm8_>57uaBtVYE@H1_7ymi?*FCEfQ3gN6BjM{pc@2(dc%h% zo|c7H%}x1eqN*6m?@YHy-x)1VJoKrtoaTOA_vQ}PnPo3Y!LQpLgdbM+TK>u zWt)WRPn`k&HJ%rg@2ZPi_I>VEc8;a3$6lMV={S*9-Pb5O3gtpU>!m-d= zxvy?;((C(&CN2KhcMDhUe#TDjdyvR8wRZE4jahA!o#%UtlY?bi{`7LvUKvU>d;y7p ze@tX`rnw`LePyPTsq%&?WRUwlR)r>Nv2p81i(V(Zan;2VXhEl_jX_~$et0cmw{LP~ zIKrg!%DrJGr^nH<8PMB84Frn@-&IbgNcB{jwVmB*vsI(23}Q+6cD^%iw$TWB+c7XyT`3EtcL zQh|@lNkd&n4^1*WlqZ85%~rUfB?`!NPh2VTtgd^c!?fLqH~3KwcmDIEAUY_&B+vRl z;SXdJz04xMNg~_DJm{v_gj$7|KvVT_r{mRsi=J@#E*3iZ>tr@PERc3;UhMmxw28y5xg zt_p6R0W}fj-@CmA^_o%wX!FPBh_&R+Wv$t@i1{wLfx=wr4`IgNCTO{s4H$cln<3Jr z#|QM^)S7FUy8PkoKGaM+cwb`I^R{+jfPKsV$11gg-hWf7H0ldhdRkDfR31cxP;9*S zkuH)4=TX$iaZWA{Px-QrUcJ->Q=#HI%;Deu=Uw-?eaC$DDCCZi>vLBb2Cl*!_PO>F z*!MssSGVgAZLdeSW(7@|){?fv1h{9Mn-OC%jx}}p@PD>eCkxCcJJrX}KL*4WL|8dn zeVQydOeHjEWw0zm%@U0*iOyU6u97b&-K8!k#ob=E>pzArq%)|P{B0yUpXq6JhhRqw z6H|b^NeHp*z;#k{B;_*iXkOnDmjP1Ks`xAARfDb+W24nYT2#MbFRkfYzd|vaBRi=+B)mvV9C3O@${Cg`fG#K9bnU3KGN|^g3 zV>72;B4S*+Vxv{F7D$^xV)8O!(0!-l4JW?``)%Y9znP$=P>tqla3I-=y*M*t8C_>) zvFnv6OT6K?`+u%(|B~z&^|OwAL6qLn8E5~?lbgNK_Ub!rQmI(Rb$i2~%GVxf3UifQ zKlCN{&oRN;-7QUhtS_Vk)CyKNbC&HV9>g0L@w@8^a9yFZRWg(0G+&KM)w$B@DskjW zPn~n!a*lA|Cq-eOLEVYX2^|(@g~3eXZ)$xzt3;5pym>PnytJIu0`MpyAu+fLs;mfm ze&SZ5npRh{U7C7|I}&9#!}54rAx$oO(s?9~_MGj~s1DfdSfR3hLMmi^QX!%8NmxEQ z=-t%lef-F61+<8d$ohNs`wkGRyFD0xUXE&$>3sXYqh8cnZL@p?in_rb$95W@o7Pab zMyf2Q=Mp}`Abv{ID^V4$i>J<;RSfBw>OOlm({^j5n`FJs2#0&mV4`Fk(pnEP3ZG`E6<@!EFQR*BSSV(d^5;K&?WMq zTe}_y2bsKsTTy2BSF;VMzG<;BhE&UgcDoc?eXo>ZKvy?SYtj0t21 zHbNKZhG~jz-l{-iI=ctd-U z6-mf=y0D^^l0_{oA8}mBaTRy(FC0moQF?Js(cz%ydlu&>++44wwMQ(+#ooWl;o7Zvi1xHy8=Fna;mb$(#C%O3-JO~CIA2c literal 0 HcmV?d00001 diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..480c493 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,12 @@ +KG Topology Toolbox +====================================== + +.. automodule:: kg_topology_toolbox + +.. toctree:: + :maxdepth: 3 + :caption: Contents + + User guide + notebooks/ogb_biokg_demo + API reference diff --git a/docs/source/notebooks/ogb_biokg_demo.ipynb b/docs/source/notebooks/ogb_biokg_demo.ipynb new file mode 100644 index 0000000..83729b7 --- /dev/null +++ b/docs/source/notebooks/ogb_biokg_demo.ipynb @@ -0,0 +1,2269 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# KGTopologyToolbox walk-through\n", + "\n", + "Copyright (c) 2024 Graphcore Ltd. All rights reserved.\n", + "\n", + "In this notebook we give a general overview of the classes and methods included in the `kg-topology-toolbox` library and explain how to use them to extract topological data from any knowledge graph. As an example, we use the open-source biomedical dataset [ogbl-biokg](https://ogb.stanford.edu/docs/linkprop/#ogbl-biokg).\n", + "\n", + "## Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found existing installation: kg-topology-toolbox 0.1.0\n", + "Uninstalling kg-topology-toolbox-0.1.0:\n", + " Successfully uninstalled kg-topology-toolbox-0.1.0\n" + ] + } + ], + "source": [ + "import sys\n", + "!{sys.executable} -m pip uninstall -y kg_topology_toolbox\n", + "!pip install -q git+ssh://git@github.com/graphcore-research/kg-topology-toolbox\n", + "!pip install -q jupyter ipywidgets ogb seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "import ogb.linkproppred\n", + "from kg_topology_toolbox import KGTopologyToolbox\n", + "\n", + "dataset_directory = \"../../../data/ogb-biokg/\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data preparation\n", + "\n", + "We load the OGBL-BioKG dataset using the `ogb.linkproppred.LinkPropPredDataset` class and store all (h, r, t) triples in a `pandas` DataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
hrt
0171803207
14903013662
25480015999
3314807247
410300016202
............
50884292451505097
50884306456508833
508843194845015873
5088432636550496
508843313860506368
\n", + "

5088434 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " h r t\n", + "0 1718 0 3207\n", + "1 4903 0 13662\n", + "2 5480 0 15999\n", + "3 3148 0 7247\n", + "4 10300 0 16202\n", + "... ... .. ...\n", + "5088429 2451 50 5097\n", + "5088430 6456 50 8833\n", + "5088431 9484 50 15873\n", + "5088432 6365 50 496\n", + "5088433 13860 50 6368\n", + "\n", + "[5088434 rows x 3 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset = ogb.linkproppred.LinkPropPredDataset(\n", + " name=\"ogbl-biokg\", root=dataset_directory\n", + ")\n", + "\n", + "all_triples = []\n", + "for split in dataset.get_edge_split().values():\n", + " all_triples.append(np.stack([split[\"head\"], split[\"relation\"], split[\"tail\"]]).T)\n", + "biokg_df = pd.DataFrame(np.concatenate(all_triples), columns=[\"h\", \"r\", \"t\"])\n", + "biokg_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Based on this representation of the knowledge graph, we can proceed to compute its topological properties using the `KGTopologyToolbox` class." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "kgtt = KGTopologyToolbox()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Node-level analysis\n", + "\n", + "The method `node_degree_summary` provides a summary of the degrees of each individual node in the knowledge graph. The returned dataframe is indexed on the node ID.\n", + "\n", + "- `h_degree` is the number of edges coming out from the node;\n", + "- `t_degree` is the number of edges going into the node;\n", + "- `tot_degree` is the number of edges that use the node as either head or tail;\n", + "- `h_unique_rel` (resp. `t_unique_rel`) is the number of unique relation types come out from (resp. go into) the node;\n", + "- `n_loops` is the number of loop edges around the node." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
h_degreet_degreetot_degreeh_unique_relt_unique_reln_loops
0277299440
11494108360
220895303570
328999261545515310110
436230266411120
.....................
45080212243110
45081293261110
45082283058110
45083171936110
45084283159110
\n", + "

45085 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " h_degree t_degree tot_degree h_unique_rel t_unique_rel n_loops\n", + "0 27 72 99 4 4 0\n", + "1 14 94 108 3 6 0\n", + "2 208 95 303 5 7 0\n", + "3 28999 26154 55153 10 11 0\n", + "4 362 302 664 11 12 0\n", + "... ... ... ... ... ... ...\n", + "45080 21 22 43 1 1 0\n", + "45081 29 32 61 1 1 0\n", + "45082 28 30 58 1 1 0\n", + "45083 17 19 36 1 1 0\n", + "45084 28 31 59 1 1 0\n", + "\n", + "[45085 rows x 6 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "node_ds = kgtt.node_degree_summary(biokg_df)\n", + "node_ds" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/nethome/albertoc/research/knowledge_graphs/kg-topology-toolbox/.venv38/lib/python3.8/site-packages/pandas/core/arraylike.py:396: RuntimeWarning: divide by zero encountered in log2\n", + " result = getattr(ufunc, method)(*inputs, **kwargs)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "metrics = [\n", + " \"h_degree\",\n", + " \"t_degree\",\n", + "]\n", + "fig, ax = plt.subplots(1, len(metrics), figsize=(4.5 * len(metrics), 4))\n", + "\n", + "for i, metric in enumerate(metrics):\n", + " x = np.log2(node_ds[metric])\n", + " sns.histplot(\n", + " x=x, stat=\"probability\", binwidth=1, binrange=[0, x.max() + 1], ax=ax[i]\n", + " )\n", + " ax[i].set_xlabel(f\"log2({metric})\")\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "metrics = [\n", + " \"h_unique_rel\",\n", + " \"t_unique_rel\",\n", + "]\n", + "fig, ax = plt.subplots(1, len(metrics), figsize=(4.5 * len(metrics), 4))\n", + "\n", + "for i, metric in enumerate(metrics):\n", + " x = node_ds[metric]\n", + " sns.histplot(\n", + " x=x, stat=\"probability\", binwidth=1, binrange=[0, x.max() + 1], ax=ax[i]\n", + " )\n", + " ax[i].set_xlabel(f\"{metric}\")\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Edge-level analysis\n", + "\n", + "### Edge degrees and cardinality\n", + "\n", + "The method `edge_degree_cardinality_summary` provides, for each edge (h, r, t) in the KG, detailed information on the connectivity patterns of the head and tail nodes:\n", + "\n", + "- `h_unique_rel` (resp. `t_unique_rel`) is the number of unique relation types coming out of the head node (resp. going into the tail node);\n", + "- `h_degree` is the out-degree of the head node and `h_degree_same_rel` is the degree when only considering edges of the same relation type `r`;\n", + "- `t_degree` is the in-degree of the tail node and `t_degree_same_rel` is the degree when only considering edges of the same relation type `r`;\n", + "- `tot_degree` is the total number of edges with either head entity `h` or tail entity `t` (in particular, `tot_degree <= h_degree + t_degree`); `tot_degree_same_rel` is computed only considering edges of the same relation type `r`;\n", + "- `triple_cardinality` is the cardinality type of the edge:\n", + " - _one-to-one_ (1:1) if `h_degree = 1`, `t_degree = 1`;\n", + " - _one-to-many_ (1:M) if `h_degree > 1`, `t_degree = 1`;\n", + " - _many-to-one_ (M:1) if `h_degree = 1`, `t_degree > 1`;\n", + " - _many-to-many_ (M:M) if `h_degree > 1`, `t_degree > 1`.\n", + "- `triple_cardinality_same_rel` is defined as `triple_cardinality` but using `h_degree_same_rel`, `t_degree_same_rel`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
hrth_unique_relh_degreeh_degree_same_relt_unique_relt_degreet_degree_same_reltot_degreetot_degree_same_reltriple_cardinalitytriple_cardinality_same_rel
0171803207519111664614236129M:MM:M
149030136628544339197550251882M:MM:M
25480015999310854722217926M:MM:M
331480724741109911673271782369M:MM:M
4103000162024414315614831561345M:MM:M
..........................................
508842924515050975636272108032721437543M:MM:M
5088430645650883310743259103711001111358M:MM:M
508843194845015873865221364861631135375M:MM:M
50884326365504969922277196181731537449M:MM:M
50884331386050636874851758455147939321M:MM:M
\n", + "

5088434 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " h r t h_unique_rel h_degree h_degree_same_rel \\\n", + "0 1718 0 3207 5 191 116 \n", + "1 4903 0 13662 8 544 33 \n", + "2 5480 0 15999 3 108 5 \n", + "3 3148 0 7247 4 110 99 \n", + "4 10300 0 16202 4 414 315 \n", + "... ... .. ... ... ... ... \n", + "5088429 2451 50 5097 5 636 272 \n", + "5088430 6456 50 8833 10 743 259 \n", + "5088431 9484 50 15873 8 652 213 \n", + "5088432 6365 50 496 9 922 277 \n", + "5088433 13860 50 6368 7 485 175 \n", + "\n", + " t_unique_rel t_degree t_degree_same_rel tot_degree \\\n", + "0 6 46 14 236 \n", + "1 9 1975 50 2518 \n", + "2 4 72 22 179 \n", + "3 11 673 271 782 \n", + "4 6 148 31 561 \n", + "... ... ... ... ... \n", + "5088429 10 803 272 1437 \n", + "5088430 10 371 100 1111 \n", + "5088431 6 486 163 1135 \n", + "5088432 19 618 173 1537 \n", + "5088433 8 455 147 939 \n", + "\n", + " tot_degree_same_rel triple_cardinality triple_cardinality_same_rel \n", + "0 129 M:M M:M \n", + "1 82 M:M M:M \n", + "2 26 M:M M:M \n", + "3 369 M:M M:M \n", + "4 345 M:M M:M \n", + "... ... ... ... \n", + "5088429 543 M:M M:M \n", + "5088430 358 M:M M:M \n", + "5088431 375 M:M M:M \n", + "5088432 449 M:M M:M \n", + "5088433 321 M:M M:M \n", + "\n", + "[5088434 rows x 13 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "edge_dcs = kgtt.edge_degree_cardinality_summary(biokg_df)\n", + "edge_dcs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The data on the distribution of degrees and cardinalities can be then easily visualized." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Edge frequency when binning by head and tail degree\n", + "\n", + "metrics = [(\"h_degree\", \"t_degree\"), (\"h_degree_same_rel\", \"t_degree_same_rel\")]\n", + "fig, ax = plt.subplots(1, len(metrics), figsize=[5 * len(metrics), 4.5])\n", + "\n", + "for i, (group_metric_1, group_metric_2) in enumerate(metrics):\n", + " df_empty = pd.DataFrame(\n", + " columns=np.int32(2 ** np.arange(15)), index=np.int32(2 ** np.arange(15))\n", + " )\n", + " df_tmp = edge_dcs[[group_metric_1, group_metric_2]]\n", + " df_tmp.insert(\n", + " 0,\n", + " f\"log_{group_metric_1}\",\n", + " np.int32(2 ** np.floor(np.log2(df_tmp[group_metric_1]))),\n", + " )\n", + " df_tmp.insert(\n", + " 0,\n", + " f\"log_{group_metric_2}\",\n", + " np.int32(2 ** np.floor(np.log2(df_tmp[group_metric_2]))),\n", + " )\n", + " df_tmp = (\n", + " df_tmp.groupby([f\"log_{group_metric_1}\", f\"log_{group_metric_2}\"])\n", + " .count()\n", + " .reset_index()\n", + " )\n", + " df_tmp[group_metric_1] /= df_tmp[group_metric_1].sum()\n", + " sns.heatmap(\n", + " df_tmp.reset_index()\n", + " .pivot(\n", + " columns=f\"log_{group_metric_2}\",\n", + " index=f\"log_{group_metric_1}\",\n", + " values=group_metric_1,\n", + " )\n", + " .combine_first(df_empty),\n", + " annot=False,\n", + " vmin=0,\n", + " vmax=0.05,\n", + " ax=ax[i],\n", + " )\n", + " ax[i].set_xlabel(group_metric_2)\n", + " ax[i].set_ylabel(group_metric_1)\n", + " ax[i].invert_yaxis()\n", + "fig.suptitle(\"Edge frequency\")\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAGMCAYAAABH3DSrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA+1klEQVR4nO3de1iUdf7/8dcAMh4QPKAohuAxxXOYhGa6m0UeMN3NXMtQUNOSMtkO2kG0tqjMQ6uupqVkm2mWWa6tqaS5HjqI2eZmlopiHvCUoKhgcP/+8Od8nTg4cA8M4zwf18V1OZ+5D++5Ge83r7kPYzEMwxAAAAAAmODl6gIAAAAAuD+CBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAlMHGjRtlsVi0ceNG29jw4cMVFhZWrutNSUmRxWLRgQMHbGM9e/ZUz549y3W9JSnqdVssFk2ePLnc113U76Fnz55q27Ztua9bkg4cOCCLxaKUlJQKWR8AVGYECwBuad++fRo9erSaNm2qqlWryt/fX926ddPrr7+uCxcuuLo8lzpy5IgmT56snTt3urqUUlmyZIlmzpzp6jKKVJlrA4DKwsfVBQBAaa1evVqDBg2S1WpVbGys2rZtq7y8PG3evFlPPPGE/ve//2n+/PkVXteCBQtUUFBQ4etdu3at3eMjR45oypQpCgsLU8eOHSu8Hkm6cOGCfHxK12KWLFmiXbt26bHHHnN4nttuu00XLlyQr69vKSssneJqCw0N1YULF1SlSpVyXT8AuAOCBQC3kp6err/85S8KDQ3V559/roYNG9qeGzt2rPbu3avVq1ebXo9hGLp48aKqVavm8Dyu+uOyvP+oLouqVauW6/IvXrwoX19feXl5lfu6SmKxWFy6fgCoTDgVCoBbefXVV3Xu3Dm99dZbdqHiiubNm2vcuHG2x4sWLdIf//hH1a9fX1arVeHh4Zo7d26h+cLCwtSvXz999tln6ty5s6pVq6Y33nhDkvTLL79owIABqlGjhurXr6/x48crNze30DJ+f63BlfPvX3vtNc2fP1/NmjWT1WrVzTffrG+++cZu3v/+978aPny47dSuBg0aKD4+XqdOnbrmNrn6GouNGzfq5ptvliTFxcXJYrHYrgFISkpSlSpVdOLEiULLePDBB1WrVi1dvHixxHWtXLlSbdu2VdWqVdW2bVt99NFHRU73+2sszp49q8cee0xhYWGyWq2qX7++7rjjDu3YscP2GlavXq2DBw/aar6yLa9cR7F06VI9++yzatSokapXr67s7Owir7G4Ii0tTV27dlW1atXUpEkTzZs3z+75oq5XuXp9V5ZZUm3FXWPx+eefq3v37qpRo4Zq1aqlu+++W7t377abZvLkybJYLNq7d6+GDx+uWrVqKSAgQHFxcTp//nzxvwQAqKQ4YgHAraxatUpNmzZV165dHZp+7ty5atOmjfr37y8fHx+tWrVKDz/8sAoKCjR27Fi7affs2aMhQ4Zo9OjRGjVqlG688UZduHBBt99+uzIyMvToo48qODhY77zzjj7//HOHa16yZInOnj2r0aNHy2Kx6NVXX9Wf/vQn7d+/33aUY926ddq/f7/i4uLUoEED2+lc//vf//Tll1/KYrE4tK7WrVvr+eef16RJk/Tggw+qe/fukqSuXbvq1ltv1fPPP69ly5YpISHBNk9eXp4++OAD/fnPfy7x0/e1a9fqz3/+s8LDw5WcnKxTp04pLi5ON9xwwzXrGjNmjD744AMlJCQoPDxcp06d0ubNm7V7927ddNNNeuaZZ5SVlaVffvlFM2bMkCT5+fnZLeOFF16Qr6+vHn/8ceXm5pZ4pObXX39Vnz59dO+992rIkCF6//339dBDD8nX11fx8fHXrPdqjtR2tfXr16t3795q2rSpJk+erAsXLmjWrFnq1q2bduzYUehC93vvvVdNmjRRcnKyduzYoTfffFP169fXK6+8Uqo6AcDlDABwE1lZWYYk4+6773Z4nvPnzxcai46ONpo2bWo3Fhoaakgy1qxZYzc+c+ZMQ5Lx/vvv28ZycnKM5s2bG5KMDRs22MaHDRtmhIaG2h6np6cbkoy6desap0+fto1//PHHhiRj1apVJdb53nvvGZKMTZs22cYWLVpkSDLS09NtYz169DB69Ohhe/zNN98YkoxFixYVWmZUVJQRGRlpN7ZixYpCr6UoHTt2NBo2bGicOXPGNrZ27VpDkt3rNgzDkGQkJSXZHgcEBBhjx44tcfl9+/YttBzDMIwNGzYYkoymTZsW2k5Xnru69h49ehiSjGnTptnGcnNzjY4dOxr169c38vLyDMMoelsWt8ziarvyO756W19Zz6lTp2xj3333neHl5WXExsbaxpKSkgxJRnx8vN0yBw4caNStW7fQugCgsuNUKABuIzs7W5JUs2ZNh+e5+hqJrKwsnTx5Uj169ND+/fuVlZVlN22TJk0UHR1tN/bpp5+qYcOGuueee2xj1atX14MPPuhwDYMHD1bt2rVtj68cRdi/f3+RdV68eFEnT57ULbfcIkm204WcITY2Vl999ZX27dtnG3v33XcVEhKiHj16FDvf0aNHtXPnTg0bNkwBAQG28TvuuEPh4eHXXG+tWrX01Vdf6ciRI2WufdiwYQ5f8+Lj46PRo0fbHvv6+mr06NE6fvy40tLSylzDtVzZTsOHD1edOnVs4+3bt9cdd9yhTz/9tNA8Y8aMsXvcvXt3nTp1yvZ+BwB3QbAA4Db8/f0lXT5f31FbtmxRr169bOe616tXT08//bQkFRksfu/gwYNq3rx5oVORbrzxRodraNy4sd3jKyHj119/tY2dPn1a48aNU1BQkKpVq6Z69erZ6vl9nWYMHjxYVqtV7777rm3Z//rXv3T//feXeLrVwYMHJUktWrQo9Jwj2+LVV1/Vrl27FBISoi5dumjy5Ml2wcoRRf1+ihMcHKwaNWrYjbVs2VKSCl1T4UxXtlNR26R169Y6efKkcnJy7MYdeX8AgDsgWABwG/7+/goODtauXbscmn7fvn26/fbbdfLkSU2fPl2rV6/WunXrNH78eEkqdGvY0twBqjS8vb2LHDcMw/bve++9VwsWLNCYMWO0YsUKrV27VmvWrCmyTjNq166tfv362YLFBx98oNzcXA0dOtRp6yjKvffeq/3792vWrFkKDg7W1KlT1aZNG/373/92eBnO/v0UF6Ty8/Odup5rceT9AQDugGABwK3069dP+/bt07Zt26457apVq5Sbm6tPPvlEo0ePVp8+fdSrV69S/YEaGhqqffv2Ffojb8+ePaWuvTi//vqrUlNTNWHCBE2ZMkUDBw7UHXfcoaZNm5Zpede60Ds2NlY//fSTvvnmG7377rvq1KmT2rRpU+I8oaGhkqSff/650HOObouGDRvq4Ycf1sqVK5Wenq66devqxRdfdLju0jhy5EihIwM//fSTJNkunr5yZODMmTN201056nA1R2u7sp2K2iY//vijAgMDCx1JAYDrBcECgFt58sknVaNGDY0cOVKZmZmFnt+3b59ef/11Sf/3SfDVoSArK0uLFi1yeH19+vTRkSNH9MEHH9jGzp8/79Qv4CuqTkll/qbnK3+4/v4P5it69+6twMBAvfLKK/riiy8cOlrRsGFDdezYUW+//bbdqVnr1q3TDz/8UOK8+fn5hU7nql+/voKDg+1u21ujRg2nnfb122+/2W4XLF2+89Ubb7yhevXqKSIiQpLUrFkzSdKmTZvsai3qd+tobVdvp6u3/65du7R27Vr16dOnrC8JACo9bjcLwK00a9ZMS5Ys0eDBg9W6dWu7b97eunWrli9fruHDh0uS7rzzTvn6+iomJkajR4/WuXPntGDBAtWvX19Hjx51aH2jRo3S7NmzFRsbq7S0NDVs2FDvvPOOqlev7rTX5O/vr9tuu02vvvqqLl26pEaNGmnt2rVKT08v0/KaNWumWrVqad68eapZs6Zq1KihyMhI2zUKVapU0V/+8hfNnj1b3t7eGjJkiEPLTU5OVt++fXXrrbcqPj5ep0+f1qxZs9SmTRudO3eu2PnOnj2rG264Qffcc486dOggPz8/rV+/Xt98842mTZtmmy4iIkLLli1TYmKibr75Zvn5+SkmJqZM2yA4OFivvPKKDhw4oJYtW2rZsmXauXOn5s+fb7vFb5s2bXTLLbdo4sSJOn36tOrUqaOlS5fqt99+K7S80tQ2depU9e7dW1FRURoxYoTtdrMBAQF23+0BANcdl96TCgDK6KeffjJGjRplhIWFGb6+vkbNmjWNbt26GbNmzTIuXrxom+6TTz4x2rdvb1StWtUICwszXnnlFWPhwoWFbjMaGhpq9O3bt8h1HTx40Ojfv79RvXp1IzAw0Bg3bpyxZs0ah283O3Xq1ELL1O9ux/rLL78YAwcONGrVqmUEBAQYgwYNMo4cOVJoOkduN2sYl29pGx4ebvj4+BR569mvv/7akGTceeedRb7m4nz44YdG69atDavVaoSHhxsrVqwo9Lp///pyc3ONJ554wujQoYNRs2ZNo0aNGkaHDh2Mf/zjH3bznDt3zrjvvvuMWrVq2d3C9srtX5cvX16onuJuN9umTRtj+/btRlRUlFG1alUjNDTUmD17dqH59+3bZ/Tq1cuwWq1GUFCQ8fTTTxvr1q0rtMziaivqdrOGYRjr1683unXrZlSrVs3w9/c3YmJijB9++MFumiu3mz1x4oTdeHG3wQWAys5iGFwdBgCe5rvvvlPHjh21ePFiPfDAA64uBwBwHeAaCwDwQAsWLJCfn5/+9Kc/uboUAMB1gmssAMCDrFq1Sj/88IPmz5+vhIQE7lAEAHAaToUCAA8SFhamzMxMRUdH65133inVt5gDAFASggUAAAAA07jGAgAAAIBpBAsAAAAAphEsAAAAAJhGsAAAAABgGsECAAAAgGkECwAAAACmESwAAAAAmEawAAAAAGAawQIAAACAaQQLAAAAAKYRLAAAAACYRrAAAAAAYBrBAgAAAIBpBAsAAAAAphEsAAAAAJhGsAAAAABgGsECAAAAgGkECwAAAACmESwAAAAAmObj6gIqWkFBgY4cOaKaNWvKYrG4uhwAqBQMw9DZs2cVHBwsLy/P/cyJHgEA9krTHzwuWBw5ckQhISGuLgMAKqVDhw7phhtucHUZLkOPAICiOdIfPC5Y1KxZU9LljePv7+/iagCgcsjOzlZISIhtH+mp6BEAYK80/cHjgsWVQ9v+/v40DQD4HU8//YceAQBFc6Q/eO6JtAAAAACchmABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAqJQ2bdqkmJgYBQcHy2KxaOXKldecZ+PGjbrppptktVrVvHlzpaSklHudAIDLCBYAgEopJydHHTp00Jw5cxyaPj09XX379tUf/vAH7dy5U4899phGjhypzz77rJwrBQBILg4WfBoFAChO79699be//U0DBw50aPp58+apSZMmmjZtmlq3bq2EhATdc889mjFjRjlXCgCQXBws+DQKAOAs27ZtU69evezGoqOjtW3btmLnyc3NVXZ2tt0PAKBsXPrN271791bv3r0dnv7qT6MkqXXr1tq8ebNmzJih6Ojo8ioTAOAGjh07pqCgILuxoKAgZWdn68KFC6pWrVqheZKTkzVlypSKKhEArmtudY0Fn0YBAJxp4sSJysrKsv0cOnTI1SUBgNty6RGL0uLTKABlEfHEYleX4HJpU2NdXUK5a9CggTIzM+3GMjMz5e/vX2R/kCSr1Sqr1VoR5QGopOgRzusRbnXEoiz4NAoAPENUVJRSU1PtxtatW6eoqCgXVQQAnsWtjljwaRQAeI5z585p7969tsfp6enauXOn6tSpo8aNG2vixIk6fPiwFi++/GnjmDFjNHv2bD355JOKj4/X559/rvfff1+rV6921UsAAI/iVkcs+DQKADzH9u3b1alTJ3Xq1EmSlJiYqE6dOmnSpEmSpKNHjyojI8M2fZMmTbR69WqtW7dOHTp00LRp0/Tmm29ycw8AqCAuPWLBp1EAgOL07NlThmEU+3xR32PUs2dPffvtt+VYFQCgOC49YsGnUQAAAMD1waVHLPg0CgAAALg+uNU1FgAAAAAqJ4IFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAKi05syZo7CwMFWtWlWRkZH6+uuvS5x+5syZuvHGG1WtWjWFhIRo/PjxunjxYgVVCwCezeXBgqYBACjKsmXLlJiYqKSkJO3YsUMdOnRQdHS0jh8/XuT0S5Ys0YQJE5SUlKTdu3frrbfe0rJly/T0009XcOUA4JlcGixoGgCA4kyfPl2jRo1SXFycwsPDNW/ePFWvXl0LFy4scvqtW7eqW7duuu+++xQWFqY777xTQ4YMKfEDq9zcXGVnZ9v9AADKxqXBoiKaBgDA/eTl5SktLU29evWyjXl5ealXr17atm1bkfN07dpVaWlptp6wf/9+ffrpp+rTp0+x60lOTlZAQIDtJyQkxLkvBAA8iMuCRUU1DT6NAgD3c/LkSeXn5ysoKMhuPCgoSMeOHStynvvuu0/PP/+8br31VlWpUkXNmjVTz549SzyqPXHiRGVlZdl+Dh065NTXAQCexGXBoqKaBp9GAYBn2Lhxo1566SX94x//0I4dO7RixQqtXr1aL7zwQrHzWK1W+fv72/0AAMrG5Rdvl0ZZmgafRgGA+wkMDJS3t7cyMzPtxjMzM9WgQYMi53nuuef0wAMPaOTIkWrXrp0GDhyol156ScnJySooKKiIsgHAo/m4asVmm4YktWvXTjk5OXrwwQf1zDPPyMurcE6yWq2yWq3OfwEAgHLj6+uriIgIpaamasCAAZKkgoICpaamKiEhoch5zp8/X6gPeHt7S5IMwyjXegEALjxicXXTuOJK04iKiipyHpoGAHiOxMRELViwQG+//bZ2796thx56SDk5OYqLi5MkxcbGauLEibbpY2JiNHfuXC1dulTp6elat26dnnvuOcXExNh6BQCg/LjsiIV0uWkMGzZMnTt3VpcuXTRz5sxCTaNRo0ZKTk6WdLlpTJ8+XZ06dVJkZKT27t1L0wCA69TgwYN14sQJTZo0SceOHVPHjh21Zs0a27V5GRkZdh82Pfvss7JYLHr22Wd1+PBh1atXTzExMXrxxRdd9RIAwKO4NFjQNAAAJUlISCj21KeNGzfaPfbx8VFSUpKSkpIqoDIAwO+5NFhINA0AAADgeuBWd4UCAAAAUDkRLAAAAACYRrAAAAAAYBrBAgAAAIBpBAsAAAAAphEsAAAAAJhGsAAAAABgGsECAAAAgGkECwAAAACmESwAAAAAmEawAAAAAGAawQIAAACAaQQLAAAAAKYRLAAAAACYRrAAAAAAYBrBAgAAAIBpBAsAAAAAphEsAAAAAJhGsAAAAABgGsECAAAAgGkECwAAAACmESwAAAAAmEawAAAAAGAawQIAAACAaQQLAAAAAKYRLAAAAACYRrAAAAAAYBrBAgAAAIBpBAsAAAAAphEsAAAAAJhGsAAAAABgGsECAAAAgGkECwAAAACmESwAAAAAmEawAAAAAGAawQIAAACAaWUKFjk5Oc6uAwBwnaBHAIBnKlOwCAoKUnx8vDZv3uzsegAAbo4eAQCeqUzB4p///KdOnz6tP/7xj2rZsqVefvllHTlyxNm1AQDcED0CADxTmYLFgAEDtHLlSh0+fFhjxozRkiVLFBoaqn79+mnFihX67bffnF0nAMBN0CMAwDOZuni7Xr16SkxM1H//+19Nnz5d69ev1z333KPg4GBNmjRJ58+fd1adAAA3Q48AAM/iY2bmzMxMvf3220pJSdHBgwd1zz33aMSIEfrll1/0yiuv6Msvv9TatWudVSsAwI3QIwDAs5QpWKxYsUKLFi3SZ599pvDwcD388MMaOnSoatWqZZuma9euat26tbPqBAC4CXoEAHimMgWLuLg4/eUvf9GWLVt08803FzlNcHCwnnnmGVPFAQDcDz0CADxTma6xOHr0qN54441iG4YkVatWTUlJSWUuDADgnpzZI+bMmaOwsDBVrVpVkZGR+vrrr0uc/syZMxo7dqwaNmwoq9Wqli1b6tNPPy31awAAlF6ZgkXNmjV1/PjxQuOnTp2St7d3qZZF0wCA64uzesSyZcuUmJiopKQk7dixQx06dFB0dHSRy5akvLw83XHHHTpw4IA++OAD7dmzRwsWLFCjRo3K/FoAAI4r06lQhmEUOZ6bmytfX1+Hl3OlacybN0+RkZGaOXOmoqOjtWfPHtWvX7/Q9FeaRv369fXBBx+oUaNGOnjwoN15uwAA13JWj5g+fbpGjRqluLg4SdK8efO0evVqLVy4UBMmTCg0/cKFC3X69Glt3bpVVapUkSSFhYWV/gUAAMqkVMHi73//uyTJYrHozTfflJ+fn+25/Px8bdq0Sa1atXJ4eTQNALh+OLNH5OXlKS0tTRMnTrSNeXl5qVevXtq2bVuR83zyySeKiorS2LFj9fHHH6tevXq677779NRTTxV7pCQ3N1e5ubm2x9nZ2Q7VBwAorFTBYsaMGZIufxo1b948ux21r6+vwsLCNG/ePIeWRdMAgOuLM3vEyZMnlZ+fr6CgILvxoKAg/fjjj0XOs3//fn3++ee6//779emnn2rv3r16+OGHdenSpWKv50hOTtaUKVMcqgkAULJSBYv09HRJ0h/+8AetWLFCtWvXLvOKaRoAcH1xZo8oi4KCAtWvX1/z58+Xt7e3IiIidPjwYU2dOrXYHjFx4kQlJibaHmdnZyskJKSiSgaA60qZrrHYsGGDs+twCE0DACo/Z/SIwMBAeXt7KzMz0248MzNTDRo0KHKehg0bqkqVKnZHSlq3bq1jx44pLy+vyOs7rFarrFar6XoBAKUIFomJiXrhhRdUo0YNuz/UizJ9+vRrLo+mAQDXD2f3CF9fX0VERCg1NVUDBgyQdPnDpdTUVCUkJBQ5T7du3bRkyRIVFBTIy+vyTQ9/+uknNWzYsFQXjQMAysbhYPHtt9/q0qVLtn8Xx2KxOLQ8mgYAXD+c3SOky2Fl2LBh6ty5s7p06aKZM2cqJyfHdsOP2NhYNWrUSMnJyZKkhx56SLNnz9a4ceP0yCOP6Oeff9ZLL72kRx991MQrAwA4yuFgcfWhbWedCkXTAIDrQ3n0iMGDB+vEiROaNGmSjh07po4dO2rNmjW2a/MyMjJsHzJJUkhIiD777DONHz9e7du3V6NGjTRu3Dg99dRTTqkHAFCyMl1j4Sw0DQBASRISEoo9ir1x48ZCY1FRUfryyy/LuSoAQFEcDhZ/+tOfHF7oihUrHJ6WpgEA7q+8egQAwH04HCwCAgLKsw4AgBujRwAAHA4WixYtKs86AABujB4BAPC69iQAAAAAUDKHj1jcdNNNSk1NVe3atdWpU6cSbxm4Y8cOpxQHAHAP9AgAgMPB4u6777Z90dyV750AAECiRwAAShEskpKSivw3AAD0CACAqe+x2L59u3bv3i1JCg8PV0REhFOKAgC4P3oEAHiWMgWLX375RUOGDNGWLVtUq1YtSdKZM2fUtWtXLV26VDfccIMzawQAuBF6BAB4pjLdFWrkyJG6dOmSdu/erdOnT+v06dPavXu3CgoKNHLkSGfXCABwI/QIAPBMZTpi8cUXX2jr1q268cYbbWM33nijZs2ape7duzutOACA+6FHAIBnKtMRi5CQEF26dKnQeH5+voKDg00XBQBwX/QIAPBMZQoWU6dO1SOPPKLt27fbxrZv365x48bptddec1pxAAD3Q48AAM/k8KlQtWvXtvvCo5ycHEVGRsrH5/IifvvtN/n4+Cg+Pp57mAOAh6FHAAAcDhYzZ84sxzIAAO6MHgEAcDhYDBs2rDzrAAC4MXoEAMDUF+RJ0sWLF5WXl2c35u/vb3axAIDrAD0CADxHmS7ezsnJUUJCgurXr68aNWqodu3adj8AAM9FjwAAz1SmYPHkk0/q888/19y5c2W1WvXmm29qypQpCg4O1uLFi51dIwDAjdAjAMAzlelUqFWrVmnx4sXq2bOn4uLi1L17dzVv3lyhoaF69913df/99zu7TgCAm6BHAIBnKtMRi9OnT6tp06aSLp8re/r0aUnSrbfeqk2bNjmvOgCA26FHAIBnKlOwaNq0qdLT0yVJrVq10vvvvy/p8qdUtWrVclpxAAD3Q48AAM9UpmARFxen7777TpI0YcIEzZkzR1WrVtX48eP1xBNPOLVAAIB7oUcAgGcq0zUW48ePt/27V69e2r17t3bs2KHmzZurffv2TisOAOB+6BEA4JlMf4+FJIWFhSksLMwZiwIAXGfoEQDgGcp0KpQkpaamql+/fmrWrJmaNWumfv36af369c6sDQDgpugRAOB5yhQs/vGPf+iuu+5SzZo1NW7cOI0bN07+/v7q06eP5syZ4+waAQBuhB4BAJ6pTKdCvfTSS5oxY4YSEhJsY48++qi6deuml156SWPHjnVagQAA90KPAADPVKYjFmfOnNFdd91VaPzOO+9UVlaW6aIAAO6LHgEAnqlMwaJ///766KOPCo1//PHH6tevn+miAADuix4BAJ7J4VOh/v73v9v+HR4erhdffFEbN25UVFSUJOnLL7/Uli1b9Ne//tX5VQIAKjV6BADAYhiG4ciETZo0cWyBFov2799vqqjylJ2drYCAAGVlZcnf39/V5QCoABFPLHZ1CS6XNjW2xOfN7hvpEQDcFT2i5B5Rmv2iw0cs0tPTHa8OAOBR6BEAgDJ/j8UVhmHIwYMeAAAPQ48AAM9R5mCxePFitWvXTtWqVVO1atXUvn17vfPOO86sDQDgpugRAOB5yvQ9FtOnT9dzzz2nhIQEdevWTZK0efNmjRkzRidPntT48eOdWiQAwH3QIwDAM5UpWMyaNUtz585VbOz/XejRv39/tWnTRpMnT6ZpAIAHo0cAgGcq06lQR48eVdeuXQuNd+3aVUePHjVdFADAfdEjAMAzlSlYNG/eXO+//36h8WXLlqlFixamiwIAuC96BAB4pjKdCjVlyhQNHjxYmzZtsp0/u2XLFqWmphbZTAAAnoMeAQCeqUxHLP785z/r66+/VmBgoFauXKmVK1cqMDBQX3/9tQYOHOjsGgEAboQeAQCeqdRHLC5duqTRo0frueee0z//+c/yqAkA4KboEQDguUp9xKJKlSr68MMPy6MWAICbo0cAgOcq06lQAwYM0MqVK51cCgDgekCPAADPVKaLt1u0aKHnn39eW7ZsUUREhGrUqGH3/KOPPuqU4gAA7oceAQCeqUzB4q233lKtWrWUlpamtLQ0u+csFgtNAwA8mDN7xJw5czR16lQdO3ZMHTp00KxZs9SlS5drzrd06VINGTJEd999N0dPAKCClClYpKen2/5tGIaky82irGgcAHD9cFaPWLZsmRITEzVv3jxFRkZq5syZio6O1p49e1S/fv1i5ztw4IAef/xxde/evfTFAwDKrEzXWEiXP5Fq27atqlatqqpVq6pt27Z68803S72cK40jKSlJO3bsUIcOHRQdHa3jx4+XOB+NAwAqL2f0iOnTp2vUqFGKi4tTeHi45s2bp+rVq2vhwoXFzpOfn6/7779fU6ZMUdOmTc2+DABAKZQpWEyaNEnjxo1TTEyMli9fruXLlysmJkbjx4/XpEmTSrUsGgcAXF+c0SPy8vKUlpamXr162ca8vLzUq1cvbdu2rdj5nn/+edWvX18jRoxwaD25ubnKzs62+wEAlE2ZToWaO3euFixYoCFDhtjG+vfvr/bt2+uRRx7R888/79ByrjSOiRMn2sZK2zj+85//lLiO3Nxc5ebm2h7TNACgfDmjR5w8eVL5+fkKCgqyGw8KCtKPP/5Y5DybN2/WW2+9pZ07dzpca3JysqZMmeLw9ACA4pXpiMWlS5fUuXPnQuMRERH67bffHF5OSY3j2LFjRc5zpXEsWLDAoXUkJycrICDA9hMSEuJwfQCA0nNWjyiNs2fP6oEHHtCCBQsUGBjo8HwTJ05UVlaW7efQoUPlUh8AeIIyBYsHHnhAc+fOLTQ+f/583X///aaLKk5ZGgdNAwAqljN6RGBgoLy9vZWZmWk3npmZqQYNGhSaft++fTpw4IBiYmLk4+MjHx8fLV68WJ988ol8fHy0b9++ItdjtVrl7+9v9wMAKJsynQolXb4wb+3atbrlllskSV999ZUyMjIUGxurxMRE23TTp08vdhlmGscVBQUFl1+Ij4/27NmjZs2a2c1jtVpltVpL/wIBAGVmtkf4+voqIiJCqampGjBggKTL+/vU1FQlJCQUmr5Vq1b6/vvv7caeffZZnT17Vq+//jpHqwGgApQpWOzatUs33XSTJNk+BQoMDFRgYKB27dplm+5atxekcQDA9cdZPSIxMVHDhg1T586d1aVLF82cOVM5OTmKi4uTJMXGxqpRo0ZKTk623XnqarVq1ZKkQuMAgPJRpmCxYcMGpxVA4wCA64uzesTgwYN14sQJTZo0SceOHVPHjh21Zs0a23V5GRkZ8vIq813TAQBOVuZToZyFxgEAKE5CQkKRR7AlaePGjSXOm5KS4vyCAADFcnmwkGgcAAAAgLvjUAAAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMqRbCYM2eOwsLCVLVqVUVGRurrr78udtoFCxaoe/fuql27tmrXrq1evXqVOD0AwH3RHwDAfbg8WCxbtkyJiYlKSkrSjh071KFDB0VHR+v48eNFTr9x40YNGTJEGzZs0LZt2xQSEqI777xThw8fruDKAQDlif4AAO7FYhiG4coCIiMjdfPNN2v27NmSpIKCAoWEhOiRRx7RhAkTrjl/fn6+ateurdmzZys2Nvaa02dnZysgIEBZWVny9/c3XT+Ayi/iicWuLsHl0qaWvH+sjPvGiugPubm5ys3NtT3Ozs5WSEhIpdoOAMoXPaLkHlGa/uDSIxZ5eXlKS0tTr169bGNeXl7q1auXtm3b5tAyzp8/r0uXLqlOnTpFPp+bm6vs7Gy7HwBA5VYR/UGSkpOTFRAQYPsJCQkxXTsAeCqXBouTJ08qPz9fQUFBduNBQUE6duyYQ8t46qmnFBwcbNd8rkbTAAD3UxH9QZImTpyorKws28+hQ4dM1Q0Anszl11iY8fLLL2vp0qX66KOPVLVq1SKnoWkAgOdxpD9IktVqlb+/v90PAKBsfFy58sDAQHl7eyszM9NuPDMzUw0aNChx3tdee00vv/yy1q9fr/bt2xc7ndVqldVqdUq9AICKURH9AQDgXC49YuHr66uIiAilpqbaxgoKCpSamqqoqKhi53v11Vf1wgsvaM2aNercuXNFlAoAqED0BwBwPy49YiFJiYmJGjZsmDp37qwuXbpo5syZysnJUVxcnCQpNjZWjRo1UnJysiTplVde0aRJk7RkyRKFhYXZzrX18/OTn5+fy14HAMC56A8A4F5cHiwGDx6sEydOaNKkSTp27Jg6duyoNWvW2C7Yy8jIkJfX/x1YmTt3rvLy8nTPPffYLScpKUmTJ0+uyNIBAOWI/gAA7sXl32NR0SrjvdoBlC/uUe6e32PhCmwHwPPQI66T77EAAAAAcH0gWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMqxTBYs6cOQoLC1PVqlUVGRmpr7/+usTply9frlatWqlq1apq166dPv300wqqFABQkegPAOA+XB4sli1bpsTERCUlJWnHjh3q0KGDoqOjdfz48SKn37p1q4YMGaIRI0bo22+/1YABAzRgwADt2rWrgisHAJQn+gMAuBeLYRiGKwuIjIzUzTffrNmzZ0uSCgoKFBISokceeUQTJkwoNP3gwYOVk5Ojf/3rX7axW265RR07dtS8efOuub7s7GwFBAQoKytL/v7+znshACqtiCcWu7oEl0ubGlvi85Vx31jR/UGqnNvhesT/ycuu9f8SFYP3Y8nvxdLsF32cXVhp5OXlKS0tTRMnTrSNeXl5qVevXtq2bVuR82zbtk2JiYl2Y9HR0Vq5cmWR0+fm5io3N9f2OCsrS9LljQTAM+TnXnB1CS53rX3eledd/FmTTUX0B4ke4Sr8n7zM7Pvstmffc1Il7mvT34aYXgbvx5Lfi6XpDy4NFidPnlR+fr6CgoLsxoOCgvTjjz8WOc+xY8eKnP7YsWNFTp+cnKwpU6YUGg8JCSlj1QDgfgJmjXFourNnzyogIKCcq7m2iugPEj0CruXo/0sUj23oHI5sR0f6g0uDRUWYOHGi3SdYBQUFOn36tOrWrSuLxeLCyoqXnZ2tkJAQHTp0iEPxJrAdzWMbOoc7bEfDMHT27FkFBwe7upQKRY/wTGxD52A7mucO27A0/cGlwSIwMFDe3t7KzMy0G8/MzFSDBg2KnKdBgwalmt5qtcpqtdqN1apVq+xFVyB/f/9K+yZzJ2xH89iGzlHZt2NlOFJxRUX0B4ke4enYhs7BdjSvsm9DR/uDS+8K5evrq4iICKWmptrGCgoKlJqaqqioqCLniYqKsptektatW1fs9AAA90N/AAD34/JToRITEzVs2DB17txZXbp00cyZM5WTk6O4uDhJUmxsrBo1aqTk5GRJ0rhx49SjRw9NmzZNffv21dKlS7V9+3bNnz/flS8DAOBk9AcAcC8uDxaDBw/WiRMnNGnSJB07dkwdO3bUmjVrbBfgZWRkyMvr/w6sdO3aVUuWLNGzzz6rp59+Wi1atNDKlSvVtm1bV70Ep7NarUpKSip0eB6lw3Y0j23oHGzHsqE/FI33k3lsQ+dgO5p3vW1Dl3+PBQAAAAD35/Jv3gYAAADg/ggWAAAAAEwjWAAAAAAwjWABAAAAwDSCRQXbtGmTYmJiFBwcLIvFopUrV15znkcffVQRERGyWq3q2LFjuddYmZV2+x04cEAWi0Xe3t46fPiw3XNHjx6Vj4+PLBaLDhw4UH5FV0LDhw+XxWLRmDFjCj03duxYWSwWDR8+vNj5X3zxRXXt2lXVq1d3my8Tcyaz289ischisejLL7+0G8/NzbV94/PGjRudXDXcAT3CHHqEefQHczy9PxAsKlhOTo46dOigOXPmlGq++Ph4DR48uJyqch9l3X6NGjXS4sWL7cbefvttNWrUyJnluZWQkBAtXbpUFy5csI1dvHhRS5YsUePGjUucNy8vT4MGDdJDDz1U3mVWWma235X5Fy1aZDf20Ucfyc/Pz+m1wn3QI8yhRzgH/cEcT+4PBIsK1rt3b/3tb3/TwIEDHZ7n73//u8aOHaumTZuWY2XuoSzbT5KGDRtW6D/pokWLNGzYMGeW51ZuuukmhYSEaMWKFbaxFStWqHHjxurUqVOJ806ZMkXjx49Xu3btyrvMSsvM9pMuvyd/33gWLlzo0e9J0CPMokc4B/3BHE/uDwSLSmby5MkKCwtzdRluq7jt179/f/3666/avHmzJGnz5s369ddfFRMTU8EVVi7x8fF2zXThwoW2bzW+IiUlRRaLpaJLcwtmtl9ERITCwsL04YcfSrr8ZW+bNm3SAw88UL5Fw63RI8yhRziO/mCOp/YHgkUlExgYqGbNmrm6DLdV3ParUqWKhg4dqoULF0q6/B986NChqlKlSkWXWKkMHTpUmzdv1sGDB3Xw4EFt2bJFQ4cOtZsmICBAN954o4sqrNzMbr/4+HjbezIlJUV9+vRRvXr1yr1uuC96hDn0CMfRH8zx1P5AsKhkEhISlJqa6uoy3FZJ2y8+Pl7Lly/XsWPHtHz5csXHx1dwdZVPvXr11LdvX6WkpGjRokXq27evAgMD7aYZOHCgfvzxRxdVWLmZ3X5Dhw7Vtm3btH//fqWkpPCexDXRI8yhRziO/mCOp/YHH1cXAFSUdu3aqVWrVhoyZIhat26ttm3baufOna4uy+Xi4+OVkJAgSaW+4BHmtl/dunXVr18/jRgxQhcvXlTv3r119uzZ8igTwDXQIwqjP5jjif2BIxbwKPHx8dq4caPbJP+KcNdddykvL0+XLl1SdHS0q8txO2a335X3ZGxsrLy9vcuhQgCOokfYoz+Y44n9gWBRwc6dO6edO3faPgVJT0/Xzp07lZGRIUmaPXu2br/9drt59u7dq507d+rYsWO6cOGCbf68vLyKLt/lyrL9rjZq1CidOHFCI0eOrIhy3YK3t7d2796tH374ocgd10cffaRWrVrZjWVkZNi2e35+vu13cu7cuYoqu9Ioy/a72l133aUTJ07o+eefL88y4SboEebQI5yL/mCOJ/YHToWqYNu3b9cf/vAH2+PExERJl28tlpKSopMnT2rfvn1284wcOVJffPGF7fGVW5Wlp6d73N1ByrL9rubj41PoHEdI/v7+xT6XlZWlPXv22I1NmjRJb7/9tu3xlffkhg0b1LNnz3KpsTIr7fa7msVi4T0JG3qEOfQI56M/mONp/cFiGIbh6iIAAAAAuDdOhQIAAABgGsECAAAAgGkECwAAAACmESwAAAAAmEawAAAAAGAawQIAAACAaQQLAAAAAKYRLAAAAACYRrCA25s8ebI6duxYqnl69uypxx57rFzqKU/Dhw/XgAEDbI/L43X8fnv+fp0AcC3slx9z6jrYL5efjRs3ymKx6MyZM64u5brg4+oCgOL07NlTHTt21MyZM0uc7vHHH9cjjzxSMUVVMitWrFCVKlXKdR2vv/66DMOwPXb09wLg+sN++drYL8OTESzgtgzDUH5+vvz8/OTn5+fqcpzq0qVLDjWmOnXqlHstAQEB5b4OANcH9svslyuLK+9FHx/+1K1InAqFSmn48OH64osv9Prrr8tischisSglJUUWi0X//ve/FRERIavVqs2bNxd7iHjKlCmqV6+e/P39NWbMGOXl5RW7vtzcXD3++ONq1KiRatSoocjISG3cuNHherds2aKePXuqevXqql27tqKjo/Xrr79KktasWaNbb71VtWrVUt26ddWvXz/t27fPNu+BAwdksVi0bNky9ejRQ1WrVtW7776r/Px8JSYm2uZ78skn7T6hkgofcg8LC9NLL72k+Ph41axZU40bN9b8+fPt5nnqqafUsmVLVa9eXU2bNtVzzz2nS5cuFfvarj7kXtTvJT09Xc2bN9drr71mN9/OnTtlsVi0d+9eh7cjgMqL/bLn7JcNw9DkyZPVuHFjWa1WBQcH69FHH7U9/84776hz586qWbOmGjRooPvuu0/Hjx+3PX/l9KLPPvtMnTp1UrVq1fTHP/5Rx48f17///W+1bt1a/v7+uu+++3T+/HnbfAUFBUpOTlaTJk1UrVo1dejQQR988EGJtf5+nb9/L5pZJkqPYIFK6fXXX1dUVJRGjRqlo0eP6ujRowoJCZEkTZgwQS+//LJ2796t9u3bFzl/amqqdu/erY0bN+q9997TihUrNGXKlGLXl5CQoG3btmnp0qX673//q0GDBumuu+7Szz//fM1ad+7cqdtvv13h4eHatm2bNm/erJiYGOXn50uScnJylJiYqO3btys1NVVeXl4aOHCgCgoK7JYzYcIEjRs3Trt371Z0dLSmTZumlJQULVy4UJs3b9bp06f10UcfXbOeadOmqXPnzvr222/18MMP66GHHtKePXtsz9esWVMpKSn64Ycf9Prrr2vBggWaMWPGNZcrFf17ady4seLj47Vo0SK7aRctWqTbbrtNzZs3d2jZACo39sues1/+8MMPNWPGDL3xxhv6+eeftXLlSrVr1872/KVLl/TCCy/ou+++08qVK3XgwAENHz680HImT56s2bNna+vWrTp06JDuvfdezZw5U0uWLNHq1au1du1azZo1yzZ9cnKyFi9erHnz5ul///ufxo8fr6FDh+qLL75waFtIhd+LzlgmSsEAKqkePXoY48aNsz3esGGDIclYuXKl3XRJSUlGhw4dbI+HDRtm1KlTx8jJybGNzZ071/Dz8zPy8/MLLfvgwYOGt7e3cfjwYbvl3n777cbEiROvWeeQIUOMbt26Ofy6Tpw4YUgyvv/+e8MwDCM9Pd2QZMycOdNuuoYNGxqvvvqq7fGlS5eMG264wbj77rttY7/fRqGhocbQoUNtjwsKCoz69esbc+fOLbaeqVOnGhEREbbHRW3PktZpGIZx+PBhw9vb2/jqq68MwzCMvLw8IzAw0EhJSSl2vQDcD/tlz9gvT5s2zWjZsqWRl5d3zWkNwzC++eYbQ5Jx9uxZwzD+732xfv162zTJycmGJGPfvn22sdGjRxvR0dGGYRjGxYsXjerVqxtbt261W/aIESOMIUOGXLOGot6Ljizzyny//vqrQ68VJeOIBdxO586drzlNhw4dVL16ddvjqKgonTt3TocOHSo07ffff6/8/Hy1bNnSdl6wn5+fvvjiC7tD48W58slYcX7++WcNGTJETZs2lb+/v8LCwiRJGRkZxb6urKwsHT16VJGRkbYxHx8fh1771Z8WWiwWNWjQwO4Q9bJly9StWzc1aNBAfn5+evbZZwvVUlrBwcHq27evFi5cKElatWqVcnNzNWjQIFPLBeAe2C+XzN32y4MGDdKFCxfUtGlTjRo1Sh999JF+++032/NpaWmKiYlR48aNVbNmTfXo0UNS4e139esOCgqynep19diV7bB3716dP39ed9xxh93vfPHixQ79zq+4+vfhrGXCcVzRArdTo0YNpy7v3Llz8vb2Vlpamry9ve2ec+Tiw2rVqpX4fExMjEJDQ7VgwQIFBweroKBAbdu2LXRusbNe1+8vLrRYLLbD+9u2bdP999+vKVOmKDo6WgEBAVq6dKmmTZtmer0jR47UAw88oBkzZmjRokUaPHiw3R8RAK5f7JdL5m775ZCQEO3Zs0fr16/XunXr9PDDD2vq1Kn64osvlJeXp+joaEVHR+vdd99VvXr1lJGRoejo6ELb7+rXbbFYStwO586dkyStXr1ajRo1spvOarU6/Jqv/p05a5lwHMEClZavr6/tfNjS+u6773ThwgVbc/nyyy/l5+dnOx/4ap06dVJ+fr6OHz+u7t27l3pd7du3V2pqapHnCp86dUp79uzRggULbMvevHnzNZcZEBCghg0b6quvvtJtt90mSfrtt9+Ulpamm266qdQ1XrF161aFhobqmWeesY0dPHiwVMso7vfSp08f1ahRQ3PnztWaNWu0adOmMtcJoHJiv+w5++Vq1aopJiZGMTExGjt2rFq1aqXvv/9ehmHo1KlTevnll22/u+3bt5eq3qKEh4fLarUqIyPDdgSkMi4TJSNYoNIKCwvTV199pQMHDsjPz6/QRXUlycvL04gRI/Tss8/qwIEDSkpKUkJCgry8Cp/917JlS91///2KjY3VtGnT1KlTJ504cUKpqalq3769+vbtW+K6Jk6cqHbt2unhhx/WmDFj5Ovrqw0bNmjQoEGqU6eO6tatq/nz56thw4bKyMjQhAkTHHoN48aN08svv6wWLVqoVatWmj59uukv8GnRooUyMjK0dOlS3XzzzVq9erVDFx5e7fe/lzp16sjLy0ve3t4aPny4Jk6cqBYtWigqKspUrQAqH/bLnrFfTklJUX5+viIjI1W9enX985//VLVq1RQaGqqCggL5+vpq1qxZGjNmjHbt2qUXXnihLC/dTs2aNfX4449r/PjxKigo0K233qqsrCxt2bJF/v7+GjZsWKVYJkrGNRaotB5//HF5e3srPDzcdqjVUbfffrtatGih2267TYMHD1b//v01efLkYqdftGiRYmNj9de//lU33nijBgwYoG+++UaNGze+5rpatmyptWvX6rvvvlOXLl0UFRWljz/+WD4+PvLy8tLSpUuVlpamtm3bavz48Zo6dapDr+Gvf/2rHnjgAQ0bNkxRUVGqWbOmBg4c6OgmKFL//v01fvx4JSQkqGPHjtq6dauee+65Ui2jpN/LiBEjlJeXp7i4OFN1Aqic2C97xn65Vq1aWrBggbp166b27dtr/fr1WrVqlerWrat69eopJSVFy5cvV3h4uF5++eVCt7UtqxdeeEHPPfeckpOT1bp1a911111avXq1mjRpUqmWieJZDON3N2AG3Nzw4cN15swZrVy50tWleJz//Oc/uv3223Xo0CEFBQW5uhwAlQT7Zddhv4yKxKlQAEzLzc3ViRMnNHnyZA0aNIjmBQAuxn4ZrsCpUMA19O7d2+42dVf/vPTSS64ur1J47733FBoaqjNnzujVV191dTkArnPsl6+tpP3yu+++W+z2a9OmjYsqLtmYMWOKrXnMmDGuLg//H6dCAddw+PBhXbhwocjn6tSpozp16lRwRQDg2dgvm3P27FllZmYW+VyVKlUUGhpawRVd2/Hjx5WdnV3kc/7+/qpfv34FV4SiECwAAAAAmMapUAAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwLT/B4B9o3cEomC7AAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "metrics = [\"triple_cardinality\", \"triple_cardinality_same_rel\"]\n", + "fig, ax = plt.subplots(1, len(metrics), figsize=[4 * len(metrics), 4])\n", + "\n", + "for i, metric in enumerate(metrics):\n", + " sns.countplot(\n", + " x=edge_dcs[metric],\n", + " order=[\"1:1\", \"1:M\", \"M:1\", \"M:M\"],\n", + " stat=\"probability\",\n", + " ax=ax[i],\n", + " )\n", + "fig.suptitle(\"Cardinality distribution\")\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Edge topological patterns\n", + "\n", + "The second method provided by `KGTopologyToolbox` for topological analysis at the edge level is `edge_pattern_summary`, which extracts information on several significant edge topological patterns. In particular, it detects whether the edge (h,r,t) is a loop, is symmetric or has inverse, inference, composition (directed and undirected):\n", + "\n", + "![image info](../images/edge_patterns.png)\n", + "\n", + "For inverse/inference, the method also provides the number and types of unique relations `r'` realizing the counterpart edges; for composition, the number of triangles supported by the edge is provided (the unique metapaths `[r_1, r_2]` can also be listed by setting `return_metapath_list=True` when calling the method)." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
hrtis_loopis_symmetrichas_inversen_inverse_relationsinverse_edge_typeshas_inferencen_inference_relationsinference_edge_typeshas_compositionhas_undirected_compositionn_trianglesn_undirected_triangles
0171803207FalseFalseFalse0[]False0[0]FalseTrue015
14903013662FalseFalseFalse0[]False0[0]TrueTrue44153
25480015999FalseFalseFalse0[]False0[0]FalseTrue01
3314807247FalseFalseFalse0[]False0[0]TrueTrue1029
410300016202FalseFalseFalse0[]False0[0]TrueTrue379
................................................
50884292451505097FalseFalseTrue1[46]True1[46, 50]TrueTrue15325722
50884306456508833FalseFalseTrue2[45, 46]True2[45, 46, 50]TrueTrue234913
508843194845015873FalseFalseTrue1[46]True2[46, 45, 50]TrueTrue13265004
5088432636550496FalseFalseTrue2[45, 46]True2[45, 46, 50]TrueTrue14335554
508843313860506368FalseFalseFalse0[]False0[50]TrueTrue119489
\n", + "

5088434 rows × 15 columns

\n", + "
" + ], + "text/plain": [ + " h r t is_loop is_symmetric has_inverse \\\n", + "0 1718 0 3207 False False False \n", + "1 4903 0 13662 False False False \n", + "2 5480 0 15999 False False False \n", + "3 3148 0 7247 False False False \n", + "4 10300 0 16202 False False False \n", + "... ... .. ... ... ... ... \n", + "5088429 2451 50 5097 False False True \n", + "5088430 6456 50 8833 False False True \n", + "5088431 9484 50 15873 False False True \n", + "5088432 6365 50 496 False False True \n", + "5088433 13860 50 6368 False False False \n", + "\n", + " n_inverse_relations inverse_edge_types has_inference \\\n", + "0 0 [] False \n", + "1 0 [] False \n", + "2 0 [] False \n", + "3 0 [] False \n", + "4 0 [] False \n", + "... ... ... ... \n", + "5088429 1 [46] True \n", + "5088430 2 [45, 46] True \n", + "5088431 1 [46] True \n", + "5088432 2 [45, 46] True \n", + "5088433 0 [] False \n", + "\n", + " n_inference_relations inference_edge_types has_composition \\\n", + "0 0 [0] False \n", + "1 0 [0] True \n", + "2 0 [0] False \n", + "3 0 [0] True \n", + "4 0 [0] True \n", + "... ... ... ... \n", + "5088429 1 [46, 50] True \n", + "5088430 2 [45, 46, 50] True \n", + "5088431 2 [46, 45, 50] True \n", + "5088432 2 [45, 46, 50] True \n", + "5088433 0 [50] True \n", + "\n", + " has_undirected_composition n_triangles n_undirected_triangles \n", + "0 True 0 15 \n", + "1 True 44 153 \n", + "2 True 0 1 \n", + "3 True 10 29 \n", + "4 True 3 79 \n", + "... ... ... ... \n", + "5088429 True 1532 5722 \n", + "5088430 True 234 913 \n", + "5088431 True 1326 5004 \n", + "5088432 True 1433 5554 \n", + "5088433 True 119 489 \n", + "\n", + "[5088434 rows x 15 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "edge_eps = kgtt.edge_pattern_summary(biokg_df)\n", + "edge_eps" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fraction of triples with property:\n" + ] + }, + { + "data": { + "text/plain": [ + "is_loop 0.000011\n", + "is_symmetric 0.713743\n", + "has_inverse 0.409704\n", + "has_inference 0.410111\n", + "has_composition 0.997605\n", + "dtype: float64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(\"Fraction of triples with property:\")\n", + "edge_eps[\n", + " [\"is_loop\", \"is_symmetric\", \"has_inverse\", \"has_inference\", \"has_composition\"]\n", + "].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "metrics = [\n", + " \"n_inverse_relations\",\n", + " \"n_inference_relations\",\n", + " \"n_triangles\",\n", + " \"n_undirected_triangles\",\n", + "]\n", + "fig, ax = plt.subplots(2, 2, figsize=(9, 7))\n", + "\n", + "for axn, metric in zip(ax.flatten(), metrics):\n", + " x = np.sqrt(edge_eps[metric])\n", + " sns.histplot(x=x, stat=\"probability\", binwidth=1, binrange=[0, x.max() + 1], ax=axn)\n", + " axn.set_xlabel(f\"sqrt({metric})\")\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Relation-level analysis\n", + "\n", + "The method `aggregate_by_relation` allows the user to aggregate at the relation-level the statistics outputted by the edge-level methods `edge_degree_cardinality_summary` and `edge_pattern_summary`. This converts DataFrames indexed on the KG edges to DataFrames indexed on the IDs of the unique relation types." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
num_triplesfrac_triplesunique_hunique_th_unique_rel_meanh_unique_rel_stdh_unique_rel_quartile1h_unique_rel_quartile2h_unique_rel_quartile3h_degree_mean...tot_degree_same_rel_quartile1tot_degree_same_rel_quartile2tot_degree_same_rel_quartile3triple_cardinality_1:M_fractriple_cardinality_M:1_fractriple_cardinality_M:M_fractriple_cardinality_same_rel_1:1_fractriple_cardinality_same_rel_1:M_fractriple_cardinality_same_rel_M:1_fractriple_cardinality_same_rel_M:M_frac
r
0810660.015931974293378.1102938.2472774.05.08.0569.252202...45.0112.0211.00.00.01.00.0016280.0235860.0649590.909827
156690.001114698153627.04815712.93641017.031.036.02518.765391...14.032.060.00.00.01.00.0028220.1042510.0275180.865408
2669540.01315861261236.4043075.60070633.036.041.04129.511919...332.0404.0482.00.00.01.00.0000000.0002540.0002390.999507
3195850.00384949149137.0959415.54738933.037.041.04527.399592...114.0157.0202.00.00.01.00.0000000.0008680.0009700.998162
4320340.00629552652537.3195675.38452334.038.041.04511.067834...188.0243.0299.00.00.01.00.0000620.0005310.0005930.998814
\n", + "

5 rows × 51 columns

\n", + "
" + ], + "text/plain": [ + " num_triples frac_triples unique_h unique_t h_unique_rel_mean \\\n", + "r \n", + "0 81066 0.015931 9742 9337 8.110293 \n", + "1 5669 0.001114 698 1536 27.048157 \n", + "2 66954 0.013158 612 612 36.404307 \n", + "3 19585 0.003849 491 491 37.095941 \n", + "4 32034 0.006295 526 525 37.319567 \n", + "\n", + " h_unique_rel_std h_unique_rel_quartile1 h_unique_rel_quartile2 \\\n", + "r \n", + "0 8.247277 4.0 5.0 \n", + "1 12.936410 17.0 31.0 \n", + "2 5.600706 33.0 36.0 \n", + "3 5.547389 33.0 37.0 \n", + "4 5.384523 34.0 38.0 \n", + "\n", + " h_unique_rel_quartile3 h_degree_mean ... tot_degree_same_rel_quartile1 \\\n", + "r ... \n", + "0 8.0 569.252202 ... 45.0 \n", + "1 36.0 2518.765391 ... 14.0 \n", + "2 41.0 4129.511919 ... 332.0 \n", + "3 41.0 4527.399592 ... 114.0 \n", + "4 41.0 4511.067834 ... 188.0 \n", + "\n", + " tot_degree_same_rel_quartile2 tot_degree_same_rel_quartile3 \\\n", + "r \n", + "0 112.0 211.0 \n", + "1 32.0 60.0 \n", + "2 404.0 482.0 \n", + "3 157.0 202.0 \n", + "4 243.0 299.0 \n", + "\n", + " triple_cardinality_1:M_frac triple_cardinality_M:1_frac \\\n", + "r \n", + "0 0.0 0.0 \n", + "1 0.0 0.0 \n", + "2 0.0 0.0 \n", + "3 0.0 0.0 \n", + "4 0.0 0.0 \n", + "\n", + " triple_cardinality_M:M_frac triple_cardinality_same_rel_1:1_frac \\\n", + "r \n", + "0 1.0 0.001628 \n", + "1 1.0 0.002822 \n", + "2 1.0 0.000000 \n", + "3 1.0 0.000000 \n", + "4 1.0 0.000062 \n", + "\n", + " triple_cardinality_same_rel_1:M_frac triple_cardinality_same_rel_M:1_frac \\\n", + "r \n", + "0 0.023586 0.064959 \n", + "1 0.104251 0.027518 \n", + "2 0.000254 0.000239 \n", + "3 0.000868 0.000970 \n", + "4 0.000531 0.000593 \n", + "\n", + " triple_cardinality_same_rel_M:M_frac \n", + "r \n", + "0 0.909827 \n", + "1 0.865408 \n", + "2 0.999507 \n", + "3 0.998162 \n", + "4 0.998814 \n", + "\n", + "[5 rows x 51 columns]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kgtt.aggregate_by_relation(edge_dcs).head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice on the left the columns `num_triples`, `frac_triples`, `unique_h`, `unique_t` giving additional statistics for relation types (number of edges and relative frequency, number of unique entities used as heads/tails by triples of the relation type).\n", + "\n", + "Similarly, by aggregating the `edge_eps` DataFrame we can look at the distribution of edge topological patterns within each relation type." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
num_triplesfrac_triplesunique_hunique_tis_loop_fracis_symmetric_frachas_inverse_fracn_inverse_relations_meann_inverse_relations_stdn_inverse_relations_quartile1...n_triangles_meann_triangles_stdn_triangles_quartile1n_triangles_quartile2n_triangles_quartile3n_undirected_triangles_meann_undirected_triangles_stdn_undirected_triangles_quartile1n_undirected_triangles_quartile2n_undirected_triangles_quartile3
r
0810660.015931974293370.0000120.0002220.0094740.0187620.3361200.0...49.615572816.7767383.07.016.00136.4528411421.83000818.0036.068.0
156690.00111469815360.0000000.0003530.0615630.5277832.5023230.0...1630.9121546563.52273613.084.0234.002864.1044289520.11681254.00224.0586.0
2669540.0131586126120.0000000.9473670.99825311.0191184.7072468.0...27666.69492515797.64974614990.025934.038868.5032678.99356318619.01605616691.0032647.548637.0
3195850.0038494914910.0000000.9473580.99959213.4172584.58515010.0...30250.85897417053.92541016204.028873.043798.0032696.12535118685.28168616563.0032808.048653.0
4320340.0062955265250.0000000.9473680.99937613.2995884.42789810.0...30942.23119216888.95665617303.030137.544161.2532685.21046418685.26715416645.2532580.048767.0
\n", + "

5 rows × 32 columns

\n", + "
" + ], + "text/plain": [ + " num_triples frac_triples unique_h unique_t is_loop_frac \\\n", + "r \n", + "0 81066 0.015931 9742 9337 0.000012 \n", + "1 5669 0.001114 698 1536 0.000000 \n", + "2 66954 0.013158 612 612 0.000000 \n", + "3 19585 0.003849 491 491 0.000000 \n", + "4 32034 0.006295 526 525 0.000000 \n", + "\n", + " is_symmetric_frac has_inverse_frac n_inverse_relations_mean \\\n", + "r \n", + "0 0.000222 0.009474 0.018762 \n", + "1 0.000353 0.061563 0.527783 \n", + "2 0.947367 0.998253 11.019118 \n", + "3 0.947358 0.999592 13.417258 \n", + "4 0.947368 0.999376 13.299588 \n", + "\n", + " n_inverse_relations_std n_inverse_relations_quartile1 ... \\\n", + "r ... \n", + "0 0.336120 0.0 ... \n", + "1 2.502323 0.0 ... \n", + "2 4.707246 8.0 ... \n", + "3 4.585150 10.0 ... \n", + "4 4.427898 10.0 ... \n", + "\n", + " n_triangles_mean n_triangles_std n_triangles_quartile1 \\\n", + "r \n", + "0 49.615572 816.776738 3.0 \n", + "1 1630.912154 6563.522736 13.0 \n", + "2 27666.694925 15797.649746 14990.0 \n", + "3 30250.858974 17053.925410 16204.0 \n", + "4 30942.231192 16888.956656 17303.0 \n", + "\n", + " n_triangles_quartile2 n_triangles_quartile3 n_undirected_triangles_mean \\\n", + "r \n", + "0 7.0 16.00 136.452841 \n", + "1 84.0 234.00 2864.104428 \n", + "2 25934.0 38868.50 32678.993563 \n", + "3 28873.0 43798.00 32696.125351 \n", + "4 30137.5 44161.25 32685.210464 \n", + "\n", + " n_undirected_triangles_std n_undirected_triangles_quartile1 \\\n", + "r \n", + "0 1421.830008 18.00 \n", + "1 9520.116812 54.00 \n", + "2 18619.016056 16691.00 \n", + "3 18685.281686 16563.00 \n", + "4 18685.267154 16645.25 \n", + "\n", + " n_undirected_triangles_quartile2 n_undirected_triangles_quartile3 \n", + "r \n", + "0 36.0 68.0 \n", + "1 224.0 586.0 \n", + "2 32647.5 48637.0 \n", + "3 32808.0 48653.0 \n", + "4 32580.0 48767.0 \n", + "\n", + "[5 rows x 32 columns]" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kgtt.aggregate_by_relation(edge_eps).head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Additional methods are provided for the analysis at the relation level: `jaccard_similarity_relation_sets` to compute the Jaccard similarity of the sets of head/tail entities used by each relation; `relational_affinity_ingram` to compute the InGram pairwise relation similarity (see [paper](https://arxiv.org/abs/2305.19987)). " + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
r1r2num_triples_bothfrac_triples_bothnum_entities_bothnum_h_r1num_h_r2num_t_r1num_t_r2jaccard_head_headjaccard_head_tailjaccard_tail_headjaccard_tail_tailjaccard_both
101867350.017046143389742698933715360.0641120.0553010.0373170.0796350.112289
2021480200.02908913934974261293376120.0565310.0565310.0319470.0319470.041768
3031006510.01978013929974249193374910.0450370.0450370.0265300.0265300.033527
4041131000.02222713931974252693375250.0482900.0481880.0276100.0275060.035819
5051322760.02599513931974257693375780.0532870.0534910.0298150.0299160.039624
.............................................
24464749180210.0035422414806188580918860.1311480.1315680.1324090.1323530.135874
244747503741930.0735385592806522480952280.0823910.0825260.0833180.0834530.084764
24974849431220.008475340727281885272918860.3712840.3699520.3719890.3710640.370707
249848503992940.078471620127285224272952280.2873560.2873790.2860610.2860840.289147
254949503880920.076269616918855224188652280.1568760.1567730.1568500.1567480.158373
\n", + "

1275 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " r1 r2 num_triples_both frac_triples_both num_entities_both \\\n", + "1 0 1 86735 0.017046 14338 \n", + "2 0 2 148020 0.029089 13934 \n", + "3 0 3 100651 0.019780 13929 \n", + "4 0 4 113100 0.022227 13931 \n", + "5 0 5 132276 0.025995 13931 \n", + "... .. .. ... ... ... \n", + "2446 47 49 18021 0.003542 2414 \n", + "2447 47 50 374193 0.073538 5592 \n", + "2497 48 49 43122 0.008475 3407 \n", + "2498 48 50 399294 0.078471 6201 \n", + "2549 49 50 388092 0.076269 6169 \n", + "\n", + " num_h_r1 num_h_r2 num_t_r1 num_t_r2 jaccard_head_head \\\n", + "1 9742 698 9337 1536 0.064112 \n", + "2 9742 612 9337 612 0.056531 \n", + "3 9742 491 9337 491 0.045037 \n", + "4 9742 526 9337 525 0.048290 \n", + "5 9742 576 9337 578 0.053287 \n", + "... ... ... ... ... ... \n", + "2446 806 1885 809 1886 0.131148 \n", + "2447 806 5224 809 5228 0.082391 \n", + "2497 2728 1885 2729 1886 0.371284 \n", + "2498 2728 5224 2729 5228 0.287356 \n", + "2549 1885 5224 1886 5228 0.156876 \n", + "\n", + " jaccard_head_tail jaccard_tail_head jaccard_tail_tail jaccard_both \n", + "1 0.055301 0.037317 0.079635 0.112289 \n", + "2 0.056531 0.031947 0.031947 0.041768 \n", + "3 0.045037 0.026530 0.026530 0.033527 \n", + "4 0.048188 0.027610 0.027506 0.035819 \n", + "5 0.053491 0.029815 0.029916 0.039624 \n", + "... ... ... ... ... \n", + "2446 0.131568 0.132409 0.132353 0.135874 \n", + "2447 0.082526 0.083318 0.083453 0.084764 \n", + "2497 0.369952 0.371989 0.371064 0.370707 \n", + "2498 0.287379 0.286061 0.286084 0.289147 \n", + "2549 0.156773 0.156850 0.156748 0.158373 \n", + "\n", + "[1275 rows x 14 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kgtt.jaccard_similarity_relation_sets(biokg_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
h_relationt_relationedge_weight
0015.565931
1020.244410
2030.049564
3040.079068
4050.159787
............
25455045393.082900
25465046421.818843
254750471.194898
2548504818.124874
254950495.420267
\n", + "

2550 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " h_relation t_relation edge_weight\n", + "0 0 1 5.565931\n", + "1 0 2 0.244410\n", + "2 0 3 0.049564\n", + "3 0 4 0.079068\n", + "4 0 5 0.159787\n", + "... ... ... ...\n", + "2545 50 45 393.082900\n", + "2546 50 46 421.818843\n", + "2547 50 47 1.194898\n", + "2548 50 48 18.124874\n", + "2549 50 49 5.420267\n", + "\n", + "[2550 rows x 3 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kgtt.relational_affinity_ingram(biokg_df)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv38", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/source/user_guide.rst b/docs/source/user_guide.rst new file mode 100644 index 0000000..9e6714e --- /dev/null +++ b/docs/source/user_guide.rst @@ -0,0 +1,27 @@ +User guide +================ + +Installation and usage +------------------------ + +1. Pip install :code:`kg-topology-toolbox`: + +.. code-block:: + + pip install git+https://github.com/graphcore-research/kg-topology-toolbox.git + +2. Import and use: + +.. code-block:: + + from kg_topology_toolbox import KGTopologyToolbox + +.. Note:: The library has been tested on Ubuntu 20.04, Python >= 3.8. + + +Getting started +------------------------ + +For a walkthrough of the library functionalities, see the `Jupyter notebook `_. + +For more details, have a look at the `API reference `_ page. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..6628c79 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,84 @@ +[build-system] +requires = ["setuptools>=61.0.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "kg-topology-toolbox" +version = "0.1.0" +authors = [ + {name = "Alberto Cattaneo"}, + {name = "Daniel Justus"}, + {name = "Thomas Martynec"}, + {name = "Stephen Bonner"}, +] +description = "A Python toolbox for Knowledge Graph topology metrics." +readme = "README.md" +license = {text = "MIT License"} +requires-python = ">=3.8" +dependencies = [ + 'numpy >= 1.24.4', + 'pandas >= 2.0.3', + 'scipy >= 1.10.1', +] + +[project.optional-dependencies] +dev = [ + 'black', + 'flake8', + 'isort', + 'mypy', + 'pandas-stubs >= 2.0.3.230814', + 'pytest >= 8.1.1', + 'pytest-cov', + 'sphinx >= 7.1.2', + 'sphinx_rtd_theme', + 'sphinx_autodoc_typehints', + 'sphinx-automodapi', + 'myst-parser', +] + +[project.urls] +repository = "https://github.com/graphcore-research/kg-topology-toolbox" + +[tool.setuptools.packages.find] +where = ["src"] +exclude = ["tests"] +namespaces = true + +[tool.black] +target-version = ["py38", "py39", "py310", "py311"] + +[tool.isort] +profile = "black" + +[tool.mypy] +pretty = true +show_error_codes = true +strict = true +check_untyped_defs = true +plugins = ["numpy.typing.mypy_plugin"] + +[[tool.mypy.overrides]] +module = "scipy.*" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "setuptools.*" +ignore_missing_imports = true + +[tool.pytest] +addopts = ["--no-cov-on-fail"] + +[tool.pytest.ini_options] +pythonpath = [ + "src" +] + +[tool.coverage.report] +skip_covered = true +show_missing = true +exclude_lines = [ + "pragma: no cover", + "raise NotImplementedError", + "assert False", +] \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..45a15b1 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,15 @@ +-r requirements.txt +black +flake8 +isort +mypy +pandas-stubs>=2.0.3.230814 +pandoc +pytest>=8.1.1 +pytest-cov +sphinx>=7.1.2 +sphinx_rtd_theme +sphinx_autodoc_typehints +sphinx-automodapi +nbsphinx +myst-parser diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f4eff4d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +numpy>=1.24.4 +pandas>=2.0.3 +scipy>=1.10.1 diff --git a/src/kg_topology_toolbox/__init__.py b/src/kg_topology_toolbox/__init__.py new file mode 100644 index 0000000..7ff262f --- /dev/null +++ b/src/kg_topology_toolbox/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) 2023 Graphcore Ltd. All rights reserved. + +""" +A Python toolbox for computing topological metrics and statistics for Knowledge Graphs. +""" + +from . import utils # NOQA:F401,E402,F403 +from .topology_toolbox import * # NOQA:F401,E402,F403 diff --git a/src/kg_topology_toolbox/topology_toolbox.py b/src/kg_topology_toolbox/topology_toolbox.py new file mode 100644 index 0000000..4fffe64 --- /dev/null +++ b/src/kg_topology_toolbox/topology_toolbox.py @@ -0,0 +1,588 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023 Graphcore Ltd. All rights reserved. + +""" +Topology toolbox main functionalities +""" + +from collections.abc import Iterable + +import numpy as np +import pandas as pd +from scipy.sparse import coo_array + +from kg_topology_toolbox.utils import composition_count, jaccard_similarity + + +class KGTopologyToolbox: + """ + Toolbox class to compute various Knowledge Graph topology statistics. + """ + + def node_degree_summary( + self, df: pd.DataFrame, return_relation_list: bool = False + ) -> pd.DataFrame: + """ + For each entity, this function computes the number of edges having it as a head + (head-degree, or out-degree), as a tail (tail-degree, or in-degree) + or one of the two (total-degree) in the Knowledge Graph. + The in-going and out-going relation types are also identified. + + The output dataframe is indexed on the IDs of the graph entities. + + :param df: A graph represented as a pd.DataFrame. + Must contain at least three columns `h`, `r`, `t`. + :param return_relation_list: If True, return the list of unique relations going + in/out of an entity. WARNING: expensive for large graphs. + + :return: The results dataframe, indexed over the same entity ID `e` used in df, + with columns: + + - **h_degree** (int): Number of triples with head entity `e`. + - **t_degree** (int): Number of triples with tail entity `e`. + - **tot_degree** (int): Number of triples with head entity `e` or tail entity `e`. + - **h_unique_rel** (int): Number of distinct relation types + among edges with head entity `e`. + - **h_rel_list** (list): List of unique relation types among edges + with head entity `e`. + - **t_unique_rel** (int): Number of distinct relation types + among edges with tail entity `e`. + - **t_rel_list** (list): List of unique relation types among edges + with tail entity `e`. + - **n_loops** (int): number of loops around entity `e`. + """ + n_entity = df[["h", "t"]].max().max() + 1 + h_rel_list = {"h_rel_list": ("r", "unique")} if return_relation_list else {} + t_rel_list = {"t_rel_list": ("r", "unique")} if return_relation_list else {} + nodes = pd.DataFrame( + df.groupby("h").agg( + h_degree=("r", "count"), h_unique_rel=("r", "nunique"), **h_rel_list # type: ignore + ), + index=np.arange(n_entity), + ) + nodes = nodes.merge( + df.groupby("t").agg( + t_degree=("r", "count"), t_unique_rel=("r", "nunique"), **t_rel_list # type: ignore + ), + left_index=True, + right_index=True, + how="left", + ) + nodes = nodes.merge( + df[df.h == df.t].groupby("h").agg(n_loops=("r", "count")), + left_index=True, + right_index=True, + how="left", + ) + nodes[["h_degree", "h_unique_rel", "t_degree", "t_unique_rel", "n_loops"]] = ( + nodes[["h_degree", "h_unique_rel", "t_degree", "t_unique_rel", "n_loops"]] + .fillna(0) + .astype(int) + ) + nodes["tot_degree"] = nodes["h_degree"] + nodes["t_degree"] - nodes["n_loops"] + + return nodes[ + ["h_degree", "t_degree", "tot_degree", "h_unique_rel"] + + (["h_rel_list"] if return_relation_list else []) + + ["t_unique_rel"] + + (["t_rel_list"] if return_relation_list else []) + + ["n_loops"] + ] + + def edge_degree_cardinality_summary(self, df: pd.DataFrame) -> pd.DataFrame: + """ + For each triple, this function computes the number of edges with the same head + (head-degree, or out-degree), the same tail (tail-degree, or in-degree) + or one of the two (total-degree) in the Knowledge Graph. + Based on entity degrees, each triple is classified as either one-to-one + (out-degree=in-degree=1), one-to-many (out-degree>1, in-degree=1), + many-to-one(out-degree=1, in-degree>1) or many-to-many + (in-degree>1, out-degree>1). + + The output dataframe maintains the same indexing and ordering of triples + as the input one. + + :param df: A graph represented as a pd.DataFrame. + Must contain at least three columns `h`, `r`, `t`. + + :return: The results dataframe. Contains the following columns + (in addition to `h`, `r`, `t` in ``df``): + + - **h_unique_rel** (int): Number of distinct relation types + among edges with head entity h. + - **h_degree** (int): Number of triples with head entity h. + - **h_degree_same_rel** (int): Number of triples with head entity h + and relation type r. + - **t_unique_rel** (int): Number of distinct relation types + among edges with tail entity t. + - **t_degree** (int): Number of triples with tail entity t. + - **t_degree_same_rel** (int): Number of triples with tail entity t + and relation type r. + - **tot_degree** (int): Number of triples with head entity h or + tail entity t. + - **tot_degree_same_rel** (int): Number of triples with head entity h or + tail entity t, and relation type r. + - **triple_cardinality** (int): cardinality type of the edge. + - **triple_cardinality_same_rel** (int): cardinality type of the edge in + the subgraph of edges with relation type r. + """ + gr_by_h_count = df.groupby("h", as_index=False).agg( + h_unique_rel=("r", "nunique"), h_degree=("t", "count") + ) + gr_by_hr_count = df.groupby(["h", "r"], as_index=False).agg( + h_degree_same_rel=("t", "count") + ) + gr_by_t_count = df.groupby("t", as_index=False).agg( + t_unique_rel=("r", "nunique"), t_degree=("h", "count") + ) + gr_by_rt_count = df.groupby(["r", "t"], as_index=False).agg( + t_degree_same_rel=("h", "count") + ) + + df_res = df.merge(gr_by_h_count, left_on=["h"], right_on=["h"], how="left") + df_res = df_res.merge( + gr_by_hr_count, left_on=["h", "r"], right_on=["h", "r"], how="left" + ) + df_res = df_res.merge(gr_by_t_count, left_on=["t"], right_on=["t"], how="left") + df_res = df_res.merge( + gr_by_rt_count, left_on=["t", "r"], right_on=["t", "r"], how="left" + ) + # compute number of parallel edges to avoid double-counting them + # in total degree + num_parallel = df_res.merge( + df.groupby(["h", "t"], as_index=False).agg(n_parallel=("r", "count")), + left_on=["h", "t"], + right_on=["h", "t"], + how="left", + ) + df_res["tot_degree"] = ( + df_res.h_degree + df_res.t_degree - num_parallel.n_parallel + ) + # when restricting to the relation type, there is only one edge + # (the edge itself) that is double-counted + df_res["tot_degree_same_rel"] = ( + df_res.h_degree_same_rel + df_res.t_degree_same_rel - 1 + ) + + # check if the values in the pair (h_degree, t_degree) are =1 or >1 + # to determine the edge cardinality + legend = { + 0: "M:M", + 1: "1:M", + 2: "M:1", + 3: "1:1", + } + for suffix in ["", "_same_rel"]: + edge_type = 2 * (df_res["h_degree" + suffix] == 1) + ( + df_res["t_degree" + suffix] == 1 + ) + df_res["triple_cardinality" + suffix] = edge_type.apply(lambda x: legend[x]) + return df_res + + def edge_pattern_summary( + self, + df: pd.DataFrame, + return_metapath_list: bool = False, + composition_chunk_size: int = 2**8, + composition_workers: int = 32, + ) -> pd.DataFrame: + """ + This function analyses the structural properties of each edge in the graph: + symmetry, presence of inverse/inference(=parallel) edges and + triangles supported on the edge. + + The output dataframe maintains the same indexing and ordering of triples + as the input one. + + :param df: A graph represented as a pd.DataFrame. + Must contain at least three columns `h`, `r`, `t`. + :param return_metapath_list: If True, return the list of unique metapaths for all + triangles supported over one edge. WARNING: very expensive for large graphs. + :param composition_chunk_size: Size of column chunks of sparse adjacency matrix + to compute the triangle count. + :param composition_workers: Number of workers to compute the triangle count. + + :return: The results dataframe. Contains the following columns + (in addition to `h`, `r`, `t` in ``df``): + + - **is_loop** (bool): True if the triple is a loop (``h == t``). + - **is_symmetric** (bool): True if the triple (t, r, h) is also contained + in the graph (assuming t and h are different). + - **has_inverse** (bool): True if the graph contains one or more triples + (t, r', h) with ``r' != r``. + - **n_inverse_relations** (int): The number of inverse relations r'. + - **inverse_edge_types** (list): All relations r' (including r if the edge + is symmetric) such that (t, r', h) is in the graph. + - **has_inference** (bool): True if the graph contains one or more triples + (h, r', t) with ``r' != r``. + - **n_inference_relations** (int): The number of inference relations r'. + - **inference_edge_types** (list): All relations r' (including r) such that + (h, r', t) is in the graph. + - **has_composition** (bool): True if the graph contains one or more triangles + supported on the edge: (h, r1, x) + (x, r2, t). + - **n_triangles** (int): The number of triangles. + - **has_undirected_composition** (bool): True if the graph contains one or more + undirected triangles supported on the edge. + - **n_undirected_triangles** (int): The number of undirected triangles + (considering all edges as bidirectional). + - **metapath_list** (list): The list of unique metapaths "r1-r2" + for the directed triangles. + """ + # symmetry-asymmetry + # edges with h/t switched + df_inv = df.reindex(columns=["t", "r", "h"]).rename( + columns={"t": "h", "r": "r", "h": "t"} + ) + df_res = pd.DataFrame({"h": df.h, "r": df.r, "t": df.t, "is_symmetric": False}) + df_res.loc[ + df.reset_index().merge(df_inv)["index"], + "is_symmetric", + ] = True + # loops are treated separately + df_res["is_loop"] = df_res.h == df_res.t + df_res.loc[df_res.h == df_res.t, "is_symmetric"] = False + + # inverse + unique_inv_r_by_ht = df_inv.groupby(["h", "t"], as_index=False).agg( + inverse_edge_types=("r", list), + ) + df_res = df_res.merge( + unique_inv_r_by_ht, left_on=["h", "t"], right_on=["h", "t"], how="left" + ) + df_res["inverse_edge_types"] = df_res["inverse_edge_types"].apply( + lambda agg: agg if isinstance(agg, list) else [] + ) + # if the edge (h,r,t) is symmetric or loop, we do not consider the relation + # r as a proper inverse + df_res["n_inverse_relations"] = ( + df_res.inverse_edge_types.str.len() - df_res.is_symmetric - df_res.is_loop + ) + df_res["n_inverse_relations"] = ( + df_res["n_inverse_relations"].fillna(0).astype(int) + ) + df_res["has_inverse"] = df_res["n_inverse_relations"] > 0 + + # inference + edges_between_ht = unique_inv_r_by_ht.reindex( + columns=["t", "h", "inverse_edge_types"] + ).rename( + columns={"t": "h", "h": "t", "inverse_edge_types": "inference_edge_types"} + ) + df_res = df_res.merge( + edges_between_ht, left_on=["h", "t"], right_on=["h", "t"], how="left" + ) + # inference_edge_types always contains the edge itself, which we need to drop + df_res["n_inference_relations"] = df_res.inference_edge_types.str.len() - 1 + df_res["has_inference"] = df_res["n_inference_relations"] > 0 + + # composition & metapaths + # discard loops as edges of a triangle + df_wo_loops = df[df.h != df.t] + if return_metapath_list: + # 2-hop paths + df_bridges = df_wo_loops.merge( + df_wo_loops, left_on="t", right_on="h", how="inner" + ) + df_triangles = df_wo_loops.merge( + df_bridges, left_on=["h", "t"], right_on=["h_x", "t_y"], how="inner" + ) + df_triangles["metapath"] = ( + df_triangles["r_x"].astype(str) + "-" + df_triangles["r_y"].astype(str) + ) + grouped_triangles = df_triangles.groupby( + ["h", "r", "t"], as_index=False + ).agg( + n_triangles=("metapath", "count"), metapath_list=("metapath", "unique") + ) + df_res = df_res.merge( + grouped_triangles, + left_on=["h", "r", "t"], + right_on=["h", "r", "t"], + how="left", + ) + df_res["metapath_list"] = df_res["metapath_list"].apply( + lambda agg: agg.tolist() if isinstance(agg, np.ndarray) else [] + ) + df_res["n_triangles"] = df_res["n_triangles"].fillna(0).astype(int) + else: + counts = composition_count( + df_wo_loops, + chunk_size=composition_chunk_size, + workers=composition_workers, + directed=True, + ) + df_res = df_res.merge( + counts, + on=["h", "t"], + how="left", + ) + df_res["n_triangles"] = df_res["n_triangles"].fillna(0).astype(int) + + df_res["has_composition"] = df_res["n_triangles"] > 0 + + counts = composition_count( + df_wo_loops, + chunk_size=composition_chunk_size, + workers=composition_workers, + directed=False, + ) + df_res = df_res.merge( + counts.rename(columns={"n_triangles": "n_undirected_triangles"}), + on=["h", "t"], + how="left", + ) + df_res["n_undirected_triangles"] = ( + df_res["n_undirected_triangles"].fillna(0).astype(int) + ) + df_res["has_undirected_composition"] = df_res["n_undirected_triangles"] > 0 + + return df_res[ + [ + "h", + "r", + "t", + "is_loop", + "is_symmetric", + "has_inverse", + "n_inverse_relations", + "inverse_edge_types", + "has_inference", + "n_inference_relations", + "inference_edge_types", + "has_composition", + "has_undirected_composition", + "n_triangles", + "n_undirected_triangles", + ] + + (["metapath_list"] if return_metapath_list else []) + ] + + def aggregate_by_relation(self, edge_topology_df: pd.DataFrame) -> pd.DataFrame: + """ + Aggregate topology metrics of all triples of the same relation type. + To be applied to the output dataframe of either + :meth:`KGTopologyToolbox.edge_degree_cardinality_summary` or + :meth:`KGTopologyToolbox.edge_pattern_summary`. + + The returned dataframe is indexed over relation type IDs, with columns + giving the aggregated statistics of triples of the correspondig relation. + The name of the columns is of the form ``column_name_in_input_df + suffix``. + The aggregation is perfomed by returning: + + - for numerical metrics: mean, standard deviation and quartiles + (``suffix`` = "_mean", "_std", "_quartile1", "_quartile2", "_quartile3"); + - for boolean metrics: the fraction of triples of the relation type + with metric = True (``suffix`` = "_frac"); + - for string metrics: for each possible label, the fraction of triples + of the relation type with that metric value (``suffix`` = "_{label}_frac") + - for list metrics: the unique metric values across triples of the relation + type (``suffix`` = "_unique"). + + :param edge_topology_df: pd.DataFrame of edge topology metrics. + Must contain at least three columns `h`, `r`, `t`. + + :return: The results dataframe. In addition to the columns with the aggregated + metrics by relation type, it also contains columns: + + - **num_triples** (int): Number of triples for each relation type. + - **frac_triples** (float): Fraction of overall triples represented by each + relation type. + - **unique_h** (int): Number of unique head entities used by triples of each + relation type. + - **unique_t** (int): Number of unique tail entities used by triples of each + relation type. + """ + df_by_r = edge_topology_df.groupby("r") + df_res = df_by_r.agg(num_triples=("r", "count")) + df_res["frac_triples"] = df_res["num_triples"] / edge_topology_df.shape[0] + col: str + for col, col_dtype in edge_topology_df.drop(columns=["r"]).dtypes.items(): # type: ignore + if col in ["h", "t"]: + df_res[f"unique_{col}"] = df_by_r[col].nunique() + elif col_dtype == object: + if isinstance(edge_topology_df[col].iloc[0], str): + for label in np.unique(edge_topology_df[col]): + df_res[f"{col}_{label}_frac"] = ( + edge_topology_df[edge_topology_df[col] == label] + .groupby("r")[col] + .count() + / df_res["num_triples"] + ).fillna(0) + elif isinstance(edge_topology_df[col].iloc[0], Iterable): + df_res[f"{col}_unique"] = ( + df_by_r[col] + .agg(np.unique) + .apply( + lambda x: ( + np.unique( + np.concatenate( + [lst for lst in x if len(lst) > 0] or [[]] + ) + ).tolist() + ) + ) + ) + else: + print(f"Skipping column {col}: no known aggregation mode") + continue + elif col_dtype == int or col_dtype == float: + df_res[f"{col}_mean"] = df_by_r[col].mean() + df_res[f"{col}_std"] = df_by_r[col].std() + for q in range(1, 4): + df_res[f"{col}_quartile{q}"] = df_by_r[col].agg( + lambda x: np.quantile(x, 0.25 * q) + ) + elif col_dtype == bool: + df_res[f"{col}_frac"] = df_by_r[col].mean() + return df_res + + def jaccard_similarity_relation_sets(self, df: pd.DataFrame) -> pd.DataFrame: + """ + Compute the similarity between relations defined as the Jaccard Similarity + between sets of entities (heads and tails) for all pairs + of relations in the graph. + + :param df: A graph represented as a pd.DataFrame. + Must contain at least three columns `h`, `r`, `t`. + + :return: The results dataframe. Contains the following columns: + + - **r1** (int): Index of the first relation. + - **r2** (int): Index of the second relation. + - **num_triples_both** (int): Number of triples with relation r1/r2. + - **frac_triples_both** (float): Fraction of triples with relation r1/r2. + - **num_entities_both** (int): Number of unique entities (h or t) for triples + with relation r1/r2. + - **num_h_r1** (int): Number of unique head entities for relation r1. + - **num_h_r2** (int): Number of unique head entities for relation r2. + - **num_t_r1** (int): Number of unique tail entities for relation r1. + - **num_t_r2** (int): Number of unique tail entities for relation r2. + - **jaccard_head_head** (float): Jaccard similarity between the head set of r1 + and the head set of r2. + - **jaccard_tail_tail** (float): Jaccard similarity between the tail set of r1 + and the tail set of r2. + - **jaccard_head_tail** (float): Jaccard similarity between the head set of r1 + and the tail set of r2. + - **jaccard_tail_head** (float): Jaccard similarity between the tail set of r1 + and the head set of r2. + - **jaccard_both** (float): Jaccard similarity between the full entity set + of r1 and r2. + """ + ent_unique = df.groupby("r", as_index=False).agg( + num_triples=("r", "count"), head=("h", "unique"), tail=("t", "unique") + ) + ent_unique["both"] = ent_unique.apply( + lambda x: np.unique(np.concatenate([x["head"], x["tail"]])), axis=1 + ) + ent_unique["num_h"] = ent_unique["head"].str.len() + ent_unique["num_t"] = ent_unique["tail"].str.len() + r_num = ent_unique[["r", "num_h", "num_t", "num_triples"]] + # combinations of relations + df_res = pd.merge( + r_num.rename(columns={"r": "r1"}), + r_num.rename(columns={"r": "r2"}), + suffixes=["_r1", "_r2"], + how="cross", + ) + df_res = df_res[df_res.r1 < df_res.r2] + + df_res["num_triples_both"] = df_res["num_triples_r1"] + df_res["num_triples_r2"] + df_res["frac_triples_both"] = df_res["num_triples_both"] / df.shape[0] + df_res["num_entities_both"] = df_res.apply( + lambda x: len( + np.unique( + np.concatenate( + [ + ent_unique.loc[x["r1"], "both"], + ent_unique.loc[x["r2"], "both"], + ] + ) + ) + ), + axis=1, + ) + df_res = df_res[ + [ + "r1", + "r2", + "num_triples_both", + "frac_triples_both", + "num_entities_both", + "num_h_r1", + "num_h_r2", + "num_t_r1", + "num_t_r2", + ] + ] + for r1_ent in ["head", "tail"]: + for r2_ent in ["head", "tail"]: + df_res[f"jaccard_{r1_ent}_{r2_ent}"] = [ + jaccard_similarity(a, b) + for a, b in zip( + ent_unique.loc[df_res.r1, r1_ent], + ent_unique.loc[df_res.r2, r2_ent], + ) + ] + df_res["jaccard_both"] = [ + jaccard_similarity(a, b) + for a, b in zip( + ent_unique.loc[df_res.r1, "both"], ent_unique.loc[df_res.r2, "both"] + ) + ] + return df_res + + def relational_affinity_ingram( + self, df: pd.DataFrame, min_max_norm: bool = False + ) -> pd.DataFrame: + """ + Compute the similarity between relations based on the approach proposed in + InGram: Inductive Knowledge Graph Embedding via Relation Graphs, + https://arxiv.org/abs/2305.19987. + + Only the pairs of relations witn ``affinity > 0`` are shown in the + returned dataframe. + + :param df: A graph represented as a pd.DataFrame. + Must contain at least three columns `h`, `r`, `t`. + :param min_max_norm: min-max normalization of edge weights. Defaults to False. + + :return: The results dataframe. Contains the following columns: + + - **h_relation** (int): Index of the head relation. + - **t_relation** (int): Index of the tail relation. + - **edge_weight** (float): Weight for the affinity between + the head and the tail relation. + """ + n_entities = df[["h", "t"]].max().max() + 1 + n_rels = df.r.max() + 1 + + hr_freqs = df.groupby(["h", "r"], as_index=False).count() + # normalize by global h frequency + hr_freqs["t"] = hr_freqs["t"] / hr_freqs.groupby("h")["t"].transform("sum") + rt_freqs = df.groupby(["t", "r"], as_index=False).count() + # normalize by global t frequency + rt_freqs["h"] = rt_freqs["h"] / rt_freqs.groupby("t")["h"].transform("sum") + + E_h = coo_array( + (hr_freqs.t, (hr_freqs.h, hr_freqs.r)), + shape=[n_entities, n_rels], + ) + E_t = coo_array( + (rt_freqs.h, (rt_freqs.t, rt_freqs.r)), + shape=[n_entities, n_rels], + ) + + A = (E_h.T @ E_h).toarray() + (E_t.T @ E_t).toarray() + A[np.diag_indices_from(A)] = 0 + + if min_max_norm: + A = (A - np.min(A)) / (np.max(A) - np.min(A)) + + h_rels, t_rels = np.nonzero(A) + return pd.DataFrame( + { + "h_relation": h_rels, + "t_relation": t_rels, + "edge_weight": A[h_rels, t_rels], + } + ) diff --git a/src/kg_topology_toolbox/utils.py b/src/kg_topology_toolbox/utils.py new file mode 100644 index 0000000..64b01fa --- /dev/null +++ b/src/kg_topology_toolbox/utils.py @@ -0,0 +1,95 @@ +# Copyright (c) 2023 Graphcore Ltd. All rights reserved. + +""" +Utility functions +""" + +from multiprocessing import Pool + +import numpy as np +import pandas as pd +from numpy.typing import NDArray +from scipy.sparse import coo_array, csc_array, csr_array + + +def jaccard_similarity( + entities_1: NDArray[np.int32], entities_2: NDArray[np.int32] +) -> float: + """ + Jaccard Similarity function for two sets of entities. + + :param entities_1: the array of IDs for the first set of entities. + :param entities_2: the array of IDs for the second set of entities. + + :return: Jaccard Similarity score for two sets of entities. + """ + intersection = len(np.intersect1d(entities_1, entities_2)) + union = len(entities_1) + len(entities_2) - intersection + return float(intersection / union) + + +def _composition_count_worker( + adj_csr: csr_array, adj_csc: csc_array, tail_shift: int = 0 +) -> pd.DataFrame: + adj_2hop = adj_csr @ adj_csc + adj_composition = (adj_2hop.tocsc() * (adj_csc > 0)).tocoo() + df_composition = pd.DataFrame( + dict( + h=adj_composition.row, + t=adj_composition.col + tail_shift, + n_triangles=adj_composition.data, + ) + ) + return df_composition + + +def composition_count( + df: pd.DataFrame, chunk_size: int, workers: int, directed: bool = True +) -> pd.DataFrame: + """A helper function to compute the composition count of a graph. + + :param df: A graph represented as a pd.DataFrame. Must contain the columns + `h` and `t`. No self-loops should be present in the graph. + :param chunk_size: Size of chunks of columns of the adjacency matrix to be + processed together. + :param workers: Number of workers processing chunks concurrently + :param directed: Boolean flag. If false, bidirectional edges are considered for + triangles by adding the adjacency matrix and its transposed. Defaults to True. + + :return: The results dataframe. Contains the following columns: + + - **h** (int): Index of the head entity. + - **t** (int): Index of the tail entity. + - **n_triangles** (int): Number of compositions for the (h, t) edge. + """ + + adj = coo_array( + (np.ones(len(df)), (df.h, df.t)), + shape=[max(df.max()) + 1, max(df.max()) + 1], + ).astype(np.uint16) + if not directed: + adj = adj + adj.T + n_cols = adj.shape[1] + adj_csr = adj.tocsr() + adj_csc = adj.tocsc() + adj_csc_slices = { + i: adj_csc[:, i * chunk_size : min((i + 1) * chunk_size, n_cols)] + for i in range(int(np.ceil(n_cols / chunk_size))) + } + + if len(adj_csc_slices) > 1 and workers > 1: + with Pool(workers) as pool: + df_composition_list = pool.starmap( + _composition_count_worker, + ( + (adj_csr, adj_csc_slice, i * chunk_size) + for i, adj_csc_slice in adj_csc_slices.items() + ), + ) + else: + df_composition_list = [ + _composition_count_worker(adj_csr, adj_csc_slice, i * chunk_size) + for i, adj_csc_slice in adj_csc_slices.items() + ] + + return pd.concat(df_composition_list) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..9dc9fcb --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +# Copyright (c) 2023 Graphcore Ltd. All rights reserved. diff --git a/tests/test_edge_topology_toolbox.py b/tests/test_edge_topology_toolbox.py new file mode 100644 index 0000000..a6aab47 --- /dev/null +++ b/tests/test_edge_topology_toolbox.py @@ -0,0 +1,84 @@ +# Copyright (c) 2023 Graphcore Ltd. All rights reserved. + +import numpy as np +import pandas as pd +import pytest + +from kg_topology_toolbox import KGTopologyToolbox + +df = pd.DataFrame( + dict( + h=[0, 0, 0, 1, 2, 2, 1, 2], + t=[1, 1, 2, 2, 0, 0, 1, 2], + r=[0, 1, 0, 1, 0, 1, 1, 0], + ) +) + +tools = KGTopologyToolbox() + + +@pytest.mark.parametrize("return_metapath_list", [True, False]) +def test_small_graph_metrics(return_metapath_list: bool) -> None: + # Define a small graph with all the features tested by + # the edge_topology_toolbox + + # entity degrees statistics + res = tools.edge_degree_cardinality_summary(df) + assert np.allclose(res["h_unique_rel"], [2, 2, 2, 1, 2, 2, 1, 2]) + assert np.allclose(res["h_degree"], [3, 3, 3, 2, 3, 3, 2, 3]) + assert np.allclose(res["h_degree_same_rel"], [2, 1, 2, 2, 2, 1, 2, 2]) + assert np.allclose(res["t_unique_rel"], [2, 2, 2, 2, 2, 2, 2, 2]) + assert np.allclose(res["t_degree"], [3, 3, 3, 3, 2, 2, 3, 3]) + assert np.allclose(res["t_degree_same_rel"], [1, 2, 2, 1, 1, 1, 2, 2]) + assert np.allclose(res["tot_degree"], [4, 4, 5, 4, 3, 3, 4, 5]) + assert np.allclose(res["tot_degree_same_rel"], [2, 2, 3, 2, 2, 1, 3, 3]) + + # triple cardinality + assert res["triple_cardinality"].tolist() == [ + "M:M", + "M:M", + "M:M", + "M:M", + "M:M", + "M:M", + "M:M", + "M:M", + ] + assert res["triple_cardinality_same_rel"].tolist() == [ + "1:M", + "M:1", + "M:M", + "1:M", + "1:M", + "1:1", + "M:M", + "M:M", + ] + + # relation pattern symmetry + res = tools.edge_pattern_summary(df, return_metapath_list=return_metapath_list) + assert np.allclose( + res["is_loop"], [False, False, False, False, False, False, True, True] + ) + assert np.allclose( + res["is_symmetric"], [False, False, True, False, True, False, False, False] + ) + # relation pattern inverse + assert np.allclose( + res["has_inverse"], [False, False, True, False, False, True, False, False] + ) + assert np.allclose(res["n_inverse_relations"], [0, 0, 1, 0, 0, 1, 0, 0]) + # relation pattern inference + assert np.allclose( + res["has_inference"], [True, True, False, False, True, True, False, False] + ) + assert np.allclose(res["n_inference_relations"], [1, 1, 0, 0, 1, 1, 0, 0]) + + # relation_pattern_composition & metapaths + assert np.allclose( + res["has_composition"], [False, False, True, False, False, False, False, False] + ) + assert np.allclose(res["n_triangles"], [0, 0, 2, 0, 0, 0, 0, 0]) + assert np.allclose(res["n_undirected_triangles"], [3, 3, 2, 6, 2, 2, 0, 0]) + if return_metapath_list: + assert res["metapath_list"][2] == ["0-1", "1-1"] diff --git a/tests/test_node_topology_toolbox.py b/tests/test_node_topology_toolbox.py new file mode 100644 index 0000000..371180c --- /dev/null +++ b/tests/test_node_topology_toolbox.py @@ -0,0 +1,43 @@ +# Copyright (c) 2023 Graphcore Ltd. All rights reserved. + +import numpy as np +import pandas as pd +import pytest + +from kg_topology_toolbox import KGTopologyToolbox + +df = pd.DataFrame( + dict( + h=[0, 0, 0, 1, 2, 2, 2], + t=[1, 1, 2, 2, 0, 0, 2], + r=[0, 1, 0, 1, 0, 1, 1], + ) +) + +tools = KGTopologyToolbox() + + +@pytest.mark.parametrize("return_relation_list", [True, False]) +def test_small_graph_metrics(return_relation_list: bool) -> None: + # Define a small graph with all the features tested by + # the node_topology_toolbox + + # entity degrees statistics + res = tools.node_degree_summary(df, return_relation_list=return_relation_list) + assert np.allclose(res["h_degree"], [3, 1, 3]) + assert np.allclose(res["t_degree"], [2, 2, 3]) + assert np.allclose(res["tot_degree"], [5, 3, 5]) + assert np.allclose(res["h_unique_rel"], [2, 1, 2]) + assert np.allclose(res["t_unique_rel"], [2, 2, 2]) + assert np.allclose(res["n_loops"], [0, 0, 1]) + if return_relation_list: + assert [x.tolist() for x in res["h_rel_list"].to_list()] == [ + [0, 1], + [1], + [0, 1], + ] + assert [x.tolist() for x in res["t_rel_list"].to_list()] == [ + [0, 1], + [0, 1], + [0, 1], + ] diff --git a/tests/test_relation_topology_toolbox.py b/tests/test_relation_topology_toolbox.py new file mode 100644 index 0000000..cdbdaa7 --- /dev/null +++ b/tests/test_relation_topology_toolbox.py @@ -0,0 +1,89 @@ +# Copyright (c) 2023 Graphcore Ltd. All rights reserved. + +from typing import List + +import numpy as np +import pandas as pd +import pytest + +from kg_topology_toolbox import KGTopologyToolbox + +df = pd.DataFrame( + dict( + h=[0, 0, 0, 1, 2, 2, 2, 3, 3, 4], + t=[1, 1, 2, 2, 0, 3, 4, 2, 4, 3], + r=[0, 1, 0, 1, 0, 1, 1, 0, 0, 1], + ) +) + +tools = KGTopologyToolbox() + + +def test_small_graph_metrics() -> None: + # Define a small graph on five nodes with all the features tested by + # the relation_topology_toolbox + + dcs = tools.aggregate_by_relation(tools.edge_degree_cardinality_summary(df)) + eps = tools.aggregate_by_relation( + tools.edge_pattern_summary(df, return_metapath_list=True) + ) + + assert np.allclose(dcs["num_triples"], [5, 5]) + assert np.allclose(dcs["frac_triples"], [0.5, 0.5]) + assert np.allclose(dcs["unique_h"], [3, 4]) + assert np.allclose(dcs["unique_t"], [4, 4]) + + # entity_degree_statistics + assert np.allclose(dcs["h_degree_mean"], [2.6, 2.2]) + assert np.allclose(dcs["t_degree_mean"], [2.2, 2.2]) + assert np.allclose(dcs["tot_degree_mean"], [3.6, 3.2]) + + # triple_relation_cardinality + assert np.allclose(dcs["triple_cardinality_1:M_frac"], [1 / 5, 0]) + assert np.allclose(dcs["triple_cardinality_M:1_frac"], [0, 2 / 5]) + assert np.allclose(dcs["triple_cardinality_M:M_frac"], [4 / 5, 3 / 5]) + assert np.allclose(dcs["triple_cardinality_same_rel_1:1_frac"], [1 / 5, 2 / 5]) + assert np.allclose(dcs["triple_cardinality_same_rel_1:M_frac"], [2 / 5, 1 / 5]) + assert np.allclose(dcs["triple_cardinality_same_rel_M:1_frac"], [0, 1 / 5]) + assert np.allclose(dcs["triple_cardinality_same_rel_M:M_frac"], [2 / 5, 1 / 5]) + + # relation_pattern_loop + assert np.allclose(eps["is_loop_frac"], [0, 0]) + + # relation_pattern_symmetric + assert np.allclose(eps["is_symmetric_frac"], [2 / 5, 0]) + + # relation_pattern_inverse + assert np.allclose(eps["has_inverse_frac"], [2 / 5, 2 / 5]) + assert eps["inverse_edge_types_unique"][0] == [0, 1] + assert eps["inverse_edge_types_unique"][1] == [0] + + # relation_pattern_composition + assert np.allclose(eps["has_composition_frac"], [2 / 5, 2 / 5]) + assert np.allclose(eps["has_undirected_composition_frac"], [1, 1]) + assert eps["metapath_list_unique"][0] == ["0-1", "1-1"] + assert eps["metapath_list_unique"][1] == ["1-0", "1-1"] + + # relation_pattern_inference + assert np.allclose(eps["has_inference_frac"], [1 / 5, 1 / 5]) + assert eps["inference_edge_types_unique"][0] == [0, 1] + assert eps["inference_edge_types_unique"][1] == [0, 1] + + +def test_jaccard_similarity() -> None: + # jaccard_similarity_relation_sets + res = tools.jaccard_similarity_relation_sets(df) + assert np.allclose(res["jaccard_head_head"], [2 / 5]) + assert np.allclose(res["jaccard_tail_tail"], [3 / 5]) + assert np.allclose(res["jaccard_head_tail"], [2 / 5]) + assert np.allclose(res["jaccard_tail_head"], [1]) + assert np.allclose(res["jaccard_both"], [1]) + + +@pytest.mark.parametrize( + "min_max_norm,expected", [(True, [1, 1]), (False, [7 / 6, 7 / 6])] +) +def test_ingram_affinity(min_max_norm: bool, expected: List[float]) -> None: + # relational_affinity_ingram + res = tools.relational_affinity_ingram(df, min_max_norm) + assert np.allclose(res["edge_weight"], expected)