Skip to content

Commit

Permalink
Merge pull request biocypher#321 from mbaric758/neo4j_v5_patch
Browse files Browse the repository at this point in the history
Neo4j version 5 patch
  • Loading branch information
nilskre authored Apr 15, 2024
2 parents d47d0d7 + 85df009 commit bec1703
Show file tree
Hide file tree
Showing 11 changed files with 1,209 additions and 1,302 deletions.
2 changes: 1 addition & 1 deletion .github/actions/setup/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ runs:
with:
version: 1.5.1
virtualenvs-create: true
virtualenvs-in-project: true
virtualenvs-in-project: false
- name: Check Poetry installation
run: poetry --version
shell: bash
15 changes: 8 additions & 7 deletions .github/actions/test/action.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
name: 'Test and code quality'
description: 'Run tests and code quality checks'
inputs:
NEO4J_VERSION:
description: 'Neo4j version'

runs:
using: "composite"
Expand All @@ -12,7 +15,7 @@ runs:
uses: douglascamata/setup-docker-macos-action@v1-alpha
if: ${{ runner.os == 'macOS' }}
- name: Start Neo4j Docker
run: docker run --restart always --publish=7474:7474 --publish=7687:7687 --env NEO4J_AUTH=neo4j/your_password_here --env NEO4J_PLUGINS='["apoc"]' --env=NEO4J_ACCEPT_LICENSE_AGREEMENT=yes -d neo4j:4.4-enterprise
run: docker run --restart always --publish=7474:7474 --publish=7687:7687 --env NEO4J_AUTH=neo4j/your_password_here --env NEO4J_PLUGINS='["apoc"]' --env=NEO4J_ACCEPT_LICENSE_AGREEMENT=yes -d neo4j:${{ inputs.NEO4J_VERSION }}
shell: bash
if: ${{ runner.os != 'Windows' }}
- name: Start Postgres Docker
Expand All @@ -24,16 +27,14 @@ runs:
#----------------------------------------------
- name: Run Tests (Windows)
run: |
source .venv/scripts/activate
pytest --version
pytest --password=your_password_here
poetry run pytest --version
poetry run pytest --password=your_password_here
shell: bash
if: runner.os == 'Windows'
- name: Run tests (Linux and MacOS)
run: |
source .venv/bin/activate
pytest --version
pytest --password=your_password_here
poetry run pytest --version
poetry run pytest --password=your_password_here
shell: bash
if: runner.os != 'Windows'
- name: Check code quality
Expand Down
8 changes: 8 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ jobs:
matrix:
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
python-version: ["3.9", "3.10", "3.11"]
neo4j-version: ["4.4-enterprise", "5.17.0-enterprise"]
exclude:
# Windows tests don't use the PostgreSQL/Neo4j Docker containers
- os: "windows-latest"
neo4j-version: "5.17.0-enterprise"
runs-on: ${{ matrix.os }}
env:
POETRY_VERSION: 1.5.1
Expand All @@ -34,8 +39,11 @@ jobs:
uses: ./.github/actions/install
with:
PYTHON_VERSION: ${{ matrix.python-version }}

#----------------------------------------------
# run tests and code quality checks
#----------------------------------------------
- name: Run tests
uses: ./.github/actions/test
with:
NEO4J_VERSION: ${{ matrix.neo4j-version }}
45 changes: 36 additions & 9 deletions biocypher/_connect.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@
"""
BioCypher 'online' mode. Handles connection and manipulation of a running DBMS.
"""
import subprocess

from ._logger import logger

logger.debug(f"Loading module {__name__}.")

from typing import Optional
from collections.abc import Iterable
import itertools

Expand All @@ -24,7 +25,6 @@
from . import _misc
from ._config import config as _config
from ._create import BioCypherEdge, BioCypherNode
from ._ontology import Ontology
from ._translate import Translator

__all__ = ["_Neo4jDriver"]
Expand Down Expand Up @@ -137,16 +137,43 @@ def _create_constraints(self):

logger.info("Creating constraints for node types in config.")

major_neo4j_version = int(self._get_neo4j_version().split(".")[0])
# get structure
for leaf in self.translator.ontology.mapping.extended_schema.items():
label = _misc.sentencecase_to_pascalcase(leaf[0])
label = _misc.sentencecase_to_pascalcase(leaf[0], sep=r"\s\.")
if leaf[1]["represented_as"] == "node":
s = (
f"CREATE CONSTRAINT `{label}_id` "
f"IF NOT EXISTS ON (n:`{label}`) "
"ASSERT n.id IS UNIQUE"
)
self._driver.query(s)
if major_neo4j_version >= 5:
s = (
f"CREATE CONSTRAINT `{label}_id` "
f"IF NOT EXISTS FOR (n:`{label}`) "
"REQUIRE n.id IS UNIQUE"
)
self._driver.query(s)
else:
s = (
f"CREATE CONSTRAINT `{label}_id` "
f"IF NOT EXISTS ON (n:`{label}`) "
"ASSERT n.id IS UNIQUE"
)
self._driver.query(s)

def _get_neo4j_version(self):
"""Get neo4j version."""
try:
neo4j_version = self._driver.query(
"""
CALL dbms.components()
YIELD name, versions, edition
UNWIND versions AS version
RETURN version AS version
""",
)[0][0]["version"]
return neo4j_version
except Exception as e:
logger.warning(
f"Error detecting Neo4j version: {e} use default version 4.0.0."
)
return "4.0.0"

def add_nodes(self, id_type_tuples: Iterable[tuple]) -> tuple:
"""
Expand Down
8 changes: 6 additions & 2 deletions biocypher/_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def sentencecase_to_snakecase(s: str) -> str:
return stringcase.snakecase(s).lower()


def sentencecase_to_pascalcase(s: str) -> str:
def sentencecase_to_pascalcase(s: str, sep: str = r"\s") -> str:
"""
Convert sentence case to PascalCase.
Expand All @@ -215,7 +215,11 @@ def sentencecase_to_pascalcase(s: str) -> str:
Returns:
string in PascalCase form
"""
return re.sub(r"(?:^| )([a-zA-Z])", lambda match: match.group(1).upper(), s)
return re.sub(
r"(?:^|[" + sep + "])([a-zA-Z])",
lambda match: match.group(1).upper(),
s,
)


def to_lower_sentence_case(s: str) -> str:
Expand Down
2 changes: 1 addition & 1 deletion biocypher/write/_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from biocypher._config import config as _config

__all__ = ["get_writer"]
__all__ = ["get_writer", "DBMS_TO_CLASS"]

if TYPE_CHECKING:
from biocypher._translate import Translator
Expand Down
47 changes: 44 additions & 3 deletions biocypher/write/graph/_neo4j.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import os
import re
import subprocess

from biocypher._logger import logger
from biocypher.write._batch_writer import parse_label, _BatchWriter
Expand All @@ -22,13 +24,27 @@ class _Neo4jBatchWriter(_BatchWriter):
- _write_array_string
"""

def __init__(self, *args, **kwargs):
"""
Constructor.
Check the version of Neo4j and adds a command scope if version >= 5.
Returns:
_Neo4jBatchWriter: An instance of the writer.
"""

# Should read the configuration and setup import_call_bin_prefix.
super().__init__(*args, **kwargs)

def _get_default_import_call_bin_prefix(self):
"""
Method to provide the default string for the import call bin prefix.
Returns:
str: The default location for the neo4j admin import location
"""

return "bin/"

def _write_array_string(self, string_list):
Expand Down Expand Up @@ -263,9 +279,32 @@ def _construct_import_call(self) -> str:
Returns:
str: a bash command for neo4j-admin import
"""
import_call_neo4j_v4 = self._get_import_call(
"import", "--database=", "--force="
)
import_call_neo4j_v5 = self._get_import_call(
"database import full", "", "--overwrite-destination="
)
neo4j_version_check = f"version=$({self._get_default_import_call_bin_prefix()}neo4j-admin --version | cut -d '.' -f 1)"

import_script = f"#!/bin/bash\n{neo4j_version_check}\nif [[ $version -ge 5 ]]; then\n\t{import_call_neo4j_v5}\nelse\n\t{import_call_neo4j_v4}\nfi"
return import_script

def _get_import_call(
self, import_cmd: str, database_cmd: str, wipe_cmd: str
) -> str:
"""Get parametrized import call for Neo4j 4 or 5+.
Args:
import_cmd (str): The import command to use.
database_cmd (str): The database command to use.
wipe_cmd (str): The wipe command to use.
Returns:
str: The import call.
"""
import_call = (
f"{self.import_call_bin_prefix}neo4j-admin import "
f"--database={self.db_name} "
f"{self.import_call_bin_prefix}neo4j-admin {import_cmd} "
f'--delimiter="{self.escaped_delim}" '
f'--array-delimiter="{self.escaped_adelim}" '
)
Expand All @@ -276,7 +315,7 @@ def _construct_import_call(self) -> str:
import_call += f"--quote='{self.quote}' "

if self.wipe:
import_call += f"--force=true "
import_call += f"{wipe_cmd}true "
if self.skip_bad_relationships:
import_call += "--skip-bad-relationships=true "
if self.skip_duplicate_nodes:
Expand All @@ -290,4 +329,6 @@ def _construct_import_call(self) -> str:
for header_path, parts_path in self.import_call_edges:
import_call += f'--relationships="{header_path},{parts_path}" '

# Database needs to be at the end starting with Neo4j 5.0+.
import_call += f"{database_cmd}{self.db_name} "
return import_call
17 changes: 9 additions & 8 deletions docs/output/neo4j.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,10 @@ refer the reader to the Neo4j documentation for more details.

## Install Neo4j

```{note}
Neo4j provide a [Neo4j Desktop
application](https://neo4j.com/download-center/#desktop) that can be used to
create a local instance of Neo4j. The desktop application provides information
about the DBMS folder and can open a terminal at the DBMS root location. The
import call generated by BioCypher can then be copied into the terminal of the
Neo4j Desktop application for each corresponding DBMS.
about the DBMS folder and can open a terminal at the DBMS root location.

Neo4j is also available as a command line interface (CLI) tool. To use the CLI
with the BioCypher admin import call, directory structures and permissions need
Expand All @@ -24,7 +21,6 @@ on your system.

Be mindful that different versions of Neo4j may differ in features and thus are
also documented differently.
```

```{note}
We use the APOC library for Neo4j, which is not included automatically, but
Expand Down Expand Up @@ -141,9 +137,14 @@ header and data files for each entity type, the import call conveniently
aggregates this information into one command, detailing the location of all
files on disk, so no data need to be copied around.

```{caution}
The generated import call is currently only compatible with Neo4j version 4.
Version 5 support coming soon!
```{note}
The generated import call differs between Neo4j version 4 and 5.
Starting from major version 5, Neo4j ``import`` command needs the
``database`` scope. BioCypher takes care of this.
The generated import script ``neo4j-admin-import-call.sh``
first checks the Neo4j database version and uses the correct
import statement for the detected version. Therefore make sure to run
the script from the targeted DBMS root location.
```

Neo4j can manage multiple projects, each with multiple DBMS (database management
Expand Down
Loading

0 comments on commit bec1703

Please sign in to comment.