diff --git a/.flake8 b/.flake8
index 2561054..1ab0476 100644
--- a/.flake8
+++ b/.flake8
@@ -1,5 +1,5 @@
 # flake8 does not support pyproject.toml, see:
 # https://github.com/PyCQA/flake8/issues/234
 [flake8]
-exclude = docs,venv
+exclude = docs,venv,build
 max-line-length = 100
\ No newline at end of file
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index ed00528..0f7526b 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -23,6 +23,7 @@ concurrency:
 env:
   # Directory that will be published on github pages
   PUBLISH_DIR: ./docs/_build/html
+  DEB_PYTHON_INSTALL_LAYOUT: deb_system
 
 jobs:
 
@@ -37,7 +38,6 @@ jobs:
       - name: Upgrade pip and setuptools
         run: |
           python3 -m pip install pip setuptools --upgrade
-          apt-get purge -y python3-setuptools
 
       - name: Install dependencies
         run: python3 -m pip install ".[test,docs]"
diff --git a/.github/workflows/test_package.yml b/.github/workflows/test_package.yml
index f8e1584..10ae2ca 100644
--- a/.github/workflows/test_package.yml
+++ b/.github/workflows/test_package.yml
@@ -13,6 +13,10 @@ on:
   schedule:
     # The CI is executed every day at 8am
     - cron: "0 8 * * *"
+  
+env:
+  DEB_PYTHON_INSTALL_LAYOUT: deb_system
+    
 jobs:
   check-code:
 
@@ -26,9 +30,8 @@ jobs:
       - uses: actions/checkout@v3
 
       - name: Upgrade pip and setuptools
-        run: |
+        run:
           python3 -m pip install pip setuptools --upgrade
-          apt-get purge -y python3-setuptools
 
       - name: "Install code"
         run: python3 -m pip install .[test]
@@ -49,9 +52,8 @@ jobs:
       - uses: actions/checkout@v3
 
       - name: Upgrade pip and setuptools
-        run: |
+        run:
           python3 -m pip install pip setuptools --upgrade
-          apt-get purge -y python3-setuptools
 
       - name: "Install oasisx"
         run: pip install .[test]
diff --git a/Makefile b/Makefile
index ae43c23..94ce5f4 100644
--- a/Makefile
+++ b/Makefile
@@ -1,14 +1,13 @@
 # List of Python demos (without file extenion) from the repo `demo` to include in the jupyterbook. 
 # These files should be listed in `docs/_toc.yml`
-#DEMOS = 
+DEMOS = "assembly_strategies"
 
+# We use --set-kernel with jupytext to make it possible for binder to pick it up
 doc: # Generate Sphinx HTML documentation, including API docs 
+	@for demo in ${DEMOS}; do \
+	jupytext --to=ipynb --set-kernel=python3 demo/$$demo.py --output=docs/$$demo.ipynb ;\
+	done
 	jupyter book build -W docs
-# We use --set-kernel with jupytext to make it possible for binder to pick it up
-# @for demo in ${DEMOS}; do \
-# 	jupytext --to=ipynb --set-kernel=python3 demo/$$demo.py --output=docs/$$demo.ipynb ;\
-# ;\
-#	done
 
 clean-pytest: # Remove output from pytest
 	rm -rf .pytest_cache
diff --git a/demo/assembly_bcs.py b/demo/assembly_bcs.py
new file mode 100644
index 0000000..b5c15a7
--- /dev/null
+++ b/demo/assembly_bcs.py
@@ -0,0 +1,316 @@
+# ---
+# jupyter:
+#   jupytext:
+#     formats: ipynb,py:light
+#     text_representation:
+#       extension: .py
+#       format_name: light
+#       format_version: '1.5'
+#       jupytext_version: 1.14.1
+#   kernelspec:
+#     display_name: Python 3 (ipykernel)
+#     language: python
+#     name: python3
+# ---
+
+# Copyright (C) 2022 Jørgen Schartum Dokken
+#
+# This file is part of Oasisx
+# SPDX-License-Identifier:    MIT
+#
+# # Application of Dirichlet BCs
+# Illustrates how to apply bcs to the component of the tentative velocity equation.
+# We compare two strategies:
+# 1. Using matrix-vector products of pre-assembled matrices to compute the RHS
+# 2. Assemble the matrix and vector separately
+# We start by importing the necessary modules
+
+import matplotlib.pyplot as plt
+import time
+import seaborn
+import pandas
+import dolfinx
+from mpi4py import MPI
+from petsc4py import PETSc
+import ufl
+import numpy as np
+import typing
+
+
+def assembly(mesh, P: int, repeats: int, jit_options: typing.Optional[dict] = None):
+    V = dolfinx.fem.FunctionSpace(mesh, ("CG", int(P)))
+
+    def f(x):
+        return 2*np.sin(x[0])+3+2*x[1]
+    mesh.topology.create_connectivity(mesh.topology.dim-1, mesh.topology.dim)
+    boundary_facets = dolfinx.mesh.exterior_facet_indices(mesh.topology)
+    boundary_dofs = dolfinx.fem.locate_dofs_topological(V, mesh.topology.dim-1, boundary_facets)
+    g = dolfinx.fem.Function(V)
+    g.interpolate(f)
+    bcs = [dolfinx.fem.dirichletbc(g, boundary_dofs)]
+
+    dt = 0.5
+    nu = 0.3
+    u = ufl.TrialFunction(V)
+    v = ufl.TestFunction(V)
+
+    # Solution from previous time step
+    u_1 = dolfinx.fem.Function(V)
+    u_1.interpolate(lambda x: np.sin(x[0])*np.cos(x[1]))
+
+    # Define variational forms
+    mass = ufl.inner(u, v) * ufl.dx
+    stiffness = ufl.inner(ufl.grad(u), ufl.grad(v)) * ufl.dx
+
+    u_ab = [dolfinx.fem.Function(V, name=f"u_ab{i}") for i in range(mesh.geometry.dim)]
+    convection = ufl.inner(ufl.dot(ufl.as_vector(u_ab), ufl.nabla_grad(u)), v) * ufl.dx
+    for u_abi in u_ab:
+        u_abi.interpolate(lambda x: x[0])
+
+    # Compile forms for matrix vector products
+    jit_options = {} if jit_options is None else jit_options
+    mass_form = dolfinx.fem.form(mass, jit_params=jit_options)
+    stiffness_form = dolfinx.fem.form(stiffness, jit_params=jit_options)
+    convection_form = dolfinx.fem.form(convection, jit_params=jit_options)
+
+    # Compile form for vector assembly (action)
+    dt_inv = dolfinx.fem.Constant(mesh, 1./dt)
+    dt_inv.name = "dt_inv"
+    nu_c = dolfinx.fem.Constant(mesh, nu)
+    nu_c.name = "nu"
+    rhs = dt_inv * mass - 0.5 * nu_c * stiffness - 0.5*convection
+    rhs_form = dolfinx.fem.form(ufl.action(rhs, u_1), jit_params=jit_options)
+
+    # Assemble time independent matrices
+    # Mass matrix
+    M = dolfinx.fem.petsc.create_matrix(mass_form)
+    M.setOption(PETSc.Mat.Option.SYMMETRIC, True)
+    M.setOption(PETSc.Mat.Option.SYMMETRY_ETERNAL, True)
+    M.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, True)
+    dolfinx.fem.petsc.assemble_matrix(M, mass_form)
+    M.assemble()
+    M.setOption(PETSc.Mat.Option.NEW_NONZERO_LOCATIONS, False)
+
+    # Stiffness matrix
+    K = dolfinx.fem.petsc.create_matrix(stiffness_form)
+    K.setOption(PETSc.Mat.Option.SYMMETRIC, True)
+    K.setOption(PETSc.Mat.Option.SYMMETRY_ETERNAL, True)
+    K.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, True)
+    dolfinx.fem.petsc.assemble_matrix(K, stiffness_form)
+    K.assemble()
+    K.setOption(PETSc.Mat.Option.NEW_NONZERO_LOCATIONS, False)
+
+    # RHS vectors
+    b = dolfinx.fem.Function(V)
+    bx = dolfinx.fem.Function(V)
+
+    # Timing vectors
+    oasis_lhs = np.zeros((repeats, mesh.comm.size), dtype=np.float64)
+    oasis_rhs = np.zeros((repeats, mesh.comm.size), dtype=np.float64)
+    new_lhs = np.zeros((repeats, mesh.comm.size), dtype=np.float64)
+    new_rhs = np.zeros((repeats, mesh.comm.size), dtype=np.float64)
+    new_total = np.zeros((repeats, mesh.comm.size), dtype=np.float64)
+    oasis_total = np.zeros((repeats, mesh.comm.size), dtype=np.float64)
+
+    A_sp = dolfinx.fem.create_sparsity_pattern(mass_form)
+    A_sp.assemble()
+
+    A = dolfinx.cpp.la.petsc.create_matrix(mesh.comm, A_sp)
+    Ax = dolfinx.cpp.la.petsc.create_matrix(mesh.comm, A_sp)
+    D = dolfinx.cpp.la.petsc.create_matrix(mesh.comm, A_sp)
+    D.assemble()
+    for i in range(repeats):
+        mesh.comm.Barrier()
+        # --------------Oasis approach-------------------------
+        # Zero out time-dependent matrix
+        start_mat = time.perf_counter()
+        A.zeroEntries()
+        A.setOption(PETSc.Mat.Option.KEEP_NONZERO_PATTERN, True)
+        A.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False)
+        end_mat = time.perf_counter()
+
+        # Add convection term
+        start_lhs = time.perf_counter()
+        dolfinx.fem.petsc.assemble_matrix(A, convection_form)
+        A.assemble()
+        A.scale(-0.5)
+        A.axpy(1./dt, M, PETSc.Mat.Structure.SUBSET_NONZERO_PATTERN)
+        A.axpy(-0.5*nu, K, PETSc.Mat.Structure.SUBSET_NONZERO_PATTERN)
+        end_lhs = time.perf_counter()
+
+        # Do mat-vec operations
+        b.x.array[:] = 0
+        start_rhs = time.perf_counter()
+        A.mult(u_1.vector, b.vector)
+        b.x.scatter_reverse(dolfinx.la.ScatterMode.add)
+        dolfinx.fem.petsc.set_bc(b.vector, bcs)
+        b.x.scatter_forward()
+        end_rhs = time.perf_counter()
+        t_matvec = end_rhs - start_rhs
+
+        mesh.comm.Barrier()
+        # Rescale matrix and apply bc
+        start_rescale = time.perf_counter()
+        A.scale(-1)
+        A.axpy(2/dt, M, PETSc.Mat.Structure.SUBSET_NONZERO_PATTERN)
+        for bc in bcs:
+            A.zeroRowsLocal(bc.dof_indices()[0], 1.)  # type: ignore
+        end_rescale = time.perf_counter()
+        t_matrix = end_rescale - start_rescale + end_lhs - start_lhs
+
+        mesh.comm.Barrier()
+
+        # Gather results
+        oasis_lhs[i, :] = mesh.comm.allgather(t_matrix)
+        oasis_rhs[i, :] = mesh.comm.allgather(t_matvec)
+        oasis_total[i, :] = mesh.comm.allgather(t_matrix + t_matvec)
+        # ---------------------------------------------------------
+        # Zero out time-dependent matrix
+        Ax.zeroEntries()
+        Ax.setOption(PETSc.Mat.Option.KEEP_NONZERO_PATTERN, True)
+        Ax.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False)
+
+        mesh.comm.Barrier()
+
+        # Add convection term
+        start_lhs_new = time.perf_counter()
+        dolfinx.fem.petsc.assemble_matrix(Ax, convection_form)
+        Ax.assemble()
+        Ax.scale(0.5)
+        Ax.axpy(1./dt, M, PETSc.Mat.Structure.SUBSET_NONZERO_PATTERN)
+        Ax.axpy(0.5*nu, K, PETSc.Mat.Structure.SUBSET_NONZERO_PATTERN)
+        for bc in bcs:
+            Ax.zeroRowsLocal(bc.dof_indices()[0], 1.)  # type: ignore
+        end_lhs_new = time.perf_counter()
+
+        mesh.comm.Barrier()
+
+        # Compute the vector without using pre-generated matrices
+        bx.x.array[:] = 0
+        start_rhs_new = time.perf_counter()
+        dolfinx.fem.petsc.assemble_vector(bx.vector, rhs_form)
+        bx.x.scatter_reverse(dolfinx.la.ScatterMode.add)
+        dolfinx.fem.petsc.set_bc(bx.vector, bcs)
+        bx.x.scatter_forward()
+        end_rhs_new = time.perf_counter()
+
+        # Gather results
+        new_total[i, :] = mesh.comm.allgather(end_lhs_new - start_lhs_new
+                                              + end_rhs_new - start_rhs_new)
+        new_lhs[i, :] = mesh.comm.allgather(end_lhs_new - start_lhs_new)
+        new_rhs[i, :] = mesh.comm.allgather(end_rhs_new - start_rhs_new)
+
+        matrix_total = mesh.comm.allgather(end_mat-start_mat)
+        if mesh.comm.rank == 0:
+            print("Oasis Total", oasis_total[i],  "\nNew Total", new_total[i],
+                  "\nMatrix total", matrix_total, flush=True)
+        # Check that vectors are the same
+        if not np.allclose(bx.x.array, b.x.array):
+            print(np.max(np.abs(bx.x.array[:]-b.x.array[:])))
+            raise RuntimeError("Vectors are not equal after assembly")
+
+        # Check that matrices are the same
+        D.zeroEntries()
+        A.copy(D, PETSc.Mat.Structure.DIFFERENT_NONZERO_PATTERN)
+        D.axpy(-1, Ax, PETSc.Mat.Structure.DIFFERENT_NONZERO_PATTERN)
+        if not np.allclose(D.getValuesCSR()[2], 0):
+            print(np.max(np.abs(D.getValuesCSR()[2])))
+            raise RuntimeError("Matrices are not equal after assembly")
+    num_dofs_global = V.dofmap.index_map_bs * V.dofmap.index_map.size_global
+    return num_dofs_global, new_lhs, new_rhs, oasis_lhs, oasis_rhs, oasis_total, new_total
+
+
+# We solve the problem on a unit cube that is split into tetrahedras with `Nx`,`Ny` and `Nx`
+# tetrahera in the x, y and z-direction respectively.
+
+def run_parameter_sweep(Nx: int, Ny: int, Nz: int, repeats: int, min_degree: int,
+                        max_degree: int) -> dict:
+    # Information regarding optimization flags can be found at:
+    # https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
+    jit_options = {"cffi_extra_compile_args": ["-march=native", "-O3"]}
+
+    mesh = dolfinx.mesh.create_unit_cube(MPI.COMM_WORLD, Nx, Ny, Nz)
+    Ps = np.arange(min_degree, max_degree+1, dtype=np.int32)
+    j = 0
+    results = {}
+    for i, P in enumerate(Ps):
+        if mesh.comm.rank == 0:
+            print(i, P, flush=True)
+        dof, new_lhs, new_rhs, oasis_lhs, oasis_rhs, oasis_total, new_total = assembly(
+            mesh, P, repeats=repeats, jit_options=jit_options)
+        if mesh.comm.rank == 0:
+            print("Writing to dict")
+        for row in new_lhs:
+            for process in row:
+                results[j] = {"P": P, "num_dofs": dof, "method": "new", "side":
+                              "lhs", "time (s)": process, "procs": MPI.COMM_WORLD.size}
+                j += 1
+        for row in new_rhs:
+            for process in row:
+                results[j] = {"P": P, "num_dofs": dof, "method": "new", "side":
+                              "rhs", "time (s)": process, "procs": MPI.COMM_WORLD.size}
+                j += 1
+
+        for row in oasis_lhs:
+            for process in row:
+                results[j] = {"P": P, "num_dofs": dof, "method": "oasis", "side":
+                              "lhs", "time (s)": process, "procs": MPI.COMM_WORLD.size}
+                j += 1
+        for row in oasis_rhs:
+            for process in row:
+                results[j] = {"P": P, "num_dofs": dof, "method": "oasis", "side":
+                              "rhs", "time (s)": process, "procs": MPI.COMM_WORLD.size}
+                j += 1
+        for row in oasis_total:
+            for process in row:
+                results[j] = {"P": P, "num_dofs": dof, "method": "oasis", "side":
+                              "total", "time (s)": process, "procs": MPI.COMM_WORLD.size}
+                j += 1
+        for row in new_total:
+            for process in row:
+                results[j] = {"P": P, "num_dofs": dof, "method": "new", "side":
+                              "total", "time (s)": process, "procs": MPI.COMM_WORLD.size}
+                j += 1
+    return results
+
+
+# We use `pandas` and `seaborn` to visualize the results
+
+# +
+
+
+def create_plot(results: dict, outfile: str):
+    if MPI.COMM_WORLD.rank == 0:
+        df = pandas.DataFrame.from_dict(results, orient="index")
+        df["label"] = "P" + df["P"].astype(str) + " " + \
+            df["num_dofs"].astype(str) + " \n Comms: " + df["procs"].astype(str)
+        plt.figure()
+        df_rhs = df[df["side"] == "rhs"]
+        plot = seaborn.catplot(data=df_rhs, kind="swarm",  x="label",
+                               y="time (s)", hue="method")
+        plot.set(yscale="log")
+        plt.grid()
+        plt.savefig(f"{outfile}_rhs.png")
+        plt.figure()
+        df_lhs = df[df["side"] == "lhs"]
+        plot = seaborn.catplot(data=df_lhs, kind="swarm",  x="label",
+                               y="time (s)", hue="method")
+        plot.set(yscale="log")
+        plt.grid()
+        plt.savefig(f"{outfile}_lhs.png")
+        plt.figure()
+        df_total = df[df["side"] == "total"]
+        plot = seaborn.catplot(data=df_total, kind="swarm",  x="label",
+                               y="time (s)", hue="method")
+        plot.set(yscale="log")
+        plt.grid()
+        plt.savefig(f"{outfile}_total.png")
+
+# -
+
+
+# We start by running the comparison for an increasing number of degrees of freedom on a fixed grid.
+if __name__ == "__main__":
+    N = 100
+    results_p = run_parameter_sweep(N, N, N, repeats=3, min_degree=1, max_degree=3)
+    create_plot(results_p, "results")
diff --git a/demo/assembly_strategies.py b/demo/assembly_strategies.py
new file mode 100644
index 0000000..4c7de06
--- /dev/null
+++ b/demo/assembly_strategies.py
@@ -0,0 +1,210 @@
+# ---
+# jupyter:
+#   jupytext:
+#     formats: ipynb,py:light
+#     text_representation:
+#       extension: .py
+#       format_name: light
+#       format_version: '1.5'
+#       jupytext_version: 1.14.1
+#   kernelspec:
+#     display_name: Python 3 (ipykernel)
+#     language: python
+#     name: python3
+# ---
+
+# Copyright (C) 2022 Jørgen Schartum Dokken
+#
+# This file is part of Oasisx
+# SPDX-License-Identifier:    MIT
+#
+# # Efficient assembly
+# This demo illustrates the performance differences between matrix free and cached assembly
+# for the Crank Nicholson time discretization with the implicit Adams-Bashforth linearization
+# in the tentative velocity step of the Navier-Stokes Equation.
+#
+# We start by importing the necessary modules
+
+
+import seaborn
+import pandas
+import dolfinx
+from mpi4py import MPI
+from petsc4py import PETSc
+import ufl
+import numpy as np
+
+
+# We define a function, that takes in a mesh, the order `P` of the Lagrange function space for the
+# scalar component of the velocity field, the number of times we should time the assembly, and
+# `jit_options` for just in time compilation of the vartiational forms.
+#
+# For the *Matrix-vector* strategy, we do only time the time it takes to modify the pre-assembled
+# convection matrix, adding the scaled mass and stiffness matrices and computing the matrix vector
+# product, as the matrix `A` is needed for the LHS assembly in the fractional step method.
+#
+# For the *Action strategy* we use `ufl.action` to create the variational form for the RHS
+# vector, and use generated code for the assembly.
+#
+# This demo does not consider any Dirichet boundary conditions.
+#
+# We add some arbitrary data to the variables `dt`, `nu`, `u_1` and `u_ab`,
+# as we are not solving a full problem here.
+
+def assembly(mesh, P: int, repeats: int, jit_options: dict = None):
+    V = dolfinx.fem.FunctionSpace(mesh, ("CG", int(P)))
+    dt = 0.5
+    nu = 0.3
+    u = ufl.TrialFunction(V)
+    v = ufl.TestFunction(V)
+
+    # Solution from previous time step
+    u_1 = dolfinx.fem.Function(V)
+    u_1.interpolate(lambda x: np.sin(x[0])*np.cos(x[1]))
+
+    # Define variational forms
+    mass = ufl.inner(u, v) * ufl.dx
+    stiffness = ufl.inner(ufl.grad(u), ufl.grad(v)) * ufl.dx
+
+    u_ab = [dolfinx.fem.Function(V, name=f"u_ab{i}") for i in range(mesh.geometry.dim)]
+    convection = ufl.inner(ufl.dot(ufl.as_vector(u_ab), ufl.nabla_grad(u)), v) * ufl.dx
+    for u_abi in u_ab:
+        u_abi.interpolate(lambda x: x[0])
+
+    # Compile forms for matrix vector products
+    jit_options = {} if jit_options is None else jit_options
+    mass_form = dolfinx.fem.form(mass, jit_params=jit_options)
+    stiffness_form = dolfinx.fem.form(stiffness, jit_params=jit_options)
+    convection_form = dolfinx.fem.form(convection, jit_params=jit_options)
+
+    # Compile form for vector assembly (action)
+    dt_inv = dolfinx.fem.Constant(mesh, 1./dt)
+    dt_inv.name = "dt_inv"
+    nu_c = dolfinx.fem.Constant(mesh, nu)
+    nu_c.name = "nu"
+    lhs = dt_inv * mass - 0.5 * nu_c * stiffness - 0.5*convection
+    lhs = dolfinx.fem.form(ufl.action(lhs, u_1), jit_params=jit_options)
+
+    # Assemble time independent matrices
+    # Mass matrix
+    M = dolfinx.fem.petsc.create_matrix(mass_form)
+    M.setOption(PETSc.Mat.Option.SYMMETRY_ETERNAL, True)
+    M.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, True)
+    dolfinx.fem.petsc.assemble_matrix(M, mass_form)
+    M.assemble()
+    M.setOption(PETSc.Mat.Option.NEW_NONZERO_LOCATIONS, False)
+
+    # Stiffness matrix
+    K = dolfinx.fem.petsc.create_matrix(stiffness_form)
+    K.setOption(PETSc.Mat.Option.SYMMETRY_ETERNAL, True)
+    K.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, True)
+    dolfinx.fem.petsc.assemble_matrix(K, stiffness_form)
+    K.assemble()
+    K.setOption(PETSc.Mat.Option.NEW_NONZERO_LOCATIONS, False)
+
+    # Create time dependent matrix (convection matrix)
+    A = dolfinx.fem.petsc.create_matrix(mass_form)
+    A.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, True)
+
+    # Vector for matrix vector product
+    b = dolfinx.fem.Function(V)
+
+    t_matvec = np.zeros((repeats, mesh.comm.size), dtype=np.float64)
+    t_action = np.zeros((repeats, mesh.comm.size), dtype=np.float64)
+    for i in range(repeats):
+        # Zero out time-dependent matrix
+        A.zeroEntries()
+
+        # Add convection term
+        dolfinx.fem.petsc.assemble_matrix(A, convection_form)
+        A.assemble()
+
+        # Do mat-vec operations
+        with dolfinx.common.Timer(f"~{P} {i} Matvec strategy") as _:
+            A.scale(-0.5)
+            A.axpy(1./dt, M)
+            A.axpy(-0.5*nu, K)
+            A.mult(u_1.vector, b.vector)
+            b.x.scatter_forward()
+
+        # Compute the vector without using pre-generated matrices
+        b_d = dolfinx.fem.Function(V)
+        with dolfinx.common.Timer(f"~{P} {i} Action strategy") as _:
+            dolfinx.fem.petsc.assemble_vector(b_d.vector, lhs)
+            b_d.x.scatter_reverse(dolfinx.la.ScatterMode.add)
+            b_d.x.scatter_forward()
+        # Compare results
+        assert np.allclose(b.x.array, b_d.x.array)
+
+        # Get timings
+        matvec = dolfinx.common.timing(f"~{P} {i} Matvec strategy")
+        action = dolfinx.common.timing(f"~{P} {i} Action strategy")
+        t_matvec[i, :] = mesh.comm.allgather(matvec[1])
+        t_action[i, :] = mesh.comm.allgather(action[1])
+
+    return V.dofmap.index_map_bs * V.dofmap.index_map.size_global, t_matvec, t_action
+
+
+# We solve the problem on a unit cube that is split into tetrahedras with `Nx`,`Ny` and `Nx`
+# tetrahera in the x, y and z-direction respectively.
+
+def run_parameter_sweep(Nx: int, Ny: int, Nz: int, repeats: int, min_degree: int,
+                        max_degree: int) -> dict:
+    # Information regarding optimization flags can be found at:
+    # https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
+    jit_options = {"cffi_extra_compile_args": ["-march=native", "-O3"]}
+
+    mesh = dolfinx.mesh.create_unit_cube(MPI.COMM_WORLD, Nx, Ny, Nz)
+    Ps = np.arange(min_degree, max_degree+1, dtype=np.int32)
+    j = 0
+    results = {}
+    for i, P in enumerate(Ps):
+        dof, matvec, action = assembly(mesh, P, repeats=repeats, jit_options=jit_options)
+        for row in matvec:
+            for process in row:
+                results[j] = {"P": P, "num_dofs": dof, "method":
+                              "matvec", "time (s)": process, "procs": MPI.COMM_WORLD.size}
+                j += 1
+        for row in action:
+            for process in row:
+                results[j] = {"P": P, "num_dofs": dof, "method":
+                              "action", "time (s)": process, "procs": MPI.COMM_WORLD.size}
+                j += 1
+    return results
+
+
+# We use `pandas` and `seaborn` to visualize the results
+
+# +
+
+def create_plot(results: dict, outfile: str):
+    if MPI.COMM_WORLD.rank == 0:
+        df = pandas.DataFrame.from_dict(results, orient="index")
+        df["label"] = "P" + df["P"].astype(str) + " " + \
+            df["num_dofs"].astype(str) + " \n Comms: " + df["procs"].astype(str)
+        plot = seaborn.catplot(data=df, kind="swarm",  x="label", y="time (s)", hue="method")
+        plot.set(yscale="log")
+        import matplotlib.pyplot as plt
+        plt.grid()
+        plt.savefig(outfile)
+
+
+# -
+
+# We start by running the comparison for an increasing number of degrees of freedom on a fixed grid.
+
+if __name__ == "__main__":
+    results_p = run_parameter_sweep(30, 25, 23, repeats=3, min_degree=1, max_degree=4)
+    create_plot(results_p, "P_sweep.png")
+
+# We observe that for all the runs, independent of the degree $P$, the *Action Strategy* is
+# significantly faster than the
+
+# We note that the run for $P=1$ is relatively small, and therefore run $P=1$ on a larger mesh
+
+if __name__ == "__main__":
+    results_p1 = run_parameter_sweep(100, 100, 80, 3, 1, 1)
+    create_plot(results_p1, "P1.png")
+
+# We observe that the run-time of both strategies for $P=1$ are more or less the
+# same for larger matrices.
diff --git a/docs/_config.yml b/docs/_config.yml
index 4030023..df7fac5 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -10,7 +10,7 @@ copyright: "2022"
 # See https://jupyterbook.org/content/execute.html
 execute:
   execute_notebooks: force
-
+  timeout: 600
 
 # Information about where the book exists on the web
 repository:
@@ -38,5 +38,5 @@ sphinx:
   - 'sphinx.ext.napoleon'
   - 'sphinx.ext.viewcode'
 
-
-
+bibtex_bibfiles:
+  - "references.bib"
diff --git a/docs/_toc.yml b/docs/_toc.yml
index 488f1a3..855bfbe 100644
--- a/docs/_toc.yml
+++ b/docs/_toc.yml
@@ -2,6 +2,11 @@ format: jb-book
 root: index
 
 parts:
+  - caption: Mathematical Description
+    chapters:
+    - file: "splitting_schemes"
+    - file: "assembly_strategies"
+
   - caption: Python API
     chapters:
     - file: "api"
diff --git a/docs/references.bib b/docs/references.bib
new file mode 100644
index 0000000..4068718
--- /dev/null
+++ b/docs/references.bib
@@ -0,0 +1,27 @@
+@article{Oasis-2015,
+	author = {Mikael Mortensen and Kristian Valen-Sendstad},
+	doi = {10.1016/j.cpc.2014.10.026},
+	issn = {0010-4655},
+	journal = {Computer Physics Communications},
+	keywords = {CFD, FEniCS, Python, Navier-Stokes},
+	pages = {177--188},
+	title = {{Oasis: A high-level/high-performance open source Navier-Stokes solver}},
+	volume = {188},
+	year = {2015}
+}
+
+
+
+@article{simo-1994,
+	abstract = {This paper examines the long-term behavior, dissipativity and unconditional nonlinear stability properties of time integration algorithms for the incompressible Navier-Stokes equations, including both direct schemes and fractional step/projection methods. The algorithms are termed nonlinear unconditionally stable if, in the absence of a forcing term but for arbitrary initial conditions, the computed kinetic energy decreases for arbitrary step sizes (i.e., L2-stability). Such an a priori stability estimate is a characteristic feature of the Navier-Stokes system. Similarly, the algorithms are said to exhibit asymptotic δt-independent long-term dissipative behavior if, as the continuum flow generated by the Navier-Stokes equations, the algorithmic flow also exhibits an absorbing set (and a maximal attractor). Both direct and fractional step methods are described which inherit these fundamental properties of the Navier-Stokes system for any time step size. Moreover, a subclass of algorithms which retain these strong notions of nonlinear stability and long-term dissipative behavior is identified which, in addition, has the remarkable property of being linear within the time step. The key implication is that no iterations are required to compute the solution within a time step. A specific member of this class is shown to possess the same absorbing set as the continuum Navier-Stokes system. A second order accurate, unconditionally stable method is also identified which retains the property of linearity within a time step. Numerical analysis and computational aspects involved in the implementation of these methods are addressed in detail and illustrated in representative numerical simulations.},
+	author = {J.C. Simo and F. Armero},
+	doi = {10.1016/0045-7825(94)90042-6},
+	issn = {0045-7825},
+	journal = {Computer Methods in Applied Mechanics and Engineering},
+	number = {1},
+	pages = {111--154},
+	title = {{Unconditional stability and long-term behavior of transient algorithms for the incompressible Navier-Stokes and Euler equations}},
+	volume = {111},
+	year = {1994}
+}
+
diff --git a/docs/splitting_schemes.md b/docs/splitting_schemes.md
new file mode 100644
index 0000000..4b22160
--- /dev/null
+++ b/docs/splitting_schemes.md
@@ -0,0 +1,148 @@
+# Splitting schemes
+As described in {cite}`Oasis-2015` Oasisx uses a fractional step method for solving the Navier-Stokes equations.
+This means that we are solving the set of equations:
+
+Find $\mathbf{u}\in \mathbf{V}_h, p \in \mathbf{Q}$ such that over $\Omega\subset \mathbb{R}^d$
+
+$$
+\begin{align}
+\frac{\partial \mathbf{u}}{\partial t} + (\mathbf{u}\cdot \nabla)\mathbf{u} &= \nu \nabla^2 \mathbf{u} - \nabla p + \mathbf{f} &\text{ in } \Omega\\
+\nabla \cdot \mathbf{u} &= 0& \text { in } \Omega \\
+\quad\nu\frac{\partial \mathbf{u}}{\partial\mathbf{n}} - p\mathbf{n} &= h\mathbf{n}& \text{ on }\partial \Omega_N \\
+\quad\mathbf{u}&=\mathbf{g}&\text{ on } \partial \Omega_D
+\end{align}
+$$
+
+where $\mathbf{u} = (u_1(\mathbf{x}, t), \dots, u_d(\mathbf{x}, t))$ is the velocity vector, $\nu$ the kinematic viscosity, $p(\mathbf{x}, t)$ the fluid pressure and $\mathbf{f}(\mathbf{x}, t)$ are the volumetric forces. The fluid density is incorporated with the pressure $p$.
+We use the a pseudo-traction boundary condition on $\partial\Omega_N$, where $h=0$ corresponds to the natural boundary condition. We use $\frac{\partial{\cdot}}{\partial \mathbf{n}}= \mathbf{n}^T(\nabla\cdot)$.
+
+We split these coupled equations into a set of simpler equations by using a fractional step method, described in for instance {cite}`simo-1994`. We arrive at the following scheme
+
+$$
+\begin{align}
+    \frac{u_k^{I}-  u_k^{n-1}}{\Delta t} + B_k^{n-\frac{1}{2}} &= \nu \nabla^2 \tilde u_k - \nabla_k p^\star + f_k^{n-\frac{1}{2}} & \text{for } k=1,\dots d&\text{ in } \Omega\\
+    u_k^I &= g_k&& \text{ on } \partial \Omega_D\\
+    \nu\frac{\partial \tilde u_k }{\partial  n} - p^*n_k &= h^{n-\frac{1}{2}}n_k&&\text{ on }\partial\Omega_N\\
+    \nabla^2\phi &= -\frac{1}{\Delta t} \nabla \cdot \mathbf{u}^I, &&\text{ in } \Omega\\
+    \frac{\partial\phi}{\partial n}&=0&& \text{ on }\partial \Omega_D \\
+    \phi &= 0 &&\text{ on } \partial \Omega_N\\
+    \frac{u_k^n-u_k^I}{\Delta t} &= -\frac{\partial}{\partial x_k}\phi & \text{for } k=1,\dots d,
+\end{align}
+$$
+
+where $u_k^n$ is the $k$th component of the velocity vector at time $t^n$ $\phi = p^{n-\frac{1}{2}}-p^\star$ is a pressure correction, $p^\star$ the tentative pressure.
+
+A consequence of this is that $p^{n+\frac{1}{2}}\vert_{\partial\Omega_N}=p^*\vert_{\partial\Omega_N}$.
+
+The first equation is solved for the tentative velocity $u_k^I$, where $\tilde u_k=\frac{1}{2}(u_k^I+u_k^{n-1})$, and the convective term $B_k^{n-\frac{1}{2}}=\mathbf{\bar{u}}\cdot \nabla \tilde u_k = (1.5 \mathbf{u}^{n-1}-0.5\mathbf{u}^{n-2})\cdot \nabla \tilde u_k$ is the implicit Adams-Bashforth discretization.
+
+## Implementational aspects
+We start by considering the tentative velocity step.
+
+We use integration by parts and multiplication with a test function $v$ to obtain
+
+$$
+\begin{align}
+    \frac{1}{\Delta t}\int_\Omega (u^I_k-u_k^{n-1}) v~\mathrm{d}x +& \int_\Omega \mathbf{\bar u} \cdot \frac{1}{2}\nabla (u_k^I + u_k^{n-1}) v ~\mathrm{d}x\\
+    &+ \frac{\nu}{2}\int_\Omega \nabla (u_k^I + u_k^{n-1})\cdot \nabla v ~\mathrm{d}x \\
+    &= \int_\Omega p^\star \nabla_k v + f_k^{n-\frac{1}{2}}v ~\mathrm{dx} + \int_{\partial\Omega_N}h^{n-\frac{1}{2}}n_kv~\mathrm{d}s.
+\end{align}
+$$
+
+As $u_k^I$ is the unknown, we use $u_k^I=\sum_{i=0}^Mc_{k,i} \phi_i(\mathbf{x})$, where $c_{k,i}$ is the unknown coefficients, $\phi_i$ is the global basis functions of $u_k^I$.
+We have that $u_k^{n-1}, u_k^{n-2}$ can be written as $u_k^{n-l}=\sum_{i=0}^M c_{k_i}i^{n-l} \phi_i$, where $c_i^{n-l}$ are the known coefficients from previous time steps.
+We write $p^* = \sum_{q=0}^Qr_q\psi_q(\mathbf{x})$.
+Summarizing, we have
+
+$$
+\left(\frac{1}{\Delta t} M + \frac{1}{2} C+ \frac{1}{2}\nu K\right) \mathbf{c}_k = \frac{1}{\Delta t} M \mathbf{c}_k^{n-1} -\frac{1}{2} C \mathbf{c}_k^{n-1} - \frac{1}{2}\nu K \mathbf{c}_k^{n-1} + P^k(p^*, f^{n-\frac{1}{2}}, h^{n-\frac{1}{2}})
+$$
+
+where 
+$$
+\begin{align}
+M_{ij} &= \int_\Omega \phi_j \phi_i ~\mathrm{d}x,\\
+K_{ij} &= \int_\Omega \nabla \phi_j \cdot \nabla \phi_i ~\mathrm{d}x,\\
+C_{ij} &= \int_\Omega \mathbf{\bar u}\cdot \nabla \phi_j \phi_i ~\mathrm{d}x,\\
+P^k_j &=\int_\Omega \left(\sum_{q=0}^{Q}r\psi_q \right)\nabla_k\phi_j + f_k^{n-\frac{1}{2}}\phi_j~\mathrm{d}x
++\int_{\partial\Omega_N}h^{n-\frac{1}{2}}\phi_j~\mathrm{d}s
+\end{align}.
+$$
+
+In Oasis {cite}`Oasis-2015`, one uses the fact that $M$, $K$ and $C$ is needed for the LHS of the variational problem, to avoid assembling them as vectors on the right hand side, and simply use matrix vector products and scaling to create the RHS vector from these pre-assembled matrices.
+
+We also note that $M$ and $K$ are time independent, and thus only $C$ has to be assembled at every time step.
+
+A difference between Oasis and the current implementation is that we implement the pressure condition as the natural boundary condition, and any supplied pressure $h$ will be part of the right hand side of the tentative velocity equation.
+
+In Oasis, the choice of not including this term ment that one would have to re-assemble 
+$\int_\Omega \nabla_k p^*v~\mathrm{d}x$ in every inner iteration.
+In the current implementation, we have that $\int_\Omega p^*\nabla_k v~\mathrm{d}x + \int_{\partial\Omega_N}h^{n-\frac{1}{2}}v~\mathrm{d}s$ has to be reassembled. As the boundary integral $\partial\Omega_N$ usually is small compared to the volume of the computational domain, this is a minimal increase in assembly time.
+
+### Matrix-vector product
+An algorithm for computing the matrix vector product follows
+```python
+M = assemble_mass_matrix()
+K = assemble_stiffness_matrix()
+for i in range(num_time_steps):
+    # Compute convecting velocity for C
+    for j in range(components): 
+        bar_u[j][:] = 0
+        bar_u[j]+= 1.5 * u_1[j]
+        bar_u[j]+= -0.5 * u_2[j]
+    # Compute 0.5C
+    A = assemble_C_matrix()
+    A.scale(0.5)
+    # Add mass matrix
+    A.axpy(1/dt, M)
+
+    #Add stiffness matrix
+    A.axpy(-0.5*nu, K)
+
+    # Compute matrix vector product and add to RHS
+    for j in range(components):
+        b[j] = body_forces
+        b_tmp[j] = A * u_1[j]
+        b[j]+= b_tmp[j]    
+
+    # Reset A for LHS
+    A.scale(-1)
+    A.xpy(2/dt, M)
+
+    # 
+```
+where $u_i$, $i=1,2$ is the solution at time-step $n-i$.
+
+### Action method
+As the matrix $A$ is sparse and potentially large (millions of degrees of freedom), we consider assembling `b` directly.
+```python
+M = assemble_mass_matrix()
+K = assemble_stiffness_matrix()
+for i in range(num_time_steps):
+    # Compute convecting veloctiy for C
+    for j in range(components): 
+        bar_u[j][:] = 0
+        bar_u[j]+= 1.5 * u_1[j]
+        bar_u[j]+= -0.5 * u_2[j]
+
+    # Assemble RHS
+    for j in range(components):
+        b[j] = assemble_vector(body_forces[j] + mass_terms[j] \
+            + stiffness_terms[j] + convective_terms[j])
+
+    # Compute -0.5C
+    A = assemble_C_matrix()
+    A.scale(-0.5)
+    # Add mass matrix
+    A.axpy(-1/dt, M)
+
+    #Add stiffness matrix
+    A.axpy(0.5*nu, K)
+```
+
+In the next section, we will consider the performance differences for these two strategies
+
+**References**
+```{bibliography}
+:filter: docname in docnames
+```
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 5677b9a..87953c1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,8 @@ dependencies = [
     'numpy'
 ]
 
+[project.scripts]
+oasisx = "oasisx.__main__:main"
 
 [project.optional-dependencies]
 test = [
@@ -47,7 +49,7 @@ files = [  # Folder to which files that should be checked by mypy
 ]
 
 [tool.pytest.ini_options]
-addopts = "--cov=oasisx --cov-report html --cov-report term-missing -v"
+addopts = "--cov=oasisx --cov-report html --cov-append --cov-report term-missing -v"
 testpaths = [
     "test"
 ]
\ No newline at end of file
diff --git a/src/oasisx/__init__.py b/src/oasisx/__init__.py
index 467650a..6a5aa00 100644
--- a/src/oasisx/__init__.py
+++ b/src/oasisx/__init__.py
@@ -1,3 +1,5 @@
+from .bcs import DirichletBC, LocatorMethod
+from .fracstep import FractionalStep_AB_CN
 from .function import Projector
 
-__all__ = ["Projector"]
+__all__ = ["Projector", "FractionalStep_AB_CN", "DirichletBC", "LocatorMethod"]
diff --git a/src/oasisx/__main__.py b/src/oasisx/__main__.py
new file mode 100644
index 0000000..c048bdf
--- /dev/null
+++ b/src/oasisx/__main__.py
@@ -0,0 +1,11 @@
+# Copyright (C) 2022 Jørgen Schartum Dokken
+#
+# This file is part of Oasisx
+# SPDX-License-Identifier:    MIT
+
+
+from oasisx.main import main
+
+if __name__ == "__main__":
+    import sys
+    sys.exit(main())
diff --git a/src/oasisx/bcs.py b/src/oasisx/bcs.py
new file mode 100644
index 0000000..08b2a13
--- /dev/null
+++ b/src/oasisx/bcs.py
@@ -0,0 +1,130 @@
+# Copyright (C) 2022 Jørgen Schartum Dokken
+#
+# This file is part of Oasisx
+# SPDX-License-Identifier:    MIT
+
+
+from enum import Enum
+from typing import Callable, Optional, Tuple, Union
+
+import dolfinx.fem as _fem
+import dolfinx.mesh as _dmesh
+import numpy as np
+import numpy.typing as npt
+from petsc4py import PETSc as _PETSc
+
+__all__ = ["DirichletBC", "LocatorMethod"]
+
+
+class LocatorMethod(Enum):
+    """
+    Search methods for Dirichlet BCs
+    """
+    GEOMETRICAL = 1
+    TOPOLOGICAL = 2
+
+
+LocatorMethod.TOPOLOGICAL.__doc__ = "Topogical search for dofs"
+LocatorMethod.GEOMETRICAL.__doc__ = "Geometrical search for dofs"
+
+
+class DirichletBC():
+    """
+    Create a Dirichlet boundary condition based on topological or geometrical info from the mesh
+
+    Args:
+        value: The value the degrees of freedom should have. It can be a float, a
+            `dolfinx.fem.Constant` or a lambda-function.
+        method: Locator method for marker.
+        marker: If :py:obj:`oasisx.LocatorMethod.TOPOLOGICAL` the input should
+            be a tuple of a mesh tag and the corresponding value for the entities to assign
+            Dirichlet conditions to. If :py:obj:`oasisx.LocatorMethod.GEOMETRICAL` the input
+            is a lambda function taking in x, y, and z coordinates (ordered as
+            `[[x0, x1,...,xn], [y0,...,yn], [z0,...,zn]]`), and return a boolean marker where
+            the ith entry indicates if `[xi, yi, zi]` should have a diriclet bc.
+
+    Example:
+        **Assigning a topological condition**
+
+        .. highlight:: python
+        .. code-block:: python
+
+            entities = np.array([0,3,8],dtype=np.int32)
+            values = np.full_like(entities, 2, dtype=np.int32)
+            mt  = dolfinx.fem.meshtags(mesh, mesh.topology.dim-1, entities, values)
+            bc = DirichletBC(5., LocatorMethod.TOPOLOGICAL, (mt, 2))
+
+        **Assigning a geometrical condition**
+
+        .. highlight:: python
+        .. code-block:: python
+
+            bc = DirchletBC(dolfinx.fem.Constant(mesh, 3.), lambda x: np.isclose(x[0]))
+    """
+    _method: LocatorMethod
+    _entities: npt.NDArray[np.int32]  # List of entities local to process
+    _e_dim: int  # Dimension of entities
+
+    _locator: Callable[[npt.NDArray[np.float64]], npt.NDArray[np.bool_]]
+    _dofs: npt.NDArray[np.int32]
+    _value: Union[np.float64, _fem.Constant, Callable[[
+        npt.NDArray[np.float64]], npt.NDArray[np.float64]]]
+    _bc: _fem.DirichletBCMetaClass
+    _u: Optional[_fem.Function]
+
+    __slots__ = tuple(__annotations__)
+
+    def __init__(
+        self, value: Union[np.float64, _fem.Constant,
+                           Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]]],
+        method: LocatorMethod,
+        marker: Union[Tuple[_dmesh.MeshTagsMetaClass, np.int32],
+                      Callable[[npt.NDArray[np.float64]], npt.NDArray[np.bool_]]]
+    ):
+        if method == LocatorMethod.GEOMETRICAL:
+            self._method = method
+            setattr(self, "_locator", marker)
+        elif method == LocatorMethod.TOPOLOGICAL:
+            self._method = method
+            self._entities = marker[0].find(marker[1])  # type: ignore
+            self._e_dim = marker[0].dim  # type:ignore
+        self._value = value
+
+    def set_dofs(self, dofs: npt.NDArray[np.int32]):
+        self._dofs = dofs
+
+    def _locate_dofs(self, V: _fem.FunctionSpace):
+        """
+        Locate all dofs satisfying the criterion
+        """
+        if self._method == LocatorMethod.GEOMETRICAL:
+            self._dofs = _fem.locate_dofs_geometrical(V, self._locator)  # type:ignore
+        elif self._method == LocatorMethod.TOPOLOGICAL:
+            self._dofs = _fem.locate_dofs_topological(V, self._e_dim, self._entities)
+
+    def create_bc(self, V: _fem.FunctionSpace):
+        """
+
+        """
+        if not hasattr(self, "_dofs"):
+            self._locate_dofs(V)
+
+        try:
+            self._bc = _fem.dirichletbc(self._value, self._dofs, V)  # type:ignore
+        except AttributeError:
+            self._u = _fem.Function(V)
+            self._u.interpolate(self._value)  # type:ignore
+            self._bc = _fem.dirichletbc(self._u, self._dofs)
+
+    def update_bc(self):
+        """
+        Update the underlying function if input value is a lambda function
+        """
+        if self._u is not None:
+            self._u.interpolate(self._value)
+
+    def apply(self, x: _PETSc.Vec):
+        """
+        Apply boundary condition to a PETSc vector
+        """
+        _fem.petsc.set_bc(x, [self._bc])
diff --git a/src/oasisx/fracstep.py b/src/oasisx/fracstep.py
new file mode 100644
index 0000000..e1a95d6
--- /dev/null
+++ b/src/oasisx/fracstep.py
@@ -0,0 +1,486 @@
+# Copyright (C) 2022 Jørgen Schartum Dokken
+#
+# This file is part of Oasisx
+# SPDX-License-Identifier:    MIT
+
+from typing import List, Optional, Tuple
+
+import numpy as np
+import numpy.typing as npt
+import ufl
+from dolfinx import cpp as _cpp
+from dolfinx import fem as _fem
+from dolfinx import la as _la
+from dolfinx import mesh as _dmesh
+from petsc4py import PETSc as _PETSc
+
+from .bcs import DirichletBC
+from .ksp import KSPSolver
+
+__all__ = ["FractionalStep_AB_CN"]
+
+
+class FractionalStep_AB_CN():
+    """
+    Create the fractional step solver with Adam-Bashforth linearization
+    of the convective term, and Crank-Nicholson time discretization.
+
+    Args:
+        mesh: The computational domain
+        u_element: A tuple describing the finite element family and
+            degree of the element used for the velocity
+        p_element: A tuple describing the finite element family and
+            degree of the element used for the pressure
+        bcs_u: List of Dirichlet BCs for each component of the velocity
+        bcs_p: List of Dirichlet BCs for the pressure
+        solver_options: Dictionary with keys `'tentative'`, `'pressure'` and `'scalar'`,
+            where each key leads to a dictionary of of PETSc options for each problem
+        jit_options: Just in time parameters, to optimize form compilation
+        options: Options for the Oasis solver.
+            `"low_memory_version"` `True`/`False` changes if
+            :math:`\\int\\nabla_k p^* v~\\mathrm{d}x` is assembled as
+            `True`: directly into a vector or `False`: matrix-vector product.
+            Default value: True
+            '"bc_topological"` `True`/`False`. changes how the Dirichlet dofs are located.
+            If True `facet_markers` has to be supplied.
+        body_force: List of forces acting in each direction (x,y,z)
+    """
+
+    # -----------------------Multi-step variables-------------------------------
+    _mesh: _dmesh.Mesh  # The computational domain
+
+    # Velocity component and mapping to parent space
+    _Vi: List[Tuple[_fem.FunctionSpace, npt.NDArray[np.int32]]]
+    _V: _fem.FunctionSpace  # Velocity function space
+    _Q: _fem.FunctionSpace  # Pressure function space
+
+    _mass_Vi: _fem.FormMetaClass  # Compiled form for mass matrix
+    _M: _PETSc.Mat  # Mass matrix
+    _bcs_u: List[List[DirichletBC]]
+    _bcs_p: List[DirichletBC]
+
+    # -----------------------Tentative velocity step----------------------------
+    _u: List[_fem.Function]  # Velocity at time t
+    _u1: List[_fem.Function]  # Velocity at time t - dt
+    _u2: List[_fem.Function]  # Velocity at time t - 2*dt
+    _uab: List[_fem.Function]  # Explicit part of Adam Bashforth convection term
+    _ps: _fem.Function  # Pressure at time t - 3/2 dt
+    _sol_u: _fem.Function  # Tentative velocity as vector function (for outputting)
+    _solver_u: KSPSolver
+
+    _conv_Vi: _fem.FormMetaClass  # Compiled form of Adam Bashforth convection
+    # Compiled form of the pressure gradient for RHS of tentative velocity
+    _grad_p: List[_fem.FormMetaClass]
+    _stiffness_Vi: _fem.FormMetaClass  # Compiled form for stiffness matrix
+    _body_force: List[_fem.FormMetaClass]
+
+    _b_tmp: List[_fem.Function]  # Working array for previous time step of tenative velocity RHS
+    _b0: List[_fem.Function]  # Working array for constant body force contribution
+    _b_u: List[_fem.Function]  # RHS of tentative velocity step
+    _b_matvec: _fem.Function  # Work array for tentative velocity matrix vector product
+    _b_gp: List[_fem.Function]  # Work array for gradient p term on RHS
+
+    _A: _PETSc.Mat  # Matrix for tentative velocity step
+    _grad_p_M: List[_PETSc.Mat]  # Matrix for grad-p operator
+    _K: _PETSc.Mat  # Stiffness matrix
+
+    # Indicating if grad(p)*v*dx and div(u)*q*dx term is assembled as
+    # vector or matrix-vector product
+    _low_memory: bool
+
+    # ----------------------Pressure correction---------------------------------
+    _p: _fem.Function  # Pressure at time t - 1/2 dt
+    _dp: _fem.Function  # Pressure correction
+    _b_c: _fem.Function  # Function for holding RHS of pressure correction
+    _same_space: bool  # Indicator if V and Q are the same
+
+    _solver_p: KSPSolver
+    _stiffness_Q: _fem.FormMetaClass  # Compiled form for pressure Laplacian
+    _p_rhs: List[_fem.FormMetaClass]  # List of rhs for pressure correction
+    _p_M: List[_PETSc.Mat]  # Matrices for non-low memory version of RHS assembly
+    _Ap: _PETSc.Mat  # Pressure Laplacian
+    _p_tmp: _fem.Function  # Working vector for matrix vector    products in non-low memory version
+
+    # ----------------------Velocity update-------------------------------------
+    _solver_c: KSPSolver
+    _c_rhs: List[_fem.FormMetaClass]  # List of velocity update terms for each component
+    _c_M: List[_PETSc.Mat]  # List of matrices for mat-vec operation in velocity update
+    _p_b: _fem.Function
+    _tmp_update: _fem.Function
+    __slots__ = tuple(__annotations__)
+
+    def __init__(self, mesh: _dmesh.Mesh, u_element: Tuple[str, int],
+                 p_element: Tuple[str, int], bcs_u: List[List[DirichletBC]],
+                 bcs_p: List[DirichletBC],
+                 solver_options: dict = None, jit_options: dict = None,
+                 body_force: Optional[ufl.core.expr.Expr] = None,
+                 options: dict = None):
+        self._mesh = mesh
+
+        v_el = ufl.VectorElement(u_element[0], mesh.ufl_cell(), u_element[1])
+        p_el = ufl.FiniteElement(p_element[0], mesh.ufl_cell(), p_element[1])
+        if v_el.extract_component(0)[1] == p_el:
+            self._same_space = True
+
+        # Initialize velocity functions for variational problem
+        self._V = _fem.FunctionSpace(mesh, v_el)
+        self._sol_u = _fem.Function(self._V)  # Function for outputting vector functions
+
+        self._Vi = [self._V.sub(i).collapse() for i in range(self._V.num_sub_spaces)]
+        self._u = [_fem.Function(Vi[0], name=f"u{i}") for (i, Vi) in enumerate(self._Vi)]
+        self._u1 = [_fem.Function(Vi[0], name=f"u_{i}1") for (i, Vi) in enumerate(self._Vi)]
+        self._u2 = [_fem.Function(Vi[0], name=f"u_{i}2") for (i, Vi) in enumerate(self._Vi)]
+        self._uab = [_fem.Function(Vi[0], name=f"u_{i}ab") for (i, Vi) in enumerate(self._Vi)]
+
+        # Create boundary conditons for velocity spaces
+        self._bcs_u = bcs_u
+        for bc_i, Vi in zip(self._bcs_u, self._Vi):
+            for bc in bc_i:
+                bc.create_bc(Vi[0])
+
+        # Working arrays
+        self._b_tmp = [_fem.Function(Vi[0]) for (i, Vi) in enumerate(
+            self._Vi)]
+        self._b_matvec = _fem.Function(self._Vi[0][0])
+        self._b_gp = [_fem.Function(Vi[0]) for (i, Vi) in enumerate(
+            self._Vi)]
+        # RHS arrays
+        self._b_u = [_fem.Function(Vi[0]) for (i, Vi) in enumerate(
+            self._Vi)]
+        self._b0 = [_fem.Function(Vi[0]) for (i, Vi) in enumerate(
+            self._Vi)]
+
+        # Initialize pressure functions for varitional problem
+        self._Q = _fem.FunctionSpace(mesh, p_el)
+        self._ps = _fem.Function(self._Q)  # Pressure used in tentative velocity scheme
+        self._p = _fem.Function(self._Q)
+        self._dp = _fem.Function(self._Q)
+        self._b_c = _fem.Function(self._Q)
+
+        # Create boundary conditions for pressure space
+        self._bcs_p = bcs_p
+        for bc in self._bcs_p:
+            bc.create_bc(self._Q)
+
+        # Create solvers for each step
+        solver_options = {} if solver_options is None else solver_options
+        self._solver_u = KSPSolver(mesh.comm, solver_options.get("tentative"))
+        self._solver_p = KSPSolver(mesh.comm, solver_options.get("pressure"))
+        self._solver_c = KSPSolver(mesh.comm, solver_options.get("scalar"))
+
+        if options is None:
+            options = {}
+        self._low_memory = options.get("low_memory_version", True)
+
+        # Precompile forms and allocate matrices
+        jit_options = {} if jit_options is None else jit_options
+        if body_force is None:
+            body_force = (0.,) * mesh.geometry.dim
+        self._compile_and_allocate_forms(body_force, jit_options)
+
+        # Assemble constant matrices
+        self._preassemble()
+
+        # Set solver operator
+        self._solver_p.setOperators(self._Ap)
+        self._solver_p.setOptions(self._Ap)
+        self._solver_c.setOperators(self._Ap)
+
+    def _compile_and_allocate_forms(self, body_force: ufl.core.expr.Expr,
+                                    jit_options: dict):
+        dx = ufl.Measure("dx", domain=self._mesh)
+        u = ufl.TrialFunction(self._Vi[0][0])
+        v = ufl.TestFunction(self._Vi[0][0])
+
+        # -----------------Tentative velocity step----------------------
+        self._body_force = []
+        for force in body_force:
+            try:
+                force = _fem.Constant(self._mesh, force)
+            except RuntimeError:
+                pass
+            self._body_force.append(_fem.form(force*v*dx, jit_params=jit_options))
+
+        # Mass matrix for velocity component
+        self._mass_Vi = _fem.form(u*v*dx, jit_params=jit_options)
+        self._M = _fem.petsc.create_matrix(self._mass_Vi)
+
+        self._A = _fem.petsc.create_matrix(self._mass_Vi)
+
+        # Stiffness matrix for velocity component
+        self._stiffness_Vi = _fem.form(ufl.inner(ufl.grad(u), ufl.grad(v))*dx,
+                                       jit_params=jit_options)
+        self._K = _fem.petsc.create_matrix(self._stiffness_Vi)
+
+        # Pressure gradients
+        p = ufl.TrialFunction(self._Q)
+        if self._low_memory:
+            self._grad_p = [_fem.form(self._ps.dx(i)*v*dx, jit_params=jit_options)
+                            for i in range(self._mesh.geometry.dim)]
+        else:
+            self._grad_p = [_fem.form(p.dx(i)*v*dx, jit_params=jit_options)
+                            for i in range(self._mesh.geometry.dim)]
+            self._grad_p_M = [_fem.petsc.create_matrix(grad_p) for grad_p in self._grad_p]
+
+        # -----------------Pressure currection step----------------------
+
+        # Pressure Laplacian/stiffness matrix
+        q = ufl.TestFunction(self._Q)
+        self._stiffness_Q = _fem.form(ufl.inner(ufl.grad(p), ufl.grad(q))*ufl.dx,
+                                      jit_params=jit_options)
+        self._Ap = _fem.petsc.create_matrix(self._stiffness_Q)
+
+        # RHS for pressure correction (unscaled)
+        if self._low_memory:
+            self._p_rhs = [_fem.form(ufl.div(ufl.as_vector(self._u))*q*dx, jit_params=jit_options)]
+        else:
+            self._p_rhs = [_fem.form(u.dx(i)*q*dx, jit_params=jit_options)
+                           for i in range(self._mesh.geometry.dim)]
+            self._p_M = [_fem.petsc.create_matrix(rhs) for rhs in self._p_rhs]
+            self._p_tmp = _fem.Function(self._Q)
+
+        # ---------------------------Velocity update-----------------------
+        # RHS for velocity update
+        self._p_b = _fem.Function(self._Vi[0][0])
+        self._tmp_update = _fem.Function(self._Vi[0][0])
+        if self._low_memory:
+            self._c_rhs = [_fem.form(self._dp.dx(i) * v*dx, jit_params=jit_options)
+                           for i in range(len(self._Vi))]
+        else:
+            self._c_M = self._grad_p_M
+
+        # Convection term for Adams Bashforth step
+        self._conv_Vi = _fem.form(ufl.inner(ufl.dot(ufl.as_vector(self._uab),
+                                                    ufl.nabla_grad(u)), v)*dx,
+                                  jit_params=jit_options)
+
+    def _preassemble(self):
+        _fem.petsc.assemble_matrix(self._M, self._mass_Vi)
+        self._M.assemble()
+        _fem.petsc.assemble_matrix(self._K, self._stiffness_Vi)
+        self._K.assemble()
+
+        # Assemble stiffness matrix with boundary conditions
+        _fem.petsc.assemble_matrix(self._Ap, self._stiffness_Q, bcs=[
+            bc._bc for bc in self._bcs_p])
+        self._Ap.assemble()
+
+        if len(self._bcs_p) == 0:
+            nullspace = _PETSc.NullSpace().create(constant=True, comm=self._mesh.comm)
+            self._Ap.setNullSpace(nullspace)
+
+        if not self._low_memory:
+            for i in range(self._mesh.geometry.dim):
+                # Preassemble pressure matrix for tentantive velocity
+                _fem.petsc.assemble_matrix(self._grad_p_M[i], self._grad_p[i])
+                self._grad_p_M[i].assemble()
+
+                # Assemble pressure RHS matrices
+                _fem.petsc.assemble_matrix(self._p_M[i], self._p_rhs[i])
+                self._p_M[i].assemble()
+
+        # Assemble constant body forces
+        for i in range(len(self._u)):
+            self._b0[i].x.set(0.0)
+            _fem.petsc.assemble_vector(self._b0[i].vector, self._body_force[i])
+            self._b0[i].x.scatter_reverse(_la.ScatterMode.add)
+
+    def assemble_first(self, dt: float, nu: float):
+        """
+        Assembly routine for first iteration of pressure/velocity system.
+
+        .. math::
+            A=\\frac{1}{\\delta t}M  + \\frac{1}{2} C +\\frac{1}{2}\\nu K
+
+        where `M` is the mass matrix, `K` the stiffness matrix and `C` the convective term.
+        We also assemble parts of the right hand side:
+
+        .. math::
+
+            b_k=\\left(\\frac{1}{\\delta t}M  + \\frac{1}{2} C +\\frac{1}{2}\\nu K\\right)u_k^{n-1}
+            + \\int_{\\Omega} f_k^{n-\\frac{1}{2}}v_k~\\mathrm{d}x
+
+        Args:
+            dt: The time step
+            nu: The kinematic viscosity
+        """
+        # Update explicit part of Adam-Bashforth approximation with previous time step
+        for i, (u_ab, u_1, u_2) in enumerate(zip(self._uab, self._u1, self._u2)):
+            u_ab.x.set(0)
+            u_ab.x.array[:] = 1.5 * u_1.x.array - 0.5 * u_2.x.array
+
+        self._A.zeroEntries()
+        _fem.petsc.assemble_matrix(self._A, a=self._conv_Vi)  # type: ignore
+        self._A.assemble()
+        self._A.scale(-0.5)  # Negative convection on the rhs
+        self._A.axpy(1./dt, self._M, _PETSc.Mat.Structure.SUBSET_NONZERO_PATTERN)
+
+        # Add diffusion
+        self._A.axpy(-0.5*nu, self._K, _PETSc.Mat.Structure.SUBSET_NONZERO_PATTERN)
+
+        # Compute rhs for all velocity components
+        for i, ui in enumerate(self._u):
+            # Start with body force
+            self._b_tmp[i].x.array[:] = self._b0[i].x.array[:]
+
+            # Add transient convection and difffusion
+            self._A.mult(self._u1[i].vector, self._b_matvec.vector)
+            self._b_matvec.x.scatter_forward()
+            self._b_tmp[i].x.array[:] += self._b_matvec.x.array[:]
+
+        # Reset matrix for lhs
+        self._A.scale(-1)
+        self._A.axpy(2./dt, self._M,  _PETSc.Mat.Structure.SUBSET_NONZERO_PATTERN)
+        # NOTE: This would not work if we have different DirichletBCs on different components
+        for bc in self._bcs_u[0]:
+            self._A.zeroRowsLocal(bc._bc.dof_indices()[0], 1.)  # type: ignore
+
+    def velocity_tentative_assemble(self):
+        """
+        Assemble RHS of tentative velocity equation computing the :math:`p^*`-dependent term of
+
+        .. math::
+
+            b_k \\mathrel{+}= b(u_k^{n-1}) + \\int_{\\Omega}
+            \\frac{\\partial p^*}{\\partial x_k}v_k~\\mathrm{d}x.
+
+        :math:`b(u_k^{n-1})` is computed in :py:obj:`FractionalStep_AB_CN.assemble_first`.
+        """
+        if self._low_memory:
+            # Using the fact that all the gradient forms has the same coefficient
+            coeffs = _cpp.fem.pack_coefficients(self._grad_p[0])
+            for i in range(self._mesh.geometry.dim):
+                self._b_gp[i].x.set(0.)
+                _fem.petsc.assemble_vector(
+                    self._b_gp[i].vector, self._grad_p[i], coeffs=coeffs, constants=[])
+                self._b_gp[i].x.scatter_reverse(_la.ScatterMode.add)
+                self._b_gp[i].x.scatter_forward()
+        else:
+            for i in range(self._mesh.geometry.dim):
+                self._b_gp[i].x.set(0.)
+                self._grad_p_M[i].mult(self._ps.vector, self._b_gp[i].vector)
+                self._b_gp[i].x.scatter_forward()
+        # Update RHS
+        for i in range(self._mesh.geometry.dim):
+            self._b_u[i].x.array[:] = self._b_tmp[i].x.array - self._b_gp[i].x.array
+
+    def velocity_tentative_solve(self) -> Tuple[float, npt.NDArray[np.int32]]:
+        """
+        Apply Dirichlet boundary condition to RHS vector and solver linear algebra system
+
+        Returns the difference between the two solutions and the solver error codes
+        """
+        self._solver_u.setOperators(self._A)
+        self._solver_u.setOptions(self._A)
+        diff = 0
+        errors = np.zeros(self._mesh.geometry.dim, dtype=np.int32)
+        for i in range(self._mesh.geometry.dim):
+            for bc in self._bcs_u[i]:
+                bc.apply(self._b_u[i].vector)
+            # Use temporary storage, as it is only used in `assemble_first`
+            self._u[i].vector.copy(result=self._b_matvec.vector)
+            errors[i] = self._solver_u.solve(self._b_u[i].vector, self._u[i])
+
+            # Compute difference from last inner iter
+            self._b_matvec.vector.axpy(-1, self._u[i].vector)
+            diff += self._b_matvec.vector.norm(_PETSc.NormType.NORM_2)
+        return diff, errors
+
+    def tenative_velocity(self, dt: float, nu: float, max_error: float = 1e-12,
+                          max_iter: int = 10) -> float:
+        """
+        Propagate the tenative velocity by one step
+
+        Args:
+            dt: The time step
+            nu: The kinematic velocity
+            max_error: The maximal difference for inner iterations of solving `u`
+            max_iter: Maximal number of inner iterations for `u`
+        Returns:
+            The difference between the two last inner iterations
+
+        """
+        inner_it = 0
+        diff = 1e8
+        self.assemble_first(dt, nu)
+        while inner_it < max_iter and diff > max_error:
+            inner_it += 1
+            self.velocity_tentative_assemble()
+            diff, errors = self.velocity_tentative_solve()
+            assert (errors > 0).all()
+        return diff
+
+    def pressure_assemble(self, dt: float):
+        """
+        Assemble RHS for pressure correction term
+
+        .. math::
+
+            b_c = \\int_{\\Omega} \\mathrm{div} u^l q~\\mathrm{d}x.
+
+        """
+        self._b_c.x.set(0.)
+        if self._low_memory:
+            _fem.petsc.assemble_vector(self._b_c.vector, self._p_rhs[0])
+        else:
+            for i in range(self._mesh.geometry.dim):
+                self._p_M[i].mult(self._u[i].vector, self._p_tmp.vector)
+                self._b_c.vector.axpy(-1/dt, self._p_tmp.vector)
+        # Apply boundary conditions to the rhs
+        bc_p = [bc._bc for bc in self._bcs_p]
+        self._b_c.x.scatter_reverse(_la.ScatterMode.add)
+        _fem.petsc.set_bc(self._b_c.vector, bc_p)
+
+        # Set pressure DirichletBC condition for time (t+dt/2)
+        _fem.petsc.set_bc(self._b_c.vector, bc_p)
+
+        self._b_c.x.scatter_forward()
+
+    def pressure_solve(self) -> np.int32:
+        """
+        Solve pressure correction problem
+        """
+
+        # Set difference vector to previous time step
+        self._dp.x.array[:] = -self._ps.x.array[:]
+
+        if len(self._bcs_p) == 0:
+            nullspace = self._Ap.getNullSpace()
+            nullspace.remove(self._b_c.vector)
+        converged = self._solver_p.solve(self._b_c.vector, self._p)
+
+        # Compute phi  = p^(n-1/2) - p*
+        self._dp.vector.axpy(1, self._p.vector)
+        return converged
+
+    def velocity_update(self, dt) -> npt.NDArray[np.int32]:
+        """
+        Compute Velocity update
+        """
+        errors = np.zeros(self._mesh.geometry.dim, dtype=np.int32)
+        if self._low_memory:
+            for i in range(self._mesh.geometry.dim):
+                self._M.mult(self._u[i].vector, self._p_b.vector)
+                self._tmp_update.x.set(0)
+                _fem.petsc.assemble_vector(self._tmp_update.vector, self._c_rhs[i])
+                self._tmp_update.x.scatter_reverse(_la.ScatterMode.add)
+                self._p_b.vector.axpy(-dt, self._tmp_update.vector)
+                errors[i] = self._solver_c.solve(self._p_b.vector, self._u[i])
+        else:
+            for i in range(self._mesh.geometry.dim):
+                self._M.mult(self._u[i].vector, self._p_b.vector)
+                self._tmp_update.x.set(0)
+                self._c_M[i].mult(self._dp.vector, self._tmp_update.vector)
+                self._p_b.vector.axpy(-1, self._tmp_update.vector)
+                errors[i] = self._solver_c.solve(self._p_b.vector, self._u[i])
+
+        return errors
+
+    @property
+    def u(self):
+        """
+        Return the solution to the tentative velocity equation as a vector function
+        """
+        for ui, (Vi, map) in zip(self._u, self._Vi):
+            self._sol_u.x.array[map] = ui.x.array
+        return self._sol_u
diff --git a/src/oasisx/ksp.py b/src/oasisx/ksp.py
new file mode 100644
index 0000000..450a93d
--- /dev/null
+++ b/src/oasisx/ksp.py
@@ -0,0 +1,62 @@
+# Copyright (C) 2022 Jørgen Schartum Dokken
+#
+# This file is part of Oasisx
+# SPDX-License-Identifier:    MIT
+
+from petsc4py import PETSc as _PETSc
+import dolfinx.fem as _fem
+import typing
+from mpi4py import MPI
+import numpy as np
+
+
+class KSPSolver:
+    _ksp: _PETSc.KSP  # Krylov subspace solver
+    __slots__ = tuple(__annotations__)
+
+    def __init__(self, comm: MPI.Comm, petsc_options: dict = None):
+        """Lightweight wrapper around the PETSc KSP solver
+        Args:
+            comm: MPI communicator used in PETSc Solver
+            petsc_options: Options that are passed to the linear
+                algebra backend PETSc. For available choices for the
+                'petsc_options' kwarg, see the `PETSc documentation
+                <https://petsc4py.readthedocs.io/en/stable/manual/ksp/>`_.
+        """
+        petsc_options = {} if petsc_options is None else petsc_options
+        self._ksp = _PETSc.KSP().create(comm)
+        prefix = f"Oasis_solve_{id(self)}"
+        self._ksp.setOptionsPrefix(prefix)
+        opts = _PETSc.Options()
+        opts.prefixPush(prefix)
+        for k, v in petsc_options.items():
+            opts[k] = v
+        opts.prefixPop()
+        self._ksp.setFromOptions()
+
+    def setOptions(self, op: typing.Union[_PETSc.Mat, _PETSc.Vec]):
+        prefix = self._ksp.getOptionsPrefix()
+        assert prefix is not None
+        op.setOptionsPrefix(prefix)
+        op.setFromOptions()
+
+    def setOperators(self, A: _PETSc.Mat, P: typing.Optional[_PETSc.Mat] = None):
+        if P is None:
+            self._ksp.setOperators(A)
+        else:
+            self._ksp.setOperators(A, P)
+
+    def solve(self, b: _PETSc.Vec, x: _fem.Function) -> np.int32:
+        self._ksp.solve(b, x.vector)
+        x.x.scatter_forward()
+        return np.int32(self._ksp.getConvergedReason())
+
+    def __del__(self):
+        """
+        Delete PETSc options manually due to https://gitlab.com/petsc/petsc/-/issues/1201
+        """
+        opts = _PETSc.Options()
+        all_opts = opts.getAll()
+        for key in all_opts.keys():
+            if f"Oasis_solve_{id(self)}" in key:
+                opts.delValue(key)
diff --git a/src/oasisx/main.py b/src/oasisx/main.py
new file mode 100644
index 0000000..41316c4
--- /dev/null
+++ b/src/oasisx/main.py
@@ -0,0 +1,26 @@
+# Copyright (C) 2022 Jørgen Schartum Dokken
+#
+# This file is part of Oasisx
+# SPDX-License-Identifier:    MIT
+
+
+import argparse
+
+from .mesh import import_mesh
+from .fracstep import FractionalStep_AB_CN
+
+desc = "Welcome to the Oasisx Python Package. To run the code, add the" + \
+    " following command-line arguments"
+
+parser = argparse.ArgumentParser(description=desc,
+                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument("--mesh-path", required=True, type=str, dest="mesh_file",
+                    help="Path to mesh file")
+
+
+def main(args=None):
+
+    xargs = parser.parse_args(args)
+    mesh = import_mesh(xargs.mesh_file)
+    problem = FractionalStep_AB_CN(mesh, ("Lagrange", 2), ("Lagrange", 1))
+    problem.assemble_first(0.1, 0.2)
diff --git a/src/oasisx/mesh.py b/src/oasisx/mesh.py
new file mode 100644
index 0000000..d1659cb
--- /dev/null
+++ b/src/oasisx/mesh.py
@@ -0,0 +1,15 @@
+# Copyright (C) 2022 Jørgen Schartum Dokken
+#
+# This file is part of Oasisx
+# SPDX-License-Identifier:    MIT
+
+
+__all__ = ["import_mesh"]
+
+import dolfinx.mesh as _mesh
+from mpi4py import MPI
+
+
+def import_mesh(filename: str) -> _mesh.Mesh:
+    print(filename)
+    return _mesh.create_unit_square(MPI.COMM_WORLD, 10, 10)
diff --git a/test/test_bcs.py b/test/test_bcs.py
new file mode 100644
index 0000000..4f44963
--- /dev/null
+++ b/test/test_bcs.py
@@ -0,0 +1,158 @@
+# Copyright (C) 2022 Jørgen Schartum Dokken
+#
+# This file is part of Oasisx
+# SPDX-License-Identifier:    MIT
+#
+# Tests for DirichletBC wrapper
+
+import dolfinx
+import numpy as np
+import pytest
+from mpi4py import MPI
+from oasisx import DirichletBC, LocatorMethod
+
+
+@pytest.mark.parametrize("P", np.arange(1, 5))
+def test_function_geometrical(P):
+    """
+    Test assignement of a time dependent function to DirichletBC using geometrical mode
+    """
+    mesh = dolfinx.mesh.create_unit_square(MPI.COMM_WORLD, 10, 10)
+
+    def locator(x):
+        return np.isclose(x[0], 1)
+
+    class TimeDependentBC():
+        def __init__(self, t: float):
+            self.t = t
+
+        def eval(self, x):
+            return np.sin(x[0]) + x[1] * self.t
+
+    condition_0 = TimeDependentBC(0.1)
+
+    bc = DirichletBC(condition_0.eval,  LocatorMethod.GEOMETRICAL, locator)
+
+    V = dolfinx.fem.FunctionSpace(mesh, ("Lagrange", int(P)))
+    bc.create_bc(V)
+
+    for t in [0.1, 0.2, 0.3]:
+        u = dolfinx.fem.Function(V)
+        u.interpolate(lambda x: np.sin(x[0]) + x[1]*t)
+        bc_dx = dolfinx.fem.dirichletbc(u, dolfinx.fem.locate_dofs_geometrical(V, locator))
+
+        u_bcx = dolfinx.fem.Function(V)
+        dolfinx.fem.petsc.set_bc(u_bcx.vector, [bc_dx])
+
+        u_bc = dolfinx.fem.Function(V)
+        condition_0.t = t
+        bc.update_bc()
+        bc.apply(u_bc.vector)
+        assert np.allclose(u_bcx.x.array, u_bc.x.array)
+
+
+@pytest.mark.parametrize("P", np.arange(1, 5))
+@pytest.mark.parametrize("dim", np.arange(0, 3))
+def test_function_topological(P, dim):
+    """
+    Test assignement of a time dependent function to DirichletBC using topological mode
+    """
+    mesh = dolfinx.mesh.create_unit_square(MPI.COMM_WORLD, 10, 10)
+
+    def locator(x):
+        return np.isclose(x[0], 1)
+
+    class TimeDependentBC():
+        def __init__(self, t: float):
+            self.t = t
+
+        def eval(self, x):
+            return np.sin(x[0]) + x[1] * self.t
+
+    condition_0 = TimeDependentBC(0.1)
+    entities = dolfinx.mesh.locate_entities(mesh, dim, locator)
+    value = np.int32(3)
+    et = dolfinx.mesh.meshtags(mesh, dim, entities, np.full(len(entities), value, dtype=np.int32))
+    bc = DirichletBC(condition_0.eval,  LocatorMethod.TOPOLOGICAL, (et, value))
+
+    V = dolfinx.fem.FunctionSpace(mesh, ("Lagrange", int(P)))
+    bc.create_bc(V)
+
+    for t in [0.1, 0.2, 0.3]:
+        u = dolfinx.fem.Function(V)
+        u.interpolate(lambda x: np.sin(x[0]) + x[1]*t)
+        bc_dx = dolfinx.fem.dirichletbc(u, dolfinx.fem.locate_dofs_topological(V, dim, entities))
+
+        u_bcx = dolfinx.fem.Function(V)
+        dolfinx.fem.petsc.set_bc(u_bcx.vector, [bc_dx])
+
+        u_bc = dolfinx.fem.Function(V)
+        condition_0.t = t
+        bc.update_bc()
+        bc.apply(u_bc.vector)
+        assert np.allclose(u_bcx.x.array, u_bc.x.array)
+
+
+@pytest.mark.parametrize("P", np.arange(1, 5))
+def test_constant_geometrical(P):
+    """
+    Test assignement of a time dependent constant to DirichletBC using geometrical mode
+    """
+    mesh = dolfinx.mesh.create_unit_square(MPI.COMM_WORLD, 10, 10)
+
+    def locator(x):
+        return np.isclose(x[0], 1)
+
+    time = dolfinx.fem.Constant(mesh, 1.)
+
+    bc = DirichletBC(time,  LocatorMethod.GEOMETRICAL, locator)
+
+    V = dolfinx.fem.FunctionSpace(mesh, ("Lagrange", int(P)))
+    bc.create_bc(V)
+
+    for t in [0.1, 0.2, 0.3]:
+        time.value += t
+        bc_dx = dolfinx.fem.dirichletbc(time, dolfinx.fem.locate_dofs_geometrical(V, locator), V)
+        u_bcx = dolfinx.fem.Function(V)
+        dolfinx.fem.petsc.set_bc(u_bcx.vector, [bc_dx])
+
+        u_bc = dolfinx.fem.Function(V)
+        bc.apply(u_bc.vector)
+        assert np.allclose(u_bcx.x.array, u_bc.x.array)
+
+
+@pytest.mark.parametrize("P", np.arange(1, 5))
+@pytest.mark.parametrize("dim", np.arange(0, 3))
+def test_constant_topological(P, dim):
+    """
+    Test assignement of a time dependent constant to DirichletBC using geometrical mode
+    """
+
+    mesh = dolfinx.mesh.create_unit_square(MPI.COMM_WORLD, 10, 10)
+
+    def locator(x):
+        return np.isclose(x[0], 1)
+
+    time = dolfinx.fem.Constant(mesh, 1.)
+
+    entities = dolfinx.mesh.locate_entities(mesh, dim, locator)
+    value = np.int32(3)
+    et = dolfinx.mesh.meshtags(mesh, dim, entities, np.full(len(entities), value, dtype=np.int32))
+    bc = DirichletBC(time, LocatorMethod.TOPOLOGICAL, (et, value))
+
+    V = dolfinx.fem.FunctionSpace(mesh, ("Lagrange", int(P)))
+    bc.create_bc(V)
+
+    for t in [0.1, 0.2, 0.3]:
+        time.value += t
+        u = dolfinx.fem.Function(V)
+        u.interpolate(lambda x: np.sin(x[0]) + x[1]*t)
+        bc_dx = dolfinx.fem.dirichletbc(
+            time, dolfinx.fem.locate_dofs_topological(V, dim, entities), V)
+
+        u_bcx = dolfinx.fem.Function(V)
+        dolfinx.fem.petsc.set_bc(u_bcx.vector, [bc_dx])
+
+        u_bc = dolfinx.fem.Function(V)
+        bc.apply(u_bc.vector)
+        assert np.allclose(u_bcx.x.array, u_bc.x.array)
diff --git a/test/test_projector.py b/test/test_projector.py
index bab8cf6..9bf5eee 100644
--- a/test/test_projector.py
+++ b/test/test_projector.py
@@ -1,3 +1,8 @@
+# Copyright (C) 2022 Jørgen Schartum Dokken
+#
+# This file is part of Oasisx
+# SPDX-License-Identifier:    MIT
+
 from oasisx import Projector
 
 import dolfinx
@@ -16,7 +21,9 @@ def test_projector():
 
     # Create gradient projector
     W = dolfinx.fem.VectorFunctionSpace(mesh, ("DG", 1))
-    gradient_projector = Projector(ufl.grad(u), W, [])
+    petsc_options = {"ksp_type": "preonly", "pc_type": "lu",
+                     "pc_factor_mat_solver_type": "mumps"}
+    gradient_projector = Projector(ufl.grad(u), W, [], petsc_options=petsc_options)
     gradient_projector.solve()
 
     # assemle L2 error
diff --git a/test/test_tentative_velocity.py b/test/test_tentative_velocity.py
new file mode 100644
index 0000000..a5d28f0
--- /dev/null
+++ b/test/test_tentative_velocity.py
@@ -0,0 +1,200 @@
+# Copyright (C) 2022 Jørgen Schartum Dokken
+#
+# This file is part of Oasisx
+# SPDX-License-Identifier:    MIT
+
+from typing import List, Tuple, Optional
+
+import dolfinx
+import numpy as np
+import numpy.typing as npt
+import pytest
+import scipy.sparse
+import ufl
+from mpi4py import MPI
+from oasisx import DirichletBC, FractionalStep_AB_CN, LocatorMethod
+
+from petsc4py import PETSc
+
+
+def gather_PETScMatrix(A: PETSc.Mat, comm: MPI.Comm, root=0) -> scipy.sparse.csr_matrix:
+    """
+    Given a distributed PETSc matrix, gather in on process 'root' in
+    a scipy CSR matrix
+    """
+    ai, aj, av = A.getValuesCSR()
+    aj_all = comm.gather(aj, root=root)  # type: ignore
+    av_all = comm.gather(av, root=root)  # type: ignore
+    ai_all = comm.gather(ai, root=root)  # type: ignore
+    if comm.rank == root:
+        ai_cum = [0]
+        for ai in ai_all:  # type: ignore
+            offsets = ai[1:] + ai_cum[-1]
+            ai_cum.extend(offsets)
+        return scipy.sparse.csr_matrix(
+            (np.hstack(av_all), np.hstack(aj_all), ai_cum), shape=A.getSize())  # type: ignore
+
+
+def create_tentative_forms(mesh: dolfinx.mesh.Mesh,
+                           el_u: Tuple[str, int],
+                           el_p: Tuple[str, int], dt: float, nu: float,
+                           f: Optional[npt.NDArray[np.float64]]) -> Tuple[ufl.Form, List[ufl.Form],
+                                                                          dolfinx.fem.Function,
+                                                                          dolfinx.fem.Function]:
+    """
+    Direct implementation of the i-th component of the tentative velocity equation
+    """
+
+    V = dolfinx.fem.FunctionSpace(mesh, el_u)
+    Q = dolfinx.fem.FunctionSpace(mesh, el_p)
+
+    u = ufl.TrialFunction(V)
+    v = ufl.TestFunction(V)
+    p = dolfinx.fem.Function(Q)
+    u_n = dolfinx.fem.Function(V)
+    u_n2 = dolfinx.fem.Function(V)
+
+    dx = ufl.Measure("dx", domain=mesh)
+    u_ab = ufl.as_vector((1.5 * u_n - 0.5 * u_n2, 1.5 * u_n - 0.5 * u_n2))
+    u_avg = 0.5 * (u + u_n)
+    F = 1./dt * (u - u_n) * v * dx
+    F += ufl.dot(u_ab, ufl.grad(u_avg)) * v * dx
+    F += nu * ufl.inner(ufl.grad(u_avg), ufl.grad(v)) * dx
+    a, L = ufl.system(F)
+    Ls = []
+    for i in range(mesh.geometry.dim):
+        if f is None:
+            Ls.append(L - p.dx(i)*v*dx)
+        else:
+            Ls.append(L - p.dx(i)*v*dx + f[i]*v*dx)
+
+    return a, Ls, u_n, u_n2
+
+
+@pytest.mark.parametrize("body_force", [True, False])
+@pytest.mark.parametrize("low_memory", [True, False])
+def test_tentative(low_memory, body_force):
+    mesh = dolfinx.mesh.create_unit_square(MPI.COMM_WORLD, 10, 10)
+    dim = mesh.topology.dim - 1
+    el_u = ("CG", 1)
+    el_p = ("Lagrange", 1)
+
+    solver_options = {"tentative": {"ksp_type": "preonly", "pc_type": "lu"}}
+    options = {"low_memory_version": low_memory}
+
+    if body_force:
+        f = np.array([0.3, -0.1], dtype=np.float64)
+    else:
+        f = None
+
+    def left_edge(x):
+        return np.isclose(x[0], 0)
+
+    def top_and_bottom(x):
+        return np.logical_or(np.isclose(x[1], 0), np.isclose(x[1], 1))
+
+    def outlet(x):
+        return np.isclose(x[0], 1)
+
+    # Locate facets for boundary conditions and create meshtags
+    left_facets = dolfinx.mesh.locate_entities_boundary(mesh, dim, left_edge)
+    left_value = 1
+    tb_facets = dolfinx.mesh.locate_entities_boundary(mesh, dim, top_and_bottom)
+    tb_value = 2
+    right_facets = dolfinx.mesh.locate_entities_boundary(mesh, dim, outlet)
+    right_value = 3
+    facets = np.hstack([left_facets, tb_facets, right_facets])
+    values = np.hstack([np.full_like(left_facets, left_value, dtype=np.int32),
+                        np.full_like(tb_facets, tb_value, dtype=np.int32),
+                        np.full_like(right_facets, right_value, dtype=np.int32)])
+    sort = np.argsort(facets)
+    facet_tags = dolfinx.mesh.meshtags(mesh, dim, facets[sort], values[sort])
+
+    # Create boundary conditions
+    class Inlet():
+        def __init__(self, t):
+            self.t = t
+
+        def eval(self, x):
+            return (1+self.t) * np.sin(np.pi*x[1])
+
+    inlet = Inlet(0)
+
+    bc_tb = DirichletBC(0., LocatorMethod.TOPOLOGICAL, (facet_tags, tb_value))
+    bc_inlet_x = DirichletBC(inlet.eval, LocatorMethod.TOPOLOGICAL, (facet_tags, left_value))
+    bc_inlet_y = DirichletBC(0., LocatorMethod.TOPOLOGICAL, (facet_tags, left_value))
+    bcs_u = [[bc_inlet_x, bc_tb], [bc_inlet_y, bc_tb]]
+    bcs_p = [DirichletBC(0., LocatorMethod.TOPOLOGICAL, (facet_tags, right_value))]
+
+    # Create fractional step solver
+    solver = FractionalStep_AB_CN(
+        mesh, el_u, el_p, bcs_u=bcs_u, bcs_p=bcs_p,
+        solver_options=solver_options, options=options, body_force=f)
+
+    dt = 0.1
+    nu = 0.5
+
+    # Set some almost sensible initial conditons
+    inlet.t = -2*dt
+    solver._u2[0].interpolate(inlet.eval)
+    solver._u2[1].interpolate(inlet.eval)
+    inlet.t = -dt
+    solver._u1[0].interpolate(inlet.eval)
+    solver._u1[1].interpolate(inlet.eval)
+    inlet.t = dt
+    bc_inlet_x.update_bc()
+
+    solver.tenative_velocity(dt, nu)
+    A_oasis = solver._A
+
+    # Reference implementation
+    a, Ls, u_n, u_n2 = create_tentative_forms(mesh, el_u, el_p, dt, nu, f)
+    V = u_n.function_space
+    ux = dolfinx.fem.Function(V)
+    ux.interpolate(inlet.eval)
+    inlet.t = -2*dt
+    u_n2.interpolate(inlet.eval)
+    inlet.t = -dt
+    u_n.interpolate(inlet.eval)
+    bcs_u = [[
+        dolfinx.fem.dirichletbc(ux, dolfinx.fem.locate_dofs_topological(V, dim, left_facets)),
+        dolfinx.fem.dirichletbc(0., dolfinx.fem.locate_dofs_topological(V, dim, tb_facets), V)],
+        [dolfinx.fem.dirichletbc(0., dolfinx.fem.locate_dofs_topological(V, dim, left_facets), V),
+         dolfinx.fem.dirichletbc(0., dolfinx.fem.locate_dofs_topological(V, dim, tb_facets), V)],
+        [dolfinx.fem.dirichletbc(0., dolfinx.fem.locate_dofs_topological(V, dim, left_facets), V),
+         dolfinx.fem.dirichletbc(0., dolfinx.fem.locate_dofs_topological(V, dim, tb_facets), V)]]
+    A = dolfinx.fem.petsc.assemble_matrix(dolfinx.fem.form(a))
+    A.assemble()
+    for bc in bcs_u[0]:
+        A.zeroRowsLocal(bc.dof_indices()[0], 1.)
+    bs = []
+    for i in range(mesh.geometry.dim):
+        b = dolfinx.fem.petsc.assemble_vector(dolfinx.fem.form(Ls[i]))
+        b.ghostUpdate(addv=PETSc.InsertMode.ADD, mode=PETSc.ScatterMode.REVERSE)
+        dolfinx.fem.petsc.set_bc(b, bcs_u[i])
+        b.ghostUpdate(addv=PETSc.InsertMode.INSERT, mode=PETSc.ScatterMode.FORWARD)
+
+        bs.append(b)
+
+    # Compare matrices
+    root = 0
+    scipy_A = gather_PETScMatrix(A, mesh.comm, root=root)
+    scipy_A_oasis = gather_PETScMatrix(A_oasis, mesh.comm, root=root)
+    if mesh.comm == root:
+        diff = np.abs(scipy_A, scipy_A_oasis)
+        assert diff.max() < 1e-14
+
+    # Compare vectors
+    converged_u = solver.velocity_tentative_solve()
+    for i in range(mesh.geometry.dim):
+        assert converged_u[1][i]
+        assert np.allclose(solver._b_u[i].vector.array, bs[i].array)
+
+    solver.pressure_assemble(dt)
+    converged_p = solver.pressure_solve()
+    assert converged_p
+    # converged_update = solver.velocity_update(dt)
+    # with dolfinx.io.VTXWriter(mesh.comm, "u.bp", [solver.u]) as vtx:
+    #     vtx.write(0.)
+    with dolfinx.io.VTXWriter(mesh.comm, "dp.bp", [solver._dp]) as vtx:
+        vtx.write(0.)