From 965ac47b4db875c9c58b5852be4267dd85e87570 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Thu, 19 Oct 2023 06:56:20 -0600 Subject: [PATCH] updates --- .../LinearOperator_{SCALAR}.pxi | 7 + base/PyNucleus_base/linear_operators.pyx | 60 +- base/PyNucleus_base/performanceLogger.pyx | 10 +- base/PyNucleus_base/tupleDict_{VALUE}.pxi | 10 + base/PyNucleus_base/utilsFem.py | 13 +- docs/example2.rst | 2 +- drivers/runFractional.py | 8 +- drivers/runFractionalHeat.py | 4 +- drivers/testDistOp.py | 7 +- fem/PyNucleus_fem/DoFMaps.pxd | 6 +- fem/PyNucleus_fem/DoFMaps.pyx | 42 +- fem/PyNucleus_fem/lookupFunction.pyx | 4 +- fem/PyNucleus_fem/mesh.py | 7 +- fem/PyNucleus_fem/meshCy.pxd | 12 +- fem/PyNucleus_fem/meshCy.pyx | 183 ++- .../PyNucleus_multilevelSolver/levels.py | 2 +- nl/PyNucleus_nl/bem_{SCALAR}.pxi | 1182 ----------------- nl/PyNucleus_nl/bitset.pxd.in | 4 + nl/PyNucleus_nl/bitset.pyx | 31 +- nl/PyNucleus_nl/clusterMethodCy.pyx | 17 +- nl/PyNucleus_nl/discretizedProblems.py | 35 + nl/PyNucleus_nl/fractionalLaplacian1D.pyx | 92 +- nl/PyNucleus_nl/fractionalLaplacian2D.pyx | 87 +- nl/PyNucleus_nl/fractionalOrders.pyx | 74 +- nl/PyNucleus_nl/helpers.py | 8 +- nl/PyNucleus_nl/interactionDomains.pyx | 54 +- nl/PyNucleus_nl/kernelNormalization.pyx | 87 +- nl/PyNucleus_nl/kernelsCy.pxd | 14 +- nl/PyNucleus_nl/kernelsCy.pyx | 228 ++-- nl/PyNucleus_nl/nonlocalAssembly.pyx | 60 +- .../nonlocalAssembly_decl_{SCALAR}.pxi | 4 +- nl/PyNucleus_nl/nonlocalAssembly_{SCALAR}.pxi | 599 ++++----- .../nonlocalOperator_decl_{SCALAR}.pxi | 17 +- nl/PyNucleus_nl/nonlocalOperator_{SCALAR}.pxi | 120 +- nl/PyNucleus_nl/nonlocalProblems.py | 48 +- nl/PyNucleus_nl/twoPointFunctions.pyx | 353 ++--- .../twoPointFunctions_decl_{SCALAR}.pxi | 7 +- .../twoPointFunctions_{SCALAR}.pxi | 83 +- ...-elementP1--solvergmres-mg--matrixFormatH2 | 12 +- ...-elementP1--solvergmres-mg--matrixFormatH2 | 12 +- ...ution--elementP1--solverlu--matrixFormatH2 | 12 +- ...st--buildDistributedH2--doSolve--no-write4 | 17 +- ...st--buildDistributedH2--doSolve--no-write4 | 17 +- ...st--buildDistributedH2--doSolve--no-write4 | 14 - ...st--buildDistributedH2--doSolve--no-write4 | 15 + ...st--buildDistributedH2--doSolve--no-write4 | 17 +- ...st--buildDistributedH2--doSolve--no-write4 | 17 +- ...st--buildDistributedH2--doSolve--no-write4 | 17 +- ...st--buildDistributedH2--doSolve--no-write4 | 17 +- ...st--buildDistributedH2--doSolve--no-write4 | 14 - ...st--buildDistributedH2--doSolve--no-write4 | 15 + ...st--buildDistributedH2--doSolve--no-write4 | 17 +- ...st--buildDistributedH2--doSolve--no-write4 | 17 +- tests/test_drivers_intFracLapl.py | 2 + tests/test_fracLapl.py | 2 - tests/test_kernels.py | 108 +- 56 files changed, 1495 insertions(+), 2428 deletions(-) delete mode 100644 nl/PyNucleus_nl/bem_{SCALAR}.pxi delete mode 100644 tests/cache_testDistOp.py--horizoninf--domaininterval--stwoDomain(0.25,0.75,0.5,0.5)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 create mode 100644 tests/cache_testDistOp.py--horizoninf--domaininterval--stwoDomainNonSym(0.25,0.75)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 delete mode 100644 tests/cache_testDistOp.py--horizoninf--domainsquare--stwoDomain(0.25,0.75,0.5,0.5)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 create mode 100644 tests/cache_testDistOp.py--horizoninf--domainsquare--stwoDomainNonSym(0.25,0.75)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 diff --git a/base/PyNucleus_base/LinearOperator_{SCALAR}.pxi b/base/PyNucleus_base/LinearOperator_{SCALAR}.pxi index 2e221f8..1f7adc2 100644 --- a/base/PyNucleus_base/LinearOperator_{SCALAR}.pxi +++ b/base/PyNucleus_base/LinearOperator_{SCALAR}.pxi @@ -220,6 +220,13 @@ cdef class {SCALAR_label}LinearOperator: return Dense_LinearOperator.HDF5read(node) elif node.attrs['type'] == 'diagonal': return diagonalOperator.HDF5read(node) + elif node.attrs['type'] == 'interpolationOperator': + return interpolationOperator.HDF5read(node) + elif node.attrs['type'] == 'multiIntervalInterpolationOperator': + return multiIntervalInterpolationOperator.HDF5read(node) + elif node.attrs['type'] == 'h2': + from PyNucleus_nl.clusterMethodCy import H2Matrix + return H2Matrix.HDF5read(node) else: raise NotImplementedError(node.attrs['type']) diff --git a/base/PyNucleus_base/linear_operators.pyx b/base/PyNucleus_base/linear_operators.pyx index 2531cf7..249bbd1 100644 --- a/base/PyNucleus_base/linear_operators.pyx +++ b/base/PyNucleus_base/linear_operators.pyx @@ -1351,12 +1351,37 @@ cdef class interpolationOperator(sumMultiplyOperator): def __repr__(self): return '<%dx%d %s with %d interpolation nodes>' % (self.num_rows, self.num_columns, self.__class__.__name__, self.numInterpolationNodes) + def HDF5write(self, node): + node.attrs['type'] = 'interpolationOperator' + node.attrs['left'] = self.left + node.attrs['right'] = self.right + node.create_dataset('nodes', data=np.array(self.nodes, copy=False)) + for i in range(len(self.ops)): + grp = node.create_group(str(i)) + self.ops[i].HDF5write(grp) + + @staticmethod + def HDF5read(node): + left = node.attrs['left'] + right = node.attrs['right'] + nodes = np.array(node['nodes'], dtype=REAL) + ops = [] + for i in range(nodes.shape[0]): + ops.append(LinearOperator.HDF5read(node[str(i)])) + return interpolationOperator(ops, nodes, left, right) + + cpdef void assure_constructed(self): + for i in range(len(self.ops)): + if isinstance(self.ops[i], delayedConstructionOperator): + self.ops[i].assure_constructed() + cdef class multiIntervalInterpolationOperator(LinearOperator): cdef: public list ops INDEX_t selected - REAL_t left, right + readonly REAL_t left + readonly REAL_t right def __init__(self, list intervals, list nodes, list ops): shape = ops[0][0].shape @@ -1371,6 +1396,12 @@ cdef class multiIntervalInterpolationOperator(LinearOperator): self.ops.append(interpolationOperator(ops[k], nodes[k], left, right)) self.selected = -1 + def get(self): + if self.selected != -1: + return self.ops[self.selected].val + else: + return np.nan + def set(self, REAL_t val, BOOL_t derivative=False): cdef: interpolationOperator op @@ -1434,6 +1465,29 @@ cdef class multiIntervalInterpolationOperator(LinearOperator): def isSparse(self): return self.getSelectedOp().isSparse() + def HDF5write(self, node): + node.attrs['type'] = 'multiIntervalInterpolationOperator' + for i in range(len(self.ops)): + grp = node.create_group(str(i)) + self.ops[i].HDF5write(grp) + + @staticmethod + def HDF5read(node): + numOps = len(node) + ops = [] + nodes = [] + intervals = [] + for i in range(numOps): + op = LinearOperator.HDF5read(node[str(i)]) + ops.append(op.ops) + nodes.append(op.nodes) + intervals.append((op.left, op.right)) + return multiIntervalInterpolationOperator(intervals, nodes, ops) + + cpdef void assure_constructed(self): + for i in range(len(self.ops)): + self.ops[i].assure_constructed() + cdef class delayedConstructionOperator(LinearOperator): def __init__(self, INDEX_t numRows, INDEX_t numCols): @@ -1490,3 +1544,7 @@ cdef class delayedConstructionOperator(LinearOperator): def isSparse(self): self.assure_constructed() return self.A.isSparse() + + def HDF5write(self, node): + self.assure_constructed() + self.A.HDF5write(node) diff --git a/base/PyNucleus_base/performanceLogger.pyx b/base/PyNucleus_base/performanceLogger.pyx index f14defd..1e5115e 100644 --- a/base/PyNucleus_base/performanceLogger.pyx +++ b/base/PyNucleus_base/performanceLogger.pyx @@ -30,7 +30,7 @@ cpdef void endMemRegion(str key): cdef class FakeTimer: - def __init__(self): + def __init__(self, str key=''): pass cdef void start(self): @@ -176,7 +176,13 @@ cdef class PLogger(FakePLogger): s = '' for key in sorted(self.values.keys()): if totalsOnly: - s += '{}: {} ({} calls)\n'.format(str(key), sum(self.values[key]), len(self.values[key])) + try: + s += '{}: {} ({} calls)\n'.format(str(key), sum(self.values[key]), len(self.values[key])) + except TypeError: + if len(self.values[key]): + s += str(key) +': ' + self.values[key][0].__repr__() + '\n' + else: + s += str(key) +': ' + self.values[key].__repr__() + '\n' else: s += str(key) +': ' + self.values[key].__repr__() + '\n' return s diff --git a/base/PyNucleus_base/tupleDict_{VALUE}.pxi b/base/PyNucleus_base/tupleDict_{VALUE}.pxi index 8276e81..dce0b35 100644 --- a/base/PyNucleus_base/tupleDict_{VALUE}.pxi +++ b/base/PyNucleus_base/tupleDict_{VALUE}.pxi @@ -264,3 +264,13 @@ cdef class tupleDict{VALUE}: print(i, j, self.indexL[i][j], self.indexL[i][j+1]) return False return True + + def toDict(self): + cdef: + INDEX_t e[2] + {VALUE_t} val + dict d = {} + self.startIter() + while self.next(e, &val): + d[(e[0], e[1])] = val + return d diff --git a/base/PyNucleus_base/utilsFem.py b/base/PyNucleus_base/utilsFem.py index 2d48e70..6a40f9e 100644 --- a/base/PyNucleus_base/utilsFem.py +++ b/base/PyNucleus_base/utilsFem.py @@ -102,7 +102,10 @@ def mergeOrdered(a_list, b_list): val = data[key] # (number of calls, min over calls, mean over calls, med over calls, max over calls) # on rank - data2[key] = (len(val), np.min(val), np.mean(val), np.median(val), np.max(val)) + try: + data2[key] = (len(val), np.min(val), np.mean(val), np.median(val), np.max(val)) + except: + pass data = data2 # gather data for all ranks if self.comm is not None: @@ -1475,15 +1478,21 @@ def __call__(self): newValue = getattr(self.baseObj, prop) oldValue = self.cached_args.get(prop, None) args.append(newValue) - cached_args[prop] = newValue # TODO: keep hash? try: if isinstance(newValue, np.ndarray): + cached_args[prop] = newValue.copy() if (newValue != oldValue).any(): + dependencyLogger.log(self.logLevel, 'Values for {} differ: \'{}\' != \'{}\', calling \'{}\''.format(prop, oldValue, newValue, self.fun.__name__)) needToBuild = True + else: + dependencyLogger.log(self.logLevel, 'Values for {} are identical: \'{}\' == \'{}\''.format(prop, oldValue, newValue)) elif newValue != oldValue: + cached_args[prop] = newValue dependencyLogger.log(self.logLevel, 'Values for {} differ: \'{}\' != \'{}\', calling \'{}\''.format(prop, oldValue, newValue, self.fun.__name__)) needToBuild = True + else: + dependencyLogger.log(self.logLevel, 'Values for {} are identical: \'{}\' == \'{}\''.format(prop, oldValue, newValue)) except Exception as e: dependencyLogger.log(logging.WARN, 'Cannot compare values {}, {} for property \'{}\', exception {}, force call \'{}\''.format(oldValue, newValue, prop, e, self.fun.__name__)) needToBuild = True diff --git a/docs/example2.rst b/docs/example2.rst index 49d42cd..3090fd5 100644 --- a/docs/example2.rst +++ b/docs/example2.rst @@ -23,7 +23,7 @@ The singularity :math:`\beta` of the kernel depends on the family of kernels: - fractional type: :math:`\beta(x,y)=d+2s(x,y)`, where :math:`d` is the spatial dimension and :math:`s(x,y)` is the fractional order. - constant type :math:`\beta(x,y)=0` -- peridynamic type :math:`\beta(x,y)=-1` +- peridynamic type :math:`\beta(x,y)=1` At present, the only implemented interaction regions are balls in the 2-norm: diff --git a/drivers/runFractional.py b/drivers/runFractional.py index 10924ca..ad50828 100755 --- a/drivers/runFractional.py +++ b/drivers/runFractional.py @@ -16,6 +16,7 @@ d = driver(MPI.COMM_WORLD) d.add('saveOperators', False) +d.add('vtkOutput', "") p = fractionalLaplacianProblem(d, False) discrProblem = discretizedNonlocalProblem(d, p) @@ -60,9 +61,12 @@ if p.element != 'P0': plotDefaults['shading'] = 'gouraud' -if d.startPlot('solution'): +if p.dim < 3 and d.startPlot('solution'): mS.plotSolution() -if mS.error is not None and d.startPlot('error'): +if p.dim < 3 and mS.error is not None and d.startPlot('error'): mS.error.plot(**plotDefaults) +if d.vtkOutput != "": + mS.exportVTK(d.vtkOutput) + d.finish() diff --git a/drivers/runFractionalHeat.py b/drivers/runFractionalHeat.py index a044a4e..e8616b1 100755 --- a/drivers/runFractionalHeat.py +++ b/drivers/runFractionalHeat.py @@ -53,9 +53,9 @@ if p.element != 'P0': plotDefaults['shading'] = 'gouraud' -if d.startPlot('solution'): +if p.dim < 3 and d.startPlot('solution'): mS.plotSolution() -if mS.error is not None and d.startPlot('error'): +if p.dim < 3 and mS.error is not None and d.startPlot('error'): mS.error.plot(**plotDefaults) d.finish() diff --git a/drivers/testDistOp.py b/drivers/testDistOp.py index 5a292c0..033a397 100755 --- a/drivers/testDistOp.py +++ b/drivers/testDistOp.py @@ -207,8 +207,7 @@ if d.buildDistributedH2Bcast: with d.timer('distributed, bcast build'): A_distributedH2Bcast = dm.assembleNonlocal(nPP.kernel, matrixFormat='H2', comm=d.comm, - params={'assembleOnRoot': False, - 'forceUnsymmetric': True}) + params={'assembleOnRoot': False}) with d.timer('distributed, bcast matvec'): print('Distributed: ', A_distributedH2Bcast) y_distributedH2Bcast = A_distributedH2Bcast*x @@ -220,7 +219,6 @@ with d.timer('distributed, halo build'): A_distributedH2 = dm.assembleNonlocal(nPP.kernel, matrixFormat='H2', comm=d.comm, params={'assembleOnRoot': False, - 'forceUnsymmetric': True, 'localFarFieldIndexing': True}, PLogger=tm.PLogger) t = d.addOutputGroup('TimersH2', timerOutputGroup(driver=d)) @@ -345,11 +343,12 @@ cg.maxIter = 1000 u = lcl_dm.zeros() with d.timer('CG solve'): - cg(b, u) + iterCG = cg(b, u) residuals = cg.residuals solveGroup = d.addOutputGroup('solve', tested=True, rTol=1e-1) solveGroup.add('residual norm', residuals[-1]) + solveGroup.add('CG iterations', iterCG) # pure Neumann condition -> add nullspace components to match analytic solution if nPP.boundaryCondition in (NEUMANN, HOMOGENEOUS_NEUMANN) and nPP.analyticSolution is not None: diff --git a/fem/PyNucleus_fem/DoFMaps.pxd b/fem/PyNucleus_fem/DoFMaps.pxd index 8596c55..864e74f 100644 --- a/fem/PyNucleus_fem/DoFMaps.pxd +++ b/fem/PyNucleus_fem/DoFMaps.pxd @@ -19,7 +19,9 @@ cdef class DoFMap: public meshBase mesh #: The underlying mesh readonly INDEX_t dim #: The spatial dimension of the underlying mesh BOOL_t reordered - public list localShapeFunctions #: List of local shape functions + public list _localShapeFunctions #: List of local shape functions + void** _localShapeFunctionsPtr + BOOL_t vectorValued public REAL_t[:, ::1] nodes #: The barycentric coordinates of the DoFs public REAL_t[:, ::1] dof_dual public INDEX_t num_dofs #: The number of DoFs of the finite element space @@ -46,6 +48,8 @@ cdef class DoFMap: cpdef void getVertexDoFs(self, INDEX_t[:, ::1] v2d) cpdef void resetUsingIndicator(self, function indicator) cpdef void resetUsingFEVector(self, REAL_t[::1] ind) + cdef shapeFunction getLocalShapeFunction(self, INDEX_t dofNo) + cdef vectorShapeFunction getLocalVectorShapeFunction(self, INDEX_t dofNo) cdef class P1_DoFMap(DoFMap): diff --git a/fem/PyNucleus_fem/DoFMaps.pyx b/fem/PyNucleus_fem/DoFMaps.pyx index c3bf5c6..f6e5fb6 100644 --- a/fem/PyNucleus_fem/DoFMaps.pyx +++ b/fem/PyNucleus_fem/DoFMaps.pyx @@ -10,6 +10,7 @@ cimport numpy as np from libc.math cimport isnan from PyNucleus_base.myTypes import INDEX, REAL, COMPLEX, BOOL from cpython cimport Py_buffer +from libc.stdlib cimport malloc, free from PyNucleus_base.blas cimport assign, assign3, assignScaled, matmat from PyNucleus_base.ip_norm cimport vector_t, ip_serial, norm_serial, wrapRealInnerToComplex, wrapRealNormToComplex from PyNucleus_base import uninitialized @@ -330,6 +331,42 @@ cdef class DoFMap: ind = self.interpolate(indicator) self.resetUsingFEVector(ind) + @property + def localShapeFunctions(self): + return self._localShapeFunctions + + @localShapeFunctions.setter + def localShapeFunctions(self, list localShapeFunctions): + cdef: + INDEX_t i + shapeFunction sf + vectorShapeFunction vsf + if isinstance(localShapeFunctions[0], shapeFunction): + self.vectorValued = False + self._localShapeFunctions = localShapeFunctions + self._localShapeFunctionsPtr = malloc(len(localShapeFunctions)*sizeof(void*)) + for i in range(len(localShapeFunctions)): + sf = self._localShapeFunctions[i] + self._localShapeFunctionsPtr[i] = sf + elif isinstance(localShapeFunctions[0], vectorShapeFunction): + self.vectorValued = True + self._localShapeFunctions = localShapeFunctions + self._localShapeFunctionsPtr = malloc(len(localShapeFunctions)*sizeof(void*)) + for i in range(len(localShapeFunctions)): + vsf = self._localShapeFunctions[i] + self._localShapeFunctionsPtr[i] = vsf + else: + raise NotImplementedError() + + cdef shapeFunction getLocalShapeFunction(self, INDEX_t dofNo): + return self._localShapeFunctionsPtr[dofNo] + + cdef vectorShapeFunction getLocalVectorShapeFunction(self, INDEX_t dofNo): + return self._localShapeFunctionsPtr[dofNo] + + def __del__(self): + free(self._localShapeFunctionsPtr) + cpdef void resetUsingFEVector(self, REAL_t[::1] ind): cdef: INDEX_t[:, ::1] new_dofs = uninitialized((self.mesh.num_cells, @@ -2506,18 +2543,19 @@ cdef class Product_DoFMap(DoFMap): self.num_dofs = self.numComponents*self.scalarDM.num_dofs self.num_boundary_dofs = self.numComponents*self.scalarDM.num_boundary_dofs - self.localShapeFunctions = [] + localShapeFunctions = [] self.nodes = uninitialized((self.dofs_per_element, self.mesh.dim+1), dtype=REAL) self.dof_dual = np.zeros((self.dofs_per_element, numComponents), dtype=REAL) i = 0 for dofNo in range(self.scalarDM.dofs_per_element): for component in range(numComponents): phi = self.scalarDM.localShapeFunctions[dofNo] - self.localShapeFunctions.append(productSpaceShapeFunction(phi, numComponents, component, self.mesh.dim)) + localShapeFunctions.append(productSpaceShapeFunction(phi, numComponents, component, self.mesh.dim)) for j in range(dim+1): self.nodes[i, j] = self.scalarDM.nodes[dofNo, j] self.dof_dual[i, component] = 1. i += 1 + self.localShapeFunctions = localShapeFunctions def __repr__(self): return '({})^{} with {} DoFs and {} boundary DoFs.'.format(type(self.scalarDM).__name__, diff --git a/fem/PyNucleus_fem/lookupFunction.pyx b/fem/PyNucleus_fem/lookupFunction.pyx index dd213a4..9fd8795 100644 --- a/fem/PyNucleus_fem/lookupFunction.pyx +++ b/fem/PyNucleus_fem/lookupFunction.pyx @@ -33,7 +33,7 @@ cdef class lookupFunction(function): for k in range(self.dm.dofs_per_element): dof = self.dm.cell2dof(cellNo, k) if dof >= 0: - shapeFun = self.dm.localShapeFunctions[k] + shapeFun = self.dm.getLocalShapeFunction(k) val += shapeFun.eval(self.cellFinder.bary)*self.u[dof] return val @@ -75,7 +75,7 @@ cdef class vectorLookupFunction(vectorFunction): for k in range(self.dm.dofs_per_element): dof = self.dm.cell2dof(cellNo, k) if dof >= 0: - shapeFun = self.dm.localShapeFunctions[k] + shapeFun = self.dm.getLocalVectorShapeFunction(k) shapeFun.setCell(self.mesh.cells[cellNo, :]) shapeFun.eval(self.cellFinder.bary, self.gradients, self.temp) for componentNo in range(self.mesh.dim): diff --git a/fem/PyNucleus_fem/mesh.py b/fem/PyNucleus_fem/mesh.py index 44bf718..0411c5d 100644 --- a/fem/PyNucleus_fem/mesh.py +++ b/fem/PyNucleus_fem/mesh.py @@ -2024,14 +2024,14 @@ class mesh0d(meshNd): class mesh1d(meshNd): - def plot(self, boundary=None, info=False): + def plot(self, vertices=True, boundary=None, info=False): import matplotlib.pyplot as plt X = np.array([v[0] for v in self.vertices]) if self.vertices.shape[1] == 1: Y = np.zeros_like(X) lenX = X.max()-X.min() plt.xlim([X.min()-lenX*0.1, X.max()+lenX*0.1]) - plt.plot(X, Y, 'o-', zorder=1) + plt.plot(X, Y, 'o-' if vertices else '-', zorder=1) else: v = self.vertices_as_array c = self.cells_as_array @@ -2040,7 +2040,8 @@ def plot(self, boundary=None, info=False): [v[c[:, 0], 1], v[c[:, 1], 1]], c='k') - plt.scatter(self.vertices_as_array[:, 0], self.vertices_as_array[:, 1]) + if vertices: + plt.scatter(self.vertices_as_array[:, 0], self.vertices_as_array[:, 1]) lenX = v[:, 0].max()-v[:, 0].min() plt.xlim([v[:, 0].min()-lenX*0.1, v[:, 0].max()+lenX*0.1]) lenY = v[:, 1].max()-v[:, 1].min() diff --git a/fem/PyNucleus_fem/meshCy.pxd b/fem/PyNucleus_fem/meshCy.pxd index 7c5fd17..972391c 100644 --- a/fem/PyNucleus_fem/meshCy.pxd +++ b/fem/PyNucleus_fem/meshCy.pxd @@ -8,6 +8,8 @@ from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t, BOOL_t from PyNucleus_base.intTuple cimport intTuple from PyNucleus_base.tupleDict cimport tupleDictINDEX +from PyNucleus_base.linear_operators cimport sparseGraph +from PyNucleus_base.intTuple cimport productIterator cimport numpy as np from . simplexMapper cimport simplexMapper, simplexMapper1D, simplexMapper2D, simplexMapper3D @@ -117,12 +119,14 @@ cdef class cellFinder2: cdef: meshBase mesh REAL_t[::1] diamInv, x_min - public dict lookup - public dict v2c + faceVals lookup REAL_t[:, ::1] simplex REAL_t[::1] bary - INDEX_t[::1] key - intTuple myKey + INDEX_t[::1] key, key2 + sparseGraph graph + sparseGraph v2c + INDEX_t[::1] candidates + productIterator pit cdef INDEX_t findCell(self, REAL_t[::1] vertex) cdef INDEX_t findCellPtr(self, REAL_t* vertex) diff --git a/fem/PyNucleus_fem/meshCy.pyx b/fem/PyNucleus_fem/meshCy.pyx index 634f6ef..5deda70 100644 --- a/fem/PyNucleus_fem/meshCy.pyx +++ b/fem/PyNucleus_fem/meshCy.pyx @@ -17,7 +17,6 @@ from libc.stdlib cimport qsort from PyNucleus_base.myTypes import INDEX, REAL, ENCODE, TAG from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t, TAG_t from PyNucleus_base.blas cimport mydot -from PyNucleus_base.intTuple cimport productIterator import warnings cdef INDEX_t MAX_INT = np.iinfo(INDEX).max @@ -2053,6 +2052,7 @@ cdef class faceVals: for m in range(self.counts[f[0]]): if self.indexL[f[0]][m] == f[1] and self.indexR[f[0]][m] == f[2]: # J is already present return self.vals[f[0]][m] + return -1 def __getitem__(self, INDEX_t[::1] face): return self.getValue(face) @@ -2145,11 +2145,16 @@ cdef class cellFinder(object): cdef class cellFinder2: def __init__(self, meshBase mesh): cdef: - INDEX_t L, j, k, cellNo, vertexNo, vertex + INDEX_t L, j, k, cellNo, vertexNo, vertex, idx, maxCellsPerKey, maxIdx, maxKeyPerDim + REAL_t[::1] x_max REAL_t h - intTuple t REAL_t[:, ::1] cellCenters = mesh.getCellCenters() - self.key = uninitialized((mesh.dim), dtype=INDEX) + INDEX_t[::1] rowPtr, indices + self.key = np.zeros((3), dtype=INDEX) + self.key2 = np.zeros((3), dtype=INDEX) + # We are mapping coordinate x to + # floor((x[j]-x_min[j]) * diamInv[j]) \in [0, L/1.01] + # where L is O(1/h). L = 1 h = mesh.h while L*h < 0.5: @@ -2161,65 +2166,114 @@ cdef class cellFinder2: self.diamInv[j] = L / (x_max[j]-self.x_min[j]) / 1.01 self.simplex = uninitialized((mesh.dim+1, mesh.dim), dtype=REAL) self.bary = uninitialized((mesh.dim+1), dtype=REAL) - self.lookup = {} + + maxKeyPerDim = 0 + for j in range(mesh.dim): + maxKeyPerDim = max(maxKeyPerDim, ((x_max[j]-self.x_min[j]) * self.diamInv[j])) + self.lookup = faceVals(maxKeyPerDim+1, deleteHits=False) + + maxIdx = 0 for k in range(cellCenters.shape[0]): for j in range(mesh.dim): self.key[j] = ((cellCenters[k, j]-self.x_min[j]) * self.diamInv[j]) - t = intTuple.create(self.key) - try: - self.lookup[t].add(k) - except KeyError: - self.lookup[t] = set([k]) + idx = self.lookup.enterValue(self.key, maxIdx) + if idx == maxIdx: + maxIdx += 1 + + rowPtr = np.zeros((maxIdx+1), dtype=INDEX) + indices = np.zeros((cellCenters.shape[0]), dtype=INDEX) + + for k in range(cellCenters.shape[0]): + for j in range(mesh.dim): + self.key[j] = ((cellCenters[k, j]-self.x_min[j]) * self.diamInv[j]) + idx = self.lookup.getValue(self.key) + rowPtr[idx+1] += 1 + maxCellsPerKey = 0 + for idx in range(rowPtr.shape[0]-1): + maxCellsPerKey = max(maxCellsPerKey, rowPtr[idx]) + rowPtr[idx+1] += rowPtr[idx] + self.candidates = uninitialized(((3**mesh.dim)*maxCellsPerKey), dtype=INDEX) + for k in range(cellCenters.shape[0]): + for j in range(mesh.dim): + self.key[j] = ((cellCenters[k, j]-self.x_min[j]) * self.diamInv[j]) + idx = self.lookup.getValue(self.key) + indices[rowPtr[idx]] = k + rowPtr[idx] += 1 + for idx in range(rowPtr.shape[0]-1, 0, -1): + rowPtr[idx] = rowPtr[idx-1] + rowPtr[0] = 0 + + self.graph = sparseGraph(indices, rowPtr, rowPtr.shape[0]-1, cellCenters.shape[0]) + self.mesh = mesh - self.v2c = {} + rowPtr = np.zeros((mesh.num_vertices+1), dtype=INDEX) for cellNo in range(mesh.num_cells): for vertexNo in range(mesh.dim+1): vertex = mesh.cells[cellNo, vertexNo] - try: - self.v2c[vertex].add(cellNo) - except KeyError: - self.v2c[vertex] = set([cellNo]) - self.myKey = intTuple.createNonOwning(self.key) + rowPtr[vertex+1] += 1 + for vertex in range(mesh.num_vertices): + rowPtr[vertex+1] += rowPtr[vertex] + indices = np.zeros((rowPtr[mesh.num_vertices]), dtype=INDEX) + for cellNo in range(mesh.num_cells): + for vertexNo in range(mesh.dim+1): + vertex = mesh.cells[cellNo, vertexNo] + indices[rowPtr[vertex]] = cellNo + rowPtr[vertex] += 1 + for idx in range(rowPtr.shape[0]-1, 0, -1): + rowPtr[idx] = rowPtr[idx-1] + rowPtr[0] = 0 + self.v2c = sparseGraph(indices, rowPtr, mesh.num_vertices, mesh.num_cells) + + self.pit = productIterator(3, self.mesh.dim) cdef INDEX_t findCell(self, REAL_t[::1] vertex): cdef: - INDEX_t j, cellNo, vertexNo, v - set candidates, toCheck = set() - productIterator pit - INDEX_t[::1] keyCenter + INDEX_t j, cellNo, vertexNo, v, idx, jj, kk + set toCheck + INDEX_t numCandidates = 0 for j in range(self.mesh.dim): self.key[j] = ((vertex[j]-self.x_min[j]) * self.diamInv[j]) - try: - candidates = self.lookup[self.myKey] - except KeyError: - keyCenter = np.array(self.key, copy=True) - pit = productIterator(3, self.mesh.dim) - candidates = set() - pit.reset() - while pit.step(): + idx = self.lookup.getValue(self.key) + if idx != -1: + for jj in range(self.graph.indptr[idx], self.graph.indptr[idx+1]): + cellNo = self.graph.indices[jj] + self.candidates[numCandidates] = cellNo + numCandidates += 1 + else: + self.pit.reset() + while self.pit.step(): for j in range(self.mesh.dim): - self.key[j] = keyCenter[j] + pit.idx[j]-1 - try: - candidates |= self.lookup[self.myKey] - except KeyError: - pass + self.key2[j] = self.key[j] + self.pit.idx[j]-1 + idx = self.lookup.getValue(self.key2) + if idx != -1: + for jj in range(self.graph.indptr[idx], self.graph.indptr[idx+1]): + cellNo = self.graph.indices[jj] + self.candidates[numCandidates] = cellNo + numCandidates += 1 # check if the vertex is in any of the cells - for cellNo in candidates: + for jj in range(numCandidates): + cellNo = self.candidates[jj] if self.mesh.vertexInCell(vertex, cellNo, self.simplex, self.bary): return cellNo # add neighboring cells of candidate cells - for cellNo in candidates: + toCheck = set() + for jj in range(numCandidates): + cellNo = self.candidates[jj] for vertexNo in range(self.mesh.dim+1): v = self.mesh.cells[cellNo, vertexNo] - toCheck |= self.v2c[v] - toCheck -= candidates + for kk in range(self.v2c.indptr[v], self.v2c.indptr[v+1]): + toCheck.add(self.v2c.indices[kk]) + for jj in range(numCandidates): + cellNo = self.candidates[jj] + toCheck.remove(cellNo) for cellNo in toCheck: if self.mesh.vertexInCell(vertex, cellNo, self.simplex, self.bary): return cellNo # allow for some extra room - for cellNo in candidates: + for jj in range(numCandidates): + cellNo = self.candidates[jj] if self.mesh.vertexInCell(vertex, cellNo, self.simplex, self.bary, 1e-15): return cellNo for cellNo in toCheck: @@ -2229,42 +2283,51 @@ cdef class cellFinder2: cdef INDEX_t findCellPtr(self, REAL_t* vertex): cdef: - INDEX_t j, cellNo, vertexNo, v - set candidates, toCheck = set() - productIterator pit - INDEX_t[::1] keyCenter + INDEX_t j, cellNo, vertexNo, v, idx, jj, kk + set toCheck + INDEX_t numCandidates = 0 for j in range(self.mesh.dim): self.key[j] = ((vertex[j]-self.x_min[j]) * self.diamInv[j]) - try: - candidates = self.lookup[self.myKey] - except KeyError: - keyCenter = np.array(self.key, copy=True) - pit = productIterator(3, self.mesh.dim) - candidates = set() - pit.reset() - while pit.step(): + idx = self.lookup.getValue(self.key) + if idx != -1: + for jj in range(self.graph.indptr[idx], self.graph.indptr[idx+1]): + cellNo = self.graph.indices[jj] + self.candidates[numCandidates] = cellNo + numCandidates += 1 + else: + self.pit.reset() + while self.pit.step(): for j in range(self.mesh.dim): - self.key[j] = keyCenter[j] + pit.idx[j]-1 - try: - candidates |= self.lookup[self.myKey] - except KeyError: - pass + self.key2[j] = self.key[j] + self.pit.idx[j]-1 + idx = self.lookup.getValue(self.key2) + if idx != -1: + for jj in range(self.graph.indptr[idx], self.graph.indptr[idx+1]): + cellNo = self.graph.indices[jj] + self.candidates[numCandidates] = cellNo + numCandidates += 1 # check if the vertex is in any of the cells - for cellNo in candidates: + for jj in range(numCandidates): + cellNo = self.candidates[jj] if self.mesh.vertexInCellPtr(vertex, cellNo, self.simplex, self.bary): return cellNo # add neighboring cells of candidate cells - for cellNo in candidates: + toCheck = set() + for jj in range(numCandidates): + cellNo = self.candidates[jj] for vertexNo in range(self.mesh.dim+1): v = self.mesh.cells[cellNo, vertexNo] - toCheck |= self.v2c[v] - toCheck -= candidates + for kk in range(self.v2c.indptr[v], self.v2c.indptr[v+1]): + toCheck.add(self.v2c.indices[kk]) + for jj in range(numCandidates): + cellNo = self.candidates[jj] + toCheck.remove(cellNo) for cellNo in toCheck: if self.mesh.vertexInCellPtr(vertex, cellNo, self.simplex, self.bary): return cellNo # allow for some extra room - for cellNo in candidates: + for jj in range(numCandidates): + cellNo = self.candidates[jj] if self.mesh.vertexInCellPtr(vertex, cellNo, self.simplex, self.bary, 1e-15): return cellNo for cellNo in toCheck: diff --git a/multilevelSolver/PyNucleus_multilevelSolver/levels.py b/multilevelSolver/PyNucleus_multilevelSolver/levels.py index 24f8a1b..58af733 100644 --- a/multilevelSolver/PyNucleus_multilevelSolver/levels.py +++ b/multilevelSolver/PyNucleus_multilevelSolver/levels.py @@ -307,7 +307,7 @@ def clean(self): @classmethod def getKeys(cls): - return ['P', 'R', 'DoFMap', 'algebraicOverlaps'] + return ['P', 'R', 'DoFMap', 'algebraicOverlaps', 'Timer'] def getLevelDict(self): lvl = {} diff --git a/nl/PyNucleus_nl/bem_{SCALAR}.pxi b/nl/PyNucleus_nl/bem_{SCALAR}.pxi deleted file mode 100644 index a5d295f..0000000 --- a/nl/PyNucleus_nl/bem_{SCALAR}.pxi +++ /dev/null @@ -1,1182 +0,0 @@ -################################################################################### -# Copyright 2021 National Technology & Engineering Solutions of Sandia, # -# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # -# U.S. Government retains certain rights in this software. # -# If you want to use this code, please refer to the README.rst and LICENSE files. # -################################################################################### - - self.qrId = sQR.qr - self.PHI_id = sQR.PHI3 - elif panel == COMMON_VERTEX: - try: - sQR = self.specialQuadRules[(singularityValue, panel)] - except KeyError: - - qr = singularityCancelationQuadRule1D(panel, - self.singularityCancelationIntegrandAcrossElements+singularityValue, - self.quad_order_diagonal, - 2*dm_order) - PHI = uninitialized((2*dofs_per_element - dofs_per_vertex, - qr.num_nodes, - 2), dtype=REAL) - - for dof in range(dofs_per_vertex): - sf = self.getLocalShapeFunction(dof) - for i in range(qr.num_nodes): - lcl_bary_x[0] = qr.nodes[0, i] - lcl_bary_x[1] = qr.nodes[1, i] - lcl_bary_y[0] = qr.nodes[2, i] - lcl_bary_y[1] = qr.nodes[3, i] - PHI[dof, i, 0] = sf.eval(lcl_bary_x) - PHI[dof, i, 1] = sf.eval(lcl_bary_y) - - for dof in range(dofs_per_vertex, dofs_per_element): - sf = self.getLocalShapeFunction(dof) - for i in range(qr.num_nodes): - lcl_bary_x[0] = qr.nodes[0, i] - lcl_bary_x[1] = qr.nodes[1, i] - lcl_bary_y[0] = qr.nodes[2, i] - lcl_bary_y[1] = qr.nodes[3, i] - PHI[dof, i, 0] = sf.eval(lcl_bary_x) - PHI[dof, i, 1] = 0 - PHI[dofs_per_element+dof-dofs_per_vertex, i, 0] = 0 - PHI[dofs_per_element+dof-dofs_per_vertex, i, 1] = sf.eval(lcl_bary_y) - - sQR = specialQuadRule(qr, PHI3=PHI) - self.specialQuadRules[(singularityValue, panel)] = sQR - if qr.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr.num_nodes), dtype={SCALAR}) - self.temp2 = uninitialized((qr.num_nodes), dtype={SCALAR}) - self.qrVertex = sQR.qr - self.PHI_vertex = sQR.PHI3 - else: - raise NotImplementedError('Unknown panel type: {}'.format(panel)) - - -cdef class {SCALAR_label}bem3D({SCALAR_label}bem): - def __init__(self, {SCALAR_label}Kernel kernel, meshBase mesh, DoFMap DoFMap, num_dofs=None, manifold_dim2=-1, **kwargs): - super({SCALAR_label}bem3D, self).__init__(kernel, mesh, DoFMap, num_dofs, manifold_dim2, **kwargs) - - cdef REAL_t get_h_simplex(self, const REAL_t[:, ::1] simplex): - cdef: - INDEX_t i, j - REAL_t hmax = 0., h2 - for i in range(2): - for j in range(i+1, 3): - h2 = ((simplex[j, 0]-simplex[i, 0])*(simplex[j, 0]-simplex[i, 0]) + - (simplex[j, 1]-simplex[i, 1])*(simplex[j, 1]-simplex[i, 1]) + - (simplex[j, 2]-simplex[i, 2])*(simplex[j, 2]-simplex[i, 2])) - hmax = max(hmax, h2) - return sqrt(hmax) - - cdef panelType getQuadOrder(self, - const REAL_t h1, - const REAL_t h2, - REAL_t d): - cdef: - panelType panel, panel2 - REAL_t logdh1 = log(d/h1), logdh2 = log(d/h2) - REAL_t c = (0.5*self.target_order+0.5)*log(self.num_dofs*self.H0**2) #-4. - REAL_t logh1H0 = abs(log(h1/self.H0)), logh2H0 = abs(log(h2/self.H0)) - REAL_t loghminH0 = max(logh1H0, logh2H0) - REAL_t s = max(-0.5*(self.kernel.getSingularityValue()+2), 0.) - panel = max(ceil((c + (s-1.)*logh2H0 + loghminH0 - s*logdh2) / - (max(logdh1, 0) + 0.4)), - 2) - panel2 = max(ceil((c + (s-1.)*logh1H0 + loghminH0 - s*logdh1) / - (max(logdh2, 0) + 0.4)), - 2) - panel = max(panel, panel2) - if self.distantQuadRulesPtr[panel] == NULL: - self.addQuadRule_nonSym(panel) - return panel - - cdef void getNearQuadRule(self, panelType panel): - cdef: - INDEX_t i - REAL_t singularityValue = self.kernel.getSingularityValue() - specialQuadRule sQR - quadratureRule qr - INDEX_t dofs_per_element = self.DoFMap.dofs_per_element - INDEX_t dofs_per_edge = self.DoFMap.dofs_per_edge - INDEX_t dofs_per_vertex = self.DoFMap.dofs_per_vertex - INDEX_t dm_order = max(self.DoFMap.polynomialOrder, 1) - shapeFunction sf - INDEX_t manifold_dim = 2 - REAL_t lcl_bary_x[3] - REAL_t lcl_bary_y[3] - REAL_t[:, :, ::1] PHI - INDEX_t dof - - if panel == COMMON_FACE: - try: - sQR = self.specialQuadRules[(singularityValue, panel)] - except KeyError: - - qr = singularityCancelationQuadRule2D(panel, - self.singularityCancelationIntegrandWithinElement+singularityValue, - self.quad_order_diagonal, - self.quad_order_diagonalV, - 1) - PHI = uninitialized((dofs_per_element, qr.num_nodes, 2), dtype=REAL) - - for dof in range(dofs_per_element): - sf = self.getLocalShapeFunction(dof) - for i in range(qr.num_nodes): - lcl_bary_x[0] = qr.nodes[0, i] - lcl_bary_x[1] = qr.nodes[1, i] - lcl_bary_x[2] = qr.nodes[2, i] - lcl_bary_y[0] = qr.nodes[3, i] - lcl_bary_y[1] = qr.nodes[4, i] - lcl_bary_y[2] = qr.nodes[5, i] - PHI[dof, i, 0] = sf.eval(lcl_bary_x) - PHI[dof, i, 1] = sf.eval(lcl_bary_y) - - sQR = specialQuadRule(qr, PHI3=PHI) - self.specialQuadRules[(singularityValue, panel)] = sQR - if qr.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr.num_nodes), dtype={SCALAR}) - self.temp2 = uninitialized((qr.num_nodes), dtype={SCALAR}) - self.qrId = sQR.qr - self.PHI_id = sQR.PHI3 - elif panel == COMMON_EDGE: - try: - sQR = self.specialQuadRules[(singularityValue, panel)] - except KeyError: - - qr = singularityCancelationQuadRule2D(panel, - self.singularityCancelationIntegrandAcrossElements+singularityValue, - self.quad_order_diagonal, - self.quad_order_diagonalV, - 1) - PHI = uninitialized((2*dofs_per_element - 2*dofs_per_vertex - dofs_per_edge, - qr.num_nodes, - 2), dtype=REAL) - - for dof in range(2*dofs_per_vertex): - sf = self.getLocalShapeFunction(dof) - for i in range(qr.num_nodes): - lcl_bary_x[0] = qr.nodes[0, i] - lcl_bary_x[1] = qr.nodes[1, i] - lcl_bary_x[2] = qr.nodes[2, i] - lcl_bary_y[0] = qr.nodes[3, i] - lcl_bary_y[1] = qr.nodes[4, i] - lcl_bary_y[2] = qr.nodes[5, i] - PHI[dof, i, 0] = sf.eval(lcl_bary_x) - PHI[dof, i, 1] = sf.eval(lcl_bary_y) - - for dof in range((manifold_dim+1)*dofs_per_vertex, (manifold_dim+1)*dofs_per_vertex+dofs_per_edge): - sf = self.getLocalShapeFunction(dof) - for i in range(qr.num_nodes): - lcl_bary_x[0] = qr.nodes[0, i] - lcl_bary_x[1] = qr.nodes[1, i] - lcl_bary_x[2] = qr.nodes[2, i] - lcl_bary_y[0] = qr.nodes[3, i] - lcl_bary_y[1] = qr.nodes[4, i] - lcl_bary_y[2] = qr.nodes[5, i] - PHI[dof, i, 0] = sf.eval(lcl_bary_x) - PHI[dof, i, 1] = sf.eval(lcl_bary_y) - - for dof in range(2*dofs_per_vertex, (manifold_dim+1)*dofs_per_vertex): - sf = self.getLocalShapeFunction(dof) - for i in range(qr.num_nodes): - lcl_bary_x[0] = qr.nodes[0, i] - lcl_bary_x[1] = qr.nodes[1, i] - lcl_bary_x[2] = qr.nodes[2, i] - lcl_bary_y[0] = qr.nodes[3, i] - lcl_bary_y[1] = qr.nodes[4, i] - lcl_bary_y[2] = qr.nodes[5, i] - PHI[dof, i, 0] = sf.eval(lcl_bary_x) - PHI[dof, i, 1] = 0 - PHI[dofs_per_element+dof-2*dofs_per_vertex, i, 0] = 0 - PHI[dofs_per_element+dof-2*dofs_per_vertex, i, 1] = sf.eval(lcl_bary_y) - - for dof in range((manifold_dim+1)*dofs_per_vertex+dofs_per_edge, dofs_per_element): - sf = self.getLocalShapeFunction(dof) - for i in range(qr.num_nodes): - lcl_bary_x[0] = qr.nodes[0, i] - lcl_bary_x[1] = qr.nodes[1, i] - lcl_bary_x[2] = qr.nodes[2, i] - lcl_bary_y[0] = qr.nodes[3, i] - lcl_bary_y[1] = qr.nodes[4, i] - lcl_bary_y[2] = qr.nodes[5, i] - PHI[dof, i, 0] = sf.eval(lcl_bary_x) - PHI[dof, i, 1] = 0 - PHI[dofs_per_element+dof-2*dofs_per_vertex-dofs_per_edge, i, 0] = 0 - PHI[dofs_per_element+dof-2*dofs_per_vertex-dofs_per_edge, i, 1] = sf.eval(lcl_bary_y) - - sQR = specialQuadRule(qr, PHI3=PHI) - self.specialQuadRules[(singularityValue, panel)] = sQR - if qr.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr.num_nodes), dtype={SCALAR}) - self.temp2 = uninitialized((qr.num_nodes), dtype={SCALAR}) - self.qrEdge = sQR.qr - self.PHI_edge = sQR.PHI3 - elif panel == COMMON_VERTEX: - try: - sQR = self.specialQuadRules[(singularityValue, panel)] - except KeyError: - - qr = singularityCancelationQuadRule2D(panel, - self.singularityCancelationIntegrandAcrossElements+singularityValue, - self.quad_order_diagonal, - self.quad_order_diagonalV, - 1) - PHI = uninitialized((2*dofs_per_element - dofs_per_vertex, - qr.num_nodes, - 2), dtype=REAL) - - for dof in range(dofs_per_vertex): - sf = self.getLocalShapeFunction(dof) - for i in range(qr.num_nodes): - lcl_bary_x[0] = qr.nodes[0, i] - lcl_bary_x[1] = qr.nodes[1, i] - lcl_bary_x[2] = qr.nodes[2, i] - lcl_bary_y[0] = qr.nodes[3, i] - lcl_bary_y[1] = qr.nodes[4, i] - lcl_bary_y[2] = qr.nodes[5, i] - PHI[dof, i, 0] = sf.eval(lcl_bary_x) - PHI[dof, i, 1] = sf.eval(lcl_bary_y) - - for dof in range(dofs_per_vertex, dofs_per_element): - sf = self.getLocalShapeFunction(dof) - for i in range(qr.num_nodes): - lcl_bary_x[0] = qr.nodes[0, i] - lcl_bary_x[1] = qr.nodes[1, i] - lcl_bary_x[2] = qr.nodes[2, i] - lcl_bary_y[0] = qr.nodes[3, i] - lcl_bary_y[1] = qr.nodes[4, i] - lcl_bary_y[2] = qr.nodes[5, i] - PHI[dof, i, 0] = sf.eval(lcl_bary_x) - PHI[dof, i, 1] = 0 - PHI[dofs_per_element+dof-dofs_per_vertex, i, 0] = 0 - PHI[dofs_per_element+dof-dofs_per_vertex, i, 1] = sf.eval(lcl_bary_y) - - sQR = specialQuadRule(qr, PHI3=PHI) - self.specialQuadRules[(singularityValue, panel)] = sQR - if qr.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr.num_nodes), dtype={SCALAR}) - self.temp2 = uninitialized((qr.num_nodes), dtype={SCALAR}) - self.qrVertex = sQR.qr - self.PHI_vertex = sQR.PHI3 - else: - raise NotImplementedError('Unknown panel type: {}'.format(panel)) - - -cdef class {SCALAR_label}bem2D_V({SCALAR_label}bem2D): - """The local stiffness matrix - - .. math:: - - \\int_{K_1}\\int_{K_2} 0.5 * [ u(x) v(y) + u(y) v(x) ] \\gamma(x,y) dy dx - - for the V operator. - """ - def __init__(self, - {SCALAR_label}Kernel kernel, - meshBase mesh, - DoFMap DoFMap, - quad_order_diagonal=None, - target_order=None, - num_dofs=None, - **kwargs): - cdef: - REAL_t smin, smax - super({SCALAR_label}bem2D_V, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs) - - # The integrand (excluding the kernel) cancels 2 orders of the singularity within an element. - self.singularityCancelationIntegrandWithinElement = 0. - # The integrand (excluding the kernel) cancels 2 orders of the - # singularity across elements for continuous finite elements. - if isinstance(DoFMap, P0_DoFMap): - assert self.kernel.max_singularity > -1., "Discontinuous finite elements are not conforming for singularity order {} <= -2.".format(self.kernel.max_singularity) - self.singularityCancelationIntegrandAcrossElements = 0. - else: - self.singularityCancelationIntegrandAcrossElements = 0. - - smin = max(-0.5*(self.kernel.min_singularity+1), 0.) - smax = max(-0.5*(self.kernel.max_singularity+1), 0.) - - if target_order is None: - # this is the desired local quadrature error - target_order = self.DoFMap.polynomialOrder+1-smin - self.target_order = target_order - if quad_order_diagonal is None: - # measured log(2 rho_2) = 0.43 - quad_order_diagonal = max(np.ceil(((target_order+2.)*log(self.num_dofs*self.H0) + (2.*smax-1.)*abs(log(self.hmin/self.H0)))/0.8), 2) - self.quad_order_diagonal = quad_order_diagonal - - if (self.kernel.kernelType != FRACTIONAL) or (not self.kernel.variableOrder): - self.getNearQuadRule(COMMON_EDGE) - self.getNearQuadRule(COMMON_VERTEX) - - cdef void eval(self, - {SCALAR}_t[::1] contrib, - panelType panel, - MASK_t mask=ALL): - cdef: - INDEX_t k, m, i, j, I, J - REAL_t vol, vol1 = self.vol1, vol2 = self.vol2 - {SCALAR}_t val - REAL_t[:, ::1] simplex1 = self.simplex1 - REAL_t[:, ::1] simplex2 = self.simplex2 - quadratureRule qr - REAL_t[:, :, ::1] PHI - INDEX_t dofs_per_element = self.DoFMap.dofs_per_element - INDEX_t dim = 2 - REAL_t x[2] - REAL_t y[2] - - if panel >= 1: - self.eval_distant_bem_V(contrib, panel, mask) - return - elif panel == COMMON_EDGE: - qr = self.qrId - PHI = self.PHI_id - elif panel == COMMON_VERTEX: - qr = self.qrVertex - PHI = self.PHI_vertex - else: - raise NotImplementedError('Unknown panel type: {}'.format(panel)) - - vol = vol1*vol2 - for m in range(qr.num_nodes): - for j in range(dim): - x[j] = (simplex1[self.perm1[0], j]*qr.nodes[0, m] + - simplex1[self.perm1[1], j]*qr.nodes[1, m]) - y[j] = (simplex2[self.perm2[0], j]*qr.nodes[2, m] + - simplex2[self.perm2[1], j]*qr.nodes[3, m]) - self.temp[m] = qr.weights[m] * self.kernel.evalPtr(dim, &x[0], &y[0]) - - contrib[:] = 0. - for I in range(PHI.shape[0]): - i = self.perm[I] - for J in range(I, PHI.shape[0]): - j = self.perm[J] - if j < i: - k = 2*dofs_per_element*j-(j*(j+1) >> 1) + i - else: - k = 2*dofs_per_element*i-(i*(i+1) >> 1) + j - if mask[k]: - val = 0. - for m in range(qr.num_nodes): - val += self.temp[m] * (PHI[I, m, 0] * PHI[J, m, 1] + PHI[I, m, 1] * PHI[J, m, 0]) - contrib[k] = 0.5*val*vol - - -cdef class {SCALAR_label}bem2D_K({SCALAR_label}bem2D): - """The local stiffness matrix - - .. math:: - - \\int_{K_1}\\int_{K_2} 0.5 * [ u(x) v(y) + u(y) v(x) ] \\gamma(x,y) dy dx - - for the V operator. - """ - def __init__(self, - {SCALAR_label}Kernel kernel, - meshBase mesh, - DoFMap DoFMap, - quad_order_diagonal=None, - target_order=None, - num_dofs=None, - **kwargs): - cdef: - REAL_t smin, smax - super({SCALAR_label}bem2D_K, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs) - - # The integrand (excluding the kernel) cancels 2 orders of the singularity within an element. - self.singularityCancelationIntegrandWithinElement = 1. - # The integrand (excluding the kernel) cancels 2 orders of the - # singularity across elements for continuous finite elements. - if isinstance(DoFMap, P0_DoFMap): - assert self.kernel.max_singularity > -1., "Discontinuous finite elements are not conforming for singularity order {} <= -2.".format(self.kernel.max_singularity) - self.singularityCancelationIntegrandAcrossElements = 1. - else: - self.singularityCancelationIntegrandAcrossElements = 1. - - smin = max(-0.5*(self.kernel.min_singularity+1), 0.) - smax = max(-0.5*(self.kernel.max_singularity+1), 0.) - - if target_order is None: - # this is the desired local quadrature error - target_order = self.DoFMap.polynomialOrder+1-smin - self.target_order = target_order - if quad_order_diagonal is None: - # measured log(2 rho_2) = 0.43 - quad_order_diagonal = max(np.ceil(((target_order+2.)*log(self.num_dofs*self.H0) + (2.*smax-1.)*abs(log(self.hmin/self.H0)))/0.8), 2) - self.quad_order_diagonal = quad_order_diagonal - - if (self.kernel.kernelType != FRACTIONAL) or (not self.kernel.variableOrder): - self.getNearQuadRule(COMMON_EDGE) - self.getNearQuadRule(COMMON_VERTEX) - - cdef void eval(self, - {SCALAR}_t[::1] contrib, - panelType panel, - MASK_t mask=ALL): - cdef: - INDEX_t k, m, i, j, I, J - REAL_t vol, valReal, vol1 = self.vol1, vol2 = self.vol2 - {SCALAR}_t val - REAL_t[:, ::1] simplex1 = self.simplex1 - REAL_t[:, ::1] simplex2 = self.simplex2 - quadratureRule qr - REAL_t[:, :, ::1] PHI - INDEX_t dofs_per_element = self.DoFMap.dofs_per_element - INDEX_t dim = 2 - REAL_t x[2] - REAL_t y[2] - REAL_t normW - - if panel >= 1: - self.eval_distant_bem_K(contrib, panel, mask) - return - elif panel == COMMON_EDGE: - qr = self.qrId - PHI = self.PHI_id - elif panel == COMMON_VERTEX: - qr = self.qrVertex - PHI = self.PHI_vertex - else: - raise NotImplementedError('Unknown panel type: {}'.format(panel)) - - # n is independent of x and y - self.n1[0] = simplex1[1, 1] - simplex1[0, 1] - self.n1[1] = simplex1[0, 0] - simplex1[1, 0] - valReal = 1./sqrt(mydot(self.n1, self.n1)) - self.n1[0] *= valReal - self.n1[1] *= valReal - - self.n2[0] = simplex2[1, 1] - simplex2[0, 1] - self.n2[1] = simplex2[0, 0] - simplex2[1, 0] - valReal = 1./sqrt(mydot(self.n2, self.n2)) - self.n2[0] *= valReal - self.n2[1] *= valReal - - vol = vol1*vol2 - for m in range(qr.num_nodes): - normW = 0. - for j in range(dim): - x[j] = (simplex1[self.perm1[0], j]*qr.nodes[0, m] + - simplex1[self.perm1[1], j]*qr.nodes[1, m]) - y[j] = (simplex2[self.perm2[0], j]*qr.nodes[2, m] + - simplex2[self.perm2[1], j]*qr.nodes[3, m]) - self.w[j] = y[j]-x[j] - normW += self.w[j]**2 - normW = 1./sqrt(normW) - for j in range(dim): - self.w[j] *= normW - - val = qr.weights[m] * self.kernel.evalPtr(dim, &x[0], &y[0]) - self.temp[m] = val * mydot(self.n2, self.w) - self.temp2[m] = - val * mydot(self.n1, self.w) - - contrib[:] = 0. - for I in range(PHI.shape[0]): - i = self.perm[I] - for J in range(I, PHI.shape[0]): - j = self.perm[J] - if j < i: - k = 2*dofs_per_element*j-(j*(j+1) >> 1) + i - else: - k = 2*dofs_per_element*i-(i*(i+1) >> 1) + j - if mask[k]: - val = 0. - for m in range(qr.num_nodes): - val += self.temp[m] * PHI[I, m, 0] * PHI[J, m, 1] + self.temp2[m] * PHI[I, m, 1] * PHI[J, m, 0] - contrib[k] = 0.5*val*vol - - -cdef class {SCALAR_label}bem2D_K_prime({SCALAR_label}bem2D): - """The local stiffness matrix - - .. math:: - - \\int_{K_1}\\int_{K_2} 0.5 * [ u(x) v(y) + u(y) v(x) ] \\gamma(x,y) dy dx - - for the V operator. - """ - def __init__(self, - {SCALAR_label}Kernel kernel, - meshBase mesh, - DoFMap DoFMap, - quad_order_diagonal=None, - target_order=None, - num_dofs=None, - **kwargs): - cdef: - REAL_t smin, smax - super({SCALAR_label}bem2D_K_prime, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs) - - # The integrand (excluding the kernel) cancels 2 orders of the singularity within an element. - self.singularityCancelationIntegrandWithinElement = 1. - # The integrand (excluding the kernel) cancels 2 orders of the - # singularity across elements for continuous finite elements. - if isinstance(DoFMap, P0_DoFMap): - assert self.kernel.max_singularity > -1., "Discontinuous finite elements are not conforming for singularity order {} <= -2.".format(self.kernel.max_singularity) - self.singularityCancelationIntegrandAcrossElements = 1. - else: - self.singularityCancelationIntegrandAcrossElements = 1. - - smin = max(-0.5*(self.kernel.min_singularity+1), 0.) - smax = max(-0.5*(self.kernel.max_singularity+1), 0.) - - if target_order is None: - # this is the desired local quadrature error - target_order = self.DoFMap.polynomialOrder+1-smin - self.target_order = target_order - if quad_order_diagonal is None: - # measured log(2 rho_2) = 0.43 - quad_order_diagonal = max(np.ceil(((target_order+2.)*log(self.num_dofs*self.H0) + (2.*smax-1.)*abs(log(self.hmin/self.H0)))/0.8), 2) - self.quad_order_diagonal = quad_order_diagonal - - if (self.kernel.kernelType != FRACTIONAL) or (not self.kernel.variableOrder): - self.getNearQuadRule(COMMON_EDGE) - self.getNearQuadRule(COMMON_VERTEX) - - cdef void eval(self, - {SCALAR}_t[::1] contrib, - panelType panel, - MASK_t mask=ALL): - cdef: - INDEX_t k, m, i, j, I, J - REAL_t vol, valReal, vol1 = self.vol1, vol2 = self.vol2 - {SCALAR}_t val - REAL_t[:, ::1] simplex1 = self.simplex1 - REAL_t[:, ::1] simplex2 = self.simplex2 - quadratureRule qr - REAL_t[:, :, ::1] PHI - INDEX_t dofs_per_element = self.DoFMap.dofs_per_element - INDEX_t dim = 2 - REAL_t x[2] - REAL_t y[2] - REAL_t normW - - if panel >= 1: - self.eval_distant_bem_K_prime(contrib, panel, mask) - return - elif panel == COMMON_EDGE: - qr = self.qrId - PHI = self.PHI_id - elif panel == COMMON_VERTEX: - qr = self.qrVertex - PHI = self.PHI_vertex - else: - raise NotImplementedError('Unknown panel type: {}'.format(panel)) - - # n is independent of x and y - self.n1[0] = simplex1[1, 1] - simplex1[0, 1] - self.n1[1] = simplex1[0, 0] - simplex1[1, 0] - valReal = 1./sqrt(mydot(self.n1, self.n1)) - self.n1[0] *= valReal - self.n1[1] *= valReal - - self.n2[0] = simplex2[1, 1] - simplex2[0, 1] - self.n2[1] = simplex2[0, 0] - simplex2[1, 0] - valReal = 1./sqrt(mydot(self.n2, self.n2)) - self.n2[0] *= valReal - self.n2[1] *= valReal - - vol = vol1*vol2 - for m in range(qr.num_nodes): - normW = 0. - for j in range(dim): - x[j] = (simplex1[self.perm1[0], j]*qr.nodes[0, m] + - simplex1[self.perm1[1], j]*qr.nodes[1, m]) - y[j] = (simplex2[self.perm2[0], j]*qr.nodes[2, m] + - simplex2[self.perm2[1], j]*qr.nodes[3, m]) - self.w[j] = y[j]-x[j] - normW += self.w[j]**2 - normW = 1./sqrt(normW) - for j in range(dim): - self.w[j] *= normW - - val = qr.weights[m] * self.kernel.evalPtr(dim, &x[0], &y[0]) - self.temp[m] = - val * mydot(self.n1, self.w) - self.temp2[m] = val * mydot(self.n2, self.w) - - contrib[:] = 0. - for I in range(PHI.shape[0]): - i = self.perm[I] - for J in range(I, PHI.shape[0]): - j = self.perm[J] - if j < i: - k = 2*dofs_per_element*j-(j*(j+1) >> 1) + i - else: - k = 2*dofs_per_element*i-(i*(i+1) >> 1) + j - if mask[k]: - val = 0. - for m in range(qr.num_nodes): - val += self.temp[m] * PHI[I, m, 0] * PHI[J, m, 1] + self.temp2[m] * PHI[I, m, 1] * PHI[J, m, 0] - contrib[k] = 0.5*val*vol - - -cdef class {SCALAR_label}bem3D_V({SCALAR_label}bem3D): - """The local stiffness matrix - - .. math:: - - \\int_{K_1}\\int_{K_2} 0.5 * [ u(x) v(y) + u(y) v(x) ] \\gamma(x,y) dy dx - - for the V operator. - """ - def __init__(self, - {SCALAR_label}Kernel kernel, - meshBase mesh, - DoFMap DoFMap, - quad_order_diagonal=None, - target_order=None, - num_dofs=None, - **kwargs): - cdef: - REAL_t smin, smax - super({SCALAR_label}bem3D_V, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs) - - # The integrand (excluding the kernel) cancels 2 orders of the singularity within an element. - self.singularityCancelationIntegrandWithinElement = 0. - # The integrand (excluding the kernel) cancels 2 orders of the - # singularity across elements for continuous finite elements. - if isinstance(DoFMap, P0_DoFMap): - assert self.kernel.max_singularity > -3., "Discontinuous finite elements are not conforming for singularity order {} <= -3.".format(self.kernel.max_singularity) - self.singularityCancelationIntegrandAcrossElements = 0. - else: - self.singularityCancelationIntegrandAcrossElements = 0. - - if target_order is None: - # this is the desired local quadrature error - # target_order = (2.-s)/self.dim - target_order = 0.5 - self.target_order = target_order - - smax = max(-0.5*(self.kernel.max_singularity+2), 0.) - if quad_order_diagonal is None: - # measured log(2 rho_2) = 0.43 - quad_order_diagonal = max(np.ceil((target_order+1.+smax)/(0.43)*abs(np.log(self.hmin/self.H0))), 4) - # measured log(2 rho_2) = 0.7 - quad_order_diagonalV = max(np.ceil((target_order+1.+smax)/(0.7)*abs(np.log(self.hmin/self.H0))), 4) - else: - quad_order_diagonalV = quad_order_diagonal - self.quad_order_diagonal = quad_order_diagonal - self.quad_order_diagonalV = quad_order_diagonalV - - if (self.kernel.kernelType != FRACTIONAL) or (not self.kernel.variableOrder): - self.getNearQuadRule(COMMON_FACE) - self.getNearQuadRule(COMMON_EDGE) - self.getNearQuadRule(COMMON_VERTEX) - - cdef void eval(self, - {SCALAR}_t[::1] contrib, - panelType panel, - MASK_t mask=ALL): - cdef: - INDEX_t k, m, i, j, I, J - REAL_t vol, vol1 = self.vol1, vol2 = self.vol2 - {SCALAR}_t val - REAL_t[:, ::1] simplex1 = self.simplex1 - REAL_t[:, ::1] simplex2 = self.simplex2 - quadratureRule qr - REAL_t[:, :, ::1] PHI - INDEX_t dofs_per_element = self.DoFMap.dofs_per_element - INDEX_t dim = 3 - REAL_t x[3] - REAL_t y[3] - - if panel >= 1: - self.eval_distant_bem_V(contrib, panel, mask) - return - elif panel == COMMON_FACE: - qr = self.qrId - PHI = self.PHI_id - elif panel == COMMON_EDGE: - qr = self.qrEdge - PHI = self.PHI_edge - elif panel == COMMON_VERTEX: - qr = self.qrVertex - PHI = self.PHI_vertex - else: - raise NotImplementedError('Unknown panel type: {}'.format(panel)) - - vol = 4.0*vol1*vol2 - for m in range(qr.num_nodes): - for j in range(dim): - x[j] = (simplex1[self.perm1[0], j]*qr.nodes[0, m] + - simplex1[self.perm1[1], j]*qr.nodes[1, m] + - simplex1[self.perm1[2], j]*qr.nodes[2, m]) - y[j] = (simplex2[self.perm2[0], j]*qr.nodes[3, m] + - simplex2[self.perm2[1], j]*qr.nodes[4, m] + - simplex2[self.perm2[2], j]*qr.nodes[5, m]) - self.temp[m] = qr.weights[m] * self.kernel.evalPtr(dim, &x[0], &y[0]) - - contrib[:] = 0. - for I in range(PHI.shape[0]): - i = self.perm[I] - for J in range(I, PHI.shape[0]): - j = self.perm[J] - if j < i: - k = 2*dofs_per_element*j-(j*(j+1) >> 1) + i - else: - k = 2*dofs_per_element*i-(i*(i+1) >> 1) + j - if mask[k]: - val = 0. - for m in range(qr.num_nodes): - val += self.temp[m] * (PHI[I, m, 0] * PHI[J, m, 1] + PHI[I, m, 1] * PHI[J, m, 0]) - contrib[k] = 0.5*val*vol - - -cdef class {SCALAR_label}bem3D_K({SCALAR_label}bem3D): - """The local stiffness matrix - - .. math:: - - \\int_{K_1}\\int_{K_2} 0.5 * [ u(x) v(y) + u(y) v(x) ] \\gamma(x,y) dy dx - - for the V operator. - """ - def __init__(self, - {SCALAR_label}Kernel kernel, - meshBase mesh, - DoFMap DoFMap, - quad_order_diagonal=None, - target_order=None, - num_dofs=None, - **kwargs): - cdef: - REAL_t smin, smax - super({SCALAR_label}bem3D_K, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs) - - # The integrand (excluding the kernel) cancels 2 orders of the singularity within an element. - self.singularityCancelationIntegrandWithinElement = 1. - # The integrand (excluding the kernel) cancels 2 orders of the - # singularity across elements for continuous finite elements. - if isinstance(DoFMap, P0_DoFMap): - assert self.kernel.max_singularity > -3., "Discontinuous finite elements are not conforming for singularity order {} <= -3.".format(self.kernel.max_singularity) - self.singularityCancelationIntegrandAcrossElements = 1. - else: - self.singularityCancelationIntegrandAcrossElements = 1. - - if target_order is None: - # this is the desired local quadrature error - # target_order = (2.-s)/self.dim - target_order = 0.5 - self.target_order = target_order - - smax = max(-0.5*(self.kernel.max_singularity+2), 0.) - if quad_order_diagonal is None: - # measured log(2 rho_2) = 0.43 - quad_order_diagonal = max(np.ceil((target_order+1.+smax)/(0.43)*abs(np.log(self.hmin/self.H0))), 4) - # measured log(2 rho_2) = 0.7 - quad_order_diagonalV = max(np.ceil((target_order+1.+smax)/(0.7)*abs(np.log(self.hmin/self.H0))), 4) - else: - quad_order_diagonalV = quad_order_diagonal - self.quad_order_diagonal = quad_order_diagonal - self.quad_order_diagonalV = quad_order_diagonalV - - if (self.kernel.kernelType != FRACTIONAL) or (not self.kernel.variableOrder): - self.getNearQuadRule(COMMON_FACE) - self.getNearQuadRule(COMMON_EDGE) - self.getNearQuadRule(COMMON_VERTEX) - - cdef void eval(self, - {SCALAR}_t[::1] contrib, - panelType panel, - MASK_t mask=ALL): - cdef: - INDEX_t k, m, i, j, I, J - REAL_t vol, valReal, vol1 = self.vol1, vol2 = self.vol2 - {SCALAR}_t val - REAL_t[:, ::1] simplex1 = self.simplex1 - REAL_t[:, ::1] simplex2 = self.simplex2 - quadratureRule qr - REAL_t[:, :, ::1] PHI - INDEX_t dofs_per_element = self.DoFMap.dofs_per_element - INDEX_t dim = 3 - REAL_t x[3] - REAL_t y[3] - REAL_t normW - - if panel >= 1: - self.eval_distant_bem_K(contrib, panel, mask) - return - elif panel == COMMON_FACE: - qr = self.qrId - PHI = self.PHI_id - elif panel == COMMON_EDGE: - qr = self.qrEdge - PHI = self.PHI_edge - elif panel == COMMON_VERTEX: - qr = self.qrVertex - PHI = self.PHI_vertex - else: - raise NotImplementedError('Unknown panel type: {}'.format(panel)) - - for j in range(dim): - x[j] = simplex1[1, j]-simplex1[0, j] - for j in range(dim): - y[j] = simplex1[2, j]-simplex1[0, j] - self.n1[0] = x[1]*y[2]-x[2]*y[1] - self.n1[1] = x[2]*y[0]-x[0]*y[2] - self.n1[2] = x[0]*y[1]-x[1]*y[0] - valReal = 1./sqrt(mydot(self.n1, self.n1)) - self.n1[0] *= valReal - self.n1[1] *= valReal - self.n1[2] *= valReal - - for j in range(dim): - x[j] = simplex2[1, j]-simplex2[0, j] - for j in range(dim): - y[j] = simplex2[2, j]-simplex2[0, j] - self.n2[0] = x[1]*y[2]-x[2]*y[1] - self.n2[1] = x[2]*y[0]-x[0]*y[2] - self.n2[2] = x[0]*y[1]-x[1]*y[0] - valReal = 1./sqrt(mydot(self.n2, self.n2)) - self.n2[0] *= valReal - self.n2[1] *= valReal - self.n2[2] *= valReal - - vol = 4.0*vol1*vol2 - for m in range(qr.num_nodes): - normW = 0. - for j in range(dim): - x[j] = (simplex1[self.perm1[0], j]*qr.nodes[0, m] + - simplex1[self.perm1[1], j]*qr.nodes[1, m] + - simplex1[self.perm1[2], j]*qr.nodes[2, m]) - y[j] = (simplex2[self.perm2[0], j]*qr.nodes[3, m] + - simplex2[self.perm2[1], j]*qr.nodes[4, m] + - simplex2[self.perm2[2], j]*qr.nodes[5, m]) - self.w[j] = y[j]-x[j] - normW += self.w[j]**2 - - normW = 1./sqrt(normW) - for j in range(dim): - self.w[j] *= normW - - val = qr.weights[m] * self.kernel.evalPtr(dim, &x[0], &y[0]) - self.temp[m] = val * mydot(self.n2, self.w) - self.temp2[m] = - val * mydot(self.n1, self.w) - - contrib[:] = 0. - for I in range(PHI.shape[0]): - i = self.perm[I] - for J in range(I, PHI.shape[0]): - j = self.perm[J] - if j < i: - k = 2*dofs_per_element*j-(j*(j+1) >> 1) + i - else: - k = 2*dofs_per_element*i-(i*(i+1) >> 1) + j - if mask[k]: - val = 0. - for m in range(qr.num_nodes): - val += self.temp[m] * PHI[I, m, 0] * PHI[J, m, 1] + self.temp2[m] * PHI[I, m, 1] * PHI[J, m, 0] - contrib[k] = 0.5*val*vol - - -cdef class {SCALAR_label}bem3D_K_prime({SCALAR_label}bem3D): - """The local stiffness matrix - - .. math:: - - \\int_{K_1}\\int_{K_2} 0.5 * [ u(x) v(y) + u(y) v(x) ] \\gamma(x,y) dy dx - - for the V operator. - """ - def __init__(self, - {SCALAR_label}Kernel kernel, - meshBase mesh, - DoFMap DoFMap, - quad_order_diagonal=None, - target_order=None, - num_dofs=None, - **kwargs): - cdef: - REAL_t smin, smax - super({SCALAR_label}bem3D_K_prime, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs) - - # The integrand (excluding the kernel) cancels 2 orders of the singularity within an element. - self.singularityCancelationIntegrandWithinElement = 1. - # The integrand (excluding the kernel) cancels 2 orders of the - # singularity across elements for continuous finite elements. - if isinstance(DoFMap, P0_DoFMap): - assert self.kernel.max_singularity > -3., "Discontinuous finite elements are not conforming for singularity order {} <= -3.".format(self.kernel.max_singularity) - self.singularityCancelationIntegrandAcrossElements = 1. - else: - self.singularityCancelationIntegrandAcrossElements = 1. - - if target_order is None: - # this is the desired local quadrature error - # target_order = (2.-s)/self.dim - target_order = 0.5 - self.target_order = target_order - - smax = max(-0.5*(self.kernel.max_singularity+2), 0.) - if quad_order_diagonal is None: - # measured log(2 rho_2) = 0.43 - quad_order_diagonal = max(np.ceil((target_order+1.+smax)/(0.43)*abs(np.log(self.hmin/self.H0))), 4) - # measured log(2 rho_2) = 0.7 - quad_order_diagonalV = max(np.ceil((target_order+1.+smax)/(0.7)*abs(np.log(self.hmin/self.H0))), 4) - else: - quad_order_diagonalV = quad_order_diagonal - self.quad_order_diagonal = quad_order_diagonal - self.quad_order_diagonalV = quad_order_diagonalV - - if (self.kernel.kernelType != FRACTIONAL) or (not self.kernel.variableOrder): - self.getNearQuadRule(COMMON_FACE) - self.getNearQuadRule(COMMON_EDGE) - self.getNearQuadRule(COMMON_VERTEX) - - cdef void eval(self, - {SCALAR}_t[::1] contrib, - panelType panel, - MASK_t mask=ALL): - cdef: - INDEX_t k, m, i, j, I, J - REAL_t vol, valReal, vol1 = self.vol1, vol2 = self.vol2 - {SCALAR}_t val - REAL_t[:, ::1] simplex1 = self.simplex1 - REAL_t[:, ::1] simplex2 = self.simplex2 - quadratureRule qr - REAL_t[:, :, ::1] PHI - INDEX_t dofs_per_element = self.DoFMap.dofs_per_element - INDEX_t dim = 3 - REAL_t x[3] - REAL_t y[3] - REAL_t normW - - if panel >= 1: - self.eval_distant_bem_K_prime(contrib, panel, mask) - return - elif panel == COMMON_FACE: - qr = self.qrId - PHI = self.PHI_id - elif panel == COMMON_EDGE: - qr = self.qrEdge - PHI = self.PHI_edge - elif panel == COMMON_VERTEX: - qr = self.qrVertex - PHI = self.PHI_vertex - else: - raise NotImplementedError('Unknown panel type: {}'.format(panel)) - - for j in range(dim): - x[j] = simplex1[1, j]-simplex1[0, j] - for j in range(dim): - y[j] = simplex1[2, j]-simplex1[0, j] - self.n1[0] = x[1]*y[2]-x[2]*y[1] - self.n1[1] = x[2]*y[0]-x[0]*y[2] - self.n1[2] = x[0]*y[1]-x[1]*y[0] - valReal = 1./sqrt(mydot(self.n1, self.n1)) - self.n1[0] *= valReal - self.n1[1] *= valReal - self.n1[2] *= valReal - - for j in range(dim): - x[j] = simplex2[1, j]-simplex2[0, j] - for j in range(dim): - y[j] = simplex2[2, j]-simplex2[0, j] - self.n2[0] = x[1]*y[2]-x[2]*y[1] - self.n2[1] = x[2]*y[0]-x[0]*y[2] - self.n2[2] = x[0]*y[1]-x[1]*y[0] - valReal = 1./sqrt(mydot(self.n2, self.n2)) - self.n2[0] *= valReal - self.n2[1] *= valReal - self.n2[2] *= valReal - - vol = 4.0*vol1*vol2 - for m in range(qr.num_nodes): - normW = 0. - for j in range(dim): - x[j] = (simplex1[self.perm1[0], j]*qr.nodes[0, m] + - simplex1[self.perm1[1], j]*qr.nodes[1, m] + - simplex1[self.perm1[2], j]*qr.nodes[2, m]) - y[j] = (simplex2[self.perm2[0], j]*qr.nodes[3, m] + - simplex2[self.perm2[1], j]*qr.nodes[4, m] + - simplex2[self.perm2[2], j]*qr.nodes[5, m]) - self.w[j] = y[j]-x[j] - normW += self.w[j]**2 - - normW = 1./sqrt(normW) - for j in range(dim): - self.w[j] *= normW - - val = qr.weights[m] * self.kernel.evalPtr(dim, &x[0], &y[0]) - self.temp[m] = - val * mydot(self.n1, self.w) - self.temp2[m] = val * mydot(self.n2, self.w) - - contrib[:] = 0. - for I in range(PHI.shape[0]): - i = self.perm[I] - for J in range(I, PHI.shape[0]): - j = self.perm[J] - if j < i: - k = 2*dofs_per_element*j-(j*(j+1) >> 1) + i - else: - k = 2*dofs_per_element*i-(i*(i+1) >> 1) + j - if mask[k]: - val = 0. - for m in range(qr.num_nodes): - val += self.temp[m] * PHI[I, m, 0] * PHI[J, m, 1] + self.temp2[m] * PHI[I, m, 1] * PHI[J, m, 0] - contrib[k] = 0.5*val*vol - - -cdef class {SCALAR_label}boundaryIntegralSingleLayer({function_type}): - cdef: - {SCALAR_label_lc_}fe_vector u - {SCALAR_label}twoPointFunction kernel - REAL_t[:, ::1] simplex - REAL_t[:, ::1] span - simplexQuadratureRule qr - REAL_t[:, ::1] PHI - {SCALAR}_t[::1] fvals - - def __init__(self, {SCALAR_label_lc_}fe_vector u, {SCALAR_label}twoPointFunction kernel): - self.u = u - self.kernel = kernel - dm = u.dm - mesh = dm.mesh - - dim = mesh.dim - dimManifold = mesh.manifold_dim - - self.simplex = uninitialized((dimManifold+1, dim), dtype=REAL) - self.span = uninitialized((dimManifold, dim), dtype=REAL) - self.qr = simplexXiaoGimbutas(dim=dim, manifold_dim=dimManifold, order=3) - - self.PHI = uninitialized((dm.dofs_per_element, self.qr.num_nodes), dtype=REAL) - for i in range(dm.dofs_per_element): - for j in range(self.qr.num_nodes): - self.PHI[i, j] = dm.localShapeFunctions[i](np.ascontiguousarray(self.qr.nodes[:, j])) - - self.fvals = uninitialized((self.qr.num_nodes), dtype={SCALAR}) - - cdef {SCALAR}_t eval(self, REAL_t[::1] x): - cdef: - INDEX_t cellNo - {SCALAR}_t val = 0. - DoFMap dm = self.u.dm - meshBase mesh = dm.mesh - INDEX_t dim = mesh.dim - INDEX_t manifold_dim = mesh.manifold_dim - simplexQuadratureRule qr = self.qr - INDEX_t num_quad_nodes = qr.num_nodes - INDEX_t k, j, I, m - REAL_t vol - {SCALAR}_t[::1] u = self.u - - for cellNo in range(mesh.num_cells): - mesh.getSimplex(cellNo, self.simplex) - - # Calculate volume - vol = qr.getSimplexVolume(self.simplex) - - for j in range(num_quad_nodes): - qr.tempVec[:] = 0. - for k in range(manifold_dim+1): - for m in range(dim): - qr.tempVec[m] += qr.nodes[k, j] * self.simplex[k, m] - self.fvals[j] = self.kernel.evalPtr(dim, &x[0], &qr.tempVec[0]) - - for k in range(dm.dofs_per_element): - I = dm.cell2dof(cellNo, k) - if I < 0: - continue - for j in range(num_quad_nodes): - val += vol*qr.weights[j]*self.fvals[j]*self.PHI[k, j]*u[I] - return val - - -cdef class {SCALAR_label}boundaryIntegralDoubleLayer({function_type}): - cdef: - {SCALAR_label_lc_}fe_vector u - {SCALAR_label}twoPointFunction kernel - REAL_t[:, ::1] simplex - REAL_t[:, ::1] span - simplexQuadratureRule qr - REAL_t[:, ::1] PHI - {SCALAR}_t[::1] fvals - REAL_t[::1] n2, w, x, y - - def __init__(self, {SCALAR_label_lc_}fe_vector u, twoPointFunction kernel): - self.u = u - self.kernel = kernel - dm = u.dm - mesh = dm.mesh - - dim = mesh.dim - dimManifold = mesh.manifold_dim - - self.simplex = uninitialized((dimManifold+1, dim), dtype=REAL) - self.span = uninitialized((dimManifold, dim), dtype=REAL) - self.qr = simplexXiaoGimbutas(dim=dim, manifold_dim=dimManifold, order=3) - - self.PHI = uninitialized((dm.dofs_per_element, self.qr.num_nodes), dtype=REAL) - for i in range(dm.dofs_per_element): - for j in range(self.qr.num_nodes): - self.PHI[i, j] = dm.localShapeFunctions[i](np.ascontiguousarray(self.qr.nodes[:, j])) - - self.fvals = uninitialized((self.qr.num_nodes), dtype={SCALAR}) - self.n2 = uninitialized((dim), dtype=REAL) - self.w = uninitialized((dim), dtype=REAL) - self.x = uninitialized((dim), dtype=REAL) - self.y = uninitialized((dim), dtype=REAL) - - cdef {SCALAR}_t eval(self, REAL_t[::1] x): - cdef: - INDEX_t cellNo - {SCALAR}_t val = 0. - DoFMap dm = self.u.dm - meshBase mesh = dm.mesh - INDEX_t dim = mesh.dim - INDEX_t manifold_dim = mesh.manifold_dim - simplexQuadratureRule qr = self.qr - INDEX_t num_quad_nodes = qr.num_nodes - INDEX_t k, j, I, m - REAL_t vol, normW - {SCALAR}_t[::1] u = self.u - - for cellNo in range(mesh.num_cells): - mesh.getSimplex(cellNo, self.simplex) - - if dim == 2: - self.n2[0] = self.simplex[1, 1] - self.simplex[0, 1] - self.n2[1] = self.simplex[0, 0] - self.simplex[1, 0] - normW = 1./sqrt(mydot(self.n2, self.n2)) - self.n2[0] *= normW - self.n2[1] *= normW - elif dim == 3: - for j in range(dim): - self.x[j] = self.simplex[1, j]-self.simplex[0, j] - for j in range(dim): - self.y[j] = self.simplex[2, j]-self.simplex[0, j] - self.n2[0] = self.x[1]*self.y[2]-self.x[2]*self.y[1] - self.n2[1] = self.x[2]*self.y[0]-self.x[0]*self.y[2] - self.n2[2] = self.x[0]*self.y[1]-self.x[1]*self.y[0] - normW = 1./sqrt(mydot(self.n2, self.n2)) - self.n2[0] *= normW - self.n2[1] *= normW - self.n2[2] *= normW - - # Calculate volume - vol = qr.getSimplexVolume(self.simplex) - - for j in range(num_quad_nodes): - qr.tempVec[:] = 0. - for k in range(manifold_dim+1): - for m in range(dim): - qr.tempVec[m] += qr.nodes[k, j] * self.simplex[k, m] - normW = 0. - for m in range(dim): - self.w[m] = qr.tempVec[m]-x[m] - normW += self.w[m]**2 - normW = 1./sqrt(normW) - for m in range(dim): - self.w[m] *= normW - - self.fvals[j] = self.kernel.evalPtr(dim, &x[0], &qr.tempVec[0]) * mydot(self.n2, self.w) - - for k in range(dm.dofs_per_element): - I = dm.cell2dof(cellNo, k) - if I < 0: - continue - for j in range(num_quad_nodes): - val += vol*qr.weights[j]*self.fvals[j]*self.PHI[k, j]*u[I] - return val diff --git a/nl/PyNucleus_nl/bitset.pxd.in b/nl/PyNucleus_nl/bitset.pxd.in index a2d98f8..f982ed8 100644 --- a/nl/PyNucleus_nl/bitset.pxd.in +++ b/nl/PyNucleus_nl/bitset.pxd.in @@ -7,6 +7,7 @@ cdef extern from "" namespace "std" nogil: MASK_t() except + bint test(int) bint operator[](int) + int count() int size() MASK_t operator<<(int) MASK_t operator>>(int) @@ -20,6 +21,9 @@ cdef extern from "" namespace "std" nogil: void reset() +cdef str MASK2Str(MASK_t a, INDEX_t length=*) + + cdef class tupleDictMASK: cdef: INDEX_t ** indexL diff --git a/nl/PyNucleus_nl/bitset.pyx b/nl/PyNucleus_nl/bitset.pyx index 82f9472..347254e 100644 --- a/nl/PyNucleus_nl/bitset.pyx +++ b/nl/PyNucleus_nl/bitset.pyx @@ -12,10 +12,15 @@ from libc.stdlib cimport malloc, realloc, free include "malloc.pxi" -cdef str MASK2Str(MASK_t a): +cdef str MASK2Str(MASK_t a, INDEX_t length=-1): + cdef: + INDEX_t i + str s + if length < 0: + length = a.size() s = '' - for i in range(a.size()): - s+= str(int(a[i])) + for i in range(length): + s += str(int(a[i])) return s @@ -198,3 +203,23 @@ cdef class tupleDictMASK: i += 1 self.i = i return True + + def toDict(self, INDEX_t maxLength=-1): + cdef: + INDEX_t e[2] + MASK_t val + dict d = {} + self.startIter() + while self.next(e, &val): + d[(e[0], e[1])] = (MASK2Str(val, maxLength), val.count()) + return d + + def getMaskSum(self): + cdef: + INDEX_t e[2] + MASK_t val + INDEX_t count = 0 + self.startIter() + while self.next(e, &val): + count += val.count() + return count diff --git a/nl/PyNucleus_nl/clusterMethodCy.pyx b/nl/PyNucleus_nl/clusterMethodCy.pyx index 9ba0e1e..ebb8411 100644 --- a/nl/PyNucleus_nl/clusterMethodCy.pyx +++ b/nl/PyNucleus_nl/clusterMethodCy.pyx @@ -2007,13 +2007,14 @@ def assembleFarFieldInteractions(Kernel kernel, dict Pfar, INDEX_t m, DoFMap dm, for j in range(kiSize): if kernel_variable: kernel.evalParamsPtr(dim, &x[i, 0], &y[j, 0]) - cP.kernelInterpolant[i, j] = -2.0*kernel.evalPtr(dim, &x[i, 0], &y[j, 0]) + kernel.evalPtr(dim, &x[i, 0], &y[j, 0], &cP.kernelInterpolant[i, j]) + cP.kernelInterpolant[i, j] *= -2.0 else: for i in range(kiSize): for j in range(kiSize): if kernel_variable: kernel.evalParamsPtr(dim, &x[i, 0], &y[j, 0]) - cP.kernelInterpolant[i, j] = kernel.evalPtr(dim, &x[i, 0], &y[j, 0]) + kernel.evalPtr(dim, &x[i, 0], &y[j, 0], &cP.kernelInterpolant[i, j]) cdef class H2Matrix(LinearOperator): @@ -3270,14 +3271,18 @@ def getFractionalOrders(variableFractionalOrder s, meshBase mesh): if s.symmetric: for cellNo1 in range(numCells): for cellNo2 in range(cellNo1, numCells): - orders[cellNo1, cellNo2] = s.eval(centers[cellNo1, :], - centers[cellNo2, :]) + s.evalPtr(centers.shape[1], + ¢ers[cellNo1, 0], + ¢ers[cellNo2, 0], + &orders[cellNo1, cellNo2]) orders[cellNo2, cellNo1] = orders[cellNo1, cellNo2] else: for cellNo1 in range(numCells): for cellNo2 in range(numCells): - orders[cellNo1, cellNo2] = s.eval(centers[cellNo1, :], - centers[cellNo2, :]) + s.evalPtr(centers.shape[1], + ¢ers[cellNo1, 0], + ¢ers[cellNo2, 0], + &orders[cellNo1, cellNo2]) return orders diff --git a/nl/PyNucleus_nl/discretizedProblems.py b/nl/PyNucleus_nl/discretizedProblems.py index a5c1daf..1cd0e8a 100644 --- a/nl/PyNucleus_nl/discretizedProblems.py +++ b/nl/PyNucleus_nl/discretizedProblems.py @@ -7,6 +7,7 @@ import numpy as np from PyNucleus_base import solverFactory +from PyNucleus_base.performanceLogger import PLogger from PyNucleus_base.utilsFem import (classWithComputedDependencies, problem, generates) @@ -26,6 +27,7 @@ from . nonlocalProblems import (DIRICHLET, NEUMANN, HOMOGENEOUS_NEUMANN, transientFractionalProblem) +from . clusterMethodCy import H2Matrix, DistributedH2Matrix_globalData, DistributedH2Matrix_localData import logging import warnings @@ -202,6 +204,17 @@ def plotSolutionComponents(self, plotDefaults={}): pm.add(self.u.getComponent(c), label='u'+str(c)) pm.plot() + def exportVTK(self, filename): + x = [self.u] + labels = ['numerical_solution'] + if self.u_interp is not None: + x.append(self.u_interp) + labels.append('interpolated_analytic_solution') + if self.error is not None: + x.append(self.error) + labels.append('error') + self.u.dm.mesh.exportSolutionVTK(x, filename, labels=labels) + def reportErrors(self, group): if self.L2_error is not None: group.add('L2 error', self.L2_error, rTol=3e-2, aTol=1e-8) @@ -441,10 +454,14 @@ def buildHierarchy(self, if solverType.find('mg') >= 0: for subHierarchy in hM.builtHierarchies: for level in subHierarchy.algebraicLevels: + level.PLogger = level.Timer.PLogger + assemblyParams['PLogger'] = level.PLogger level.params.update(assemblyParams) level.build(ASSEMBLY) else: level = hM.builtHierarchies[-1].algebraicLevels[-1] + level.PLogger = level.Timer.PLogger + assemblyParams['PLogger'] = level.PLogger level.params.update(assemblyParams) level.build(ASSEMBLY) @@ -586,12 +603,30 @@ def solve(self, b, dm, dmInterior, dmBC, P_interior, P_bc, solver, boundaryCondi self.modelSolution = stationaryModelSolution(self, u, **data) def report(self, group): + group.add('kernel',self.continuumProblem.kernel) group.add('h', self.finalMesh.h) group.add('hmin', self.finalMesh.hmin) group.add('mesh quality', self.finalMesh.delta) group.add('DoFMap', str(self.dm)) group.add('Interior DoFMap', str(self.dmInterior)) group.add('Dirichlet DoFMap', str(self.dmBC)) + group.add('matrix', str(self.A)) + if isinstance(self.A, (H2Matrix, + DistributedH2Matrix_globalData, + DistributedH2Matrix_localData)): + for label, key in [('near field matrix', 'Anear'), + ('min cluster size', 'minSize'), + ('interpolation order', 'interpolation_order'), + ('numAssembledCellPairs', 'numAssembledCellPairs'), + ('numIntegrations', 'numIntegrations')]: + group.add(label, self.hierarchy[-1]['Timer'].PLogger[key][0]) + if isinstance(self.A, (Dense_LinearOperator, + H2Matrix, + DistributedH2Matrix_globalData, + DistributedH2Matrix_localData)): + for label, key in [('useSymmetricCells', 'useSymmetricCells'), + ('useSymmetricLocalMatrix', 'useSymmetricLocalMatrix')]: + group.add(label, self.hierarchy[-1]['Timer'].PLogger[key][0]) group.add('matrix memory size', self.A.getMemorySize()) diff --git a/nl/PyNucleus_nl/fractionalLaplacian1D.pyx b/nl/PyNucleus_nl/fractionalLaplacian1D.pyx index 71ac2f1..6f9683c 100644 --- a/nl/PyNucleus_nl/fractionalLaplacian1D.pyx +++ b/nl/PyNucleus_nl/fractionalLaplacian1D.pyx @@ -203,6 +203,8 @@ cdef class fractionalLaplacian1D(nonlocalLaplacian1D): REAL_t smin, smax super(fractionalLaplacian1D, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs) + self.symmetricCells = True + # The integrand (excluding the kernel) cancels 2 orders of the singularity within an element. self.singularityCancelationIntegrandWithinElement = 2. # The integrand (excluding the kernel) cancels 2 orders of the @@ -288,7 +290,7 @@ cdef class fractionalLaplacian1D(nonlocalLaplacian1D): sQR = specialQuadRule(qr, PSI) self.specialQuadRules[(singularityValue, panel)] = sQR if qr.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr.num_nodes), dtype=REAL) + self.temp = uninitialized((qr.num_nodes, self.kernel.valueSize), dtype=REAL) self.qrId = sQR.qr self.PSI_id = sQR.PSI elif panel == COMMON_VERTEX: @@ -325,7 +327,7 @@ cdef class fractionalLaplacian1D(nonlocalLaplacian1D): sQR = specialQuadRule(qr, PSI) self.specialQuadRules[(singularityValue, panel)] = sQR if qr.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr.num_nodes), dtype=REAL) + self.temp = uninitialized((qr.num_nodes, self.kernel.valueSize), dtype=REAL) self.qrVertex = sQR.qr self.PSI_vertex = sQR.PSI else: @@ -340,18 +342,19 @@ cdef class fractionalLaplacian1D(nonlocalLaplacian1D): 'quad_order_off_diagonal: {}\n'.format(list(self.distantQuadRules.keys()))) cdef void eval(self, - REAL_t[::1] contrib, + REAL_t[:, ::1] contrib, panelType panel, MASK_t mask=ALL): cdef: - INDEX_t k, m, i, j, I, J - REAL_t vol, val, vol1 = self.vol1, vol2 = self.vol2 + INDEX_t k, m, i, j, I, J, l + REAL_t vol, vol1 = self.vol1, vol2 = self.vol2, val REAL_t[:, ::1] simplex1 = self.simplex1 REAL_t[:, ::1] simplex2 = self.simplex2 quadratureRule qr REAL_t[:, ::1] PSI INDEX_t dofs_per_element = self.DoFMap.dofs_per_element INDEX_t dim = 1 + INDEX_t valueSize = self.kernel.valueSize REAL_t x[1] REAL_t y[1] @@ -375,9 +378,14 @@ cdef class fractionalLaplacian1D(nonlocalLaplacian1D): simplex1[self.perm1[1], j]*qr.nodes[1, m]) y[j] = (simplex2[self.perm2[0], j]*qr.nodes[2, m] + simplex2[self.perm2[1], j]*qr.nodes[3, m]) - self.temp[m] = qr.weights[m] * self.kernel.evalPtr(dim, &x[0], &y[0]) - - contrib[:] = 0. + self.kernel.evalPtr(dim, + &x[0], + &y[0], + &self.vec[0]) + for l in range(valueSize): + self.temp[m, l] = qr.weights[m] * self.vec[l] + + contrib[:, :] = 0. for I in range(PSI.shape[0]): i = self.perm[I] for J in range(I, PSI.shape[0]): @@ -387,12 +395,11 @@ cdef class fractionalLaplacian1D(nonlocalLaplacian1D): else: k = 2*dofs_per_element*i-(i*(i+1) >> 1) + j if mask[k]: - val = 0. - for m in range(qr.num_nodes): - val += self.temp[m] * PSI[I, m] * PSI[J, m] - contrib[k] = val*vol - - + for l in range(valueSize): + val = 0. + for m in range(qr.num_nodes): + val += self.temp[m, l] * PSI[I, m] * PSI[J, m] + contrib[k, l] = val*vol cdef class fractionalLaplacian1D_nonsym(fractionalLaplacian1D): @@ -466,8 +473,8 @@ cdef class fractionalLaplacian1D_nonsym(fractionalLaplacian1D): sQR = specialQuadRule(qr, PHI3=PHI) self.specialQuadRules[(singularityValue, panel)] = sQR if qr.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr.num_nodes), dtype=REAL) - self.temp2 = uninitialized((qr.num_nodes), dtype=REAL) + self.temp = uninitialized((qr.num_nodes, self.kernel.valueSize), dtype=REAL) + self.temp2 = uninitialized((qr.num_nodes, self.kernel.valueSize), dtype=REAL) self.qrId = sQR.qr self.PHI_id = sQR.PHI3 elif panel == COMMON_VERTEX: @@ -508,20 +515,20 @@ cdef class fractionalLaplacian1D_nonsym(fractionalLaplacian1D): sQR = specialQuadRule(qr, PHI3=PHI) self.specialQuadRules[(singularityValue, panel)] = sQR if qr.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr.num_nodes), dtype=REAL) - self.temp2 = uninitialized((qr.num_nodes), dtype=REAL) + self.temp = uninitialized((qr.num_nodes, self.kernel.valueSize), dtype=REAL) + self.temp2 = uninitialized((qr.num_nodes, self.kernel.valueSize), dtype=REAL) self.qrVertex = sQR.qr self.PHI_vertex = sQR.PHI3 else: raise NotImplementedError('Unknown panel type: {}'.format(panel)) cdef void eval(self, - REAL_t[::1] contrib, + REAL_t[:, ::1] contrib, panelType panel, MASK_t mask=ALL): cdef: - INDEX_t k, m, i, j, I, J - REAL_t vol, val, vol1 = self.vol1, vol2 = self.vol2 + INDEX_t k, m, i, j, I, J, l + REAL_t vol, vol1 = self.vol1, vol2 = self.vol2, val REAL_t[:, ::1] simplex1 = self.simplex1 REAL_t[:, ::1] simplex2 = self.simplex2 quadratureRule qr @@ -530,6 +537,7 @@ cdef class fractionalLaplacian1D_nonsym(fractionalLaplacian1D): INDEX_t dim = 1 REAL_t x[1] REAL_t y[1] + INDEX_t valueSize = self.kernel.valueSize if panel >= 1: self.eval_distant_nonsym(contrib, panel, mask) @@ -551,21 +559,24 @@ cdef class fractionalLaplacian1D_nonsym(fractionalLaplacian1D): simplex1[self.perm1[1], j]*qr.nodes[1, m]) y[j] = (simplex2[self.perm2[0], j]*qr.nodes[2, m] + simplex2[self.perm2[1], j]*qr.nodes[3, m]) - self.temp[m] = qr.weights[m] * self.kernel.evalPtr(dim, &x[0], &y[0]) - self.temp2[m] = qr.weights[m] * self.kernel.evalPtr(dim, &y[0], &x[0]) + self.kernel.evalPtr(dim, &x[0], &y[0], &self.vec[0]) + self.kernel.evalPtr(dim, &y[0], &x[0], &self.vec2[0]) + for l in range(valueSize): + self.temp[m, l] = qr.weights[m] * self.vec[l] + self.temp2[m, l] = qr.weights[m] * self.vec2[l] - contrib[:] = 0. + contrib[:, :] = 0. for I in range(PHI.shape[0]): i = self.perm[I] for J in range(PHI.shape[0]): j = self.perm[J] k = i*(2*dofs_per_element)+j if mask[k]: - val = 0. - for m in range(qr.num_nodes): - val += (self.temp[m] * PHI[I, m, 0] - self.temp2[m] * PHI[I, m, 1]) * (PHI[J, m, 0] - PHI[J, m, 1]) - contrib[k] = val*vol - + for l in range(valueSize): + val = 0. + for m in range(qr.num_nodes): + val += (self.temp[m, l] * PHI[I, m, 0] - self.temp2[m, l] * PHI[I, m, 1]) * (PHI[J, m, 0] - PHI[J, m, 1]) + contrib[k, l] = val*vol cdef class fractionalLaplacian1D_boundary(fractionalLaplacian1DZeroExterior): @@ -653,7 +664,7 @@ cdef class fractionalLaplacian1D_boundary(fractionalLaplacian1DZeroExterior): sQR = specialQuadRule(qr, PHI=PHI) self.specialQuadRules[(singularityValue, panel)] = sQR if qr.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr.num_nodes), dtype=REAL) + self.temp = uninitialized((qr.num_nodes, self.kernel.valueSize), dtype=REAL) self.qrVertex = sQR.qr self.PHI_vertex = sQR.PHI else: @@ -668,12 +679,12 @@ cdef class fractionalLaplacian1D_boundary(fractionalLaplacian1DZeroExterior): 'quad_order_off_diagonal: {}\n'.format(list(self.distantQuadRules.keys()))) cdef void eval(self, - REAL_t[::1] contrib, + REAL_t[:, ::1] contrib, panelType panel, MASK_t mask=ALL): cdef: REAL_t vol = self.vol1, val - INDEX_t i, j, k, m + INDEX_t i, j, k, m, l quadratureRule qr REAL_t[:, ::1] PHI REAL_t[:, ::1] simplex1 = self.simplex1 @@ -682,6 +693,7 @@ cdef class fractionalLaplacian1D_boundary(fractionalLaplacian1DZeroExterior): REAL_t x[1] REAL_t y[1] INDEX_t dofs_per_element = self.DoFMap.dofs_per_element + INDEX_t valueSize = self.kernel.valueSize # Kernel: # \Gamma(x,y) = n \dot (x-y) * C(d,s) / (2s) / |x-y|^{d+2s} @@ -709,9 +721,11 @@ cdef class fractionalLaplacian1D_boundary(fractionalLaplacian1DZeroExterior): x[j] = (simplex1[self.perm1[0], j]*qr.nodes[0, m] + simplex1[self.perm1[1], j]*qr.nodes[1, m]) y[j] = simplex2[self.perm2[0], j]*qr.nodes[2, m] - self.temp[m] = qr.weights[m] * self.kernel.evalPtr(dim, &x[0], &y[0]) + self.kernel.evalPtr(dim, &x[0], &y[0], &self.vec[0]) + for l in range(valueSize): + self.temp[m, l] = qr.weights[m] * self.vec[l] - contrib[:] = 0. + contrib[:, :] = 0. for I in range(dofs_per_element): i = self.perm[I] @@ -722,12 +736,12 @@ cdef class fractionalLaplacian1D_boundary(fractionalLaplacian1DZeroExterior): else: k = dofs_per_element*i-(i*(i+1) >> 1) + j if mask[k]: - val = 0. - for m in range(qr.num_nodes): - val += self.temp[m] * PHI[I, m] * PHI[J, m] - contrib[k] = val*vol + for l in range(valueSize): + val = 0. + for m in range(qr.num_nodes): + val += self.temp[m, l] * PHI[I, m] * PHI[J, m] + contrib[k, l] = val*vol else: raise NotImplementedError('Unknown panel type: {}'.format(panel)) - diff --git a/nl/PyNucleus_nl/fractionalLaplacian2D.pyx b/nl/PyNucleus_nl/fractionalLaplacian2D.pyx index 577bbcd..93cdd78 100644 --- a/nl/PyNucleus_nl/fractionalLaplacian2D.pyx +++ b/nl/PyNucleus_nl/fractionalLaplacian2D.pyx @@ -584,6 +584,8 @@ cdef class fractionalLaplacian2D(nonlocalLaplacian2D): **kwargs): super(fractionalLaplacian2D, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs) + self.symmetricCells = True + # The integrand (excluding the kernel) cancels 2 orders of the singularity within an element. self.singularityCancelationIntegrandWithinElement = 2. # The integrand (excluding the kernel) cancels 2 orders of the @@ -681,7 +683,7 @@ cdef class fractionalLaplacian2D(nonlocalLaplacian2D): sQR = specialQuadRule(qr, PSI) self.specialQuadRules[(singularityValue, panel)] = sQR if qr.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr.num_nodes), dtype=REAL) + self.temp = uninitialized((qr.num_nodes, self.kernel.valueSize), dtype=REAL) self.qrId = sQR.qr self.PSI_id = sQR.PSI elif panel == COMMON_EDGE: @@ -746,7 +748,7 @@ cdef class fractionalLaplacian2D(nonlocalLaplacian2D): sQR = specialQuadRule(qr, PSI) self.specialQuadRules[(singularityValue, panel)] = sQR if qr.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr.num_nodes), dtype=REAL) + self.temp = uninitialized((qr.num_nodes, self.kernel.valueSize), dtype=REAL) self.qrEdge = sQR.qr self.PSI_edge = sQR.PSI elif panel == COMMON_VERTEX: @@ -788,7 +790,7 @@ cdef class fractionalLaplacian2D(nonlocalLaplacian2D): sQR = specialQuadRule(qr, PSI) self.specialQuadRules[(singularityValue, panel)] = sQR if qr.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr.num_nodes), dtype=REAL) + self.temp = uninitialized((qr.num_nodes, self.kernel.valueSize), dtype=REAL) self.qrVertex = sQR.qr self.PSI_vertex = sQR.PSI else: @@ -803,11 +805,12 @@ cdef class fractionalLaplacian2D(nonlocalLaplacian2D): 'quad_order_off_diagonal: {}\n'.format(list(self.distantQuadRules.keys()))) cdef void eval(self, - REAL_t[::1] contrib, + REAL_t[:, ::1] contrib, panelType panel, MASK_t mask=ALL): cdef: - INDEX_t k, m, i, j, I, J, dofs_per_element, dim = 2 + INDEX_t k, m, i, j, I, J, dofs_per_element, dim = 2, l + INDEX_t valueSize = self.kernel.valueSize REAL_t vol, val quadratureRule qr REAL_t[:, ::1] PSI @@ -842,7 +845,12 @@ cdef class fractionalLaplacian2D(nonlocalLaplacian2D): y[j] = (self.simplex2[self.perm2[0], j]*qr.nodes[3, m] + self.simplex2[self.perm2[1], j]*qr.nodes[4, m] + self.simplex2[self.perm2[2], j]*qr.nodes[5, m]) - self.temp[m] = qr.weights[m] * self.kernel.evalPtr(dim, &x[0], &y[0]) + self.kernel.evalPtr(dim, + &x[0], + &y[0], + &self.vec[0]) + for l in range(valueSize): + self.temp[m, l] = qr.weights[m] * self.vec[l] # "perm" maps from dofs on the reordered simplices (matching # vertices first) to the dofs in the usual ordering. @@ -860,10 +868,11 @@ cdef class fractionalLaplacian2D(nonlocalLaplacian2D): k = 2*dofs_per_element*i-(i*(i+1) >> 1) + j # Check if that entry has been requested. if mask[k]: - val = 0. - for m in range(qr.num_nodes): - val += self.temp[m] * PSI[I, m] * PSI[J, m] - contrib[k] = val*vol + for l in range(valueSize): + val = 0. + for m in range(qr.num_nodes): + val += self.temp[m, l] * PSI[I, m] * PSI[J, m] + contrib[k, l] = val*vol cdef class fractionalLaplacian2D_nonsym(fractionalLaplacian2D): @@ -941,8 +950,8 @@ cdef class fractionalLaplacian2D_nonsym(fractionalLaplacian2D): sQR = specialQuadRule(qr, PHI3=PHI) self.specialQuadRules[(singularityValue, panel)] = sQR if qr.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr.num_nodes), dtype=REAL) - self.temp2 = uninitialized((qr.num_nodes), dtype=REAL) + self.temp = uninitialized((qr.num_nodes, self.kernel.valueSize), dtype=REAL) + self.temp2 = uninitialized((qr.num_nodes, self.kernel.valueSize), dtype=REAL) self.qrId = sQR.qr self.PHI_id = sQR.PHI3 elif panel == COMMON_EDGE: @@ -1014,8 +1023,8 @@ cdef class fractionalLaplacian2D_nonsym(fractionalLaplacian2D): sQR = specialQuadRule(qr, PHI3=PHI) self.specialQuadRules[(singularityValue, panel)] = sQR if qr.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr.num_nodes), dtype=REAL) - self.temp2 = uninitialized((qr.num_nodes), dtype=REAL) + self.temp = uninitialized((qr.num_nodes, self.kernel.valueSize), dtype=REAL) + self.temp2 = uninitialized((qr.num_nodes, self.kernel.valueSize), dtype=REAL) self.qrEdge = sQR.qr self.PHI_edge = sQR.PHI3 elif panel == COMMON_VERTEX: @@ -1061,24 +1070,25 @@ cdef class fractionalLaplacian2D_nonsym(fractionalLaplacian2D): sQR = specialQuadRule(qr, PHI3=PHI) self.specialQuadRules[(singularityValue, panel)] = sQR if qr.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr.num_nodes), dtype=REAL) - self.temp2 = uninitialized((qr.num_nodes), dtype=REAL) + self.temp = uninitialized((qr.num_nodes, self.kernel.valueSize), dtype=REAL) + self.temp2 = uninitialized((qr.num_nodes, self.kernel.valueSize), dtype=REAL) self.qrVertex = sQR.qr self.PHI_vertex = sQR.PHI3 else: raise NotImplementedError('Unknown panel type: {}'.format(panel)) cdef void eval(self, - REAL_t[::1] contrib, + REAL_t[:, ::1] contrib, panelType panel, MASK_t mask=ALL): cdef: - INDEX_t k, m, i, j, I, J, dofs_per_element, dim = 2 + INDEX_t k, m, i, j, I, J, dofs_per_element, dim = 2, l REAL_t vol, val quadratureRule qr REAL_t[:, :, ::1] PHI REAL_t x[2] REAL_t y[2] + INDEX_t valueSize = self.kernel.valueSize if panel >= 1: self.eval_distant_nonsym(contrib, panel, mask) @@ -1108,8 +1118,11 @@ cdef class fractionalLaplacian2D_nonsym(fractionalLaplacian2D): y[j] = (self.simplex2[self.perm2[0], j]*qr.nodes[3, m] + self.simplex2[self.perm2[1], j]*qr.nodes[4, m] + self.simplex2[self.perm2[2], j]*qr.nodes[5, m]) - self.temp[m] = qr.weights[m] * self.kernel.evalPtr(dim, &x[0], &y[0]) - self.temp2[m] = qr.weights[m] * self.kernel.evalPtr(dim, &y[0], &x[0]) + self.kernel.evalPtr(dim, &x[0], &y[0], &self.vec[0]) + self.kernel.evalPtr(dim, &y[0], &x[0], &self.vec2[0]) + for l in range(valueSize): + self.temp[m, l] = qr.weights[m] * self.vec[l] + self.temp2[m, l] = qr.weights[m] * self.vec2[l] # "perm" maps from dofs on the reordered simplices (matching # vertices first) to the dofs in the usual ordering. @@ -1121,10 +1134,11 @@ cdef class fractionalLaplacian2D_nonsym(fractionalLaplacian2D): k = i*(2*dofs_per_element)+j # Check if that entry has been requested. if mask[k]: - val = 0. - for m in range(qr.num_nodes): - val += (self.temp[m] * PHI[I, m, 0] - self.temp2[m] * PHI[I, m, 1]) * (PHI[J, m, 0] - PHI[J, m, 1]) - contrib[k] = val*vol + for l in range(valueSize): + val = 0. + for m in range(qr.num_nodes): + val += (self.temp[m, l] * PHI[I, m, 0] - self.temp2[m, l] * PHI[I, m, 1]) * (PHI[J, m, 0] - PHI[J, m, 1]) + contrib[k, l] = val*vol cdef class fractionalLaplacian2D_boundary(fractionalLaplacian2DZeroExterior): @@ -1222,7 +1236,7 @@ cdef class fractionalLaplacian2D_boundary(fractionalLaplacian2DZeroExterior): sQR = specialQuadRule(qr, PHI=PHI) self.specialQuadRules[(singularityValue, panel)] = sQR if qr.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr.num_nodes), dtype=REAL) + self.temp = uninitialized((qr.num_nodes, self.kernel.valueSize), dtype=REAL) self.qrEdge = sQR.qr self.PHI_edge2 = sQR.PHI @@ -1244,7 +1258,7 @@ cdef class fractionalLaplacian2D_boundary(fractionalLaplacian2DZeroExterior): sQR = specialQuadRule(qr, PHI=PHI) self.specialQuadRules[(singularityValue, panel)] = sQR if qr.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr.num_nodes), dtype=REAL) + self.temp = uninitialized((qr.num_nodes, self.kernel.valueSize), dtype=REAL) self.qrVertex = sQR.qr self.PHI_vertex2 = sQR.PHI @@ -1257,21 +1271,22 @@ cdef class fractionalLaplacian2D_boundary(fractionalLaplacian2DZeroExterior): 'quad_order_off_diagonal {}\n'.format(list(self.distantQuadRules.keys()))) cdef void eval(self, - REAL_t[::1] contrib, + REAL_t[:, ::1] contrib, panelType panel, MASK_t mask=ALL): cdef: REAL_t vol1 = self.vol1, vol2 = self.vol2, vol, val - INDEX_t i, j, k, I, J, m + INDEX_t i, j, k, I, J, m, l REAL_t[:, ::1] simplex1 = self.simplex1 REAL_t[:, ::1] simplex2 = self.simplex2 - REAL_t normW + REAL_t normW, nw quadratureRule qr REAL_t[:, ::1] PHI INDEX_t dofs_per_element = self.DoFMap.dofs_per_element INDEX_t dim = 2 REAL_t x[2] REAL_t y[2] + INDEX_t valueSize = self.kernel.valueSize if panel >= 1: self.eval_distant_boundary(contrib, panel, mask) @@ -1321,7 +1336,10 @@ cdef class fractionalLaplacian2D_boundary(fractionalLaplacian2DZeroExterior): normW = 1./sqrt(normW) for j in range(dim): self.w[j] *= normW - self.temp[m] = qr.weights[m] * mydot(self.n, self.w) * self.kernel.evalPtr(dim, &x[0], &y[0]) + self.kernel.evalPtr(dim, &x[0], &y[0], &self.vec[0]) + nw = mydot(self.n, self.w) + for l in range(valueSize): + self.temp[m, l] = qr.weights[m] * nw * self.vec[l] for I in range(dofs_per_element): i = self.perm[I] for J in range(I, dofs_per_element): @@ -1331,9 +1349,10 @@ cdef class fractionalLaplacian2D_boundary(fractionalLaplacian2DZeroExterior): else: k = dofs_per_element*i-(i*(i+1) >> 1) + j if mask[k]: - val = 0. - for m in range(qr.num_nodes): - val += self.temp[m] * PHI[I, m] * PHI[J, m] - contrib[k] = val*vol + for l in range(valueSize): + val = 0. + for m in range(qr.num_nodes): + val += self.temp[m, l] * PHI[I, m] * PHI[J, m] + contrib[k, l] = val*vol diff --git a/nl/PyNucleus_nl/fractionalOrders.pyx b/nl/PyNucleus_nl/fractionalOrders.pyx index b4a1f62..c4a8cc2 100644 --- a/nl/PyNucleus_nl/fractionalOrders.pyx +++ b/nl/PyNucleus_nl/fractionalOrders.pyx @@ -48,13 +48,17 @@ cdef enum fracOrderParams: cdef class fractionalOrderBase(twoPointFunction): - def __init__(self, REAL_t smin, REAL_t smax, BOOL_t symmetric, INDEX_t numParameters=0): - super(fractionalOrderBase, self).__init__(symmetric) + def __init__(self, REAL_t smin, REAL_t smax, BOOL_t symmetric, INDEX_t numParameters=1): + super(fractionalOrderBase, self).__init__(symmetric, 1) self.min = smin self.max = smax self.numParameters = numParameters + assert self.numParameters >= 1 - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): + raise NotImplementedError() + + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): raise NotImplementedError() cdef void evalGrad(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] grad): @@ -84,30 +88,36 @@ cdef class constFractionalOrder(fractionalOrderBase): def __setstate__(self, state): constFractionalOrder.__init__(self, state) - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): - return self.value + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): + value[0] = self.value - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): - return self.value + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): + value[0] = self.value + + cdef void evalGrad(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] grad): + grad[0] = 1. + + cdef REAL_t evalGradPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, INDEX_t vectorSize, REAL_t* grad): + grad[0] = 1. def __repr__(self): return '{}'.format(self.value) cdef class variableFractionalOrder(fractionalOrderBase): - def __init__(self, REAL_t smin, REAL_t smax, BOOL_t symmetric, INDEX_t numParameters=0): + def __init__(self, REAL_t smin, REAL_t smax, BOOL_t symmetric, INDEX_t numParameters=1): super(variableFractionalOrder, self).__init__(smin, smax, symmetric, numParameters) self.c_params = malloc(NUM_FRAC_ORDER_PARAMS*OFFSET) - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): cdef: fun_t sFun = getFun(self.c_params, fSFUN) - return sFun(&x[0], &y[0], self.c_params) + value[0] = sFun(&x[0], &y[0], self.c_params) - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): cdef: fun_t sFun = getFun(self.c_params, fSFUN) - return sFun(x, y, self.c_params) + value[0] = sFun(x, y, self.c_params) cdef void setFractionalOrderFun(self, void* params): memcpy(params, self.c_params, NUM_FRAC_ORDER_PARAMS*OFFSET) @@ -135,14 +145,14 @@ cdef class singleVariableTwoPointFunction(twoPointFunction): extendedFunction fun def __init__(self, extendedFunction fun): - super(singleVariableTwoPointFunction, self).__init__(False) + super(singleVariableTwoPointFunction, self).__init__(False, 1) self.fun = fun - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): - return self.fun.eval(x) + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): + value[0] = self.fun.eval(x) - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): - return self.fun.evalPtr(dim, x) + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): + value[0] = self.fun.evalPtr(dim, x) def __repr__(self): return '{}'.format(self.fun) @@ -159,11 +169,11 @@ cdef class singleVariableUnsymmetricFractionalOrder(variableFractionalOrder): super(singleVariableUnsymmetricFractionalOrder, self).__init__(smin, smax, False, numParameters) self.sFun = sFun - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): - return self.sFun.eval(x) + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): + value[0] = self.sFun.eval(x) - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): - return self.sFun.evalPtr(dim, x) + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): + value[0] = self.sFun.evalPtr(dim, x) cdef void evalGrad(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] grad): self.sFun.evalGrad(x, grad) @@ -583,7 +593,7 @@ cdef class lookupExtended(extendedFunction): for k in range(self.dm.dofs_per_element): dof = self.dm.cell2dof(cellNo, k) if dof >= 0: - shapeFun = self.dm.localShapeFunctions[k] + shapeFun = self.dm.getLocalShapeFunction(k) val += shapeFun.eval(self.cellFinder.bary)*self.u[dof] return val @@ -599,7 +609,7 @@ cdef class lookupExtended(extendedFunction): for k in range(self.dm.dofs_per_element): dof = self.dm.cell2dof(cellNo, k) if dof >= 0: - shapeFun = self.dm.localShapeFunctions[k] + shapeFun = self.dm.getLocalShapeFunction(k) val += shapeFun.eval(self.cellFinder.bary)*self.u[dof] return val @@ -615,7 +625,7 @@ cdef class lookupExtended(extendedFunction): for k in range(self.dm.dofs_per_element): dof = self.dm.cell2dof(cellNo, k) if dof >= 0: - shapeFun = self.dm.localShapeFunctions[k] + shapeFun = self.dm.getLocalShapeFunction(k) grad[dof] += shapeFun.eval(self.cellFinder.bary) cdef void evalGradPtr(self, INDEX_t dim, REAL_t* x, INDEX_t vectorSize, REAL_t* grad): @@ -630,7 +640,7 @@ cdef class lookupExtended(extendedFunction): for k in range(self.dm.dofs_per_element): dof = self.dm.cell2dof(cellNo, k) if dof >= 0: - shapeFun = self.dm.localShapeFunctions[k] + shapeFun = self.dm.getLocalShapeFunction(k) grad[dof] += shapeFun.eval(self.cellFinder.bary) def __getstate__(self): @@ -639,6 +649,12 @@ cdef class lookupExtended(extendedFunction): def __setstate__(self, state): lookupExtended.__init__(self, state[0], state[1], state[2]) + def __repr__(self): + if self.dm.num_dofs < 10: + return str(np.array(self.u).tolist()) + else: + return repr(self.dm) + cdef class constantNonSymFractionalOrder(singleVariableUnsymmetricFractionalOrder): cdef: @@ -762,8 +778,12 @@ cdef class sumFractionalOrder(variableFractionalOrder): self.s2 = s2 self.fac2 = fac2 - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): - return self.s1.eval(x, y) + self.s2.eval(x, y) + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): + cdef: + REAL_t val1, val2 + self.s1.evalPtr(x.shape[0], &x[0], &y[0], &val1) + self.s2.evalPtr(x.shape[0], &x[0], &y[0], &val2) + value[0] = val1*val2 cdef REAL_t islandsFractionalOrderFun(REAL_t *x, REAL_t *y, void *c_params): diff --git a/nl/PyNucleus_nl/helpers.py b/nl/PyNucleus_nl/helpers.py index e43f0a0..af31828 100644 --- a/nl/PyNucleus_nl/helpers.py +++ b/nl/PyNucleus_nl/helpers.py @@ -7,6 +7,7 @@ from PyNucleus_base import INDEX from PyNucleus_base.utilsFem import getLoggingTimer +from PyNucleus_base.performanceLogger import FakeTimer from PyNucleus_base.linear_operators import (LinearOperator, multiIntervalInterpolationOperator, delayedConstructionOperator) @@ -147,8 +148,9 @@ def getFracLapl(mesh, DoFMap, kernel=None, rangedOpParams={}, **kwargs): forceRebuild = kwargs.get('forceRebuild', True) if timer is None: - timer = getLoggingTimer(LOGGER, comm=comm, rootOutput=True) - kwargs['timer'] = timer + # timer = getLoggingTimer(LOGGER, comm=comm, rootOutput=True) + # kwargs['timer'] = timer + timer = FakeTimer if kernel is None: s = rangedOpParams['s'] @@ -336,7 +338,7 @@ def build(self, buildType): DoFMap = self.DoFMap mesh = self.meshLevel.mesh self.fullyAssembled = True - with self.Timer('Assembled matrices'): + with self.Timer('Assembled matrices on level {}'.format(self.levelNo)): self.params.pop('mesh', None) if self.comm is not None and self.comm.size > 1: self.params['assemblyComm'] = self.comm diff --git a/nl/PyNucleus_nl/interactionDomains.pyx b/nl/PyNucleus_nl/interactionDomains.pyx index 234e9a0..d500a94 100644 --- a/nl/PyNucleus_nl/interactionDomains.pyx +++ b/nl/PyNucleus_nl/interactionDomains.pyx @@ -25,7 +25,7 @@ cdef class interactionDomain(parametrizedTwoPointFunction): """Base class for all interaction domains.""" def __init__(self, BOOL_t isComplement): - super(interactionDomain, self).__init__(True) + super(interactionDomain, self).__init__(True, 1) self.complement = isComplement self.intervals1 = uninitialized((4), dtype=REAL) self.intervals2 = uninitialized((3), dtype=REAL) @@ -633,11 +633,11 @@ cdef class fullSpace(interactionDomain): cdef RELATIVE_POSITION_t getRelativePosition(self, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2): return INTERACT - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): - return 1. + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): + value[0] = 1. - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): - return 1. + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): + value[0] = 1. def __repr__(self): dim = getINDEX(self.params, fKDIM) @@ -716,7 +716,7 @@ cdef class ball2(interactionDomain): numIntersections += 1 return numIntersections - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): cdef: REAL_t s = 0. INDEX_t i @@ -724,11 +724,11 @@ cdef class ball2(interactionDomain): for i in range(x.shape[0]): s += (x[i]-y[i])**2 if s <= horizon2: - return 1. + value[0] = 1. else: - return 0. + value[0] = 0. - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): cdef: REAL_t s = 0. INDEX_t i @@ -736,9 +736,9 @@ cdef class ball2(interactionDomain): for i in range(dim): s += (x[i]-y[i])**2 if s <= horizon2: - return 1. + value[0] = 1. else: - return 0. + value[0] = 0. def __repr__(self): horizon2 = getREAL(self.params, fHORIZON2) @@ -849,7 +849,7 @@ cdef class ballInf(interactionDomain): intersections[0], intersections[1] = intersections[1], intersections[0] return numIntersections - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): cdef: REAL_t s = 0. INDEX_t i @@ -857,11 +857,11 @@ cdef class ballInf(interactionDomain): for i in range(x.shape[0]): s = max(s, (x[i]-y[i])**2) if s <= horizon2: - return 1. + value[0] = 1. else: - return 0. + value[0] = 0. - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): cdef: REAL_t s = 0. INDEX_t i @@ -869,9 +869,9 @@ cdef class ballInf(interactionDomain): for i in range(dim): s = max(s, (x[i]-y[i])**2) if s <= horizon2: - return 1. + value[0] = 1. else: - return 0. + value[0] = 0. def __repr__(self): horizon2 = getREAL(self.params, fHORIZON2) @@ -933,7 +933,7 @@ cdef class ball2Complement(interactionDomain): self.relPos = CUT return self.relPos - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): cdef: REAL_t s = 0. INDEX_t i @@ -941,11 +941,11 @@ cdef class ball2Complement(interactionDomain): for i in range(x.shape[0]): s += (x[i]-y[i])**2 if s > horizon2: - return 1. + value[0] = 1. else: - return 0. + value[0] = 0. - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): cdef: REAL_t s = 0. INDEX_t i @@ -953,9 +953,9 @@ cdef class ball2Complement(interactionDomain): for i in range(dim): s += (x[i]-y[i])**2 if s > horizon2: - return 1. + value[0] = 1. else: - return 0. + value[0] = 0. def __repr__(self): horizon2 = getREAL(self.params, fHORIZON2) @@ -979,18 +979,18 @@ cdef class linearTransformInteraction(interactionDomain): self.simplex1 = uninitialized((dim+1, dim), dtype=REAL) self.simplex2 = uninitialized((dim+1, dim), dtype=REAL) - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): self.transformVectorForward(x, self.vec) self.transformVectorForward(y, self.vec2) - return self.baseInteraction.eval(self.vec, self.vec2) + self.baseInteraction.eval(self.vec, self.vec2, value) - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): cdef: REAL_t[::1] xA = x REAL_t[::1] yA = y self.transformVectorForward(xA, self.vec) self.transformVectorForward(yA, self.vec2) - return self.baseInteraction.evalPtr(dim, &self.vec[0], &self.vec2[0]) + self.baseInteraction.evalPtr(dim, &self.vec[0], &self.vec2[0], value) def __getstate__(self): return self.A diff --git a/nl/PyNucleus_nl/kernelNormalization.pyx b/nl/PyNucleus_nl/kernelNormalization.pyx index 9dfdc97..6057e90 100644 --- a/nl/PyNucleus_nl/kernelNormalization.pyx +++ b/nl/PyNucleus_nl/kernelNormalization.pyx @@ -71,9 +71,7 @@ cdef class constantFractionalLaplacianScaling(constantTwoPoint): value = np.nan else: if horizon < inf: - value = (2.-2*s) * pow(horizon, 2*s-2.) * gamma(0.5*dim)/pow(pi, 0.5*dim) * 0.5 - if dim > 1: - value *= 2. + value = (2.-2*s) * pow(horizon, 2*s-2.) * dim * gamma(0.5*dim)/pow(pi, 0.5*dim) * 0.5 else: if (tempered == 0.) or (s == 0.5): value = 2.0**(2.0*s) * s * gamma(s+0.5*dim)/pow(pi, 0.5*dim)/gamma(1.0-s) * 0.5 @@ -93,7 +91,7 @@ cdef class constantFractionalLaplacianScaling(constantTwoPoint): cdef class constantFractionalLaplacianScalingDerivative(twoPointFunction): def __init__(self, INDEX_t dim, REAL_t s, REAL_t horizon, BOOL_t normalized, BOOL_t boundary, INDEX_t derivative, REAL_t tempered): - super(constantFractionalLaplacianScalingDerivative, self).__init__(True) + super(constantFractionalLaplacianScalingDerivative, self).__init__(True, 1) self.dim = dim self.s = s @@ -108,9 +106,7 @@ cdef class constantFractionalLaplacianScalingDerivative(twoPointFunction): if self.normalized: if horizon2 < inf: - self.C = (2.-2*s) * pow(horizon2, s-1.) * gamma(0.5*dim)/pow(pi, 0.5*dim) * 0.5 - if dim > 1: - self.C *= 2. + self.C = (2.-2*s) * pow(horizon2, s-1.) * dim*gamma(0.5*dim)/pow(pi, 0.5*dim) * 0.5 else: if (tempered == 0.) or (s == 0.5): self.C = 2.0**(2.0*s) * s * gamma(s+0.5*dim) * pow(pi, -0.5*dim) / gamma(1.0-s) * 0.5 @@ -139,45 +135,45 @@ cdef class constantFractionalLaplacianScalingDerivative(twoPointFunction): else: raise NotImplementedError(self.derivative) - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): cdef: REAL_t d2 INDEX_t i if self.derivative == 0: - return self.C + value[0] = self.C elif self.derivative == 1: d2 = 0. for i in range(self.dim): d2 += (x[i]-y[i])*(x[i]-y[i]) if self.normalized: if self.horizon2 < inf: - return self.C*(-log(d2/self.horizon2) + self.fac) + value[0] = self.C*(-log(d2/self.horizon2) + self.fac) else: - return self.C*(-log(0.25*d2) + self.fac) + value[0] = self.C*(-log(0.25*d2) + self.fac) else: - return self.C*(-log(d2) + self.fac) + value[0] = self.C*(-log(d2) + self.fac) else: raise NotImplementedError() - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): cdef: REAL_t d2 INDEX_t i if self.derivative == 0: - return self.C + value[0] = self.C elif self.derivative == 1: d2 = 0. for i in range(self.dim): d2 += (x[i]-y[i])*(x[i]-y[i]) if self.normalized: if self.horizon2 < inf: - return self.C*(-log(d2/self.horizon2) + self.fac) + value[0] = self.C*(-log(d2/self.horizon2) + self.fac) else: - return self.C*(-log(0.25*d2) + self.fac) + value[0] = self.C*(-log(0.25*d2) + self.fac) else: - return self.C*(-log(d2) + self.fac) + value[0] = self.C*(-log(d2) + self.fac) else: raise NotImplementedError() @@ -187,6 +183,9 @@ cdef class constantFractionalLaplacianScalingDerivative(twoPointFunction): def __setstate__(self, state): constantFractionalLaplacianScalingDerivative.__init__(self, state[0], state[1], state[2], state[3], state[4], state[5], state[6]) + def __repr__(self): + return "{}({},{} -> {})".format(self.__class__.__name__, self.s, self.horizon, self.fac) + cdef class constantIntegrableScaling(constantTwoPoint): def __init__(self, kernelType kType, interactionDomain interaction, INDEX_t dim, REAL_t horizon): @@ -247,7 +246,7 @@ cdef class constantIntegrableScaling(constantTwoPoint): cdef class variableFractionalLaplacianScaling(parametrizedTwoPointFunction): def __init__(self, BOOL_t symmetric, BOOL_t normalized=True, BOOL_t boundary=False, INDEX_t derivative=0): - super(variableFractionalLaplacianScaling, self).__init__(symmetric) + super(variableFractionalLaplacianScaling, self).__init__(symmetric, 1) self.normalized = normalized self.boundary = boundary self.derivative = derivative @@ -257,7 +256,7 @@ cdef class variableFractionalLaplacianScaling(parametrizedTwoPointFunction): parametrizedTwoPointFunction.setParams(self, params) self.dim = getINDEX(self.params, fKDIM) - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): cdef: REAL_t s = getREAL(self.params, fS) REAL_t horizon2 = getREAL(self.params, fHORIZON2) @@ -271,7 +270,7 @@ cdef class variableFractionalLaplacianScaling(parametrizedTwoPointFunction): elif self.dim == 2: C = (2.-2*s) * pow(horizon2, s-1.) * 2./pi * 0.5 elif self.dim == 3: - C = (2.-2*s) * pow(horizon2, s-1.) * 1./pi * 0.5 + C = (2.-2*s) * pow(horizon2, s-1.) * 1.5/pi * 0.5 else: raise NotImplementedError() else: @@ -280,7 +279,7 @@ cdef class variableFractionalLaplacianScaling(parametrizedTwoPointFunction): C = 0.5 if self.derivative == 0: - return C + value[0] = C elif self.derivative == 1: d2 = 0. for i in range(self.dim): @@ -288,23 +287,23 @@ cdef class variableFractionalLaplacianScaling(parametrizedTwoPointFunction): if self.normalized: if horizon2 < inf: if not self.boundary: - return C*(-log(d2/horizon2) - 1./(1.-s)) + value[0] = C*(-log(d2/horizon2) - 1./(1.-s)) else: - return C*(-log(d2/horizon2) - 1./(1.-s) - 1./s) + value[0] = C*(-log(d2/horizon2) - 1./(1.-s) - 1./s) else: if not self.boundary: - return C*(-log(0.25*d2) + self.digamma.eval(s+0.5*self.dim) + self.digamma.eval(-s)) + value[0] = C*(-log(0.25*d2) + self.digamma.eval(s+0.5*self.dim) + self.digamma.eval(-s)) else: - return C*(-log(0.25*d2) + self.digamma.eval(s+0.5*self.dim) + self.digamma.eval(1.-s)) + value[0] = C*(-log(0.25*d2) + self.digamma.eval(s+0.5*self.dim) + self.digamma.eval(1.-s)) else: if not self.boundary: - return C*(-log(d2)) + value[0] = C*(-log(d2)) else: - return C*(-log(d2)-1./s) + value[0] = C*(-log(d2)-1./s) else: raise NotImplementedError() - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): cdef: REAL_t s = getREAL(self.params, fS) REAL_t horizon2 = getREAL(self.params, fHORIZON2) @@ -318,7 +317,7 @@ cdef class variableFractionalLaplacianScaling(parametrizedTwoPointFunction): elif self.dim == 2: C = (2.-2*s) * pow(horizon2, s-1.) * 2./pi * 0.5 elif self.dim == 3: - C = (2.-2*s) * pow(horizon2, s-1.) * 1./pi * 0.5 + C = (2.-2*s) * pow(horizon2, s-1.) * 1.5/pi * 0.5 else: raise NotImplementedError() else: @@ -327,7 +326,7 @@ cdef class variableFractionalLaplacianScaling(parametrizedTwoPointFunction): C = 0.5 if self.derivative == 0: - return C + value[0] = C elif self.derivative == 1: d2 = 0. for i in range(self.dim): @@ -335,19 +334,19 @@ cdef class variableFractionalLaplacianScaling(parametrizedTwoPointFunction): if self.normalized: if horizon2 < inf: if not self.boundary: - return C*(-log(d2/horizon2) - 1./(1.-s)) + value[0] = C*(-log(d2/horizon2) - 1./(1.-s)) else: - return C*(-log(d2/horizon2) - 1./(1.-s) - 1./s) + value[0] = C*(-log(d2/horizon2) - 1./(1.-s) - 1./s) else: if not self.boundary: - return C*(-log(0.25*d2) + self.digamma.eval(s+0.5*self.dim) + self.digamma.eval(-s)) + value[0] = C*(-log(0.25*d2) + self.digamma.eval(s+0.5*self.dim) + self.digamma.eval(-s)) else: - return C*(-log(0.25*d2) + self.digamma.eval(s+0.5*self.dim) + self.digamma.eval(1.-s)) + value[0] = C*(-log(0.25*d2) + self.digamma.eval(s+0.5*self.dim) + self.digamma.eval(1.-s)) else: if not self.boundary: - return C*(-log(d2)) + value[0] = C*(-log(d2)) else: - return C*(-log(d2)-1./s) + value[0] = C*(-log(d2)-1./s) else: raise NotImplementedError() @@ -382,34 +381,34 @@ cdef class variableFractionalLaplacianScalingWithDifferentHorizon(variableFracti super(variableFractionalLaplacianScalingWithDifferentHorizon, self).__init__(symmetric, normalized, boundary, derivative) self.horizonFun = horizonFun - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): cdef: void* params void* paramsModified = malloc(NUM_KERNEL_PARAMS*OFFSET) - REAL_t horizon + REAL_t horizon, scalingValue horizon = self.horizonFun.eval(x) params = self.getParams() memcpy(paramsModified, params, NUM_KERNEL_PARAMS*OFFSET) setREAL(paramsModified, fHORIZON2, horizon**2) self.setParams(paramsModified) - scalingValue = variableFractionalLaplacianScaling.eval(self, x, y) + variableFractionalLaplacianScaling.evalPtr(self, x.shape[0], &x[0], &y[0], &scalingValue) self.setParams(params) - return scalingValue + value[0] = scalingValue - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): cdef: void* params void* paramsModified = malloc(NUM_KERNEL_PARAMS*OFFSET) - REAL_t horizon + REAL_t horizon, scalingValue REAL_t[::1] xA = x horizon = self.horizonFun.eval(xA) params = self.getParams() memcpy(paramsModified, params, NUM_KERNEL_PARAMS*OFFSET) setREAL(paramsModified, fHORIZON2, horizon**2) self.setParams(paramsModified) - scalingValue = variableFractionalLaplacianScaling.evalPtr(self, dim, x, y) + variableFractionalLaplacianScaling.evalPtr(self, dim, x, y, &scalingValue) self.setParams(params) - return scalingValue + value[0] = scalingValue def __getstate__(self): return (self.symmetric, self.normalized, self.boundary, self.derivative, self.horizonFun) diff --git a/nl/PyNucleus_nl/kernelsCy.pxd b/nl/PyNucleus_nl/kernelsCy.pxd index 87c2202..1d2256c 100644 --- a/nl/PyNucleus_nl/kernelsCy.pxd +++ b/nl/PyNucleus_nl/kernelsCy.pxd @@ -36,7 +36,6 @@ cdef class Kernel(twoPointFunction): public BOOL_t variable public BOOL_t piecewise public BOOL_t boundary - public INDEX_t vectorSize kernel_fun_t kernelFun void *c_kernel_params cdef REAL_t getSingularityValue(self) @@ -49,10 +48,8 @@ cdef class Kernel(twoPointFunction): cdef void evalParamsOnSimplices(self, REAL_t[::1] center1, REAL_t[::1] center2, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2) cdef void evalParams(self, REAL_t[::1] x, REAL_t[::1] y) cdef void evalParamsPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y) - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y) - cdef void evalVector(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] vec) - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y) - cdef void evalVectorPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, INDEX_t vectorSize, REAL_t* vec) + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value) + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value) cdef class ComplexKernel(ComplextwoPointFunction): @@ -73,7 +70,6 @@ cdef class ComplexKernel(ComplextwoPointFunction): public BOOL_t variable public BOOL_t piecewise public BOOL_t boundary - public INDEX_t vectorSize complex_kernel_fun_t kernelFun void *c_kernel_params cdef REAL_t getSingularityValue(self) @@ -86,10 +82,8 @@ cdef class ComplexKernel(ComplextwoPointFunction): cdef void evalParamsOnSimplices(self, REAL_t[::1] center1, REAL_t[::1] center2, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2) cdef void evalParams(self, REAL_t[::1] x, REAL_t[::1] y) cdef void evalParamsPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y) - cdef COMPLEX_t eval(self, REAL_t[::1] x, REAL_t[::1] y) - cdef void evalVector(self, REAL_t[::1] x, REAL_t[::1] y, COMPLEX_t[::1] vec) - cdef COMPLEX_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y) - cdef void evalVectorPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, INDEX_t vectorSize, COMPLEX_t* vec) + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, COMPLEX_t[::1] value) + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, COMPLEX_t* value) cdef class FractionalKernel(Kernel): diff --git a/nl/PyNucleus_nl/kernelsCy.pyx b/nl/PyNucleus_nl/kernelsCy.pyx index 36e1252..4d75df0 100644 --- a/nl/PyNucleus_nl/kernelsCy.pyx +++ b/nl/PyNucleus_nl/kernelsCy.pyx @@ -63,9 +63,10 @@ def getKernelEnum(str kernelTypeString): cdef REAL_t fracKernelFinite1D(REAL_t *x, REAL_t *y, void *c_params): cdef: - REAL_t s, C, d2 + REAL_t s, C, d2, inter twoPointFunction interaction = (((c_params+fINTERACTION))[0]) - if interaction.evalPtr(1, x, y) != 0.: + interaction.evalPtr(1, x, y, &inter) + if inter != 0.: s = getREAL(c_params, fS) C = getREAL(c_params, fSCALING) d2 = (x[0]-y[0])*(x[0]-y[0]) @@ -76,9 +77,10 @@ cdef REAL_t fracKernelFinite1D(REAL_t *x, REAL_t *y, void *c_params): cdef REAL_t fracKernelFinite2D(REAL_t *x, REAL_t *y, void *c_params): cdef: - REAL_t s, C, d2 + REAL_t s, C, d2, inter twoPointFunction interaction = (((c_params+fINTERACTION))[0]) - if interaction.evalPtr(2, x, y) != 0.: + interaction.evalPtr(2, x, y, &inter) + if inter != 0.: s = getREAL(c_params, fS) C = getREAL(c_params, fSCALING) d2 = (x[0]-y[0])*(x[0]-y[0]) + (x[1]-y[1])*(x[1]-y[1]) @@ -89,22 +91,24 @@ cdef REAL_t fracKernelFinite2D(REAL_t *x, REAL_t *y, void *c_params): cdef REAL_t fracKernelFinite3D(REAL_t *x, REAL_t *y, void *c_params): cdef: - REAL_t s, C, d2 + REAL_t s, C, d2, inter twoPointFunction interaction = (((c_params+fINTERACTION))[0]) - if interaction.evalPtr(3, x, y) != 0.: + interaction.evalPtr(3, x, y, &inter) + if inter != 0.: s = getREAL(c_params, fS) C = getREAL(c_params, fSCALING) d2 = (x[0]-y[0])*(x[0]-y[0]) + (x[1]-y[1])*(x[1]-y[1]) + (x[2]-y[2])*(x[2]-y[2]) - return C*pow(d2, -1.-s) + return C*pow(d2, -1.5-s) else: return 0. cdef REAL_t fracKernelFinite1Dboundary(REAL_t *x, REAL_t *y, void *c_params): cdef: - REAL_t s, C, d2 + REAL_t s, C, d2, inter twoPointFunction interaction = (((c_params+fINTERACTION))[0]) - if interaction.evalPtr(1, x, y) != 0.: + interaction.evalPtr(1, x, y, &inter) + if inter != 0.: s = getREAL(c_params, fS) C = getREAL(c_params, fSCALING) d2 = (x[0]-y[0])*(x[0]-y[0]) @@ -115,9 +119,10 @@ cdef REAL_t fracKernelFinite1Dboundary(REAL_t *x, REAL_t *y, void *c_params): cdef REAL_t fracKernelFinite2Dboundary(REAL_t *x, REAL_t *y, void *c_params): cdef: - REAL_t s, C, d2 + REAL_t s, C, d2, inter twoPointFunction interaction = (((c_params+fINTERACTION))[0]) - if interaction.evalPtr(2, x, y) != 0.: + interaction.evalPtr(2, x, y, &inter) + if inter != 0.: s = getREAL(c_params, fS) C = getREAL(c_params, fSCALING) d2 = (x[0]-y[0])*(x[0]-y[0]) + (x[1]-y[1])*(x[1]-y[1]) @@ -128,9 +133,10 @@ cdef REAL_t fracKernelFinite2Dboundary(REAL_t *x, REAL_t *y, void *c_params): cdef REAL_t fracKernelFinite3Dboundary(REAL_t *x, REAL_t *y, void *c_params): cdef: - REAL_t s, C, d2 + REAL_t s, C, d2, inter twoPointFunction interaction = (((c_params+fINTERACTION))[0]) - if interaction.evalPtr(3, x, y) != 0.: + interaction.evalPtr(3, x, y, &inter) + if inter != 0.: s = getREAL(c_params, fS) C = getREAL(c_params, fSCALING) d2 = (x[0]-y[0])*(x[0]-y[0]) + (x[1]-y[1])*(x[1]-y[1]) + (x[2]-y[2])*(x[2]-y[2]) @@ -256,8 +262,9 @@ cdef REAL_t temperedFracKernelInfinite3Dboundary(REAL_t *x, REAL_t *y, void *c_p cdef REAL_t indicatorKernel1D(REAL_t *x, REAL_t *y, void *c_params): cdef: twoPointFunction interaction = (((c_params+fINTERACTION))[0]) - REAL_t C - if interaction.evalPtr(1, x, y) != 0.: + REAL_t C, inter + interaction.evalPtr(1, x, y, &inter) + if inter != 0.: C = getREAL(c_params, fSCALING) return C else: @@ -267,8 +274,9 @@ cdef REAL_t indicatorKernel1D(REAL_t *x, REAL_t *y, void *c_params): cdef REAL_t indicatorKernel2D(REAL_t *x, REAL_t *y, void *c_params): cdef: twoPointFunction interaction = (((c_params+fINTERACTION))[0]) - REAL_t C - if interaction.evalPtr(2, x, y) != 0.: + REAL_t C, inter + interaction.evalPtr(2, x, y, &inter) + if inter != 0.: C = getREAL(c_params, fSCALING) return C else: @@ -278,8 +286,9 @@ cdef REAL_t indicatorKernel2D(REAL_t *x, REAL_t *y, void *c_params): cdef REAL_t indicatorKernel1Dboundary(REAL_t *x, REAL_t *y, void *c_params): cdef: twoPointFunction interaction = (((c_params+fINTERACTION))[0]) - REAL_t C - if interaction.evalPtr(1, x, y) != 0.: + REAL_t C, inter + interaction.evalPtr(1, x, y, &inter) + if inter != 0.: C = getREAL(c_params, fSCALING) return -C*2.0*sqrt((x[0]-y[0])*(x[0]-y[0])) else: @@ -289,8 +298,9 @@ cdef REAL_t indicatorKernel1Dboundary(REAL_t *x, REAL_t *y, void *c_params): cdef REAL_t indicatorKernel2Dboundary(REAL_t *x, REAL_t *y, void *c_params): cdef: twoPointFunction interaction = (((c_params+fINTERACTION))[0]) - REAL_t C - if interaction.evalPtr(2, x, y) != 0.: + REAL_t C, inter + interaction.evalPtr(2, x, y, &inter) + if inter != 0.: C = getREAL(c_params, fSCALING) return -C*sqrt((x[0]-y[0])*(x[0]-y[0])+(x[1]-y[1])*(x[1]-y[1])) else: @@ -301,8 +311,9 @@ cdef REAL_t peridynamicKernel1D(REAL_t *x, REAL_t *y, void *c_params): cdef: interactionDomain interaction = (((c_params+fINTERACTION))[0]) REAL_t C - REAL_t d2 - if interaction.evalPtr(1, x, y) != 0.: + REAL_t d2, inter + interaction.evalPtr(1, x, y, &inter) + if inter != 0.: d2 = (x[0]-y[0])*(x[0]-y[0]) C = getREAL(c_params, fSCALING) return C/sqrt(d2) @@ -314,8 +325,9 @@ cdef REAL_t peridynamicKernel2D(REAL_t *x, REAL_t *y, void *c_params): cdef: interactionDomain interaction = (((c_params+fINTERACTION))[0]) REAL_t C - REAL_t d2 - if interaction.evalPtr(2, x, y) != 0.: + REAL_t d2, inter + interaction.evalPtr(2, x, y, &inter) + if inter != 0.: d2 = (x[0]-y[0])*(x[0]-y[0]) + (x[1]-y[1])*(x[1]-y[1]) C = getREAL(c_params, fSCALING) return C/sqrt(d2) @@ -327,8 +339,9 @@ cdef REAL_t peridynamicKernel3D(REAL_t *x, REAL_t *y, void *c_params): cdef: interactionDomain interaction = (((c_params+fINTERACTION))[0]) REAL_t C - REAL_t d2 - if interaction.evalPtr(3, x, y) != 0.: + REAL_t d2, inter + interaction.evalPtr(3, x, y, &inter) + if inter != 0.: d2 = (x[0]-y[0])*(x[0]-y[0]) + (x[1]-y[1])*(x[1]-y[1]) + (x[2]-y[2])*(x[2]-y[2]) C = getREAL(c_params, fSCALING) return C/sqrt(d2) @@ -340,8 +353,9 @@ cdef REAL_t peridynamicKernel1Dboundary(REAL_t *x, REAL_t *y, void *c_params): cdef: interactionDomain interaction = (((c_params+fINTERACTION))[0]) REAL_t C - REAL_t d - if interaction.evalPtr(1, x, y) != 0.: + REAL_t d, inter + interaction.evalPtr(1, x, y, &inter) + if inter != 0.: d = abs(x[0]-y[0]) C = getREAL(c_params, fSCALING) return -2.0*C*log(d) @@ -352,8 +366,9 @@ cdef REAL_t peridynamicKernel1Dboundary(REAL_t *x, REAL_t *y, void *c_params): cdef REAL_t peridynamicKernel2Dboundary(REAL_t *x, REAL_t *y, void *c_params): cdef: interactionDomain interaction = (((c_params+fINTERACTION))[0]) - REAL_t C - if interaction.evalPtr(2, x, y) != 0.: + REAL_t C, inter + interaction.evalPtr(2, x, y, &inter) + if inter != 0.: C = getREAL(c_params, fSCALING) return -2.0*C else: @@ -363,8 +378,9 @@ cdef REAL_t gaussianKernel1D(REAL_t *x, REAL_t *y, void *c_params): cdef: interactionDomain interaction = (((c_params+fINTERACTION))[0]) REAL_t C, invD - REAL_t d2 - if interaction.evalPtr(1, x, y) != 0.: + REAL_t d2, inter + interaction.evalPtr(1, x, y, &inter) + if inter != 0.: d2 = (x[0]-y[0])*(x[0]-y[0]) C = getREAL(c_params, fSCALING) invD = getREAL(c_params, fEXPONENTINVERSE) @@ -377,8 +393,9 @@ cdef REAL_t gaussianKernel2D(REAL_t *x, REAL_t *y, void *c_params): cdef: interactionDomain interaction = (((c_params+fINTERACTION))[0]) REAL_t C, invD - REAL_t d2 - if interaction.evalPtr(2, x, y) != 0.: + REAL_t d2, inter + interaction.evalPtr(2, x, y, &inter) + if inter != 0.: d2 = (x[0]-y[0])*(x[0]-y[0]) + (x[1]-y[1])*(x[1]-y[1]) C = getREAL(c_params, fSCALING) invD = getREAL(c_params, fEXPONENTINVERSE) @@ -391,8 +408,9 @@ cdef REAL_t gaussianKernel1Dboundary(REAL_t *x, REAL_t *y, void *c_params): cdef: interactionDomain interaction = (((c_params+fINTERACTION))[0]) REAL_t C, invD - REAL_t d2 - if interaction.evalPtr(1, x, y) != 0.: + REAL_t d2, inter + interaction.evalPtr(1, x, y, &inter) + if inter != 0.: d2 = (x[0]-y[0])*(x[0]-y[0]) C = getREAL(c_params, fSCALING) invD = getREAL(c_params, fEXPONENTINVERSE) @@ -405,8 +423,9 @@ cdef REAL_t gaussianKernel2Dboundary(REAL_t *x, REAL_t *y, void *c_params): cdef: interactionDomain interaction = (((c_params+fINTERACTION))[0]) REAL_t C, invD - REAL_t d2 - if interaction.evalPtr(2, x, y) != 0.: + REAL_t d2, inter + interaction.evalPtr(2, x, y, &inter) + if inter != 0.: d2 = (x[0]-y[0])*(x[0]-y[0]) + (x[1]-y[1])*(x[1]-y[1]) C = getREAL(c_params, fSCALING) invD = getREAL(c_params, fEXPONENTINVERSE) @@ -473,7 +492,7 @@ cdef REAL_t updateAndEvalIntegrable(REAL_t *x, REAL_t *y, void *c_params): setREAL(c_params, fHORIZON2, horizon*horizon) if not scalingFunNull: scalingFun = (((c_params+fSCALINGFUN))[0]) - C = scalingFun.evalPtr(dim, x, y) + scalingFun.evalPtr(dim, x, y, &C) setREAL(c_params, fSCALING, C) return kernel(x, y, c_params) @@ -496,7 +515,7 @@ cdef COMPLEX_t updateAndEvalIntegrableComplex(REAL_t *x, REAL_t *y, void *c_para setREAL(c_params, fHORIZON2, horizon*horizon) if not scalingFunNull: scalingFun = (((c_params+fSCALINGFUN))[0]) - C = scalingFun.evalPtr(dim, x, y) + scalingFun.evalPtr(dim, x, y, &C) setREAL(c_params, fSCALING, C) return kernel(x, y, c_params) @@ -515,7 +534,7 @@ cdef REAL_t updateAndEvalFractional(REAL_t *x, REAL_t *y, void *c_params): if not isNull(c_params, fORDERFUN): sFun = (((c_params+fORDERFUN))[0]) - s = sFun.evalPtr(dim, x, y) + sFun.evalPtr(dim, x, y, &s) setREAL(c_params, fS, s) if not isNull(c_params, fHORIZONFUN): xA = x @@ -524,7 +543,7 @@ cdef REAL_t updateAndEvalFractional(REAL_t *x, REAL_t *y, void *c_params): setREAL(c_params, fHORIZON2, horizon*horizon) if not isNull(c_params, fSCALINGFUN): scalingFun = (((c_params+fSCALINGFUN))[0]) - C = scalingFun.evalPtr(dim, x, y) + scalingFun.evalPtr(dim, x, y, &C) setREAL(c_params, fSCALING, C) return kernel(x, y, c_params) @@ -532,13 +551,14 @@ cdef REAL_t updateAndEvalFractional(REAL_t *x, REAL_t *y, void *c_params): cdef class Kernel(twoPointFunction): """A kernel functions that can be used to define a nonlocal operator.""" - def __init__(self, INDEX_t dim, kernelType kType, function horizon, interactionDomain interaction, twoPointFunction scaling, twoPointFunction phi, BOOL_t piecewise=True, BOOL_t boundary=False, INDEX_t vectorSize=1, **kwargs): + def __init__(self, INDEX_t dim, kernelType kType, function horizon, interactionDomain interaction, twoPointFunction scaling, twoPointFunction phi, BOOL_t piecewise=True, BOOL_t boundary=False, INDEX_t valueSize=1, **kwargs): cdef: parametrizedTwoPointFunction parametrizedScaling int i self.dim = dim - self.vectorSize = vectorSize + assert valueSize >= 1, "Creation of kernel with valueSize = {}".format(valueSize) + self.valueSize = valueSize self.kernelType = kType self.piecewise = piecewise self.boundary = boundary @@ -549,7 +569,7 @@ cdef class Kernel(twoPointFunction): setINDEX(self.c_kernel_params, fKDIM, dim) symmetric = isinstance(horizon, constant) and scaling.symmetric - super(Kernel, self).__init__(symmetric) + super(Kernel, self).__init__(symmetric, valueSize) if self.kernelType == INDICATOR: self.min_singularity = 0. @@ -759,16 +779,21 @@ cdef class Kernel(twoPointFunction): self.horizonValue = self.horizon.eval(center1) cdef void evalParams(self, REAL_t[::1] x, REAL_t[::1] y): + cdef: + REAL_t scalingValue if self.piecewise: if self.variableHorizon: self.horizonValue = self.horizon.eval(x) if self.kernelType == GAUSSIAN: setREAL(self.c_kernel_params, fEXPONENTINVERSE, 1.0/(self.horizonValue/3.)**2) if self.variableScaling: - self.scalingValue = self.scaling.eval(x, y) + self.scaling.evalPtr(x.shape[0], &x[0], &y[0], &scalingValue) + self.scalingValue = scalingValue def evalParams_py(self, REAL_t[::1] x, REAL_t[::1] y): "Evaluate the kernel parameters." + cdef: + REAL_t scalingValue if self.piecewise: self.evalParams(x, y) else: @@ -777,11 +802,13 @@ cdef class Kernel(twoPointFunction): if self.kernelType == GAUSSIAN: setREAL(self.c_kernel_params, fEXPONENTINVERSE, 1.0/(self.horizonValue/3.)**2) if self.variableScaling: - self.scalingValue = self.scaling.eval(x, y) + self.scaling.evalPtr(x.shape[0], &x[0], &y[0], &scalingValue) + self.scalingValue = scalingValue cdef void evalParamsPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): cdef: REAL_t[::1] xA + REAL_t scalingValue if self.piecewise: if self.variableHorizon: xA = x @@ -789,18 +816,13 @@ cdef class Kernel(twoPointFunction): if self.kernelType == GAUSSIAN: setREAL(self.c_kernel_params, fEXPONENTINVERSE, 1.0/(self.horizonValue/3.)**2) if self.variableScaling: - self.scalingValue = self.scaling.evalPtr(dim, x, y) - - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): - return self.kernelFun(&x[0], &y[0], self.c_kernel_params) + self.scaling.evalPtr(dim, x, y, &scalingValue) + self.scalingValue = scalingValue - cdef void evalVector(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] vec): + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] vec): vec[0] = self.kernelFun(&x[0], &y[0], self.c_kernel_params) - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): - return self.kernelFun(x, y, self.c_kernel_params) - - cdef void evalVectorPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, INDEX_t vectorSize, REAL_t* vec): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* vec): vec[0] = self.kernelFun(x, y, self.c_kernel_params) def __call__(self, REAL_t[::1] x, REAL_t[::1] y, BOOL_t callEvalParams=True): @@ -809,11 +831,11 @@ cdef class Kernel(twoPointFunction): self.evalParams(x, y) return self.kernelFun(&x[0], &y[0], self.c_kernel_params) - def evalVector_py(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] vec, BOOL_t callEvalParams=True): + def eval_py(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] vec, BOOL_t callEvalParams=True): "Evaluate the kernel." if self.piecewise and callEvalParams: self.evalParams(x, y) - self.evalVector(x, y, vec) + self.eval(x, y, vec) def getModifiedKernel(self, function horizon=None, @@ -857,7 +879,7 @@ cdef class Kernel(twoPointFunction): return "{}({}{}, {}, {})".format(self.__class__.__name__, kernelName, '' if not self.boundary else '-boundary', repr(self.interaction), self.scaling) def __getstate__(self): - return (self.dim, self.kernelType, self.horizon, self.interaction, self.scaling, self.phi, self.piecewise, self.boundary, self.singularityValue) + return (self.dim, self.kernelType, self.horizon, self.interaction, self.scaling, self.phi, self.piecewise, self.boundary, self.valueSize) def __setstate__(self, state): Kernel.__init__(self, state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7], state[8]) @@ -939,13 +961,13 @@ cdef class Kernel(twoPointFunction): cdef class ComplexKernel(ComplextwoPointFunction): """A kernel functions that can be used to define a nonlocal operator.""" - def __init__(self, INDEX_t dim, kernelType kType, function horizon, interactionDomain interaction, twoPointFunction scaling, twoPointFunction phi, BOOL_t piecewise=True, BOOL_t boundary=False, INDEX_t vectorSize=1, **kwargs): + def __init__(self, INDEX_t dim, kernelType kType, function horizon, interactionDomain interaction, twoPointFunction scaling, twoPointFunction phi, BOOL_t piecewise=True, BOOL_t boundary=False, INDEX_t valueSize=1, **kwargs): cdef: parametrizedTwoPointFunction parametrizedScaling int i self.dim = dim - self.vectorSize = vectorSize + self.valueSize = valueSize self.kernelType = kType self.piecewise = piecewise self.boundary = boundary @@ -956,7 +978,7 @@ cdef class ComplexKernel(ComplextwoPointFunction): setINDEX(self.c_kernel_params, fKDIM, dim) symmetric = isinstance(horizon, constant) and scaling.symmetric - super(ComplexKernel, self).__init__(symmetric) + super(ComplexKernel, self).__init__(symmetric, valueSize) if self.kernelType == GREENS_2D: greensLambda = kwargs.get('greens2D_lambda', np.nan) @@ -1093,16 +1115,21 @@ cdef class ComplexKernel(ComplextwoPointFunction): self.horizonValue = self.horizon.eval(center1) cdef void evalParams(self, REAL_t[::1] x, REAL_t[::1] y): + cdef: + REAL_t scalingValue if self.piecewise: if self.variableHorizon: self.horizonValue = self.horizon.eval(x) if self.kernelType == GAUSSIAN: setREAL(self.c_kernel_params, fEXPONENTINVERSE, 1.0/(self.horizonValue/3.)**2) if self.variableScaling: - self.scalingValue = self.scaling.eval(x, y) + self.scaling.evalPtr(x.shape[0], &x[0], &y[0], &scalingValue) + self.scalingValue = scalingValue def evalParams_py(self, REAL_t[::1] x, REAL_t[::1] y): "Evaluate the kernel parameters." + cdef: + REAL_t scalingValue if self.piecewise: self.evalParams(x, y) else: @@ -1111,11 +1138,13 @@ cdef class ComplexKernel(ComplextwoPointFunction): if self.kernelType == GAUSSIAN: setREAL(self.c_kernel_params, fEXPONENTINVERSE, 1.0/(self.horizonValue/3.)**2) if self.variableScaling: - self.scalingValue = self.scaling.eval(x, y) + self.scaling.evalPtr(x.shape[0], &x[0], &y[0], &scalingValue) + self.scalingValue = scalingValue cdef void evalParamsPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): cdef: REAL_t[::1] xA + REAL_t scalingValue if self.piecewise: if self.variableHorizon: xA = x @@ -1123,19 +1152,14 @@ cdef class ComplexKernel(ComplextwoPointFunction): if self.kernelType == GAUSSIAN: setREAL(self.c_kernel_params, fEXPONENTINVERSE, 1.0/(self.horizonValue/3.)**2) if self.variableScaling: - self.scalingValue = self.scaling.evalPtr(dim, x, y) - - cdef COMPLEX_t eval(self, REAL_t[::1] x, REAL_t[::1] y): - return self.kernelFun(&x[0], &y[0], self.c_kernel_params) + self.scaling.evalPtr(dim, x, y, &scalingValue) + self.scalingValue = scalingValue - cdef void evalVector(self, REAL_t[::1] x, REAL_t[::1] y, COMPLEX_t[::1] vec): - vec[0] = self.kernelFun(&x[0], &y[0], self.c_kernel_params) - - cdef COMPLEX_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): - return self.kernelFun(x, y, self.c_kernel_params) + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, COMPLEX_t[::1] value): + value[0] = self.kernelFun(&x[0], &y[0], self.c_kernel_params) - cdef void evalVectorPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, INDEX_t vectorSize, COMPLEX_t* vec): - vec[0] = self.kernelFun(x, y, self.c_kernel_params) + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, COMPLEX_t* value): + value[0] = self.kernelFun(x, y, self.c_kernel_params) def __call__(self, REAL_t[::1] x, REAL_t[::1] y, BOOL_t callEvalParams=True): "Evaluate the kernel." @@ -1143,11 +1167,11 @@ cdef class ComplexKernel(ComplextwoPointFunction): self.evalParams(x, y) return self.kernelFun(&x[0], &y[0], self.c_kernel_params) - def evalVector_py(self, REAL_t[::1] x, REAL_t[::1] y, COMPLEX_t[::1] vec, BOOL_t callEvalParams=True): + def eval_py(self, REAL_t[::1] x, REAL_t[::1] y, COMPLEX_t[::1] vec, BOOL_t callEvalParams=True): "Evaluate the kernel." if self.piecewise and callEvalParams: self.evalParams(x, y) - self.evalVector(x, y, vec) + self.eval(x, y, vec) def getModifiedKernel(self, function horizon=None, @@ -1282,13 +1306,13 @@ cdef class FractionalKernel(Kernel): cdef: parametrizedTwoPointFunction parametrizedScaling if derivative == 0: - vectorSize = 1 + valueSize = 1 elif derivative == 1: - vectorSize = s.numParameters + valueSize = s.numParameters else: - vectorSize = 1 + valueSize = 1 - super(FractionalKernel, self).__init__(dim, FRACTIONAL, horizon, interaction, scaling, phi, piecewise, boundary, vectorSize) + super(FractionalKernel, self).__init__(dim, FRACTIONAL, horizon, interaction, scaling, phi, piecewise, boundary, valueSize) self.symmetric = s.symmetric and isinstance(horizon, constant) and scaling.symmetric self.derivative = derivative @@ -1472,18 +1496,22 @@ cdef class FractionalKernel(Kernel): cdef void evalParamsOnSimplices(self, REAL_t[::1] center1, REAL_t[::1] center2, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2): # Set the max singularity and the horizon. cdef: - REAL_t sValue + REAL_t sValue, sValue2 if self.variableOrder: if self.s.symmetric: - sValue = self.s.eval(center1, center2) + self.s.evalPtr(center1.shape[0], ¢er1[0], ¢er2[0], &sValue) else: sValue = 0. - sValue = max(sValue, self.s.eval(center1, center2)) - sValue = max(sValue, self.s.eval(center2, center1)) + self.s.evalPtr(center1.shape[0], ¢er1[0], ¢er2[0], &sValue2) + sValue = max(sValue, sValue2) + self.s.evalPtr(center1.shape[0], ¢er2[0], ¢er1[0], &sValue2) + sValue = max(sValue, sValue2) for i in range(simplex1.shape[0]): - sValue = max(sValue, self.s.eval(simplex1[i,:], center2)) + self.s.evalPtr(center1.shape[0], &simplex1[i, 0], ¢er2[0], &sValue2) + sValue = max(sValue, sValue2) for i in range(simplex2.shape[0]): - sValue = max(sValue, self.s.eval(simplex2[i,:], center1)) + self.s.evalPtr(center1.shape[0], &simplex2[i, 0], ¢er1[0], &sValue2) + sValue = max(sValue, sValue2) if not self.boundary: self.setSingularityValue(-self.dim-2*sValue) else: @@ -1496,7 +1524,7 @@ cdef class FractionalKernel(Kernel): REAL_t sValue, scalingValue if self.piecewise: if self.variableOrder: - sValue = self.s.eval(x, y) + self.s.evalPtr(x.shape[0], &x[0], &y[0], &sValue) if not self.boundary: self.setSingularityValue(-self.dim-2*sValue) else: @@ -1505,7 +1533,7 @@ cdef class FractionalKernel(Kernel): if self.variableHorizon: self.horizonValue = self.horizon.eval(x) if self.variableScaling: - scalingValue = self.scaling.eval(x, y) + self.scaling.evalPtr(x.shape[0], &x[0], &y[0], &scalingValue) self.setScalingValue(scalingValue) cdef void evalParamsPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): @@ -1514,7 +1542,7 @@ cdef class FractionalKernel(Kernel): REAL_t sValue, scalingValue if self.piecewise: if self.variableOrder: - sValue = self.s.evalPtr(dim, x, y) + self.s.evalPtr(dim, x, y, &sValue) if not self.boundary: self.setSingularityValue(-self.dim-2*sValue) else: @@ -1524,7 +1552,7 @@ cdef class FractionalKernel(Kernel): xA = x self.horizonValue = self.horizon.eval(xA) if self.variableScaling: - scalingValue = self.scaling.evalPtr(dim, x, y) + self.scaling.evalPtr(dim, x, y, &scalingValue) self.setScalingValue(scalingValue) def evalParams_py(self, REAL_t[::1] x, REAL_t[::1] y): @@ -1534,7 +1562,7 @@ cdef class FractionalKernel(Kernel): self.evalParams(x, y) else: if self.variableOrder: - sValue = self.s.eval(x, y) + self.s.evalPtr(x.shape[0], &x[0], &y[0], &sValue) self.setsValue(sValue) if not self.boundary: self.setSingularityValue(-self.dim-2*sValue) @@ -1543,10 +1571,10 @@ cdef class FractionalKernel(Kernel): if self.variableHorizon: self.horizonValue = self.horizon.eval(x) if self.variableScaling: - scalingValue = self.scaling.eval(x, y) + self.scaling.evalPtr(x.shape[0], &x[0], &y[0], & scalingValue) self.setScalingValue(scalingValue) - cdef void evalVector(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] vec): + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] vec): cdef: INDEX_t i REAL_t fac @@ -1555,10 +1583,10 @@ cdef class FractionalKernel(Kernel): elif self.derivative == 1: fac = self.kernelFun(&x[0], &y[0], self.c_kernel_params) self.s.evalGrad(x, y, vec) - for i in range(self.vectorSize): + for i in range(self.valueSize): vec[i] *= fac - cdef void evalVectorPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, INDEX_t vectorSize, REAL_t* vec): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* vec): cdef: INDEX_t i REAL_t fac @@ -1566,10 +1594,8 @@ cdef class FractionalKernel(Kernel): vec[0] = self.kernelFun(x, y, self.c_kernel_params) elif self.derivative == 1: fac = self.kernelFun(x, y, self.c_kernel_params) - # print(fac) - self.s.evalGradPtr(dim, x, y, vectorSize, vec) - # print(vec[0]) - for i in range(vectorSize): + self.s.evalGradPtr(dim, x, y, self.valueSize, vec) + for i in range(self.valueSize): vec[i] *= fac def getModifiedKernel(self, diff --git a/nl/PyNucleus_nl/nonlocalAssembly.pyx b/nl/PyNucleus_nl/nonlocalAssembly.pyx index b0fa56d..ba420fe 100644 --- a/nl/PyNucleus_nl/nonlocalAssembly.pyx +++ b/nl/PyNucleus_nl/nonlocalAssembly.pyx @@ -31,6 +31,7 @@ from PyNucleus_base.linear_operators cimport (CSR_LinearOperator, VectorLinearOperator, ComplexVectorLinearOperator, Dense_VectorLinearOperator, + ComplexDense_VectorLinearOperator, Dense_SubBlock_LinearOperator, diagonalOperator, TimeStepperLinearOperator, @@ -167,7 +168,7 @@ cdef class horizonSurfaceIntegral(function): for j in range(dim): self.y[j] = x[j]+self.quadNodes[k, j] self.kernel.evalParams(x, self.y) - val = self.kernel.eval(x, self.y) + self.kernel.evalPtr(x.shape[0], &x[0], &self.y[0], &val) # val = self.kernel.scalingValue*pow(self.horizon, 1-dim-2*s)/s fac -= val * self.quadWeights[k] return fac @@ -367,63 +368,6 @@ def assembleNonlocalOperator(meshBase mesh, return builder.getDense() -cdef LinearOperator getSparseNearField(DoFMap DoFMap, list Pnear, bint symmetric=False, tree_node myRoot=None): - cdef: - sparsityPattern sP - INDEX_t I = -1, J = -1 - nearFieldClusterPair clusterPair - indexSet dofs1, dofs2 - indexSetIterator it1 = arrayIndexSetIterator(), it2 = arrayIndexSetIterator() - sP = sparsityPattern(DoFMap.num_dofs) - if symmetric: - for clusterPair in Pnear: - dofs1 = clusterPair.n1.get_dofs() - dofs2 = clusterPair.n2.get_dofs() - it1.setIndexSet(dofs1) - it2.setIndexSet(dofs2) - while it1.step(): - I = it1.i - it2.reset() - while it2.step(): - J = it2.i - if I > J: - sP.add(I, J) - elif myRoot is not None: - for clusterPair in Pnear: - if clusterPair.n1.getParent(1).id != myRoot.id: - continue - dofs1 = clusterPair.n1.get_dofs() - dofs2 = clusterPair.n2.get_dofs() - it1.setIndexSet(dofs1) - it2.setIndexSet(dofs2) - while it1.step(): - I = it1.i - it2.reset() - while it2.step(): - J = it2.i - sP.add(I, J) - else: - for clusterPair in Pnear: - dofs1 = clusterPair.n1.get_dofs() - dofs2 = clusterPair.n2.get_dofs() - it1.setIndexSet(dofs1) - it2.setIndexSet(dofs2) - while it1.step(): - I = it1.i - it2.reset() - while it2.step(): - J = it2.i - sP.add(I, J) - indptr, indices = sP.freeze() - data = np.zeros((indices.shape[0]), dtype=REAL) - if symmetric: - diagonal = np.zeros((DoFMap.num_dofs), dtype=REAL) - A = SSS_LinearOperator(indices, indptr, data, diagonal) - else: - A = CSR_LinearOperator(indices, indptr, data) - return A - - cdef class nearFieldClusterPair: def __init__(self, tree_node n1, tree_node n2): self.n1 = n1 diff --git a/nl/PyNucleus_nl/nonlocalAssembly_decl_{SCALAR}.pxi b/nl/PyNucleus_nl/nonlocalAssembly_decl_{SCALAR}.pxi index c719981..19ab7f7 100644 --- a/nl/PyNucleus_nl/nonlocalAssembly_decl_{SCALAR}.pxi +++ b/nl/PyNucleus_nl/nonlocalAssembly_decl_{SCALAR}.pxi @@ -15,7 +15,7 @@ cdef class {SCALAR_label}nonlocalBuilder: public {SCALAR_label}double_local_matrix_t local_matrix_zeroExterior public {SCALAR_label}double_local_matrix_t local_matrix_surface BOOL_t zeroExterior - {SCALAR}_t[::1] contrib, contribZeroExterior + {SCALAR}_t[:, ::1] contrib, contribZeroExterior list _d2c public MPI.Comm comm public FakePLogger PLogger @@ -24,4 +24,4 @@ cdef class {SCALAR_label}nonlocalBuilder: cdef inline {SCALAR_label}double_local_matrix_t getLocalMatrixBoundaryZeroExterior(self, dict params, BOOL_t infHorizon) cpdef {SCALAR}_t getEntry(self, INDEX_t I, INDEX_t J) cpdef {SCALAR}_t getEntryCluster(self, INDEX_t I, INDEX_t J) - cpdef LinearOperator assembleClusters(self, list Pnear, bint forceUnsymmetric=*, LinearOperator Anear=*, dict jumps=*, str prefix=*, tree_node myRoot=*, BOOL_t doDistributedAssembly=*) + cpdef LinearOperator assembleClusters(self, list Pnear, bint forceUnsymmetricMatrix=*, LinearOperator Anear=*, dict jumps=*, str prefix=*, tree_node myRoot=*, BOOL_t doDistributedAssembly=*) diff --git a/nl/PyNucleus_nl/nonlocalAssembly_{SCALAR}.pxi b/nl/PyNucleus_nl/nonlocalAssembly_{SCALAR}.pxi index cc3e030..9708988 100644 --- a/nl/PyNucleus_nl/nonlocalAssembly_{SCALAR}.pxi +++ b/nl/PyNucleus_nl/nonlocalAssembly_{SCALAR}.pxi @@ -148,25 +148,25 @@ cdef class {SCALAR_label}IndexManager: canSkip = canSkip and dof < 0 return canSkip - cdef inline void addToMatrixElemSym(self, const {SCALAR}_t[::1] contrib, REAL_t fac): + cdef void addToMatrixElemSym(self, const {SCALAR}_t[:, ::1] contrib, REAL_t fac): cdef: INDEX_t k, p, q, I, J k = 0 for p in range(self.dm.dofs_per_element): I = self.localDoFs[p] if I >= 0: - self.A.addToEntry(I, I, fac*contrib[k]) + self.A.addToEntry(I, I, fac*contrib[k, 0]) k += 1 for q in range(p+1, self.dm.dofs_per_element): J = self.localDoFs[q] if J >= 0: - self.A.addToEntry(I, J, fac*contrib[k]) - self.A.addToEntry(J, I, fac*contrib[k]) + self.A.addToEntry(I, J, fac*contrib[k, 0]) + self.A.addToEntry(J, I, fac*contrib[k, 0]) k += 1 else: k += self.dm.dofs_per_element-p - cdef inline void addToMatrixElem(self, const {SCALAR}_t[::1] contrib, REAL_t fac): + cdef void addToMatrixElem(self, const {SCALAR}_t[:, ::1] contrib, REAL_t fac): cdef: INDEX_t k, p, q, I, J k = 0 @@ -176,7 +176,7 @@ cdef class {SCALAR_label}IndexManager: for q in range(self.dm.dofs_per_element): J = self.localDoFs[q] if J >= 0: - self.A.addToEntry(I, J, fac*contrib[k]) + self.A.addToEntry(I, J, fac*contrib[k, 0]) k += 1 else: k += self.dm.dofs_per_element @@ -200,7 +200,7 @@ cdef class {SCALAR_label}IndexManager: else: k += 2*self.dm.dofs_per_element-p - cdef inline void addToMatrixElemElemSym(self, const {SCALAR}_t[::1] contrib, REAL_t fac): + cdef void addToMatrixElemElemSym(self, const {SCALAR}_t[:, ::1] contrib, REAL_t fac): # Add symmetric 'contrib' to elements i and j in symmetric fashion cdef: INDEX_t k, p, q, I, J @@ -208,13 +208,13 @@ cdef class {SCALAR_label}IndexManager: for p in range(2*self.dm.dofs_per_element): I = self.localDoFs[p] if I >= 0: - self.A.addToEntry(I, I, fac*contrib[k]) + self.A.addToEntry(I, I, fac*contrib[k, 0]) k += 1 for q in range(p+1, 2*self.dm.dofs_per_element): J = self.localDoFs[q] if J >= 0: - self.A.addToEntry(I, J, fac*contrib[k]) - self.A.addToEntry(J, I, fac*contrib[k]) + self.A.addToEntry(I, J, fac*contrib[k, 0]) + self.A.addToEntry(J, I, fac*contrib[k, 0]) k += 1 else: k += 2*self.dm.dofs_per_element-p @@ -235,7 +235,7 @@ cdef class {SCALAR_label}IndexManager: else: k += 2*self.dm.dofs_per_element - cdef inline void addToMatrixElemElem(self, const {SCALAR}_t[::1] contrib, REAL_t fac): + cdef void addToMatrixElemElem(self, const {SCALAR}_t[:, ::1] contrib, REAL_t fac): # Add general 'contrib' to elements i and j cdef: INDEX_t k, p, q, I, J @@ -246,26 +246,25 @@ cdef class {SCALAR_label}IndexManager: for q in range(2*self.dm.dofs_per_element): J = self.localDoFs[q] if J >= 0: - self.A.addToEntry(I, J, fac*contrib[k]) + self.A.addToEntry(I, J, fac*contrib[k, 0]) k += 1 else: k += 2*self.dm.dofs_per_element - def buildMasksForClusters_py(self, list clusterList, bint useSymmetricCells): + def buildMasksForClusters_py(self, list clusterList, bint useSymmetricCells, bint symmetricLocalMatrix): cdef: INDEX_t startCluster = 0 - return self.buildMasksForClusters(clusterList, useSymmetricCells, &startCluster) + return self.buildMasksForClusters(clusterList, useSymmetricCells, symmetricLocalMatrix, &startCluster) - cdef tupleDictMASK buildMasksForClusters(self, list clusterList, bint useSymmetricCells, INDEX_t *startCluster): + cdef tupleDictMASK buildMasksForClusters(self, list clusterList, bint useSymmetricCells, bint symmetricLocalMatrix, INDEX_t *startCluster): cdef: nearFieldClusterPair cluster = clusterList[0] MASK_t cellMask1, cellMask2 indexSet cellsUnion = cluster.cellsUnion indexSetIterator it = cellsUnion.getIter(), it2 = cellsUnion.getIter() indexSet clusterDofs1, clusterDofs2 - INDEX_t cellNo1 = -1, cellNo2 = -1 + INDEX_t cellNo1 = -1, cellNo2 = -1, cellNo = -1 INDEX_t[::1] cellPair = uninitialized((2), dtype=INDEX) - INDEX_t[::1] cellPair2 = uninitialized((2), dtype=INDEX) tupleDictMASK masks = tupleDictMASK(self.dm.mesh.num_cells, deleteHits=False, logicalAndHits=True, length_inc=20) INDEX_t p, I # dict cellMasks1, cellMasks2 @@ -278,86 +277,112 @@ cdef class {SCALAR_label}IndexManager: cellMask2.reset() for cluster in clusterList[startCluster[0]:]: startCluster[0] += 1 - cellsUnion = cluster.cellsUnion - # cellMasks1 = {} - # cellMasks2 = {} + clusterDofs1 = cluster.n1.get_dofs() clusterDofs2 = cluster.n2.get_dofs() + # loop over cells in the union of the two clusters + cellsUnion = cluster.cellsUnion it.setIndexSet(cellsUnion) - while it.step(): - cellNo1 = it.i + # Record separately for the two clusters if local + # dofs on the cell. + cellNo = it.i mask1.reset() mask2.reset() k = 1 for p in range(dofs_per_element): - I = self.dm.cell2dof(cellNo1, p) + I = self.dm.cell2dof(cellNo, p) if I >= 0: if clusterDofs1.inSet(I): mask1 |= k if clusterDofs2.inSet(I): mask2 |= k k = k << 1 - cellMasks1[cellNo1] = mask1 - cellMasks2[cellNo1] = mask2 + cellMasks1[cellNo] = mask1 + cellMasks2[cellNo] = mask2 if not useSymmetricCells: - # TODO: Think some more about this branch, maybe this can be improved. - it.reset() + + # loop over cells in (cellsUnion x cellsUnion) it2.setIndexSet(cellsUnion) - # it.setIndexSet(cluster.n1.cells) - # it2.setIndexSet(cluster.n2.cells) + + it.reset() while it.step(): cellNo1 = it.i cellPair[0] = cellNo1 cellMask11 = cellMasks1[cellNo1] cellMask12 = cellMasks2[cellNo1] + it2.reset() while it2.step(): cellNo2 = it2.i cellMask21 = cellMasks1[cellNo2] cellMask22 = cellMasks2[cellNo2] + + # create the masks on (cellNo1, cellNo2) cellMask1 = cellMask11 | (cellMask21 << dofs_per_element) cellMask2 = cellMask12 | (cellMask22 << dofs_per_element) + if (cellMask1.none()) or (cellMask2.none()): continue + cellPair[1] = cellNo2 - mask = self.getElemElemMask(cellMask1, cellMask2) - # does a logical "and" if there already is an entry + + # Create mask for local matrix. + # This is the tensor product of the two masks, but also accounts for potential symmetry of the local matrix. + if symmetricLocalMatrix: + mask = self.getElemElemSymMask(cellMask1, cellMask2) + else: + mask = self.getElemElemMask(cellMask1, cellMask2) + + # Enter/update the mask for (cellNo1, cellNo2). + # This does a logical "and" if there already is an entry from another cluster pair. masks.enterValue(cellPair, mask) else: + + # loop over cells in (n1.cells x n2.cells) it.setIndexSet(cluster.n1.cells) it2.setIndexSet(cluster.n2.cells) while it.step(): cellNo1 = it.i - cellPair[0] = cellNo1 - cellPair2[1] = cellNo1 cellMask11 = cellMasks1[cellNo1] cellMask12 = cellMasks2[cellNo1] + it2.reset() while it2.step(): cellNo2 = it2.i cellMask21 = cellMasks1[cellNo2] cellMask22 = cellMasks2[cellNo2] + + # Create the masks on (cellNo1, cellNo2). + # Due to symmetry we can record everything as ordered cell pairs. if cellNo1 > cellNo2: + # We swap cellNo1 and cellNo2. cellMask1 = cellMask21 | (cellMask11 << dofs_per_element) cellMask2 = cellMask22 | (cellMask12 << dofs_per_element) if (cellMask1.none()) or (cellMask2.none()): continue - cellPair2[0] = cellNo2 - mask = self.getElemElemSymMask(cellMask1, cellMask2) - # does a logical "and" if there already is an entry - masks.enterValue(cellPair2, mask) + cellPair[0] = cellNo2 + cellPair[1] = cellNo1 else: cellMask1 = cellMask11 | (cellMask21 << dofs_per_element) cellMask2 = cellMask12 | (cellMask22 << dofs_per_element) if (cellMask1.none()) or (cellMask2.none()): continue + cellPair[0] = cellNo1 cellPair[1] = cellNo2 + + # Create mask for local matrix. + # This is the tensor product of the two masks, but also accounts for potential symmetry of the local matrix. + if symmetricLocalMatrix: mask = self.getElemElemSymMask(cellMask1, cellMask2) - # does a logical "and" if there already is an entry - masks.enterValue(cellPair, mask) + else: + mask = self.getElemElemMask(cellMask1, cellMask2) + + # Enter/update the mask for (cellNo1, cellNo2). + # This does a logical "and" if there already is an entry from another cluster pair. + masks.enterValue(cellPair, mask) if masks.nnz > 10000000: break @@ -453,7 +478,7 @@ cdef class {SCALAR_label}IndexManager: k = k << 1 return mask - cdef inline void addToMatrixElemElemSymMasked(self, const {SCALAR}_t[::1] contrib, REAL_t fac, MASK_t mask): + cdef inline void addToMatrixElemElemSymMasked(self, const {SCALAR}_t[:, ::1] contrib, REAL_t fac, MASK_t mask): # Add symmetric 'contrib' to elements i and j in symmetric fashion cdef: INDEX_t k, p, q, I, J @@ -462,16 +487,16 @@ cdef class {SCALAR_label}IndexManager: for p in range(2*self.dm.dofs_per_element): I = self.localDoFs[p] if mask[k]: - self.A.addToEntry(I, I, fac*contrib[k]) + self.A.addToEntry(I, I, fac*contrib[k, 0]) k += 1 for q in range(p+1, 2*self.dm.dofs_per_element): if mask[k]: J = self.localDoFs[q] - self.A.addToEntry(I, J, fac*contrib[k]) - self.A.addToEntry(J, I, fac*contrib[k]) + self.A.addToEntry(I, J, fac*contrib[k, 0]) + self.A.addToEntry(J, I, fac*contrib[k, 0]) k += 1 - cdef inline void addToMatrixElemElemMasked(self, const {SCALAR}_t[::1] contrib, REAL_t fac, MASK_t mask): + cdef inline void addToMatrixElemElemMasked(self, const {SCALAR}_t[:, ::1] contrib, REAL_t fac, MASK_t mask): # Add unsymmetric 'contrib' to elements i and j in unsymmetric fashion cdef: INDEX_t k, p, q, I, J @@ -482,20 +507,20 @@ cdef class {SCALAR_label}IndexManager: for q in range(2*self.dm.dofs_per_element): if mask[k]: J = self.localDoFs[q] - self.A.addToEntry(I, J, fac*contrib[k]) + self.A.addToEntry(I, J, fac*contrib[k, 0]) k += 1 - cdef void addToCache(self, {SCALAR}_t[::1] contrib, INDEX_t[::1] ID, INDEX_t perm, BOOL_t inv=False): + cdef void addToCache(self, {SCALAR}_t[:, ::1] contrib, INDEX_t[::1] ID, INDEX_t perm, BOOL_t inv=False): cdef: intTuple hv = intTuple.create(ID) - contribNew = uninitialized((contrib.shape[0]), dtype=REAL) + contribNew = uninitialized((contrib.shape[0], contrib.shape[1]), dtype=REAL) self.permute(contrib, contribNew, perm, inv) self.cache[hv] = contribNew - cdef void permute(self, {SCALAR}_t[::1] contrib, {SCALAR}_t[::1] contribNew, INDEX_t perm, BOOL_t inv=False): + cdef void permute(self, {SCALAR}_t[:, ::1] contrib, {SCALAR}_t[:, ::1] contribNew, INDEX_t perm, BOOL_t inv=False): cdef: INDEX_t K, p, q - INDEX_t k, i, j + INDEX_t k, i, j, l INDEX_t dofs_per_element = self.dm.dofs_per_element INDEX_t dofs_per_element2 = 2*dofs_per_element BOOL_t perm0 = perm & 1 @@ -533,7 +558,8 @@ cdef class {SCALAR_label}IndexManager: i = permutedDoFsLocal[p] k = 2*dofs_per_element*i-(i*(i+1) >> 1) + i - contribNew[K] = contrib[k] + for l in range(contrib.shape[1]): + contribNew[K, l] = contrib[k, l] K += 1 for q in range(p+1, dofs_per_element2): @@ -543,7 +569,8 @@ cdef class {SCALAR_label}IndexManager: k = dofs_per_element2*j-(j*(j+1) >> 1) + i else: k = dofs_per_element2*i-(i*(i+1) >> 1) + j - contribNew[K] = contrib[k] + for l in range(contrib.shape[1]): + contribNew[K, l] = contrib[k, l] K += 1 def __repr__(self): @@ -552,7 +579,7 @@ cdef class {SCALAR_label}IndexManager: return s -cdef inline {SCALAR}_t extractElemSymMasked{SCALAR_label}(DoFMap DoFMap, const {SCALAR}_t[::1] contrib, REAL_t fac, MASK_t mask): +cdef inline {SCALAR}_t extractElemSymMasked{SCALAR_label}(DoFMap DoFMap, const {SCALAR}_t[:, ::1] contrib, REAL_t fac, MASK_t mask): # Add symmetric 'contrib' to elements i and j in symmetric fashion cdef: INDEX_t k, p, q @@ -561,12 +588,12 @@ cdef inline {SCALAR}_t extractElemSymMasked{SCALAR_label}(DoFMap DoFMap, const { for p in range(DoFMap.dofs_per_element): for q in range(p, DoFMap.dofs_per_element): if mask[k]: - s += fac*contrib[k] + s += fac*contrib[k, 0] k += 1 return s -cdef inline {SCALAR}_t extractElemElemSymMasked{SCALAR_label}(DoFMap DoFMap, const {SCALAR}_t[::1] contrib, REAL_t fac, MASK_t mask): +cdef inline {SCALAR}_t extractElemElemSymMasked{SCALAR_label}(DoFMap DoFMap, const {SCALAR}_t[:, ::1] contrib, REAL_t fac, MASK_t mask): # Add symmetric 'contrib' to elements i and j in symmetric fashion cdef: INDEX_t k, p, q @@ -575,7 +602,7 @@ cdef inline {SCALAR}_t extractElemElemSymMasked{SCALAR_label}(DoFMap DoFMap, con for p in range(2*DoFMap.dofs_per_element): for q in range(p, 2*DoFMap.dofs_per_element): if mask[k]: - s += fac*contrib[k] + s += fac*contrib[k, 0] k += 1 return s @@ -583,18 +610,18 @@ cdef inline {SCALAR}_t extractElemElemSymMasked{SCALAR_label}(DoFMap DoFMap, con cdef class {SCALAR_label}IndexManagerVector({SCALAR_label}IndexManager): cdef: {SCALAR_label}VectorLinearOperator vecA - INDEX_t vectorSize + INDEX_t valueSize def __init__(self, DoFMap dm, {SCALAR_label}VectorLinearOperator A=None, cellPairIdentifierSize=1, indexSet myDofs=None, sparsityPattern sP=None): super({SCALAR_label}IndexManagerVector, self).__init__(dm, None, cellPairIdentifierSize, myDofs, sP) self.vecA = A - self.vectorSize = A.vectorSize + self.valueSize = A.vectorSize - cdef inline void addToMatrixElemSymVector(self, {SCALAR}_t[:, ::1] contrib, REAL_t fac): + cdef void addToMatrixElemSym(self, {SCALAR}_t[:, ::1] contrib, REAL_t fac): cdef: INDEX_t k, p, q, I, J for p in range(contrib.shape[0]): - for q in range(self.vectorSize): + for q in range(self.valueSize): contrib[p, q] *= fac k = 0 for p in range(self.dm.dofs_per_element): @@ -611,11 +638,11 @@ cdef class {SCALAR_label}IndexManagerVector({SCALAR_label}IndexManager): else: k += self.dm.dofs_per_element-p - cdef inline void addToMatrixElemVector(self, {SCALAR}_t[:, ::1] contrib, REAL_t fac): + cdef void addToMatrixElem(self, {SCALAR}_t[:, ::1] contrib, REAL_t fac): cdef: INDEX_t k, p, q, I, J for p in range(contrib.shape[0]): - for q in range(self.vectorSize): + for q in range(self.valueSize): contrib[p, q] *= fac k = 0 for p in range(self.dm.dofs_per_element): @@ -629,12 +656,12 @@ cdef class {SCALAR_label}IndexManagerVector({SCALAR_label}IndexManager): else: k += self.dm.dofs_per_element - cdef inline void addToMatrixElemElemSymVector(self, {SCALAR}_t[:, ::1] contrib, REAL_t fac): + cdef void addToMatrixElemElemSym(self, {SCALAR}_t[:, ::1] contrib, REAL_t fac): # Add symmetric 'contrib' to elements i and j in symmetric fashion cdef: INDEX_t k, p, q, I, J for p in range(contrib.shape[0]): - for q in range(self.vectorSize): + for q in range(self.valueSize): contrib[p, q] *= fac k = 0 for p in range(2*self.dm.dofs_per_element): @@ -651,12 +678,12 @@ cdef class {SCALAR_label}IndexManagerVector({SCALAR_label}IndexManager): else: k += 2*self.dm.dofs_per_element-p - cdef inline void addToMatrixElemElemVector(self, {SCALAR}_t[:, ::1] contrib, REAL_t fac): + cdef void addToMatrixElemElem(self, {SCALAR}_t[:, ::1] contrib, REAL_t fac): # Add general 'contrib' to elements i and j cdef: INDEX_t k, p, q, I, J for p in range(contrib.shape[0]): - for q in range(self.vectorSize): + for q in range(self.valueSize): contrib[p, q] *= fac k = 0 for p in range(2*self.dm.dofs_per_element): @@ -711,9 +738,9 @@ cdef class {SCALAR_label}nonlocalBuilder: self.local_matrix = self.getLocalMatrix(params) if self.local_matrix.symmetricLocalMatrix: - self.contrib = uninitialized(((2*self.dm.dofs_per_element)*(2*self.dm.dofs_per_element+1)//2), dtype={SCALAR}) + self.contrib = uninitialized(((2*self.dm.dofs_per_element)*(2*self.dm.dofs_per_element+1)//2, self.kernel.valueSize), dtype={SCALAR}) else: - self.contrib = uninitialized(((2*self.dm.dofs_per_element)**2), dtype={SCALAR}) + self.contrib = uninitialized(((2*self.dm.dofs_per_element)**2, self.kernel.valueSize), dtype={SCALAR}) assert self.contrib.shape[0] <= PyLong_FromSsize_t(mask.size()), "Mask type size = {} is not large enough for {} entries. Please set a larger size and recompile.".format(mask.size(), self.contrib.shape[0]) self.local_matrix.setMesh1(self.dm.mesh) @@ -733,14 +760,13 @@ cdef class {SCALAR_label}nonlocalBuilder: self.local_matrix_zeroExterior.setMesh1(self.dm.mesh) self.local_matrix_surface.setMesh1(self.dm.mesh) if self.local_matrix_zeroExterior.symmetricLocalMatrix: - self.contribZeroExterior = uninitialized((self.dm.dofs_per_element*(self.dm.dofs_per_element+1)//2), dtype={SCALAR}) + self.contribZeroExterior = uninitialized((self.dm.dofs_per_element*(self.dm.dofs_per_element+1)//2, self.kernel.valueSize), dtype={SCALAR}) else: - self.contribZeroExterior = uninitialized(((self.dm.dofs_per_element)**2), dtype={SCALAR}) + self.contribZeroExterior = uninitialized(((self.dm.dofs_per_element)**2, self.kernel.valueSize), dtype={SCALAR}) LOGGER.debug(self.local_matrix_zeroExterior) LOGGER.debug(self.local_matrix_surface) else: - self.contribZeroExterior = uninitialized((0), dtype={SCALAR}) - + self.contribZeroExterior = uninitialized((0, 0), dtype={SCALAR}) if PLogger is not None: self.PLogger = PLogger @@ -758,6 +784,7 @@ cdef class {SCALAR_label}nonlocalBuilder: BOOL_t symmetric, forceNonSym fractionalOrderBase s target_order = params.get('target_order', None) + opType = params.get('opType', 'Laplacian') quadType = params.get('quadType', 'classical-refactored') assert quadType in ( 'classical-refactored' @@ -765,39 +792,39 @@ cdef class {SCALAR_label}nonlocalBuilder: forceNonSym = params.get('forceNonSym', False) symmetric = not forceNonSym and self.kernel.symmetric - if quadType == 'classical-refactored': - if self.mesh.manifold_dim == 1: - if symmetric: - local_matrix = fractionalLaplacian1D(self.kernel, - mesh=self.mesh, - DoFMap=self.dm, - target_order=target_order) - else: - local_matrix = fractionalLaplacian1D_nonsym(self.kernel, - mesh=self.mesh, - DoFMap=self.dm, - target_order=target_order) - elif self.mesh.manifold_dim == 2: - if symmetric: - if not isinstance(self.dm, Product_DoFMap): + # nonlocal Laplacians + if opType == 'Laplacian': + if quadType == 'classical-refactored': + if self.mesh.manifold_dim == 1: + if symmetric: + local_matrix = fractionalLaplacian1D(self.kernel, + mesh=self.mesh, + DoFMap=self.dm, + target_order=target_order) + else: + local_matrix = fractionalLaplacian1D_nonsym(self.kernel, + mesh=self.mesh, + DoFMap=self.dm, + target_order=target_order) + elif self.mesh.manifold_dim == 2: + if symmetric: local_matrix = fractionalLaplacian2D(self.kernel, mesh=self.mesh, DoFMap=self.dm, target_order=target_order) else: - raise NotImplementedError() + local_matrix = fractionalLaplacian2D_nonsym(self.kernel, + mesh=self.mesh, + DoFMap=self.dm, + target_order=target_order) else: - local_matrix = fractionalLaplacian2D_nonsym(self.kernel, - mesh=self.mesh, - DoFMap=self.dm, - target_order=target_order) - else: - raise NotImplementedError() + raise NotImplementedError() return local_matrix cdef inline {SCALAR_label}double_local_matrix_t getLocalMatrixBoundaryZeroExterior(self, dict params, BOOL_t infHorizon): cdef: fractionalOrderBase s + opType = params.get('opType', 'Laplacian') target_order = params.get('target_order', None) if 'quadTypeBoundary' in params: quadType = params['quadTypeBoundary'] @@ -807,68 +834,39 @@ cdef class {SCALAR_label}nonlocalBuilder: 'classical-refactored' ) - if isinstance(self.kernel, FractionalKernel): - s = self.kernel.s - assert ((s.min < 1.) and (s.max < 1.)) or ((s.min > 1.) and (s.max > 1.)) + # nonlocal Laplacians + if opType == 'Laplacian': assert isinstance(self.kernel.horizon, constant) if infHorizon: kernelInfHorizon = self.kernel.getModifiedKernel(horizon=constant(np.inf)) else: kernelInfHorizon = self.kernel + kernelBoundary = kernelInfHorizon.getBoundaryKernel() if quadType == 'classical-refactored': - kernelBoundary = kernelInfHorizon.getBoundaryKernel() if self.mesh.manifold_dim == 1: local_matrix = fractionalLaplacian1D_boundary(kernelBoundary, mesh=self.mesh, DoFMap=self.dm, target_order=target_order) elif self.mesh.manifold_dim == 2: - if not isinstance(self.dm, Product_DoFMap): - local_matrix = fractionalLaplacian2D_boundary(kernelBoundary, - mesh=self.mesh, - DoFMap=self.dm, - target_order=target_order) - else: - raise NotImplementedError() - else: - raise NotImplementedError() - else: - raise NotImplementedError() - else: - assert isinstance(self.kernel.horizon, constant) - if infHorizon: - kernelInfHorizon = self.kernel.getModifiedKernel(horizon=constant(np.inf)) - else: - kernelInfHorizon = self.kernel - if quadType == 'classical-refactored': - kernelBoundary = kernelInfHorizon.getBoundaryKernel() - if self.mesh.manifold_dim == 1: - local_matrix = fractionalLaplacian1D_boundary(kernelBoundary, + local_matrix = fractionalLaplacian2D_boundary(kernelBoundary, mesh=self.mesh, DoFMap=self.dm, target_order=target_order) - elif self.mesh.manifold_dim == 2: - if not isinstance(self.dm, Product_DoFMap): - local_matrix = fractionalLaplacian2D_boundary(kernelBoundary, - mesh=self.mesh, - DoFMap=self.dm, - target_order=target_order) - else: - raise NotImplementedError() else: raise NotImplementedError() else: - local_matrix = None + raise NotImplementedError() + else: + raise NotImplementedError() return local_matrix def getSparse(self, BOOL_t returnNearField=False, str prefix=''): cdef: INDEX_t cellNo1, cellNo2 - {SCALAR}_t[::1] contrib = self.contrib + {SCALAR}_t[:, ::1] contrib = self.contrib {SCALAR_label}IndexManager iM REAL_t fac - BOOL_t symmetricLocalMatrix = self.local_matrix.symmetricLocalMatrix - BOOL_t symmetricCells = self.local_matrix.symmetricCells panelType panel BOOL_t ignoreDiagonalBlocks = False BOOL_t doDistributedAssembly @@ -955,9 +953,9 @@ cdef class {SCALAR_label}nonlocalBuilder: with self.PLogger.Timer(prefix+'build near field sparsity pattern'): if myRoot is not None and doDistributedAssembly: - A = getSparseNearField(self.dm, Pnear, symmetric=useSymmetricMatrix, myRoot=myRoot) + A = getSparseNearField{SCALAR_label}(self.dm, Pnear, symmetric=useSymmetricMatrix, myRoot=myRoot) else: - A = getSparseNearField(self.dm, Pnear, symmetric=useSymmetricMatrix) + A = getSparseNearField{SCALAR_label}(self.dm, Pnear, symmetric=useSymmetricMatrix) # We are not using assembleClusters because we don't want to use surface integration with self.PLogger.Timer(prefix+'interior - compute'): @@ -987,24 +985,24 @@ cdef class {SCALAR_label}nonlocalBuilder: if cellNo1 == cellNo2: if panel != IGNORED: self.local_matrix.eval(contrib, panel) - if symmetricLocalMatrix: + if self.local_matrix.symmetricLocalMatrix: iM.addToMatrixElemElemSym(contrib, 1.) else: iM.addToMatrixElemElem(contrib, 1.) else: - if symmetricCells: + if self.local_matrix.symmetricCells: if panel != IGNORED: self.local_matrix.eval(contrib, panel) # If the kernel is symmetric, the contributions from (cellNo1, cellNo2) and (cellNo2, cellNo1) # are the same. We multiply by 2 to account for the contribution from cells (cellNo2, cellNo1). - if symmetricLocalMatrix: + if self.local_matrix.symmetricLocalMatrix: iM.addToMatrixElemElemSym(contrib, 2.) else: iM.addToMatrixElemElem(contrib, 2.) else: if panel != IGNORED: self.local_matrix.eval(contrib, panel) - if symmetricLocalMatrix: + if self.local_matrix.symmetricLocalMatrix: iM.addToMatrixElemElemSym(contrib, 1.) else: iM.addToMatrixElemElem(contrib, 1.) @@ -1014,7 +1012,7 @@ cdef class {SCALAR_label}nonlocalBuilder: if iM.getDoFsElemElem(cellNo2, cellNo1): continue self.local_matrix.eval(contrib, panel) - if symmetricLocalMatrix: + if self.local_matrix.symmetricLocalMatrix: iM.addToMatrixElemElemSym(contrib, 1.) else: iM.addToMatrixElemElem(contrib, 1.) @@ -1051,7 +1049,8 @@ cdef class {SCALAR_label}nonlocalBuilder: cdef: INDEX_t cellNo1, cellNo2 {SCALAR_label}LinearOperator A = None - {SCALAR}_t[::1] contrib = self.contrib, contribZeroExterior = self.contribZeroExterior + {SCALAR_label}VectorLinearOperator vecA = None + {SCALAR}_t[:, ::1] contrib = self.contrib, contribZeroExterior = self.contribZeroExterior INDEX_t start, end meshBase surface {SCALAR_label}IndexManager iM @@ -1059,10 +1058,11 @@ cdef class {SCALAR_label}nonlocalBuilder: np.int64_t explicitZeros REAL_t[:, ::1] data REAL_t sparsificationThreshold = 0.8 - BOOL_t symmetricLocalMatrix = self.local_matrix.symmetricLocalMatrix - BOOL_t symmetricCells = self.local_matrix.symmetricCells MASK_t mask + self.PLogger.addValue('useSymmetricCells', self.local_matrix.symmetricCells) + self.PLogger.addValue('useSymmetricLocalMatrix', self.local_matrix.symmetricLocalMatrix) + if self.comm: start = np.ceil(self.mesh.num_cells*self.comm.rank/self.comm.size) end = np.ceil(self.mesh.num_cells*(self.comm.rank+1)/self.comm.size) @@ -1119,7 +1119,7 @@ cdef class {SCALAR_label}nonlocalBuilder: iM.addToSparsityElemElem() self.local_matrix.swapCells() indptr, indices = sP.freeze() - useSymmetricMatrix = self.local_matrix.symmetricLocalMatrix and self.local_matrix.symmetricCells + useSymmetricMatrix = self.local_matrix.symmetricLocalMatrix if useSymmetricMatrix: A = {SCALAR_label}SSS_LinearOperator(indices, indptr, np.zeros((indices.shape[0]), dtype={SCALAR}), @@ -1134,19 +1134,34 @@ cdef class {SCALAR_label}nonlocalBuilder: trySparsification = False else: if self.dm2 is None: - A = {SCALAR_label}Dense_LinearOperator(np.zeros((self.dm.num_dofs, self.dm.num_dofs), dtype={SCALAR})) + if self.kernel.valueSize == 1: + A = {SCALAR_label}Dense_LinearOperator(np.zeros((self.dm.num_dofs, self.dm.num_dofs), dtype={SCALAR})) + else: + vecA = {SCALAR_label}Dense_VectorLinearOperator(np.zeros((self.dm.num_dofs, self.dm.num_dofs, self.kernel.valueSize), dtype=REAL)) else: - A = {SCALAR_label}Dense_LinearOperator(np.zeros((self.dm.num_dofs, self.dm2.num_dofs), dtype={SCALAR})) + if self.kernel.valueSize == 1: + A = {SCALAR_label}Dense_LinearOperator(np.zeros((self.dm.num_dofs, self.dm2.num_dofs), dtype={SCALAR})) + else: + vecA = {SCALAR_label}Dense_VectorLinearOperator(np.zeros((self.dm.num_dofs, self.dm2.num_dofs, self.kernel.valueSize), dtype=REAL)) if self.dm2 is None: - iM = {SCALAR_label}IndexManager(self.dm, A) + if self.kernel.valueSize == 1: + iM = {SCALAR_label}IndexManager(self.dm, A) + else: + iM = {SCALAR_label}IndexManagerVector(self.dm, vecA) else: LOGGER.warning('Efficiency of assembly with 2 DoFMaps is bad.') dmCombined = self.dm.combine(self.dm2) - B = SubMatrixAssemblyOperator(A, + if self.kernel.valueSize == 1: + B = SubMatrixAssemblyOperator(A, np.arange(self.dm.num_dofs, dtype=INDEX), np.arange(self.dm.num_dofs, self.dm.num_dofs+self.dm2.num_dofs, dtype=INDEX)) - iM = {SCALAR_label}IndexManager(dmCombined, B) + iM = {SCALAR_label}IndexManager(dmCombined, B) + else: + vecB = SubMatrixAssemblyOperator(vecA, + np.arange(self.dm.num_dofs, dtype=INDEX), + np.arange(self.dm.num_dofs, self.dm.num_dofs+self.dm2.num_dofs, dtype=INDEX)) + iM = {SCALAR_label}IndexManagerVector(dmCombined, vecB) # Omega x Omega with self.PLogger.Timer('interior'): @@ -1160,24 +1175,24 @@ cdef class {SCALAR_label}nonlocalBuilder: if cellNo1 == cellNo2: if panel != IGNORED: self.local_matrix.eval(contrib, panel) - if symmetricLocalMatrix: + if self.local_matrix.symmetricLocalMatrix: iM.addToMatrixElemElemSym(contrib, 1.) else: iM.addToMatrixElemElem(contrib, 1.) else: - if symmetricCells: + if self.local_matrix.symmetricCells: if panel != IGNORED: self.local_matrix.eval(contrib, panel) # If the kernel is symmetric, the contributions from (cellNo1, cellNo2) and (cellNo2, cellNo1) # are the same. We multiply by 2 to account for the contribution from cells (cellNo2, cellNo1). - if symmetricLocalMatrix: + if self.local_matrix.symmetricLocalMatrix: iM.addToMatrixElemElemSym(contrib, 2.) else: iM.addToMatrixElemElem(contrib, 2.) else: if panel != IGNORED: self.local_matrix.eval(contrib, panel) - if symmetricLocalMatrix: + if self.local_matrix.symmetricLocalMatrix: iM.addToMatrixElemElemSym(contrib, 1.) else: iM.addToMatrixElemElem(contrib, 1.) @@ -1187,7 +1202,7 @@ cdef class {SCALAR_label}nonlocalBuilder: if iM.getDoFsElemElem(cellNo2, cellNo1): continue self.local_matrix.eval(contrib, panel) - if symmetricLocalMatrix: + if self.local_matrix.symmetricLocalMatrix: iM.addToMatrixElemElemSym(contrib, 1.) else: iM.addToMatrixElemElem(contrib, 1.) @@ -1233,114 +1248,10 @@ cdef class {SCALAR_label}nonlocalBuilder: return CSR_LinearOperator.from_dense(A) else: LOGGER.warning('Not converting dense to sparse matrix, since only {}% of entries are zero.'.format(100.*ratio)) - return A - - def getDenseVector(self, BOOL_t trySparsification=False): - cdef: - INDEX_t cellNo1, cellNo2 - VectorLinearOperator A = None - {SCALAR}_t[:, ::1] contrib = self.contribVector, contribZeroExterior = self.contribZeroExteriorVector - INDEX_t start, end - meshBase surface - {SCALAR_label}IndexManagerVector iM - INDEX_t i, j, explicitZerosRow - np.int64_t explicitZeros - REAL_t[:, ::1] data - REAL_t sparsificationThreshold = 0.8 - BOOL_t symmetricLocalMatrix = self.local_matrix.symmetricLocalMatrix - BOOL_t symmetricCells = self.local_matrix.symmetricCells - MASK_t mask - - if self.comm: - start = np.ceil(self.mesh.num_cells*self.comm.rank/self.comm.size) - end = np.ceil(self.mesh.num_cells*(self.comm.rank+1)/self.comm.size) - else: - start = 0 - end = self.mesh.num_cells - - - if self.dm2 is None: - A = Dense_VectorLinearOperator(np.zeros((self.dm.num_dofs, self.dm.num_dofs, self.kernel.vectorSize), dtype=REAL)) - else: - A = Dense_VectorLinearOperator(np.zeros((self.dm.num_dofs, self.dm2.num_dofs, self.kernel.vectorSize), dtype=REAL)) - - if self.dm2 is None: - iM = {SCALAR_label}IndexManagerVector(self.dm, A) + if self.kernel.valueSize == 1: + return A else: - LOGGER.warning('Efficiency of assembly with 2 DoFMaps is bad.') - dmCombined = self.dm.combine(self.dm2) - B = SubMatrixAssemblyOperator(A, - np.arange(self.dm.num_dofs, dtype=INDEX), - np.arange(self.dm.num_dofs, self.dm.num_dofs+self.dm2.num_dofs, dtype=INDEX)) - iM = {SCALAR_label}IndexManagerVector(dmCombined, B) - - # Omega x Omega - with self.PLogger.Timer('interior'): - for cellNo1 in range(start, end): - self.local_matrix.setCell1(cellNo1) - for cellNo2 in range(cellNo1, self.mesh.num_cells): - self.local_matrix.setCell2(cellNo2) - if iM.getDoFsElemElem(cellNo1, cellNo2): - continue - panel = self.local_matrix.getPanelType() - if cellNo1 == cellNo2: - if panel != IGNORED: - self.local_matrix.evalVector(contrib, panel) - if symmetricLocalMatrix: - iM.addToMatrixElemElemSymVector(contrib, 1.) - else: - iM.addToMatrixElemElemVector(contrib, 1.) - else: - if symmetricCells: - if panel != IGNORED: - self.local_matrix.evalVector(contrib, panel) - # If the kernel is symmetric, the contributions from (cellNo1, cellNo2) and (cellNo2, cellNo1) - # are the same. We multiply by 2 to account for the contribution from cells (cellNo2, cellNo1). - if symmetricLocalMatrix: - iM.addToMatrixElemElemSymVector(contrib, 2.) - else: - iM.addToMatrixElemElemVector(contrib, 2.) - else: - if panel != IGNORED: - self.local_matrix.evalVector(contrib, panel) - if symmetricLocalMatrix: - iM.addToMatrixElemElemSymVector(contrib, 1.) - else: - iM.addToMatrixElemElemVector(contrib, 1.) - self.local_matrix.swapCells() - panel = self.local_matrix.getPanelType() - if panel != IGNORED: - if iM.getDoFsElemElem(cellNo2, cellNo1): - continue - self.local_matrix.evalVector(contrib, panel) - if symmetricLocalMatrix: - iM.addToMatrixElemElemSymVector(contrib, 1.) - else: - iM.addToMatrixElemElemVector(contrib, 1.) - self.local_matrix.swapCells() - - # Omega x Omega^C - if self.zeroExterior: - with self.PLogger.Timer('zeroExterior'): - surface = self.mesh.get_surface_mesh() - - self.local_matrix_zeroExterior.setMesh2(surface) - - for cellNo1 in range(start, end): - iM.getDoFsElem(cellNo1) - mask = iM.getElemSymMask() - self.local_matrix_zeroExterior.setCell1(cellNo1) - for cellNo2 in range(surface.num_cells): - self.local_matrix_zeroExterior.setCell2(cellNo2) - panel = self.local_matrix_zeroExterior.getPanelType() - self.local_matrix_zeroExterior.evalVector(contribZeroExterior, panel, mask) - # if local_matrix_zeroExterior.symmetricLocalMatrix: - iM.addToMatrixElemSymVector(contribZeroExterior, 1.) - # else: - # raise NotImplementedError() - if self.comm: - self.comm.Allreduce(MPI.IN_PLACE, A.data) - return A + return vecA cpdef {SCALAR}_t getEntryCluster(self, INDEX_t I, INDEX_t J): cdef: @@ -1527,11 +1438,11 @@ cdef class {SCALAR_label}nonlocalBuilder: entry += extractElemSymMasked{SCALAR_label}(dm, self.contribZeroExterior, 1., mask) return entry - cpdef LinearOperator assembleClusters(self, list Pnear, bint forceUnsymmetric=False, LinearOperator Anear=None, dict jumps={}, str prefix='', tree_node myRoot=None, BOOL_t doDistributedAssembly=False): + cpdef LinearOperator assembleClusters(self, list Pnear, bint forceUnsymmetricMatrix=False, LinearOperator Anear=None, dict jumps={}, str prefix='', tree_node myRoot=None, BOOL_t doDistributedAssembly=False): cdef: INDEX_t cellNo1, cellNo2, cellNo3 REAL_t fac - {SCALAR}_t[::1] contrib = self.contrib, contribZeroExterior = self.contribZeroExterior + {SCALAR}_t[:, ::1] contrib = self.contrib, contribZeroExterior = self.contribZeroExterior meshBase surface INDEX_t[:, ::1] cells = self.mesh.cells, surface_cells, fake_cells indexSet cellsInter @@ -1543,45 +1454,40 @@ cdef class {SCALAR_label}nonlocalBuilder: tupleDictMASK masks = None ENCODE_t hv, hv2 MASK_t mask - # INDEX_t vertex1, vertex2 bint useSymmetricMatrix - bint useSymmetricCells INDEX_t vertexNo, i INDEX_t[::1] edge = uninitialized((2), dtype=INDEX) REAL_t evalShift = 1e-9 local_matrix_t mass indexSetIterator it = arrayIndexSetIterator() INDEX_t startCluster - INDEX_t numAssembledCells + INDEX_t numCellPairs, numAssembledCellPairs, numIntegrations indexSet myDofs = None REAL_t sValuePre, sValuePost BOOL_t surfaceIntegralNeedsShift + self.PLogger.addValue('useSymmetricCells', self.local_matrix.symmetricCells) + self.PLogger.addValue('useSymmetricLocalMatrix', self.local_matrix.symmetricLocalMatrix) + mask.reset() if myRoot is not None: myDofs = myRoot.get_dofs() if Anear is None: - useSymmetricMatrix = self.local_matrix.symmetricLocalMatrix and self.local_matrix.symmetricCells and not forceUnsymmetric + useSymmetricMatrix = self.local_matrix.symmetricLocalMatrix and not forceUnsymmetricMatrix with self.PLogger.Timer(prefix+'build near field sparsity pattern'): # TODO: double check that this should not be if myRoot is not None and doDistributedAssembly: - Anear = getSparseNearField(self.dm, Pnear, symmetric=useSymmetricMatrix, myRoot=myRoot) + Anear = getSparseNearField{SCALAR_label}(self.dm, Pnear, symmetric=useSymmetricMatrix, myRoot=myRoot) else: - Anear = getSparseNearField(self.dm, Pnear, symmetric=useSymmetricMatrix) - LOGGER.info('Anear: {}'.format(Anear)) - - if self.comm is not None and self.comm.size > 1: - nnz = Anear.nnz - counts = np.zeros((self.comm.size), dtype=INDEX) - self.comm.Gather(np.array([nnz], dtype=INDEX), counts) - LOGGER.info('Near field entries per rank: {} ({}) / {} / {} ({})'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax())) + Anear = getSparseNearField{SCALAR_label}(self.dm, Pnear, symmetric=useSymmetricMatrix) + self.PLogger.addValue('Anear', repr(Anear)) + if hasattr(Anear, 'nnz'): + self.PLogger.addValue('numLocalNearFieldEntries', Anear.nnz) Anear_filtered = FilteredAssemblyOperator(Anear) - useSymmetricCells = self.local_matrix.symmetricCells - iM = {SCALAR_label}IndexManager(self.dm, Anear) use_masks = self.params.get('use_masks', True) @@ -1593,6 +1499,10 @@ cdef class {SCALAR_label}nonlocalBuilder: # D = (supp u) \cup (supp v)., # We only update unknowns that are in the cluster pair. + numCellPairs = 0 + numIntegrations = 0 + numAssembledCellPairs = 0 + if not use_masks: # This loop does the correct thing, but we are wasting a lot of # element x element evaluations. @@ -1607,17 +1517,19 @@ cdef class {SCALAR_label}nonlocalBuilder: for cellNo1 in cellsUnion: self.local_matrix.setCell1(cellNo1) for cellNo2 in cellsUnion: + numCellPairs += 1 self.local_matrix.setCell2(cellNo2) panel = self.local_matrix.getPanelType() if panel != IGNORED: - if useSymmetricCells and (cellNo1 != cellNo2): + if self.local_matrix.symmetricCells and (cellNo1 != cellNo2): fac = 2. else: fac = 1. if iM.getDoFsElemElem(cellNo1, cellNo2): continue + numAssembledCellPairs += 1 self.local_matrix.eval(contrib, panel) - if useSymmetricCells: + if self.local_matrix.symmetricLocalMatrix: iM.addToMatrixElemElemSym(contrib, fac) else: iM.addToMatrixElemElem(contrib, fac) @@ -1626,10 +1538,12 @@ cdef class {SCALAR_label}nonlocalBuilder: # This way, we only assembly over each element x element pair once. # We load balance the cells and only get the list for the local rank. startCluster = 0 - numAssembledCells = 0 + while startCluster < len(Pnear): with self.PLogger.Timer(prefix+'interior - build masks'): - masks = iM.buildMasksForClusters(Pnear, useSymmetricCells, &startCluster) + masks = iM.buildMasksForClusters(Pnear, self.local_matrix.symmetricCells, self.local_matrix.symmetricLocalMatrix, &startCluster) + + numCellPairs += masks.nnz if (masks.getSizeInBytes() >> 20) > 20: LOGGER.info('element x element pairs {}, {} MB'.format(masks.nnz, masks.getSizeInBytes() >> 20)) @@ -1643,24 +1557,23 @@ cdef class {SCALAR_label}nonlocalBuilder: self.local_matrix.setCell2(cellNo2) panel = self.local_matrix.getPanelType() if panel != IGNORED: - numAssembledCells += 1 - if useSymmetricCells and (cellNo1 != cellNo2): + numAssembledCellPairs += 1 + numIntegrations += mask.count() + if self.local_matrix.symmetricCells and (cellNo1 != cellNo2): fac = 2. else: fac = 1. if iM.getDoFsElemElem(cellNo1, cellNo2): continue self.local_matrix.eval(contrib, panel, mask) - if useSymmetricCells: + if self.local_matrix.symmetricLocalMatrix: iM.addToMatrixElemElemSymMasked(contrib, fac, mask) else: iM.addToMatrixElemElemMasked(contrib, fac, mask) masks = None - if self.comm is not None and self.comm.size > 1: - counts = np.zeros((self.comm.size), dtype=INDEX) - self.comm.Gather(np.array([numAssembledCells], dtype=INDEX), counts) - if self.comm.rank == 0: - LOGGER.info('Num assembled cells per rank: {} ({}) / {} / {} ({}) imbalance: {}'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax(), counts.max()/counts.min())) + self.PLogger.addValue('numCellPairs', numCellPairs) + self.PLogger.addValue('numAssembledCellPairs', numAssembledCellPairs) + self.PLogger.addValue('numIntegrations', numIntegrations) if not self.kernel.variable: if not self.kernel.complement: @@ -1969,10 +1882,8 @@ cdef class {SCALAR_label}nonlocalBuilder: LinearOperator Aother INDEX_t I, nnz indexSetIterator it = myDofs.getIter() - counts = np.zeros((self.comm.size), dtype=INDEX) - self.comm.Gather(np.array([Anear.nnz], dtype=INDEX), counts) - if self.comm.rank == 0: - LOGGER.info('Near field entries per rank: {} ({}) / {} / {} ({}) imbalance: {}'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax(), counts.max()/counts.min())) + self.PLogger.addValue('numLocalNearFieldEntries', Anear.nnz) + # drop entries that are not in rows of myRoot.dofs Anear = self.dropOffRank(Anear, myDofs) @@ -2029,9 +1940,6 @@ cdef class {SCALAR_label}nonlocalBuilder: if self.comm.rank != 0: Anear = None - else: - LOGGER.info('Anear reduced: {}'.format(Anear)) - # Anear = self.comm.bcast(Anear, root=0) return Anear def dropOffRank(self, LinearOperator Anear, indexSet myDofs): @@ -2251,7 +2159,9 @@ cdef class {SCALAR_label}nonlocalBuilder: else: for n in root.leaves(): n.canBeAssembled = True - LOGGER.info('Jumps: {}, Block sizes: {}, Leaf nodes: {}'.format(len(jumps), str({key: len(blocks[key]) for key in blocks}), len(list(root.leaves())))) + self.PLogger.addValue('jumps', len(jumps)) + self.PLogger.addValue('block sizes', str({key: len(blocks[key]) for key in blocks})) + self.PLogger.addValue('leaf nodes', len(list(root.leaves()))) if doDistributedAssembly: if self.kernel.variable: @@ -2307,12 +2217,8 @@ cdef class {SCALAR_label}nonlocalBuilder: symmetrizeNearFieldClusters(Pnear) - counts = np.zeros((self.comm.size), dtype=INDEX) - self.comm.Gather(np.array([myRoot.num_dofs], dtype=INDEX), counts) - LOGGER.info('Unknowns per rank: {} ({}) / {} / {} ({})'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax())) - - self.comm.Gather(np.array([len(Pnear)], dtype=INDEX), counts) - LOGGER.info('Near field cluster pairs per rank: {} ({}) / {} / {} ({})'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax())) + self.PLogger.addValue('numLocalUnknowns', myRoot.num_dofs) + self.PLogger.addValue('numLocalNearFieldClusterPairs', len(Pnear)) if assembleOnRoot: # collect far field on rank 0 @@ -2322,9 +2228,10 @@ cdef class {SCALAR_label}nonlocalBuilder: # "lvl+1", since the ranks are children of the global root farField.append((lvl+1, cP.n1.id, cP.n2.id)) farField = np.array(farField, dtype=INDEX) + self.PLogger.addValue('numLocalFarFieldClusterPairs', farField.shape[0]) + counts = uninitialized((self.comm.size), dtype=INDEX) self.comm.Gather(np.array([farField.shape[0]], dtype=INDEX), counts) if self.comm.rank == 0: - LOGGER.info('Far field cluster pairs per rank: {} ({}) / {} / {} ({})'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax())) N = 0 for rank in range(self.comm.size): N += counts[rank] @@ -2421,13 +2328,8 @@ cdef class {SCALAR_label}nonlocalBuilder: symmetrizeNearFieldClusters(Pnear) - counts = np.zeros((self.comm.size), dtype=INDEX) - self.comm.Gather(np.array([myRoot.num_dofs], dtype=INDEX), counts) - LOGGER.info('Unknowns per rank: {} ({}) / {} / {} ({})'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax())) - - self.comm.Gather(np.array([len(Pnear)], dtype=INDEX), counts) - LOGGER.info('Near field cluster pairs per rank: {} ({}) / {} / {} ({})'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax())) - + self.PLogger.addValue('numLocalUnknowns', myRoot.num_dofs) + self.PLogger.addValue('numLocalNearFieldClusterPairs', len(Pnear)) else: getCoveringClusters(self.kernel, root, root, refParams, @@ -2570,19 +2472,19 @@ cdef class {SCALAR_label}nonlocalBuilder: dict Pfar list Pnear LinearOperator h2 = None, Anear = None - BOOL_t forceUnsymmetric, doDistributedAssembly = False, assembleOnRoot = True, localFarFieldIndexing = False + BOOL_t forceUnsymmetricMatrix, doDistributedAssembly = False, assembleOnRoot = True, localFarFieldIndexing = False refinementParams refParams CSR_LinearOperator lclR refParams = self.getH2RefinementParams() - forceUnsymmetric = self.params.get('forceUnsymmetric', False) doDistributedAssembly = self.comm is not None and self.comm.size > 1 and DoFMap.num_dofs > self.comm.size assembleOnRoot = self.params.get('assembleOnRoot', True) + forceUnsymmetricMatrix = self.params.get('forceUnsymmetric', doDistributedAssembly and not assembleOnRoot) localFarFieldIndexing = self.params.get('localFarFieldIndexing', False) localFarFieldIndexing = doDistributedAssembly and not assembleOnRoot and localFarFieldIndexing if doDistributedAssembly and not assembleOnRoot: - assert forceUnsymmetric + assert forceUnsymmetricMatrix with self.PLogger.Timer('boxes, cells, coords'): boxes, cells = getDoFBoxesAndCells(self.dm.mesh, self.dm, self.comm) @@ -2599,16 +2501,16 @@ cdef class {SCALAR_label}nonlocalBuilder: lenPfar = self.comm.bcast(lenPfar) if lenPfar > 0: - LOGGER.info('interpolation_order: {}, maxLevels: {}, minClusterSize: {}, minMixedClusterSize: {}, minFarFieldBlockSize: {}, eta: {}'.format(refParams.interpolation_order, - refParams.maxLevels, - refParams.minSize, - refParams.minMixedSize, - refParams.farFieldInteractionSize, - refParams.eta)) + self.PLogger.addValue('interpolation_order', refParams.interpolation_order) + self.PLogger.addValue('maxLevels', refParams.maxLevels) + self.PLogger.addValue('minSize', refParams.minSize) + self.PLogger.addValue('minMixedSize', refParams.minMixedSize) + self.PLogger.addValue('farFieldInteractionSize', refParams.farFieldInteractionSize) + self.PLogger.addValue('eta', refParams.eta) # get near field matrix with self.PLogger.Timer('near field'): - Anear = self.assembleClusters(Pnear, jumps=jumps, forceUnsymmetric=forceUnsymmetric, myRoot=myRoot, doDistributedAssembly=doDistributedAssembly) + Anear = self.assembleClusters(Pnear, jumps=jumps, forceUnsymmetricMatrix=forceUnsymmetricMatrix, myRoot=myRoot, doDistributedAssembly=doDistributedAssembly) if doDistributedAssembly and assembleOnRoot: with self.PLogger.Timer('reduceNearOp'): Anear = self.reduceNearOp(Anear, myRoot.get_dofs()) @@ -2652,7 +2554,7 @@ cdef class {SCALAR_label}nonlocalBuilder: h2 = DistributedH2Matrix_localData(local_h2, Pnear, self.comm, self.dm, local_dm, lclR, lclP) else: h2 = nullOperator(self.dm.num_dofs, self.dm.num_dofs) - LOGGER.info('{}'.format(h2)) + self.PLogger.addValue('H2', repr(h2)) elif len(Pnear) == 0: h2 = nullOperator(self.dm.num_dofs, self.dm.num_dofs) else: @@ -2673,3 +2575,60 @@ cdef class {SCALAR_label}nonlocalBuilder: def getH2FiniteHorizon(self, LinearOperator Ainf=None): A = horizonCorrected(self.mesh, self.dm, self.kernel, self.comm, Ainf, logging=isinstance(self.PLogger, (PLogger, LoggingPLogger))) return A + + +cdef LinearOperator getSparseNearField{SCALAR_label}(DoFMap DoFMap, list Pnear, bint symmetric=False, tree_node myRoot=None): + cdef: + sparsityPattern sP + INDEX_t I = -1, J = -1 + nearFieldClusterPair clusterPair + indexSet dofs1, dofs2 + indexSetIterator it1 = arrayIndexSetIterator(), it2 = arrayIndexSetIterator() + sP = sparsityPattern(DoFMap.num_dofs) + if symmetric: + for clusterPair in Pnear: + dofs1 = clusterPair.n1.get_dofs() + dofs2 = clusterPair.n2.get_dofs() + it1.setIndexSet(dofs1) + it2.setIndexSet(dofs2) + while it1.step(): + I = it1.i + it2.reset() + while it2.step(): + J = it2.i + if I > J: + sP.add(I, J) + elif myRoot is not None: + for clusterPair in Pnear: + if clusterPair.n1.getParent(1).id != myRoot.id: + continue + dofs1 = clusterPair.n1.get_dofs() + dofs2 = clusterPair.n2.get_dofs() + it1.setIndexSet(dofs1) + it2.setIndexSet(dofs2) + while it1.step(): + I = it1.i + it2.reset() + while it2.step(): + J = it2.i + sP.add(I, J) + else: + for clusterPair in Pnear: + dofs1 = clusterPair.n1.get_dofs() + dofs2 = clusterPair.n2.get_dofs() + it1.setIndexSet(dofs1) + it2.setIndexSet(dofs2) + while it1.step(): + I = it1.i + it2.reset() + while it2.step(): + J = it2.i + sP.add(I, J) + indptr, indices = sP.freeze() + data = np.zeros((indices.shape[0]), dtype={SCALAR}) + if symmetric: + diagonal = np.zeros((DoFMap.num_dofs), dtype={SCALAR}) + A = {SCALAR_label}SSS_LinearOperator(indices, indptr, data, diagonal) + else: + A = {SCALAR_label}CSR_LinearOperator(indices, indptr, data) + return A diff --git a/nl/PyNucleus_nl/nonlocalOperator_decl_{SCALAR}.pxi b/nl/PyNucleus_nl/nonlocalOperator_decl_{SCALAR}.pxi index 780a501..c6cc6f6 100644 --- a/nl/PyNucleus_nl/nonlocalOperator_decl_{SCALAR}.pxi +++ b/nl/PyNucleus_nl/nonlocalOperator_decl_{SCALAR}.pxi @@ -47,13 +47,9 @@ cdef class {SCALAR_label}double_local_matrix_t: cdef void setSimplex2(self, REAL_t[:, ::1] simplex2) cdef void swapCells(self) cdef void eval(self, - {SCALAR}_t[::1] contrib, + {SCALAR}_t[:, ::1] contrib, panelType panel, MASK_t mask=*) - cdef void evalVector(self, - {SCALAR}_t[:, ::1] contrib, - panelType panel, - MASK_t mask=*) cdef panelType getQuadOrder(self, const REAL_t h1, const REAL_t h2, @@ -78,7 +74,10 @@ cdef class {SCALAR_label}nonlocalOperator({SCALAR_label}double_local_matrix_t): public {SCALAR_label}Kernel kernel REAL_t[:, ::1] x, y void** distantQuadRulesPtr - {SCALAR}_t[::1] temp, temp2 + {SCALAR}_t[::1] vec + {SCALAR}_t[::1] vec2 + {SCALAR}_t[:, ::1] temp + {SCALAR}_t[:, ::1] temp2 public REAL_t[::1] n, w cdef void getNearQuadRule(self, panelType panel) cdef inline shapeFunction getLocalShapeFunction(self, INDEX_t local_dof) @@ -86,6 +85,6 @@ cdef class {SCALAR_label}nonlocalOperator({SCALAR_label}double_local_matrix_t): cdef void addQuadRule_nonSym(self, panelType panel) cdef void addQuadRule_boundary(self, panelType panel) cdef void getNonSingularNearQuadRule(self, panelType panel) - cdef void eval_distant(self, {SCALAR}_t[::1] contrib, panelType panel, MASK_t mask=*) - cdef void eval_distant_nonsym(self, {SCALAR}_t[::1] contrib, panelType panel, MASK_t mask=*) - cdef void eval_distant_boundary(self, {SCALAR}_t[::1] contrib, panelType panel, MASK_t mask=*) + cdef void eval_distant(self, {SCALAR}_t[:, ::1] contrib, panelType panel, MASK_t mask=*) + cdef void eval_distant_nonsym(self, {SCALAR}_t[:, ::1] contrib, panelType panel, MASK_t mask=*) + cdef void eval_distant_boundary(self, {SCALAR}_t[:, ::1] contrib, panelType panel, MASK_t mask=*) diff --git a/nl/PyNucleus_nl/nonlocalOperator_{SCALAR}.pxi b/nl/PyNucleus_nl/nonlocalOperator_{SCALAR}.pxi index f94cd47..22b53ec 100644 --- a/nl/PyNucleus_nl/nonlocalOperator_{SCALAR}.pxi +++ b/nl/PyNucleus_nl/nonlocalOperator_{SCALAR}.pxi @@ -230,32 +230,21 @@ cdef class {SCALAR_label}double_local_matrix_t: self.vol2 = self.volume2(self.simplex2) def __call__(self, - {SCALAR}_t[::1] contrib, + {SCALAR}_t[:, ::1] contrib, panelType panel): return self.eval(contrib, panel) cdef void eval(self, - {SCALAR}_t[::1] contrib, + {SCALAR}_t[:, ::1] contrib, panelType panel, MASK_t mask=ALL): raise NotImplementedError() - cdef void evalVector(self, - {SCALAR}_t[:, ::1] contrib, - panelType panel, - MASK_t mask=ALL): - raise NotImplementedError() - def eval_py(self, - {SCALAR}_t[::1] contrib, + {SCALAR}_t[:, ::1] contrib, panel): self.eval(contrib, panel, ALL) - def evalVector_py(self, - {SCALAR}_t[:, ::1] contrib, - panel): - self.evalVector(contrib, panel, ALL) - cdef panelType getQuadOrder(self, const REAL_t h1, const REAL_t h2, @@ -449,7 +438,10 @@ cdef class {SCALAR_label}nonlocalOperator({SCALAR_label}double_local_matrix_t): self.x = uninitialized((0, self.dim), dtype=REAL) self.y = uninitialized((0, self.dim), dtype=REAL) - self.temp = uninitialized((0), dtype={SCALAR}) + + self.vec = uninitialized((kernel.valueSize), dtype={SCALAR}) + self.vec2 = uninitialized((kernel.valueSize), dtype={SCALAR}) + self.temp = uninitialized((0, kernel.valueSize), dtype={SCALAR}) self.n = uninitialized((self.dim), dtype=REAL) self.w = uninitialized((self.dim), dtype=REAL) @@ -587,7 +579,7 @@ cdef class {SCALAR_label}nonlocalOperator({SCALAR_label}double_local_matrix_t): if numQuadNodes1 > self.y.shape[0]: self.y = uninitialized((numQuadNodes1, self.dim), dtype=REAL) if numQuadNodes0*numQuadNodes1 > self.temp.shape[0]: - self.temp = uninitialized((numQuadNodes0*numQuadNodes1), dtype={SCALAR}) + self.temp = uninitialized((numQuadNodes0*numQuadNodes1, self.kernel.valueSize), dtype={SCALAR}) cdef void addQuadRule_nonSym(self, panelType panel): cdef: @@ -639,8 +631,8 @@ cdef class {SCALAR_label}nonlocalOperator({SCALAR_label}double_local_matrix_t): if numQuadNodes1 > self.y.shape[0]: self.y = uninitialized((numQuadNodes1, self.dim), dtype=REAL) if numQuadNodes0*numQuadNodes1 > self.temp.shape[0]: - self.temp = uninitialized((numQuadNodes0*numQuadNodes1), dtype={SCALAR}) - self.temp2 = uninitialized((numQuadNodes0*numQuadNodes1), dtype={SCALAR}) + self.temp = uninitialized((numQuadNodes0*numQuadNodes1, self.kernel.valueSize), dtype={SCALAR}) + self.temp2 = uninitialized((numQuadNodes0*numQuadNodes1, self.kernel.valueSize), dtype={SCALAR}) cdef void getNonSingularNearQuadRule(self, panelType panel): cdef: @@ -689,14 +681,14 @@ cdef class {SCALAR_label}nonlocalOperator({SCALAR_label}double_local_matrix_t): if numQuadNodes1 > self.y.shape[0]: self.y = uninitialized((numQuadNodes1, self.dim), dtype=REAL) if qr2.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr2.num_nodes), dtype={SCALAR}) + self.temp = uninitialized((qr2.num_nodes, self.kernel.valueSize), dtype={SCALAR}) cdef void eval_distant(self, - {SCALAR}_t[::1] contrib, + {SCALAR}_t[:, ::1] contrib, panelType panel, MASK_t mask=ALL): cdef: - INDEX_t k, i, j, I, J + INDEX_t k, i, j, I, J, l REAL_t vol, vol1 = self.vol1, vol2 = self.vol2 {SCALAR}_t val doubleSimplexQuadratureRule qr2 @@ -713,6 +705,7 @@ cdef class {SCALAR_label}nonlocalOperator({SCALAR_label}double_local_matrix_t): REAL_t[::1] b1 REAL_t[:, ::1] A1, A2 BOOL_t cutElements = False + INDEX_t valueSize = self.kernel.valueSize if self.kernel.finiteHorizon: # check if the horizon might cut the elements @@ -740,19 +733,23 @@ cdef class {SCALAR_label}nonlocalOperator({SCALAR_label}double_local_matrix_t): k = 0 for i in range(qr2.rule1.num_nodes): for j in range(qr2.rule2.num_nodes): - self.temp[k] = qr2.weights[k]*self.kernel.evalPtr(dim, - &self.x[i, 0], - &self.y[j, 0]) + self.kernel.evalPtr(dim, + &self.x[i, 0], + &self.y[j, 0], + &self.vec[0]) + for l in range(valueSize): + self.temp[k, l] = qr2.weights[k]*self.vec[l] k += 1 k = 0 for I in range(2*self.DoFMap.dofs_per_element): for J in range(I, 2*self.DoFMap.dofs_per_element): if mask[k]: - val = 0. - for i in range(qr2.num_nodes): - val += self.temp[i] * PSI[I, i] * PSI[J, i] - contrib[k] = val*vol + for l in range(valueSize): + val = 0. + for i in range(qr2.num_nodes): + val += self.temp[i, l] * PSI[I, i] * PSI[J, i] + contrib[k, l] = val*vol k += 1 else: if panel < 0: @@ -793,7 +790,8 @@ cdef class {SCALAR_label}nonlocalOperator({SCALAR_label}double_local_matrix_t): qr1trans.setLinearBaryTransform(A2) qr1trans.nodesInGlobalCoords(simplex2, self.y) for j in range(qr1trans.num_nodes): - val = qr0trans.weights[i]*qr1trans.weights[j]*self.kernel.evalPtr(dim, &self.x[i, 0], &self.y[j, 0]) + self.kernel.evalPtr(dim, &self.x[i, 0], &self.y[j, 0], &self.vec[0]) + val = qr0trans.weights[i]*qr1trans.weights[j]*self.vec[0] val *= c1 * c2 * vol k = 0 for I in range(2*dofs_per_element): @@ -807,16 +805,16 @@ cdef class {SCALAR_label}nonlocalOperator({SCALAR_label}double_local_matrix_t): PSI_J = self.getLocalShapeFunction(J).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0) else: PSI_J = -self.getLocalShapeFunction(J-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1) - contrib[k] += val * PSI_I*PSI_J + contrib[k, 0] += val * PSI_I*PSI_J k += 1 cdef void eval_distant_nonsym(self, - {SCALAR}_t[::1] contrib, + {SCALAR}_t[:, ::1] contrib, panelType panel, MASK_t mask=ALL): cdef: - INDEX_t k, i, j, I, J + INDEX_t k, i, j, I, J, l REAL_t vol, vol1 = self.vol1, vol2 = self.vol2 {SCALAR}_t val, val2 doubleSimplexQuadratureRule qr2 @@ -835,6 +833,7 @@ cdef class {SCALAR_label}nonlocalOperator({SCALAR_label}double_local_matrix_t): REAL_t a_A2[3][3] REAL_t[::1] b1 REAL_t[:, ::1] A1, A2 + INDEX_t valueSize = self.kernel.valueSize if self.kernel.finiteHorizon: # check if the horizon might cut the elements @@ -846,7 +845,7 @@ cdef class {SCALAR_label}nonlocalOperator({SCALAR_label}double_local_matrix_t): # need to figure out the element # transformation. - contrib[:] = 0. + contrib[:, :] = 0. if not cutElements: vol = vol1*vol2 @@ -864,22 +863,28 @@ cdef class {SCALAR_label}nonlocalOperator({SCALAR_label}double_local_matrix_t): for i in range(qr2.rule1.num_nodes): for j in range(qr2.rule2.num_nodes): w = qr2.weights[k] - self.temp[k] = w * self.kernel.evalPtr(dim, - &self.x[i, 0], - &self.y[j, 0]) - self.temp2[k] = w * self.kernel.evalPtr(dim, - &self.y[j, 0], - &self.x[i, 0]) + self.kernel.evalPtr(dim, + &self.x[i, 0], + &self.y[j, 0], + &self.vec[0]) + self.kernel.evalPtr(dim, + &self.y[j, 0], + &self.x[i, 0], + &self.vec2[0]) + for l in range(valueSize): + self.temp[k, l] = w * self.vec[l] + self.temp2[k, l] = w * self.vec2[l] k += 1 k = 0 for I in range(2*self.DoFMap.dofs_per_element): for J in range(2*self.DoFMap.dofs_per_element): if mask[k]: - val = 0. - for i in range(qr2.num_nodes): - val += (self.temp[i] * PHI[0, I, i] - self.temp2[i] * PHI[1, I, i]) * PSI[J, i] - contrib[k] = val*vol + for l in range(valueSize): + val = 0. + for i in range(qr2.num_nodes): + val += (self.temp[i, l] * PHI[0, I, i] - self.temp2[i, l] * PHI[1, I, i]) * PSI[J, i] + contrib[k, l] = val*vol k += 1 else: if panel < 0: @@ -921,8 +926,10 @@ cdef class {SCALAR_label}nonlocalOperator({SCALAR_label}double_local_matrix_t): qr1trans.nodesInGlobalCoords(simplex2, self.y) for j in range(qr1trans.num_nodes): w = qr0trans.weights[i]*qr1trans.weights[j]*c1 * c2 * vol - val = w*self.kernel.evalPtr(dim, &self.x[i, 0], &self.y[j, 0]) - val2 = w*self.kernel.evalPtr(dim, &self.y[j, 0], &self.x[i, 0]) + self.kernel.evalPtr(dim, &self.x[i, 0], &self.y[j, 0], &self.vec[0]) + self.kernel.evalPtr(dim, &self.y[j, 0], &self.x[i, 0], & self.vec2[0]) + val = w*self.vec[0] + val2 = w*self.vec2[0] k = 0 for I in range(2*dofs_per_element): if I < dofs_per_element: @@ -937,7 +944,7 @@ cdef class {SCALAR_label}nonlocalOperator({SCALAR_label}double_local_matrix_t): PSI_J = self.getLocalShapeFunction(J).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0) else: PSI_J = -self.getLocalShapeFunction(J-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1) - contrib[k] += (val * PHI_I_0 - val2 * PHI_I_1) * PSI_J + contrib[k, 0] += (val * PHI_I_0 - val2 * PHI_I_1) * PSI_J k += 1 cdef void addQuadRule_boundary(self, panelType panel): @@ -967,14 +974,14 @@ cdef class {SCALAR_label}nonlocalOperator({SCALAR_label}double_local_matrix_t): if qr2.rule2.num_nodes > self.y.shape[0]: self.y = uninitialized((qr2.rule2.num_nodes, self.dim), dtype=REAL) if qr2.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr2.num_nodes), dtype=REAL) + self.temp = uninitialized((qr2.num_nodes, self.kernel.valueSize), dtype=REAL) cdef void eval_distant_boundary(self, - {SCALAR}_t[::1] contrib, + {SCALAR}_t[:, ::1] contrib, panelType panel, MASK_t mask=ALL): cdef: - INDEX_t k, m, i, j, I, J + INDEX_t k, m, i, j, I, J, l REAL_t vol, valReal, vol1 = self.vol1, vol2 = self.vol2 {SCALAR}_t val doubleSimplexQuadratureRule qr2 @@ -983,6 +990,7 @@ cdef class {SCALAR_label}nonlocalOperator({SCALAR_label}double_local_matrix_t): REAL_t[:, ::1] simplex2 = self.simplex2 INDEX_t dim = simplex1.shape[1] REAL_t normW, nw + INDEX_t valueSize = self.kernel.valueSize # Kernel: # \Gamma(x,y) = n \dot (x-y) * C(d,s) / (2s) / |x-y|^{d+2s} @@ -1041,15 +1049,17 @@ cdef class {SCALAR_label}nonlocalOperator({SCALAR_label}double_local_matrix_t): self.w[j] *= normW nw = mydot(self.n, self.w) i = k*qr2.rule2.num_nodes+m - self.temp[i] = qr2.weights[i] * nw * self.kernel.evalPtr(dim, &self.x[k, 0], &self.y[m, 0]) + self.kernel.evalPtr(dim, &self.x[k, 0], &self.y[m, 0], &self.vec[0]) + for l in range(valueSize): + self.temp[i, l] = qr2.weights[i] * nw * self.vec[l] k = 0 for I in range(self.DoFMap.dofs_per_element): for J in range(I, self.DoFMap.dofs_per_element): if mask[k]: - val = 0. - for i in range(qr2.num_nodes): - val += self.temp[i] * PHI[I, i] * PHI[J, i] - contrib[k] = val*vol + for m in range(valueSize): + val = 0. + for i in range(qr2.num_nodes): + val += self.temp[i, m] * PHI[I, i] * PHI[J, i] + contrib[k, m] = val*vol k += 1 - diff --git a/nl/PyNucleus_nl/nonlocalProblems.py b/nl/PyNucleus_nl/nonlocalProblems.py index 9530288..d657245 100644 --- a/nl/PyNucleus_nl/nonlocalProblems.py +++ b/nl/PyNucleus_nl/nonlocalProblems.py @@ -160,7 +160,7 @@ def build(self, name, kernel, boundaryCondition, noRef=0, useMulti=False, **kwar tag = NO_BOUNDARY else: tag = PHYSICAL - raise NotImplementedError() + raise NotImplementedError("Non-homogeneous Dirichlet conditions for infinite horizon kernels are not implemented.") else: tag = NO_BOUNDARY zeroExterior = False @@ -286,8 +286,9 @@ def __init__(self, driver): self.addProperty('phiType') self.addProperty('phiArgs') self.addProperty('admissibleParams') + self.addProperty('feOrder') self.admissibleParams = None - self.feFractionalOrder = None + self.feOrder = None def setDriverArgs(self): p = self.driver.addGroup('kernel') @@ -384,7 +385,7 @@ def constructAuxiliarySpace(self): @generates(['kernel', 'rangedKernel']) def processKernel(self, dim, kernelType, sType, sArgs, phiType, phiArgs, horizon, interaction, normalized, admissibleParams, - discretizedOrder, dmAux, feFractionalOrder): + discretizedOrder, dmAux, feOrder): if kernelType == 'local': self.kernel = None @@ -399,16 +400,15 @@ def processKernel(self, dim, kernelType, sType, sArgs, phiType, phiArgs, horizon rangedKernel = self.directlyGetWithoutChecks('rangedKernel') if rangedKernel is None or not isinstance(rangedKernel, RangedFractionalKernel): self.rangedKernel = RangedFractionalKernel(dim, - admissibleParams.subset({'sArgs'}), + admissibleParams.subset({('sArgs', 's')}), functionFactory('constant', horizon), normalized) else: self.rangedKernel = rangedKernel try: - self.rangedKernel.setOrder(*sArgs) - self.kernel = self.rangedKernel.getFrozenKernel(*sArgs) + self.rangedKernel.setOrder(sArgs['s']) + self.kernel = self.rangedKernel.getFrozenKernel(sArgs['s']) except TypeError: - sArgs = (sArgs, ) self.rangedKernel.setOrder(*sArgs) self.kernel = self.rangedKernel.getFrozenKernel(*sArgs) return @@ -416,7 +416,7 @@ def processKernel(self, dim, kernelType, sType, sArgs, phiType, phiArgs, horizon self.rangedKernel = None if kType == FRACTIONAL: - if feFractionalOrder is None: + if feOrder is None: if isinstance(sArgs, dict): if discretizedOrder: sFun = fractionalOrderFactory(sType, dm=dmAux, **sArgs) @@ -433,9 +433,17 @@ def processKernel(self, dim, kernelType, sType, sArgs, phiType, phiArgs, horizon if discretizedOrder: sFun = fractionalOrderFactory(sType, *sArgs, dm=dmAux) else: - sFun = fractionalOrderFactory(sType, *sArgs) + sFun = fractionalOrderFactory(sType, *sArgs) + if discretizedOrder: + self.directlySetWithoutChecks('feFractionalOrder', sFun) else: - sFun = deepcopy(feFractionalOrder) + if isinstance(feOrder, feFractionalOrder): + sFun = deepcopy(feOrder) + elif isinstance(feOrder, np.ndarray): + t = dmAux.fromArray(feOrder) + sFun = feFractionalOrder(t, feOrder.min(), feOrder.max()) + else: + raise NotImplementedError() else: sFun = None @@ -494,6 +502,7 @@ def setDriverArgs(self): 'residual', 'hierarchical', 'knownSolution', None], argInterpreter=lambda v: None if v == 'None' else v, group=p) self.setDriverFlag('noRef', -1, group=p) + self.setDriverFlag('targetDoFsAux', 0) def processCmdline(self, params): noRef = params['noRef'] @@ -582,11 +591,16 @@ def processProblem(self, kernel, dim, domain, domainParams, problem, normalized) if problem == 'constant': self.rhs = constant(1.) - if isinstance(s, (constFractionalOrder, variableConstFractionalOrder, constantNonSymFractionalOrder)): - C = 2.**(-2.*s.value)*Gamma(dim/2.)/Gamma((dim+2.*s.value)/2.)/Gamma(1.+s.value) - self.exactHsSquared = C * np.sqrt(np.pi)*Gamma(s.value+1)/Gamma(s.value+3/2) - L2_ex = np.sqrt(C**2 * np.sqrt(np.pi) * Gamma(1+2*s.value)/Gamma(3/2+2*s.value) * radius**2) - self.analyticSolution = solFractional(s.value, dim, radius) + if (isinstance(s, (constFractionalOrder, variableConstFractionalOrder, constantNonSymFractionalOrder)) or + (isinstance(s, feFractionalOrder) and np.array(s.vec).min() == np.array(s.vec).max())): + if isinstance(s, feFractionalOrder): + sValue = s.vec[0] + else: + sValue = s.value + C = 2.**(-2.*sValue)*Gamma(dim/2.)/Gamma((dim+2.*sValue)/2.)/Gamma(1.+sValue) + self.exactHsSquared = C * np.sqrt(np.pi)*Gamma(sValue+1)/Gamma(sValue+3/2) + L2_ex = np.sqrt(C**2 * np.sqrt(np.pi) * Gamma(1+2*sValue)/Gamma(3/2+2*sValue) * radius**2) + self.analyticSolution = solFractional(sValue, dim, radius) elif problem == 'sin': self.rhs = Lambda(lambda x: np.sin(np.pi*x[0])) elif problem == 'cos': @@ -794,7 +808,7 @@ def buildMesh(self, mesh_domain, mesh_params): self.mesh, _ = nonlocalMeshFactory.build(mesh_domain, **mesh_params) @generates('dmAux') - def constructAuxiliarySpace(self, dim, domain, domainParams, kernelType, horizon): + def constructAuxiliarySpace(self, dim, domain, domainParams, kernelType, horizon, targetDoFsAux): # This is not the actual kernel that we use. # We just need something to get a mesh to support the fractional order. kType = getKernelEnum(kernelType) @@ -804,6 +818,8 @@ def constructAuxiliarySpace(self, dim, domain, domainParams, kernelType, horizon else: kernel = getKernel(dim=dim, kernel=kType, horizon=horizon) mesh, _ = nonlocalMeshFactory(domain, kernel=kernel, boundaryCondition=HOMOGENEOUS_DIRICHLET, **domainParams) + while mesh.num_vertices < targetDoFsAux: + mesh = mesh.refine() self.dmAux = dofmapFactory('P1', mesh, NO_BOUNDARY) def getIdentifier(self, params): diff --git a/nl/PyNucleus_nl/twoPointFunctions.pyx b/nl/PyNucleus_nl/twoPointFunctions.pyx index c1ccf28..54aedff 100644 --- a/nl/PyNucleus_nl/twoPointFunctions.pyx +++ b/nl/PyNucleus_nl/twoPointFunctions.pyx @@ -13,6 +13,8 @@ import numpy as np cimport numpy as np from libc.math cimport sqrt, exp, atan from PyNucleus_base.myTypes import INDEX, REAL, COMPLEX, ENCODE, BOOL +from PyNucleus_base.blas import uninitialized + cdef enum fixed_type: FIXED_X @@ -22,113 +24,23 @@ cdef enum fixed_type: include "twoPointFunctions_REAL.pxi" include "twoPointFunctions_COMPLEX.pxi" -# cdef class fixedTwoPointFunction(function): -# cdef: -# twoPointFunction f -# REAL_t[::1] point -# fixed_type fixedType - -# def __init__(self, twoPointFunction f, REAL_t[::1] point, fixed_type fixedType): -# self.f = f -# self.point = point -# self.fixedType = fixedType - -# cdef REAL_t eval(self, REAL_t[::1] x): -# if self.fixedType == FIXED_X: -# return self.f(self.point, x) -# if self.fixedType == FIXED_Y: -# return self.f(x, self.point) -# else: -# return self.f(x, x) - - -# cdef class twoPointFunction: -# def __init__(self, BOOL_t symmetric): -# self.symmetric = symmetric - -# def __call__(self, REAL_t[::1] x, REAL_t[::1] y): -# return self.eval(x, y) - -# cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): -# raise NotImplementedError() - -# cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): -# raise NotImplementedError() - -# def __getstate__(self): -# return self.symmetric - -# def __setstate__(self, state): -# twoPointFunction.__init__(self, state) - -# def fixedX(self, REAL_t[::1] x): -# return fixedTwoPointFunction(self, x, FIXED_X) - -# def fixedY(self, REAL_t[::1] y): -# return fixedTwoPointFunction(self, y, FIXED_Y) - -# def diagonal(self): -# return fixedTwoPointFunction(self, None, DIAGONAL) - -# def plot(self, mesh, **kwargs): -# cdef: -# INDEX_t i, j -# REAL_t[:, ::1] S -# REAL_t[::1] S2 -# REAL_t[::1] x, y -# import matplotlib.pyplot as plt -# c = np.array(mesh.getCellCenters()) -# if mesh.dim == 1: -# X, Y = np.meshgrid(c[:, 0], c[:, 0]) -# x = np.empty((mesh.dim), dtype=REAL) -# y = np.empty((mesh.dim), dtype=REAL) -# S = np.zeros((mesh.num_cells, mesh.num_cells)) -# for i in range(mesh.num_cells): -# for j in range(mesh.num_cells): -# x[0] = X[i, j] -# y[0] = Y[i, j] -# S[i, j] = self.eval(x, y) -# plt.pcolormesh(X, Y, S, **kwargs) -# plt.colorbar() -# plt.xlabel(r'$x$') -# plt.ylabel(r'$y$') -# elif mesh.dim == 2: -# S2 = np.zeros(mesh.num_cells) -# for i in range(mesh.num_cells): -# S2[i] = self(c[i, :], c[i, :]) -# mesh.plotFunction(S2, flat=True) -# else: -# raise NotImplementedError() - -# def __mul__(self, twoPointFunction other): -# if isinstance(self, constantTwoPoint) and isinstance(other, constantTwoPoint): -# return constantTwoPoint(self.value*other.value) -# elif isinstance(self, parametrizedTwoPointFunction) or isinstance(other, parametrizedTwoPointFunction): -# return productParametrizedTwoPoint(self, other) -# elif isinstance(self, constantTwoPoint) and isinstance(other, (float, REAL)): -# return constantTwoPoint(self.value*other) -# elif isinstance(other, constantTwoPoint) and isinstance(self, (float, REAL)): -# return constantTwoPoint(self*other.value) -# else: -# return productTwoPoint(self, other) - cdef class lambdaTwoPoint(twoPointFunction): cdef: object fun def __init__(self, fun, BOOL_t symmetric): - super(lambdaTwoPoint, self).__init__(symmetric) + super(lambdaTwoPoint, self).__init__(symmetric, 1) self.fun = fun - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): - return self.fun(x, y) + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): + value[0] = self.fun(x, y) - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): cdef: REAL_t[::1] xA = x REAL_t[::1] yA = y - return self.fun(xA, yA) + value[0] = self.fun(xA, yA) def __repr__(self): return 'Lambda({})'.format(self.fun) @@ -140,49 +52,6 @@ cdef class lambdaTwoPoint(twoPointFunction): lambdaTwoPoint.__init__(self, state[0], state[1]) -# cdef class productTwoPoint(twoPointFunction): -# def __init__(self, twoPointFunction f1, twoPointFunction f2): -# super(productTwoPoint, self).__init__(f1.symmetric and f2.symmetric) -# self.f1 = f1 -# self.f2 = f2 - -# cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): -# return self.f1.eval(x, y)*self.f2.eval(x, y) - -# cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): -# return self.f1.evalPtr(dim, x, y)*self.f2.evalPtr(dim, x, y) - -# def __repr__(self): -# return '{}*{}'.format(self.f1, self.f2) - -# def __getstate__(self): -# return self.f1, self.f2 - -# def __setstate__(self, state): -# productTwoPoint.__init__(self, state[0], state[1]) - - -# cdef class constantTwoPoint(twoPointFunction): -# def __init__(self, REAL_t value): -# super(constantTwoPoint, self).__init__(True) -# self.value = value - -# cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): -# return self.value - -# cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): -# return self.value - -# def __repr__(self): -# return '{}'.format(self.value) - -# def __getstate__(self): -# return self.value - -# def __setstate__(self, state): -# constantTwoPoint.__init__(self, state) - - cdef class matrixTwoPoint(twoPointFunction): def __init__(self, REAL_t[:, ::1] mat): self.mat = mat @@ -192,7 +61,7 @@ cdef class matrixTwoPoint(twoPointFunction): for j in range(i, mat.shape[0]): if abs(mat[i, j]-mat[j, i]) > 1e-12: symmetric = False - super(matrixTwoPoint, self).__init__(symmetric) + super(matrixTwoPoint, self).__init__(symmetric, 1) self.n = np.zeros((mat.shape[0]), dtype=REAL) def __getstate__(self): @@ -204,7 +73,7 @@ cdef class matrixTwoPoint(twoPointFunction): def __repr__(self): return '{}({},sym={})'.format(self.__class__.__name__, np.array(self.mat), self.symmetric) - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): cdef: INDEX_t dim = x.shape[0] INDEX_t i, j @@ -219,9 +88,9 @@ cdef class matrixTwoPoint(twoPointFunction): for i in range(dim): for j in range(dim): d += self.n[i]*self.mat[i, j]*self.n[j] - return d + value[0] = d - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): cdef: INDEX_t i, j REAL_t d = 0. @@ -236,8 +105,9 @@ cdef class matrixTwoPoint(twoPointFunction): for i in range(dim): for j in range(dim): d += self.n[i]*self.mat[i, j]*self.n[j] - return d - return 1. + value[0] = d + return + value[0] = 1. cdef class leftRightTwoPoint(twoPointFunction): @@ -246,7 +116,7 @@ cdef class leftRightTwoPoint(twoPointFunction): lr = 0.5*(ll+rr) if not np.isfinite(rl): rl = 0.5*(ll+rr) - super(leftRightTwoPoint, self).__init__(rl == lr) + super(leftRightTwoPoint, self).__init__(rl == lr, 1) self.ll = ll self.lr = lr self.rl = rl @@ -262,34 +132,34 @@ cdef class leftRightTwoPoint(twoPointFunction): def __repr__(self): return '{}(ll={},rr={},lr={},rl={},interface={},sym={})'.format(self.__class__.__name__, self.ll, self.rr, self.lr, self.rl, self.interface, self.symmetric) - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): if x[0] < self.interface: if y[0] < self.interface: - return self.ll + value[0] = self.ll else: - return self.lr + value[0] = self.lr else: if y[0] < self.interface: - return self.rl + value[0] = self.rl else: - return self.rr + value[0] = self.rr - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): if x[0] < self.interface: if y[0] < self.interface: - return self.ll + value[0] = self.ll else: - return self.lr + value[0] = self.lr else: if y[0] < self.interface: - return self.rl + value[0] = self.rl else: - return self.rr + value[0] = self.rr cdef class interfaceTwoPoint(twoPointFunction): def __init__(self, REAL_t horizon1, REAL_t horizon2, BOOL_t left, REAL_t interface=0.): - super(interfaceTwoPoint, self).__init__(True) + super(interfaceTwoPoint, self).__init__(True, 1) self.horizon1 = horizon1 self.horizon2 = horizon2 self.left = left @@ -304,88 +174,88 @@ cdef class interfaceTwoPoint(twoPointFunction): def __repr__(self): return '{}(horizon1={},horizon2={},left={},interface={})'.format(self.__class__.__name__, self.horizon1, self.horizon2, self.left, self.interface) - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): - return self.evalPtr(x.shape[0], &x[0], &y[0]) + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): + self.evalPtr(x.shape[0], &x[0], &y[0], &value[0]) - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): if dim == 1: if self.left: if ((x[0] <= self.interface) and (y[0] <= self.interface)): - return 1. + value[0] = 1. elif ((x[0] > self.interface) and (y[0] > self.interface)): - return 0. + value[0] = 0. elif ((x[0] <= self.interface-self.horizon2) and (y[0] > self.interface)): - return 1. + value[0] = 1. elif ((x[0] > self.interface) and (y[0] <= self.interface-self.horizon2)): - return 1. + value[0] = 1. else: - return 0.5 + value[0] = 0.5 else: if ((x[0] >= self.interface) and (y[0] >= self.interface)): - return 1. + value[0] = 1. elif ((x[0] < self.interface) and (y[0] < self.interface)): - return 0. + value[0] = 0. elif ((x[0] >= self.interface+self.horizon1) and (y[0] < self.interface)): - return 1. + value[0] = 1. elif ((x[0] < self.interface) and (y[0] >= self.interface+self.horizon1)): - return 1. + value[0] = 1. else: - return 0.5 + value[0] = 0.5 elif dim == 2: if self.left: if (x[0] <= self.interface) and ((x[1] > 0.) and (x[1] < 1.)): if (y[0] <= self.interface) and ((y[1] > 0.) and (y[1] < 1.)): - return 1. + value[0] = 1. elif (y[0] > self.interface) and ((y[1] > 0.) and (y[1] < 1.)): if x[0] <= self.interface-self.horizon2: - return 1. + value[0] = 1. else: - return 0.5 + value[0] = 0.5 else: - return 1. + value[0] = 1. elif (x[0] > self.interface) and ((x[1] > 0.) and (x[1] < 1.)): if (y[0] <= self.interface) and ((y[1] > 0.) and (y[1] < 1.)): - return 0.5 + value[0] = 0.5 elif (y[0] > self.interface) and ((y[1] > 0.) and (y[1] < 1.)): - return 0. + value[0] = 0. else: - return 0. + value[0] = 0. else: if (y[0] <= self.interface) and ((y[1] > 0.) and (y[1] < 1.)): - return 1. + value[0] = 1. else: - return 0. + value[0] = 0. else: if (x[0] >= self.interface) and ((x[1] > 0.) and (x[1] < 1.)): if (y[0] >= self.interface) and ((y[1] > 0.) and (y[1] < 1.)): - return 1. + value[0] = 1. elif (y[0] < self.interface) and ((y[1] > 0.) and (y[1] < 1.)): if x[0] >= self.interface+self.horizon1: - return 1. + value[0] = 1. else: - return 0.5 + value[0] = 0.5 else: - return 1. + value[0] = 1. elif (x[0] < self.interface) and ((x[1] > 0.) and (x[1] < 1.)): if (y[0] >= self.interface) and ((y[1] > 0.) and (y[1] < 1.)): if y[0] <= self.interface+self.horizon1: - return 0.5 + value[0] = 0.5 else: - return 1. + value[0] = 1. elif (y[0] < self.interface) and ((y[1] > 0.) and (y[1] < 1.)): - return 0. + value[0] = 0. else: - return 0. + value[0] = 0. else: if (y[0] >= self.interface) and ((y[1] > 0.) and (y[1] < 1.)): - return 1. + value[0] = 1. else: - return 0. + value[0] = 0. cdef class temperedTwoPoint(twoPointFunction): def __init__(self, REAL_t lambdaCoeff, INDEX_t dim): - super(temperedTwoPoint, self).__init__(True) + super(temperedTwoPoint, self).__init__(True, 1) self.lambdaCoeff = lambdaCoeff self.dim = dim @@ -398,21 +268,21 @@ cdef class temperedTwoPoint(twoPointFunction): def __repr__(self): return '{}(lambda={})'.format(self.__class__.__name__, self.lambdaCoeff) - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): cdef: INDEX_t i REAL_t r = 0. for i in range(self.dim): r += (x[i]-y[i])*(x[i]-y[i]) - return exp(-self.lambdaCoeff*sqrt(r)) + value[0] = exp(-self.lambdaCoeff*sqrt(r)) - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): cdef: INDEX_t i REAL_t r = 0. for i in range(dim): r += (x[i]-y[i])*(x[i]-y[i]) - return exp(-self.lambdaCoeff*sqrt(r)) + value[0] = exp(-self.lambdaCoeff*sqrt(r)) cdef class tensorTwoPoint(twoPointFunction): @@ -420,7 +290,7 @@ cdef class tensorTwoPoint(twoPointFunction): INDEX_t i, j, dim def __init__(self, INDEX_t i, INDEX_t j, INDEX_t dim): - super(tensorTwoPoint, self).__init__(True) + super(tensorTwoPoint, self).__init__(True, 1) self.dim = dim self.i = i self.j = j @@ -434,7 +304,7 @@ cdef class tensorTwoPoint(twoPointFunction): def __repr__(self): return '{}(i={},j={})'.format(self.__class__.__name__, self.i, self.j) - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): cdef: INDEX_t i REAL_t n2 = 0., ExE @@ -444,9 +314,9 @@ cdef class tensorTwoPoint(twoPointFunction): ExE = (x[self.i]-y[self.i])*(x[self.j]-y[self.j])/n2 else: ExE = 1. - return ExE + value[0] = ExE - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): cdef: INDEX_t i REAL_t n2 = 0., ExE @@ -456,12 +326,12 @@ cdef class tensorTwoPoint(twoPointFunction): ExE = (x[self.i]-y[self.i])*(x[self.j]-y[self.j])/n2 else: ExE = 1. - return ExE + value[0] = ExE cdef class smoothedLeftRightTwoPoint(twoPointFunction): def __init__(self, REAL_t vl, REAL_t vr, REAL_t r=0.1, REAL_t slope=200.): - super(smoothedLeftRightTwoPoint, self).__init__(False) + super(smoothedLeftRightTwoPoint, self).__init__(False, 1) self.vl = vl self.vr = vr self.r = r @@ -477,24 +347,24 @@ cdef class smoothedLeftRightTwoPoint(twoPointFunction): def __repr__(self): return '{}(vl={},vr={},r={},slope={})'.format(self.__class__.__name__, self.vl, self.vr, self.r, self.slope) - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): if x[0] < -self.r: - return self.vl + value[0] = self.vl elif x[0] > self.r: - return self.vr - return 0.5*(self.vl+self.vr)+0.5*(self.vr-self.vl)*atan(x[0]*self.slope) * self.fac + value[0] = self.vr + value[0] = 0.5*(self.vl+self.vr)+0.5*(self.vr-self.vl)*atan(x[0]*self.slope) * self.fac - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): if x[0] < -self.r: - return self.vl + value[0] = self.vl elif x[0] > self.r: - return self.vr - return 0.5*(self.vl+self.vr)+0.5*(self.vr-self.vl)*atan(x[0]*self.slope) * self.fac + value[0] = self.vr + value[0] = 0.5*(self.vl+self.vr)+0.5*(self.vr-self.vl)*atan(x[0]*self.slope) * self.fac cdef class unsymTwoPoint(twoPointFunction): def __init__(self, REAL_t l, REAL_t r): - super(unsymTwoPoint, self).__init__(l == r) + super(unsymTwoPoint, self).__init__(l == r, 1) self.l = l self.r = r @@ -507,40 +377,31 @@ cdef class unsymTwoPoint(twoPointFunction): def __repr__(self): return '{}(l={},r={})'.format(self.__class__.__name__, self.l, self.r) - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): if x[0] < y[0]: - return self.l + value[0] = self.l else: - return self.r + value[0] = self.r - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): if x[0] < y[0]: - return self.l + value[0] = self.l else: - return self.r - - -# cdef class parametrizedTwoPointFunction(twoPointFunction): -# def __init__(self, BOOL_t symmetric): -# super(parametrizedTwoPointFunction, self).__init__(symmetric) - -# cdef void setParams(self, void *params): -# self.params = params - -# cdef void* getParams(self): -# return self.params + value[0] = self.r cdef class inverseTwoPoint(twoPointFunction): def __init__(self, twoPointFunction f): - super(inverseTwoPoint, self).__init__(f.symmetric) + super(inverseTwoPoint, self).__init__(f.symmetric, 1) self.f = f - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): - return 1./self.f.eval(x, y) + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] value): + self.f.eval(x, y, value) + value[0] = 1./value[0] - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): - return 1./self.f.evalPtr(dim, x, y) + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, REAL_t* value): + self.f.evalPtr(dim, x, y, value) + value[0] = 1./value[0] def __repr__(self): return '1/{}'.format(self.f) @@ -550,35 +411,3 @@ cdef class inverseTwoPoint(twoPointFunction): def __setstate__(self, state): inverseTwoPoint.__init__(self, state) - - -# cdef class productParametrizedTwoPoint(parametrizedTwoPointFunction): -# def __init__(self, twoPointFunction f1, twoPointFunction f2): -# super(productParametrizedTwoPoint, self).__init__(f1.symmetric and f2.symmetric) -# self.f1 = f1 -# self.f2 = f2 - -# cdef void setParams(self, void *params): -# cdef: -# parametrizedTwoPointFunction f -# if isinstance(self.f1, parametrizedTwoPointFunction): -# f = self.f1 -# f.setParams(params) -# if isinstance(self.f2, parametrizedTwoPointFunction): -# f = self.f2 -# f.setParams(params) - -# cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): -# return self.f1.eval(x, y)*self.f2.eval(x, y) - -# cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): -# return self.f1.evalPtr(dim, x, y)*self.f2.evalPtr(dim, x, y) - -# def __repr__(self): -# return '{}*{}'.format(self.f1, self.f2) - -# def __getstate__(self): -# return self.f1, self.f2 - -# def __setstate__(self, state): -# productParametrizedTwoPoint.__init__(self, state[0], state[1]) diff --git a/nl/PyNucleus_nl/twoPointFunctions_decl_{SCALAR}.pxi b/nl/PyNucleus_nl/twoPointFunctions_decl_{SCALAR}.pxi index 35d49a0..0cb50fe 100644 --- a/nl/PyNucleus_nl/twoPointFunctions_decl_{SCALAR}.pxi +++ b/nl/PyNucleus_nl/twoPointFunctions_decl_{SCALAR}.pxi @@ -8,13 +8,14 @@ cdef class {SCALAR_label}twoPointFunction: cdef: public BOOL_t symmetric - cdef {SCALAR}_t eval(self, REAL_t[::1] x, REAL_t[::1] y) - cdef {SCALAR}_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y) + public INDEX_t valueSize + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, {SCALAR}_t[::1] value) + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, {SCALAR}_t* value) cdef class {SCALAR_label}productTwoPoint({SCALAR_label}twoPointFunction): cdef: - public twoPointFunction f1, f2 + public {SCALAR_label}twoPointFunction f1, f2 cdef class {SCALAR_label}constantTwoPoint({SCALAR_label}twoPointFunction): diff --git a/nl/PyNucleus_nl/twoPointFunctions_{SCALAR}.pxi b/nl/PyNucleus_nl/twoPointFunctions_{SCALAR}.pxi index c8da7c3..7b17906 100644 --- a/nl/PyNucleus_nl/twoPointFunctions_{SCALAR}.pxi +++ b/nl/PyNucleus_nl/twoPointFunctions_{SCALAR}.pxi @@ -6,16 +6,23 @@ ################################################################################### cdef class {SCALAR_label}twoPointFunction: - def __init__(self, BOOL_t symmetric): + def __init__(self, BOOL_t symmetric, INDEX_t valueSize): self.symmetric = symmetric + self.valueSize = valueSize def __call__(self, REAL_t[::1] x, REAL_t[::1] y): - return self.eval(x, y) + cdef: + {SCALAR}_t[::1] value = uninitialized((self.valueSize), dtype={SCALAR}) + self.eval(x, y, value) + if self.valueSize == 1: + return value[0] + else: + return np.array(value, copy=False) - cdef {SCALAR}_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, {SCALAR}_t[::1] value): raise NotImplementedError() - cdef {SCALAR}_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, {SCALAR}_t* value): raise NotImplementedError() def __getstate__(self): @@ -50,7 +57,7 @@ cdef class {SCALAR_label}twoPointFunction: for j in range(mesh.num_cells): x[0] = X[i, j] y[0] = Y[i, j] - S[i, j] = self.eval(x, y) + self.evalPtr(x.shape[0], &x[0], &y[0], &S[i, j]) plt.pcolormesh(X, Y, S, **kwargs) plt.colorbar() plt.xlabel(r'$x$') @@ -83,30 +90,44 @@ cdef class {SCALAR_label}fixedTwoPointFunction({function_type}): fixed_type fixedType def __init__(self, {SCALAR_label}twoPointFunction f, REAL_t[::1] point, fixed_type fixedType): + assert f.valueSize == 1 self.f = f self.point = point self.fixedType = fixedType cdef {SCALAR}_t eval(self, REAL_t[::1] x): + cdef: + {SCALAR}_t val if self.fixedType == FIXED_X: - return self.f(self.point, x) - if self.fixedType == FIXED_Y: - return self.f(x, self.point) + self.f.evalPtr(x.shape[0], &self.point[0], &x[0], &val) + elif self.fixedType == FIXED_Y: + self.f.evalPtr(x.shape[0], &x[0], &self.point[0], &val) else: - return self.f(x, x) + self.f.evalPtr(x.shape[0], &x[0], &x[0], &val) + return val cdef class {SCALAR_label}productTwoPoint({SCALAR_label}twoPointFunction): def __init__(self, {SCALAR_label}twoPointFunction f1, {SCALAR_label}twoPointFunction f2): - super(productTwoPoint, self).__init__(f1.symmetric and f2.symmetric) + assert f1.valueSize == 1 + assert f2.valueSize == 1 + super({SCALAR_label}productTwoPoint, self).__init__(f1.symmetric and f2.symmetric, 1) self.f1 = f1 self.f2 = f2 - cdef {SCALAR}_t eval(self, REAL_t[::1] x, REAL_t[::1] y): - return self.f1.eval(x, y)*self.f2.eval(x, y) + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, {SCALAR}_t[::1] value): + cdef: + {SCALAR}_t val1, val2 + self.f1.evalPtr(x.shape[0], &x[0], &y[0], &val1) + self.f2.evalPtr(x.shape[0], &x[0], &y[0], &val2) + value[0] = val1*val2 - cdef {SCALAR}_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): - return self.f1.evalPtr(dim, x, y)*self.f2.evalPtr(dim, x, y) + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, {SCALAR}_t* value): + cdef: + {SCALAR}_t val1, val2 + self.f1.evalPtr(dim, x, y, &val1) + self.f2.evalPtr(dim, x, y, &val2) + value[0] = val1*val2 def __repr__(self): return '{}*{}'.format(self.f1, self.f2) @@ -120,14 +141,14 @@ cdef class {SCALAR_label}productTwoPoint({SCALAR_label}twoPointFunction): cdef class {SCALAR_label}constantTwoPoint({SCALAR_label}twoPointFunction): def __init__(self, {SCALAR}_t value): - super(constantTwoPoint, self).__init__(True) + super({SCALAR_label}constantTwoPoint, self).__init__(True, 1) self.value = value - cdef {SCALAR}_t eval(self, REAL_t[::1] x, REAL_t[::1] y): - return self.value + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, {SCALAR}_t[::1] value): + value[0] = self.value - cdef {SCALAR}_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): - return self.value + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, {SCALAR}_t* value): + value[0] = self.value def __repr__(self): return '{}'.format(self.value) @@ -140,8 +161,8 @@ cdef class {SCALAR_label}constantTwoPoint({SCALAR_label}twoPointFunction): cdef class {SCALAR_label}parametrizedTwoPointFunction({SCALAR_label}twoPointFunction): - def __init__(self, BOOL_t symmetric): - super({SCALAR_label}parametrizedTwoPointFunction, self).__init__(symmetric) + def __init__(self, BOOL_t symmetric, INDEX_t valueSize): + super({SCALAR_label}parametrizedTwoPointFunction, self).__init__(symmetric, valueSize) cdef void setParams(self, void *params): self.params = params @@ -152,7 +173,9 @@ cdef class {SCALAR_label}parametrizedTwoPointFunction({SCALAR_label}twoPointFunc cdef class {SCALAR_label}productParametrizedTwoPoint({SCALAR_label}parametrizedTwoPointFunction): def __init__(self, {SCALAR_label}twoPointFunction f1, {SCALAR_label}twoPointFunction f2): - super({SCALAR_label}productParametrizedTwoPoint, self).__init__(f1.symmetric and f2.symmetric) + assert f1.valueSize == 1 + assert f2.valueSize == 1 + super({SCALAR_label}productParametrizedTwoPoint, self).__init__(f1.symmetric and f2.symmetric, 1) self.f1 = f1 self.f2 = f2 @@ -166,11 +189,19 @@ cdef class {SCALAR_label}productParametrizedTwoPoint({SCALAR_label}parametrizedT f = self.f2 f.setParams(params) - cdef {SCALAR}_t eval(self, REAL_t[::1] x, REAL_t[::1] y): - return self.f1.eval(x, y)*self.f2.eval(x, y) + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y, {SCALAR}_t[::1] value): + cdef: + {SCALAR}_t val1, val2 + self.f1.evalPtr(x.shape[0], &x[0], &y[0], &val1) + self.f2.evalPtr(x.shape[0], &x[0], &y[0], &val2) + value[0] = val1*val2 - cdef {SCALAR}_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): - return self.f1.evalPtr(dim, x, y)*self.f2.evalPtr(dim, x, y) + cdef void evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, {SCALAR}_t* value): + cdef: + {SCALAR}_t val1, val2 + self.f1.evalPtr(dim, x, y, &val1) + self.f2.evalPtr(dim, x, y, &val2) + value[0] = val1*val2 def __repr__(self): return '{}*{}'.format(self.f1, self.f2) diff --git a/tests/cache_runFractional.py--domaindisc--stwoDomainNonSym(0.25,0.75)--problemknownSolution--elementP1--solvergmres-mg--matrixFormatH2 b/tests/cache_runFractional.py--domaindisc--stwoDomainNonSym(0.25,0.75)--problemknownSolution--elementP1--solvergmres-mg--matrixFormatH2 index 3ba67fd..fa4862a 100644 --- a/tests/cache_runFractional.py--domaindisc--stwoDomainNonSym(0.25,0.75)--problemknownSolution--elementP1--solvergmres-mg--matrixFormatH2 +++ b/tests/cache_runFractional.py--domaindisc--stwoDomainNonSym(0.25,0.75)--problemknownSolution--elementP1--solvergmres-mg--matrixFormatH2 @@ -1,11 +1,11 @@ Timers: {} errors: - L2 error: 0.005777344730945286 - L2 error interpolated: 0.0032309065559272816 - Linf error interpolated: 0.010836872679943266 - relative L2 error: 0.005049623135026952 - relative interpolated L2 error: 0.002825578067099134 - relative interpolated Linf error: 0.010836872679943266 + L2 error: 0.005523848294663783 + L2 error interpolated: 0.002949427025611149 + Linf error interpolated: 0.010900169318455166 + relative L2 error: 0.004828057428130919 + relative interpolated L2 error: 0.002579411124963488 + relative interpolated Linf error: 0.010900169318455166 meshes: {} results: {} vectors: {} diff --git a/tests/cache_runFractional.py--domaininterval--stwoDomainNonSym(0.25,0.75)--problemknownSolution--elementP1--solvergmres-mg--matrixFormatH2 b/tests/cache_runFractional.py--domaininterval--stwoDomainNonSym(0.25,0.75)--problemknownSolution--elementP1--solvergmres-mg--matrixFormatH2 index 077d262..7b7f776 100644 --- a/tests/cache_runFractional.py--domaininterval--stwoDomainNonSym(0.25,0.75)--problemknownSolution--elementP1--solvergmres-mg--matrixFormatH2 +++ b/tests/cache_runFractional.py--domaininterval--stwoDomainNonSym(0.25,0.75)--problemknownSolution--elementP1--solvergmres-mg--matrixFormatH2 @@ -1,11 +1,11 @@ Timers: {} errors: - L2 error: 0.002069866055876411 - L2 error interpolated: 0.001286197038918661 - Linf error interpolated: 0.003597160027701274 - relative L2 error: 0.0018857140778679666 - relative interpolated L2 error: 0.0011718453355923197 - relative interpolated Linf error: 0.003597160027701274 + L2 error: 0.0019681481495570246 + L2 error interpolated: 0.0011282723772410108 + Linf error interpolated: 0.0036210750360706756 + relative L2 error: 0.0017930458168598858 + relative interpolated L2 error: 0.0010279612551892601 + relative interpolated Linf error: 0.0036210750360706756 meshes: {} results: {} vectors: {} diff --git a/tests/cache_runFractional.py--domaininterval--stwoDomainNonSym(0.25,0.75)--problemknownSolution--elementP1--solverlu--matrixFormatH2 b/tests/cache_runFractional.py--domaininterval--stwoDomainNonSym(0.25,0.75)--problemknownSolution--elementP1--solverlu--matrixFormatH2 index 61b74e1..955ad68 100644 --- a/tests/cache_runFractional.py--domaininterval--stwoDomainNonSym(0.25,0.75)--problemknownSolution--elementP1--solverlu--matrixFormatH2 +++ b/tests/cache_runFractional.py--domaininterval--stwoDomainNonSym(0.25,0.75)--problemknownSolution--elementP1--solverlu--matrixFormatH2 @@ -1,11 +1,11 @@ Timers: {} errors: - L2 error: 0.002069872877152111 - L2 error interpolated: 0.0012862054358249695 - Linf error interpolated: 0.00359720325470074 - relative L2 error: 0.0018857202922679657 - relative interpolated L2 error: 0.0011718529859562944 - relative interpolated Linf error: 0.00359720325470074 + L2 error: 0.001968154983051443 + L2 error interpolated: 0.0011282813503860167 + Linf error interpolated: 0.0036211177968323988 + relative L2 error: 0.0017930520423915278 + relative interpolated L2 error: 0.0010279694305603757 + relative interpolated Linf error: 0.0036211177968323988 meshes: {} results: {} vectors: {} diff --git a/tests/cache_testDistOp.py--horizoninf--domaininterval--sconst(0.25)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 b/tests/cache_testDistOp.py--horizoninf--domaininterval--sconst(0.25)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 index 4ffba0f..b3408f8 100644 --- a/tests/cache_testDistOp.py--horizoninf--domaininterval--sconst(0.25)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 +++ b/tests/cache_testDistOp.py--horizoninf--domaininterval--sconst(0.25)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 @@ -2,13 +2,14 @@ Timers: {} TimersH2: {} info: {} matvec errors: - '|(A_dense - A_distributed_bcast) * x|': 3.816424931677378e-07 - '|(A_dense - A_distributed_halo) * x|': 3.8164249316810784e-07 - '|(A_dense - A_h2) * x |': 3.233222486362068e-07 - '|(A_dense - A_h2_reduced) * x|': 3.8164249315495977e-07 - '|(A_h2 - A_distributed_bcast) * x|': 2.429563526090043e-07 - '|(A_h2 - A_distributed_halo) * x|': 2.4295635260837935e-07 - '|(A_h2 - A_h2_reduced) * x |': 2.429563525907425e-07 + '|(A_dense - A_distributed_bcast) * x|': 3.8165171132657736e-07 + '|(A_dense - A_distributed_halo) * x|': 3.8165171132652606e-07 + '|(A_dense - A_h2) * x |': 3.233321814687945e-07 + '|(A_dense - A_h2_reduced) * x|': 3.8165171131848205e-07 + '|(A_h2 - A_distributed_bcast) * x|': 2.4297107870765103e-07 + '|(A_h2 - A_distributed_halo) * x|': 2.4297107870749004e-07 + '|(A_h2 - A_h2_reduced) * x |': 2.429710786976915e-07 solve: - residual norm: 3.2398173671756745e-06 + CG iterations: 8 + residual norm: 3.2398173668907127e-06 stats: {} diff --git a/tests/cache_testDistOp.py--horizoninf--domaininterval--sconst(0.75)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 b/tests/cache_testDistOp.py--horizoninf--domaininterval--sconst(0.75)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 index c9351ea..0faf189 100644 --- a/tests/cache_testDistOp.py--horizoninf--domaininterval--sconst(0.75)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 +++ b/tests/cache_testDistOp.py--horizoninf--domaininterval--sconst(0.75)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 @@ -2,13 +2,14 @@ Timers: {} TimersH2: {} info: {} matvec errors: - '|(A_dense - A_distributed_bcast) * x|': 0.00011063411999060675 - '|(A_dense - A_distributed_halo) * x|': 0.00011063411999060407 - '|(A_dense - A_h2) * x |': 9.543571820177557e-05 - '|(A_dense - A_h2_reduced) * x|': 0.00011063411998750746 - '|(A_h2 - A_distributed_bcast) * x|': 8.642227264284719e-05 - '|(A_h2 - A_distributed_halo) * x|': 8.642227264284524e-05 - '|(A_h2 - A_h2_reduced) * x |': 8.642227263923861e-05 + '|(A_dense - A_distributed_bcast) * x|': 0.00011064427313268595 + '|(A_dense - A_distributed_halo) * x|': 0.00011064427313268325 + '|(A_dense - A_h2) * x |': 9.54464240645034e-05 + '|(A_dense - A_h2_reduced) * x|': 0.00011064427313016145 + '|(A_h2 - A_distributed_bcast) * x|': 8.643264538064418e-05 + '|(A_h2 - A_distributed_halo) * x|': 8.643264538064223e-05 + '|(A_h2 - A_h2_reduced) * x |': 8.643264537853821e-05 solve: - residual norm: 4.799642454857278e-06 + CG iterations: 40 + residual norm: 4.799642527092277e-06 stats: {} diff --git a/tests/cache_testDistOp.py--horizoninf--domaininterval--stwoDomain(0.25,0.75,0.5,0.5)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 b/tests/cache_testDistOp.py--horizoninf--domaininterval--stwoDomain(0.25,0.75,0.5,0.5)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 deleted file mode 100644 index 1675938..0000000 --- a/tests/cache_testDistOp.py--horizoninf--domaininterval--stwoDomain(0.25,0.75,0.5,0.5)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 +++ /dev/null @@ -1,14 +0,0 @@ -Timers: {} -TimersH2: {} -info: {} -matvec errors: - '|(A_dense - A_distributed_bcast) * x|': 7.576554001105956e-06 - '|(A_dense - A_distributed_halo) * x|': 7.576554001107469e-06 - '|(A_dense - A_h2) * x |': 7.574253924699861e-06 - '|(A_dense - A_h2_reduced) * x|': 7.576554001415875e-06 - '|(A_h2 - A_distributed_bcast) * x|': 2.6989568205697076e-07 - '|(A_h2 - A_distributed_halo) * x|': 2.6989568205531947e-07 - '|(A_h2 - A_h2_reduced) * x |': 2.698956819105973e-07 -solve: - residual norm: 6.506035839523732e-06 -stats: {} diff --git a/tests/cache_testDistOp.py--horizoninf--domaininterval--stwoDomainNonSym(0.25,0.75)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 b/tests/cache_testDistOp.py--horizoninf--domaininterval--stwoDomainNonSym(0.25,0.75)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 new file mode 100644 index 0000000..759dfb2 --- /dev/null +++ b/tests/cache_testDistOp.py--horizoninf--domaininterval--stwoDomainNonSym(0.25,0.75)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 @@ -0,0 +1,15 @@ +Timers: {} +TimersH2: {} +info: {} +matvec errors: + '|(A_dense - A_distributed_bcast) * x|': 0.00018316241788625764 + '|(A_dense - A_distributed_halo) * x|': 0.00018316241788625824 + '|(A_dense - A_h2) * x |': 0.0002831704784625597 + '|(A_dense - A_h2_reduced) * x|': 0.00018316241788625824 + '|(A_h2 - A_distributed_bcast) * x|': 0.00022298192765630998 + '|(A_h2 - A_distributed_halo) * x|': 0.00022298192765631274 + '|(A_h2 - A_h2_reduced) * x |': 0.00022298192765631274 +solve: + CG iterations: 1000 + residual norm: 0.0685570841274984 +stats: {} diff --git a/tests/cache_testDistOp.py--horizoninf--domaininterval--svarconst(0.25)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 b/tests/cache_testDistOp.py--horizoninf--domaininterval--svarconst(0.25)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 index 4ffba0f..0a13348 100644 --- a/tests/cache_testDistOp.py--horizoninf--domaininterval--svarconst(0.25)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 +++ b/tests/cache_testDistOp.py--horizoninf--domaininterval--svarconst(0.25)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 @@ -2,13 +2,14 @@ Timers: {} TimersH2: {} info: {} matvec errors: - '|(A_dense - A_distributed_bcast) * x|': 3.816424931677378e-07 - '|(A_dense - A_distributed_halo) * x|': 3.8164249316810784e-07 - '|(A_dense - A_h2) * x |': 3.233222486362068e-07 - '|(A_dense - A_h2_reduced) * x|': 3.8164249315495977e-07 - '|(A_h2 - A_distributed_bcast) * x|': 2.429563526090043e-07 - '|(A_h2 - A_distributed_halo) * x|': 2.4295635260837935e-07 - '|(A_h2 - A_h2_reduced) * x |': 2.429563525907425e-07 + '|(A_dense - A_distributed_bcast) * x|': 3.816517113159294e-07 + '|(A_dense - A_distributed_halo) * x|': 3.816517113162995e-07 + '|(A_dense - A_h2) * x |': 3.2333218146514824e-07 + '|(A_dense - A_h2_reduced) * x|': 3.8165171129938495e-07 + '|(A_h2 - A_distributed_bcast) * x|': 2.4297107871961114e-07 + '|(A_h2 - A_distributed_halo) * x|': 2.4297107871898624e-07 + '|(A_h2 - A_h2_reduced) * x |': 2.4297107869777103e-07 solve: - residual norm: 3.2398173671756745e-06 + CG iterations: 8 + residual norm: 3.2398173668907127e-06 stats: {} diff --git a/tests/cache_testDistOp.py--horizoninf--domaininterval--svarconst(0.75)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 b/tests/cache_testDistOp.py--horizoninf--domaininterval--svarconst(0.75)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 index c9351ea..0faf189 100644 --- a/tests/cache_testDistOp.py--horizoninf--domaininterval--svarconst(0.75)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 +++ b/tests/cache_testDistOp.py--horizoninf--domaininterval--svarconst(0.75)--problemconstant--noRef6--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 @@ -2,13 +2,14 @@ Timers: {} TimersH2: {} info: {} matvec errors: - '|(A_dense - A_distributed_bcast) * x|': 0.00011063411999060675 - '|(A_dense - A_distributed_halo) * x|': 0.00011063411999060407 - '|(A_dense - A_h2) * x |': 9.543571820177557e-05 - '|(A_dense - A_h2_reduced) * x|': 0.00011063411998750746 - '|(A_h2 - A_distributed_bcast) * x|': 8.642227264284719e-05 - '|(A_h2 - A_distributed_halo) * x|': 8.642227264284524e-05 - '|(A_h2 - A_h2_reduced) * x |': 8.642227263923861e-05 + '|(A_dense - A_distributed_bcast) * x|': 0.00011064427313268595 + '|(A_dense - A_distributed_halo) * x|': 0.00011064427313268325 + '|(A_dense - A_h2) * x |': 9.54464240645034e-05 + '|(A_dense - A_h2_reduced) * x|': 0.00011064427313016145 + '|(A_h2 - A_distributed_bcast) * x|': 8.643264538064418e-05 + '|(A_h2 - A_distributed_halo) * x|': 8.643264538064223e-05 + '|(A_h2 - A_h2_reduced) * x |': 8.643264537853821e-05 solve: - residual norm: 4.799642454857278e-06 + CG iterations: 40 + residual norm: 4.799642527092277e-06 stats: {} diff --git a/tests/cache_testDistOp.py--horizoninf--domainsquare--sconst(0.25)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 b/tests/cache_testDistOp.py--horizoninf--domainsquare--sconst(0.25)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 index 48aa108..c417a90 100644 --- a/tests/cache_testDistOp.py--horizoninf--domainsquare--sconst(0.25)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 +++ b/tests/cache_testDistOp.py--horizoninf--domainsquare--sconst(0.25)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 @@ -2,13 +2,14 @@ Timers: {} TimersH2: {} info: {} matvec errors: - '|(A_dense - A_distributed_bcast) * x|': 0.00039121455877137734 - '|(A_dense - A_distributed_halo) * x|': 0.0003912145587713774 - '|(A_dense - A_h2) * x |': 0.0003912145587713713 - '|(A_dense - A_h2_reduced) * x|': 0.0003912145587713712 - '|(A_h2 - A_distributed_bcast) * x|': 8.321377044794258e-17 - '|(A_h2 - A_distributed_halo) * x|': 8.33264918075009e-17 - '|(A_h2 - A_h2_reduced) * x |': 3.9061383326340725e-18 + '|(A_dense - A_distributed_bcast) * x|': 0.0003899386883711289 + '|(A_dense - A_distributed_halo) * x|': 0.00038993868837112886 + '|(A_dense - A_h2) * x |': 0.00038993868837111894 + '|(A_dense - A_h2_reduced) * x|': 0.000389938688371119 + '|(A_h2 - A_distributed_bcast) * x|': 9.356721282675474e-17 + '|(A_h2 - A_distributed_halo) * x|': 9.354648141330332e-17 + '|(A_h2 - A_h2_reduced) * x |': 9.326659742954563e-19 solve: - residual norm: 3.9841801434345884e-06 + CG iterations: 4 + residual norm: 3.984128775788769e-06 stats: {} diff --git a/tests/cache_testDistOp.py--horizoninf--domainsquare--sconst(0.75)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 b/tests/cache_testDistOp.py--horizoninf--domainsquare--sconst(0.75)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 index 7dee829..fb830de 100644 --- a/tests/cache_testDistOp.py--horizoninf--domainsquare--sconst(0.75)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 +++ b/tests/cache_testDistOp.py--horizoninf--domainsquare--sconst(0.75)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 @@ -2,13 +2,14 @@ Timers: {} TimersH2: {} info: {} matvec errors: - '|(A_dense - A_distributed_bcast) * x|': 0.00029971842268656493 - '|(A_dense - A_distributed_halo) * x|': 0.00029971842268656493 - '|(A_dense - A_h2) * x |': 0.0002997184226865862 - '|(A_dense - A_h2_reduced) * x|': 0.0002997184226865862 - '|(A_h2 - A_distributed_bcast) * x|': 3.4947303616337217e-15 - '|(A_h2 - A_distributed_halo) * x|': 3.494730361213269e-15 - '|(A_h2 - A_h2_reduced) * x |': 5.421010862427522e-20 + '|(A_dense - A_distributed_bcast) * x|': 0.0003006751126370004 + '|(A_dense - A_distributed_halo) * x|': 0.00030067511263700024 + '|(A_dense - A_h2) * x |': 0.00030067511263696034 + '|(A_dense - A_h2_reduced) * x|': 0.0003006751126369605 + '|(A_h2 - A_distributed_bcast) * x|': 3.3272127432116837e-15 + '|(A_h2 - A_distributed_halo) * x|': 3.327183346892776e-15 + '|(A_h2 - A_h2_reduced) * x |': 1.738319638496483e-18 solve: - residual norm: 3.205046217544268e-06 + CG iterations: 15 + residual norm: 3.2051430665174306e-06 stats: {} diff --git a/tests/cache_testDistOp.py--horizoninf--domainsquare--stwoDomain(0.25,0.75,0.5,0.5)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 b/tests/cache_testDistOp.py--horizoninf--domainsquare--stwoDomain(0.25,0.75,0.5,0.5)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 deleted file mode 100644 index 8df8201..0000000 --- a/tests/cache_testDistOp.py--horizoninf--domainsquare--stwoDomain(0.25,0.75,0.5,0.5)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 +++ /dev/null @@ -1,14 +0,0 @@ -Timers: {} -TimersH2: {} -info: {} -matvec errors: - '|(A_dense - A_distributed_bcast) * x|': 0.00015866680661817494 - '|(A_dense - A_distributed_halo) * x|': 0.00015866680661817467 - '|(A_dense - A_h2) * x |': 0.00015889304845314317 - '|(A_dense - A_h2_reduced) * x|': 0.0001586668066180097 - '|(A_h2 - A_distributed_bcast) * x|': 3.794296003184545e-06 - '|(A_h2 - A_distributed_halo) * x|': 3.794296003184545e-06 - '|(A_h2 - A_h2_reduced) * x |': 3.7942960034768332e-06 -solve: - residual norm: 8.62345672497112e-06 -stats: {} diff --git a/tests/cache_testDistOp.py--horizoninf--domainsquare--stwoDomainNonSym(0.25,0.75)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 b/tests/cache_testDistOp.py--horizoninf--domainsquare--stwoDomainNonSym(0.25,0.75)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 new file mode 100644 index 0000000..e68b62f --- /dev/null +++ b/tests/cache_testDistOp.py--horizoninf--domainsquare--stwoDomainNonSym(0.25,0.75)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 @@ -0,0 +1,15 @@ +Timers: {} +TimersH2: {} +info: {} +matvec errors: + '|(A_dense - A_distributed_bcast) * x|': 0.0011268992709496262 + '|(A_dense - A_distributed_halo) * x|': 0.0011268992709496262 + '|(A_dense - A_h2) * x |': 0.0011268992709496262 + '|(A_dense - A_h2_reduced) * x|': 0.0011268992709496262 + '|(A_h2 - A_distributed_bcast) * x|': 0.0 + '|(A_h2 - A_distributed_halo) * x|': 0.0 + '|(A_h2 - A_h2_reduced) * x |': 0.0 +solve: + CG iterations: 1000 + residual norm: 0.00020456935587475318 +stats: {} diff --git a/tests/cache_testDistOp.py--horizoninf--domainsquare--svarconst(0.25)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 b/tests/cache_testDistOp.py--horizoninf--domainsquare--svarconst(0.25)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 index 48aa108..cde11b1 100644 --- a/tests/cache_testDistOp.py--horizoninf--domainsquare--svarconst(0.25)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 +++ b/tests/cache_testDistOp.py--horizoninf--domainsquare--svarconst(0.25)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 @@ -2,13 +2,14 @@ Timers: {} TimersH2: {} info: {} matvec errors: - '|(A_dense - A_distributed_bcast) * x|': 0.00039121455877137734 - '|(A_dense - A_distributed_halo) * x|': 0.0003912145587713774 - '|(A_dense - A_h2) * x |': 0.0003912145587713713 - '|(A_dense - A_h2_reduced) * x|': 0.0003912145587713712 - '|(A_h2 - A_distributed_bcast) * x|': 8.321377044794258e-17 - '|(A_h2 - A_distributed_halo) * x|': 8.33264918075009e-17 - '|(A_h2 - A_h2_reduced) * x |': 3.9061383326340725e-18 + '|(A_dense - A_distributed_bcast) * x|': 0.00038993868837113336 + '|(A_dense - A_distributed_halo) * x|': 0.0003899386883711334 + '|(A_dense - A_h2) * x |': 0.0003899386883711279 + '|(A_dense - A_h2_reduced) * x|': 0.0003899386883711279 + '|(A_h2 - A_distributed_bcast) * x|': 7.833771481761733e-17 + '|(A_h2 - A_distributed_halo) * x|': 7.833193749803594e-17 + '|(A_h2 - A_h2_reduced) * x |': 9.513835452293473e-19 solve: - residual norm: 3.9841801434345884e-06 + CG iterations: 4 + residual norm: 3.984128775788795e-06 stats: {} diff --git a/tests/cache_testDistOp.py--horizoninf--domainsquare--svarconst(0.75)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 b/tests/cache_testDistOp.py--horizoninf--domainsquare--svarconst(0.75)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 index 7dee829..bcf359e 100644 --- a/tests/cache_testDistOp.py--horizoninf--domainsquare--svarconst(0.75)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 +++ b/tests/cache_testDistOp.py--horizoninf--domainsquare--svarconst(0.75)--problemconstant--noRef3--buildDense--buildH2--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--doSolve--no-write4 @@ -2,13 +2,14 @@ Timers: {} TimersH2: {} info: {} matvec errors: - '|(A_dense - A_distributed_bcast) * x|': 0.00029971842268656493 - '|(A_dense - A_distributed_halo) * x|': 0.00029971842268656493 - '|(A_dense - A_h2) * x |': 0.0002997184226865862 - '|(A_dense - A_h2_reduced) * x|': 0.0002997184226865862 - '|(A_h2 - A_distributed_bcast) * x|': 3.4947303616337217e-15 - '|(A_h2 - A_distributed_halo) * x|': 3.494730361213269e-15 - '|(A_h2 - A_h2_reduced) * x |': 5.421010862427522e-20 + '|(A_dense - A_distributed_bcast) * x|': 0.00030067511263694505 + '|(A_dense - A_distributed_halo) * x|': 0.00030067511263694434 + '|(A_dense - A_h2) * x |': 0.00030067511263676095 + '|(A_dense - A_h2_reduced) * x|': 0.0003006751126367617 + '|(A_h2 - A_distributed_bcast) * x|': 3.3349237703568857e-15 + '|(A_h2 - A_distributed_halo) * x|': 3.3349741881822486e-15 + '|(A_h2 - A_h2_reduced) * x |': 6.993156541509241e-18 solve: - residual norm: 3.205046217544268e-06 + CG iterations: 15 + residual norm: 3.2051430665174463e-06 stats: {} diff --git a/tests/test_drivers_intFracLapl.py b/tests/test_drivers_intFracLapl.py index 2fad65c..a1e3c7c 100644 --- a/tests/test_drivers_intFracLapl.py +++ b/tests/test_drivers_intFracLapl.py @@ -151,10 +151,12 @@ def testVariableOrder(extra): ('interval', 'const(0.75)'), ('interval', 'varconst(0.25)'), ('interval', 'varconst(0.75)'), + ('interval', 'twoDomainNonSym(0.25,0.75)'), ('square', 'const(0.25)'), ('square', 'const(0.75)'), ('square', 'varconst(0.25)'), ('square', 'varconst(0.75)'), + ('square', 'twoDomainNonSym(0.25,0.75)'), ], ids=idfunc) def runDistOp_params(request): diff --git a/tests/test_fracLapl.py b/tests/test_fracLapl.py index 7b44302..7e7fa0d 100644 --- a/tests/test_fracLapl.py +++ b/tests/test_fracLapl.py @@ -235,5 +235,3 @@ def testH2(setupH2): else: refinements = 3 h2(dim, s, refinements, element, errBnd) - - diff --git a/tests/test_kernels.py b/tests/test_kernels.py index f490362..b08f577 100644 --- a/tests/test_kernels.py +++ b/tests/test_kernels.py @@ -141,6 +141,8 @@ def idfuncFractional(param): from PyNucleus_fem import meshFactory, dofmapFactory mesh1d = meshFactory('interval', a=-1, b=1, hTarget=1e-2) dm1d = dofmapFactory('P1', mesh1d, -1) +mesh2d = meshFactory('disc', hTarget=1e-1, n=8) +dm2d = dofmapFactory('P1', mesh2d, -1) @pytest.fixture(scope='module', params=[ @@ -344,7 +346,90 @@ def idfuncFractional(param): (2, fractionalOrderFactory('twoDomainNonSym', 0.25, 0.75), np.inf, False, None, 1), (2, fractionalOrderFactory('twoDomainNonSym', 0.25, 0.75), 0.5, True, None, 1), (2, fractionalOrderFactory('twoDomainNonSym', 0.25, 0.75), 0.5, False, None, 1), - + # discretized fractional order + (2, fractionalOrderFactory('const', 0.25, dm=dm2d), np.inf, True, None, 0), + (2, fractionalOrderFactory('const', 0.25, dm=dm2d), np.inf, False, None, 0), + (2, fractionalOrderFactory('const', 0.25, dm=dm2d), 0.5, True, None, 0), + (2, fractionalOrderFactory('const', 0.25, dm=dm2d), 0.5, False, None, 0), + (2, fractionalOrderFactory('const', 0.75, dm=dm2d), np.inf, True, None, 0), + (2, fractionalOrderFactory('const', 0.75, dm=dm2d), np.inf, False, None, 0), + (2, fractionalOrderFactory('const', 0.75, dm=dm2d), 0.5, True, None, 0), + (2, fractionalOrderFactory('const', 0.75, dm=dm2d), 0.5, False, None, 0), + (2, fractionalOrderFactory('twoDomainNonSym', 0.75, 0.25, dm=dm2d), np.inf, True, None, 0), + (2, fractionalOrderFactory('twoDomainNonSym', 0.75, 0.25, dm=dm2d), np.inf, False, None, 0), + (2, fractionalOrderFactory('twoDomainNonSym', 0.75, 0.25, dm=dm2d), 0.5, True, None, 0), + (2, fractionalOrderFactory('twoDomainNonSym', 0.75, 0.25, dm=dm2d), 0.5, False, None, 0), + (2, fractionalOrderFactory('twoDomainNonSym', 0.25, 0.75, dm=dm2d), np.inf, True, None, 0), + (2, fractionalOrderFactory('twoDomainNonSym', 0.25, 0.75, dm=dm2d), np.inf, False, None, 0), + (2, fractionalOrderFactory('twoDomainNonSym', 0.25, 0.75, dm=dm2d), 0.5, True, None, 0), + (2, fractionalOrderFactory('twoDomainNonSym', 0.25, 0.75, dm=dm2d), 0.5, False, None, 0), + ################################################## + # 3d kernels + (3, fractionalOrderFactory('const', 0.25), np.inf, True, None, 0), + (3, fractionalOrderFactory('const', 0.25), np.inf, False, None, 0), + (3, fractionalOrderFactory('const', 0.25), 0.5, True, None, 0), + (3, fractionalOrderFactory('const', 0.25), 0.5, False, None, 0), + (3, fractionalOrderFactory('const', 0.75), np.inf, True, None, 0), + (3, fractionalOrderFactory('const', 0.75), np.inf, False, None, 0), + (3, fractionalOrderFactory('const', 0.75), 0.5, True, None, 0), + (3, fractionalOrderFactory('const', 0.75), 0.5, False, None, 0), + (3, fractionalOrderFactory('constantSym', 0.25), np.inf, True, None, 0), + (3, fractionalOrderFactory('constantSym', 0.25), np.inf, False, None, 0), + (3, fractionalOrderFactory('constantSym', 0.25), 0.5, True, None, 0), + (3, fractionalOrderFactory('constantSym', 0.25), 0.5, False, None, 0), + (3, fractionalOrderFactory('constantSym', 0.75), np.inf, True, None, 0), + (3, fractionalOrderFactory('constantSym', 0.75), np.inf, False, None, 0), + (3, fractionalOrderFactory('constantSym', 0.75), 0.5, True, None, 0), + (3, fractionalOrderFactory('constantSym', 0.75), 0.5, False, None, 0), + (3, fractionalOrderFactory('constantNonSym', 0.25), np.inf, True, None, 0), + (3, fractionalOrderFactory('constantNonSym', 0.25), np.inf, False, None, 0), + (3, fractionalOrderFactory('constantNonSym', 0.25), 0.5, True, None, 0), + (3, fractionalOrderFactory('constantNonSym', 0.25), 0.5, False, None, 0), + (3, fractionalOrderFactory('constantNonSym', 0.75), np.inf, True, None, 0), + (3, fractionalOrderFactory('constantNonSym', 0.75), np.inf, False, None, 0), + (3, fractionalOrderFactory('constantNonSym', 0.75), 0.5, True, None, 0), + (3, fractionalOrderFactory('constantNonSym', 0.75), 0.5, False, None, 0), + (3, fractionalOrderFactory('twoDomainNonSym', 0.25, 0.75), np.inf, True, None, 0), + (3, fractionalOrderFactory('twoDomainNonSym', 0.25, 0.75), np.inf, False, None, 0), + (3, fractionalOrderFactory('twoDomainNonSym', 0.25, 0.75), 0.5, True, None, 0), + (3, fractionalOrderFactory('twoDomainNonSym', 0.25, 0.75), 0.5, False, None, 0), + (3, fractionalOrderFactory('twoDomainNonSym', 0.75, 0.25), np.inf, True, None, 0), + (3, fractionalOrderFactory('twoDomainNonSym', 0.75, 0.25), np.inf, False, None, 0), + (3, fractionalOrderFactory('twoDomainNonSym', 0.75, 0.25), 0.5, True, None, 0), + (3, fractionalOrderFactory('twoDomainNonSym', 0.75, 0.25), 0.5, False, None, 0), + # derivative wrt s + (3, fractionalOrderFactory('const', 0.25), np.inf, True, None, 1), + (3, fractionalOrderFactory('const', 0.25), np.inf, False, None, 1), + (3, fractionalOrderFactory('const', 0.25), 0.5, True, None, 1), + (3, fractionalOrderFactory('const', 0.25), 0.5, False, None, 1), + (3, fractionalOrderFactory('const', 0.75), np.inf, True, None, 1), + (3, fractionalOrderFactory('const', 0.75), np.inf, False, None, 1), + (3, fractionalOrderFactory('const', 0.75), 0.5, True, None, 1), + (3, fractionalOrderFactory('const', 0.75), 0.5, False, None, 1), + (3, fractionalOrderFactory('constantSym', 0.25), np.inf, True, None, 1), + (3, fractionalOrderFactory('constantSym', 0.25), np.inf, False, None, 1), + (3, fractionalOrderFactory('constantSym', 0.25), 0.5, True, None, 1), + (3, fractionalOrderFactory('constantSym', 0.25), 0.5, False, None, 1), + (3, fractionalOrderFactory('constantSym', 0.75), np.inf, True, None, 1), + (3, fractionalOrderFactory('constantSym', 0.75), np.inf, False, None, 1), + (3, fractionalOrderFactory('constantSym', 0.75), 0.5, True, None, 1), + (3, fractionalOrderFactory('constantSym', 0.75), 0.5, False, None, 1), + (3, fractionalOrderFactory('constantNonSym', 0.25), np.inf, True, None, 1), + (3, fractionalOrderFactory('constantNonSym', 0.25), np.inf, False, None, 1), + (3, fractionalOrderFactory('constantNonSym', 0.25), 0.5, True, None, 1), + (3, fractionalOrderFactory('constantNonSym', 0.25), 0.5, False, None, 1), + (3, fractionalOrderFactory('constantNonSym', 0.75), np.inf, True, None, 1), + (3, fractionalOrderFactory('constantNonSym', 0.75), np.inf, False, None, 1), + (3, fractionalOrderFactory('constantNonSym', 0.75), 0.5, True, None, 1), + (3, fractionalOrderFactory('constantNonSym', 0.75), 0.5, False, None, 1), + (3, fractionalOrderFactory('twoDomainNonSym', 0.75, 0.25), np.inf, True, None, 1), + (3, fractionalOrderFactory('twoDomainNonSym', 0.75, 0.25), np.inf, False, None, 1), + (3, fractionalOrderFactory('twoDomainNonSym', 0.75, 0.25), 0.5, True, None, 1), + (3, fractionalOrderFactory('twoDomainNonSym', 0.75, 0.25), 0.5, False, None, 1), + (3, fractionalOrderFactory('twoDomainNonSym', 0.25, 0.75), np.inf, True, None, 1), + (3, fractionalOrderFactory('twoDomainNonSym', 0.25, 0.75), np.inf, False, None, 1), + (3, fractionalOrderFactory('twoDomainNonSym', 0.25, 0.75), 0.5, True, None, 1), + (3, fractionalOrderFactory('twoDomainNonSym', 0.25, 0.75), 0.5, False, None, 1), ], ids=idfuncFractional) def fractionalKernelParams(request): return request.param @@ -360,6 +445,10 @@ def testFractionalKernel(fractionalKernelParams): xy_values = [(np.array([-0.1, 0.1]), np.array([0.1, 0.2])), (np.array([0.1, 0.1]), np.array([-0.1, 0.2])), (np.array([-0.1, 0.1]), np.array([0.5, 0.2]))] + elif dim == 3: + xy_values = [(np.array([-0.1, 0.1, 0.1]), np.array([0.1, 0.2, 0.2])), + (np.array([0.1, 0.1, 0.1]), np.array([-0.1, 0.2, 0.2])), + (np.array([-0.1, 0.1, 0.1]), np.array([0.5, 0.2, 0.2]))] else: raise NotImplementedError() @@ -409,6 +498,11 @@ def testFractionalKernel(fractionalKernelParams): const = (2.-2*sValue)*pow(horizonValue**2, sValue-1.) * 2./pi * 0.5 else: const = 2.0**(2.0*sValue) * sValue * gamma(sValue+1.0)/pi/gamma(1.-sValue) * 0.5 + elif dim == 3: + if horizonValue < np.inf: + const = (2.-2*sValue)*pow(horizonValue**2, sValue-1.) * 3*gamma(dim/2)/pow(pi, dim/2) * 0.5 + else: + const = 2.0**(2.0*sValue) * sValue * gamma(sValue+1.5)/pow(pi, 1.5)/gamma(1.-sValue) * 0.5 else: const = 0.5 @@ -462,13 +556,18 @@ def testFractionalKernel(fractionalKernelParams): yShifted = y.copy() yShifted[i] += eps div_fd += (boundaryKernelInf(x, yShifted) * (x-yShifted)[i]/norm(x-yShifted) - boundaryKernelInf(x, y) * (x-y)[i]/norm(x-y))/eps - assert np.isclose(div_fd, 2*infHorizonKernel(x, y)), (div_fd, 2*infHorizonKernel(x, y)) + assert np.isclose(div_fd, 2*infHorizonKernel(x, y), rtol=1e-3), (div_fd, 2*infHorizonKernel(x, y)) from PyNucleus import dofmapFactory, fractionalOrderFactory, kernelFactory, meshFactory, nonlocalBuilder, REAL, functionFactory def test_discrete_s_const(): + """ + Compare operators for kernel with + s = constantNonSym(0.75) + and its finite element interpolation. + """ mesh = meshFactory('interval', a=-1, b=1, hTarget=1e-2) dmS = dofmapFactory('P1', mesh, -1) sFun = fractionalOrderFactory('constantNonSym', 0.75) @@ -482,6 +581,11 @@ def test_discrete_s_const(): def test_discrete_leftRight(): + """ + Compare operators for kernel with + s = twoDomainNonSym(0.25, 0.75) + and its finite element interpolation. + """ mesh = meshFactory('interval', a=-1, b=1, hTarget=1e-2) dmS = dofmapFactory('P1', mesh, -1) sFun = fractionalOrderFactory('twoDomainNonSym', sl=0.25, sr=0.75, r=0.3)