SasView · pkienzle · May 21, 2021 · May 21, 2021 · May 21, 2021 · Jan 21, 2024
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -45,9 +45,10 @@ jobs:
         choco install opencl-intel-cpu-runtime
         python -m pip install --only-binary=pyopencl --find-links http://www.silx.org/pub/wheelhouse/ --trusted-host www.silx.org pyopencl
 
-    - name: Test with pytest
+    - name: Test with pytest (only on Windows for now since PoCL is failing on Ubuntu)
       env:
         PYOPENCL_COMPILER_OUTPUT: 1
+        SAS_OPENCL: none
       run: |
         # other CI uses the following, but `setup.py test` is a deprecated way
         # of running tests
@@ -57,6 +58,8 @@ jobs:
 
     - name: check that the docs build (linux only)
       if: ${{ matrix.os == 'ubuntu-latest' }}
+      env:
+        SAS_OPENCL: none
       run: |
         make -j 4 -C doc SPHINXOPTS="-W --keep-going -n" html
 

diff --git a/explore/precision.py b/explore/precision.py
@@ -391,9 +391,42 @@ def add_function(name, mp_function, np_function, ocl_function,
 add_function(
     name="(1-cos(x))/x^2",
     mp_function=lambda x: (1 - mp.cos(x))/(x*x),
-    np_function=lambda x: (1 - np.cos(x))/(x*x),
+    #np_function=lambda x: (1 - np.cos(x))/(x*x),
+    np_function=lambda x, cutoff=0.5: (
+        (x>cutoff)*(1 - np.cos(x))/(x*x)
+        +(x<=cutoff)*(1.0/2.0 + x*x*(1./-24. + x*x*(1./720. + x*x*(1./-40320. + x*x*(1./3628800. + x*x*(1./-479001600. + x*x*1./87178291200.))))))
+    ),
     ocl_function=make_ocl("return (1-cos(q))/q/q;", "sas_1mcosx_x2"),
 )
+add_function(
+    # Note: this is just j1(x)/x
+    name="TY_pc_t1",
+    mp_function=lambda x: (mp.sin(x)/x - mp.cos(x))/(x*x),
+    np_function=lambda x, cutoff=0.5: (
+        (x>cutoff)*(np.sin(x)/x - np.cos(x))/(x*x)
+        +(x<=cutoff)*(1.0/3.0 + x*x*(1./-30. + x*x*(1./840. + x*x*(1./-45360. + x*x*(1./3991680. + x*x*(1./-518918400. + x*x*(1./93405312000.)))))))
+    ),
+    #ocl_function=make_ocl("return sas_3j1x_x(q)/3.;", "sas_j1x_x", ["lib/sas_3j1x_x.c"]),
+    ocl_function=make_ocl("return (sin(q)/q - cos(q))/(q*q);", "j1x_x"),
+)
+add_function(
+    name="TY_pc_t2",
+    mp_function=lambda x: (2.*x*mp.sin(x) - (x*x - 2.)*mp.cos(x) - 2.)/x**4,
+    np_function=lambda x, cutoff=0.5: (
+        (x>cutoff)*(2.*x*np.sin(x) - (x*x - 2.)*np.cos(x) - 2.)/x**4
+        +(x<=cutoff)*(1./4. + x*x*(1./-36 + x*x*(1./960. + x*x*(1./-50400. + x*x*(1./4354560. + x*x*(1./-558835200. + x*x*(1./99632332800.)))))))
+    ),
+    ocl_function=make_ocl("return (2.*q*sin(q) - (q*q - 2.)*cos(q) - 2.) / pown(q,4);","TY_pc_t2"),
+)
+add_function(
+    name="TY_pc_t3",
+    mp_function=lambda x: ( 4*x*(x*x - 6)*mp.sin(x)  - (x**4 - 12*x*x + 24)*mp.cos(x) + 24 ) / x**6,
+    np_function=lambda x, cutoff=0.9: (
+      (x>cutoff)*(4.*x*(x*x - 6.)*np.sin(x) - ((x*x - 12.)*x*x + 24.)*np.cos(x) + 24.) / x**6
+      +(x<=cutoff)*(1/6 + x*x*(1./-48. + x*x*(1./1200. + x*x*(1./-60480. + x*x*(1./5080320. + x*x*(1./-638668800. + x*x*(1./112086374400.)))))))
+    ),
+    ocl_function=make_ocl("return (4*q*(q*q - 6)*sin(q)  - ((q*q - 12)*q*q + 24)*cos(q) + 24 ) / pown(q,6);","ty_pc_t2"),
+)
 add_function(
     name="(1-sin(x)/x)/x",
     mp_function=lambda x: 1/x - mp.sin(x)/(x*x),

diff --git a/sasmodels/generate.py b/sasmodels/generate.py
@@ -149,6 +149,19 @@
 the latest time stamp amongst the source files (so you can check if
 the model needs to be rebuilt).
 
+The generated code is very ugly, making heavy use of C macros to build its
+functions. See the source for kernel_iq.c for a list along with a brief
+description of each.
+
+The C code can override some of these macros, allowing an
+arbitrary translation of the input parameters prior to the loop over Q
+when running in a DLL. In particular, it can call an expensive precalcution
+function and pass the results to the underlying I(Q). Note that there is
+no post-loop cleanup call, so memory allocated by malloc() will not be freed.
+TRANSLATION_VARS, CALL_VOLUME, CALL_RADIUS_EFFECTIVE can be directly substituted.
+CALL_{IQ,FQ}* are substituted with OVERRIDE_{IQ,FQ}*. See models/TY_YukawaSq.c
+for an example.
+
 The function :func:`make_doc` extracts the doc string and adds the
 parameter table to the top.  *make_figure* in *sasmodels/doc/genmodel*
 creates the default figure for the model.  [These two sets of code
@@ -159,6 +172,8 @@
 # TODO: determine which functions are useful outside of generate
 #__all__ = ["model_info", "make_doc", "make_source", "convert_type"]
 
+# TODO: write out the full C code rather than relying on macro expansion.
+
 import sys
 from os import environ
 from os.path import abspath, dirname, join as joinpath, exists, getmtime, sep
@@ -1054,43 +1069,81 @@ def make_source(model_info):
         call_volume = (
             "#define CALL_VOLUME(_form, _shell, _v) "
             "do { _form = _shell = 1.0; } while (0)")
-    source.append(translation_vars)
-    source.append(call_volume)
-    source.append(call_radius_effective)
+    def add_macro(name, code):
+        source.append(f"#ifndef {name}\n{code}\n#endif")
+    add_macro("TRANSLATION_VARS", translation_vars)
+    add_macro("CALL_VOLUME", call_volume)
+    add_macro("CALL_RADIUS_EFFECTIVE", call_radius_effective)
     model_refs = _call_pars(base_table.iq_parameters, subs)
 
     if model_info.have_Fq:
         pars = ",".join(["_q", "&_F1", "&_F2",] + model_refs)
-        call_iq = "#define CALL_FQ(_q, _F1, _F2, _v) Fq(%s)" % pars
+        call_iq = f"""\
+#ifdef OVERRIDE_FQ
+# define CALL_FQ(_q, _F1, _F2, _v) OVERRIDE_FQ(_q, _F1, _F2, _v)
+#else
+# define CALL_FQ(_q, _F1, _F2, _v) Fq({pars})
+#endif"""
         clear_iq = "#undef CALL_FQ"
     else:
         pars = ",".join(["_q"] + model_refs)
-        call_iq = "#define CALL_IQ(_q, _v) Iq(%s)" % pars
+        call_iq = f"""\
+#ifdef OVERRIDE_IQ
+# define CALL_IQ(_q, _v) OVERRIDE_IQ(_q, _v)
+#else
+# define CALL_IQ(_q, _v) Iq({pars})
+#endif"""
         clear_iq = "#undef CALL_IQ"
     if xy_mode == 'qabc':
         pars = ",".join(["_qa", "_qb", "_qc"] + model_refs)
-        call_iqxy = "#define CALL_IQ_ABC(_qa,_qb,_qc,_v) Iqabc(%s)" % pars
+        call_iqxy = f"""\
+#ifdef OVERRIDE_IQ_ABC
+# define CALL_IQ_ABC(_qa,_qb,_qc,_v) OVERRIDE_IQ_ABC(_qa,_qb,_qc,_v)
+#else
+# define CALL_IQ_ABC(_qa,_qb,_qc,_v) Iqabc({pars})
+#endif"""
         clear_iqxy = "#undef CALL_IQ_ABC"
     elif xy_mode == 'qac':
         pars = ",".join(["_qa", "_qc"] + model_refs)
-        call_iqxy = "#define CALL_IQ_AC(_qa,_qc,_v) Iqac(%s)" % pars
+        call_iqxy = "#ifndef CALL_IQ_AC\n#define CALL_IQ_AC(_qa,_qc,_v) Iqac(%s)\ n#endif" % pars
+        call_iqxy = f"""\
+#ifdef OVERRIDE_IQ_AC
+# define CALL_IQ_AC(_qa,_qc,_v) OVERRIDE_IQ_AC(_qa,_qc,_v)
+#else
+# define CALL_IQ_AC(_qa,_qc,_v) Iqac({pars})
+#endif"""
         clear_iqxy = "#undef CALL_IQ_AC"
     elif xy_mode == 'qa' and not model_info.have_Fq:
         pars = ",".join(["_qa"] + model_refs)
-        call_iqxy = "#define CALL_IQ_A(_qa,_v) Iq(%s)" % pars
+        call_iqxy = f"""\
+#ifdef OVERRIDE_IQ
+# define CALL_IQ_A(_qa,_v) OVERRIDE_IQ(_qa,_v)
+#else
+# define CALL_IQ_A(_qa,_v) Iq({pars})
+#endif"""
         clear_iqxy = "#undef CALL_IQ_A"
     elif xy_mode == 'qa' and model_info.have_Fq:
-        pars = ",".join(["_qa", "&_F1", "&_F2",] + model_refs)
+        pars = ",".join(["_qa", "&_F1", "&_F2"] + model_refs)
         # Note: uses rare C construction (expr1, expr2) which computes
         # expr1 then expr2 and evaluates to expr2.  This allows us to
         # leave it looking like a function even though it is returning
         # its values by reference.
-        call_iqxy = "#define CALL_FQ_A(_qa,_F1,_F2,_v) (Fq(%s),_F2)" % pars
+        call_iqxy = f"""\
+#ifdef OVERRIDE_FQ
+# define CALL_FQ_A(_qa,_F1,_F2,_v) (OVERRIDE_FQ(_qa,_F1,_F2,v),_F2)
+#else
+# define CALL_FQ_A(_qa,_F1,_F2,_v) (Fq({pars}),_F2)
+#endif"""
         clear_iqxy = "#undef CALL_FQ_A"
     elif xy_mode == 'qxy':
         qxy_refs = _call_pars(base_table.orientation_parameters, subs)
         pars = ",".join(["_qx", "_qy"] + model_refs + qxy_refs)
-        call_iqxy = "#define CALL_IQ_XY(_qx,_qy,_v) Iqxy(%s)" % pars
+        call_iqxy = f"""\
+#ifdef OVERRIDE_IQ_XY
+# define CALL_IQ_XY(_qx,_qy,_v) OVERRIDE_IQ_XY(_qx,_qy,_v)
+#else
+# define CALL_IQ_XY(_qx,_qy,_v) Iqxy({pars})
+#endif"""
         clear_iqxy = "#undef CALL_IQ_XY"
         if base_table.orientation_parameters:
             call_iqxy += "\n#define HAVE_THETA"
@@ -1127,30 +1180,30 @@ def _kernels(kernel, call_iq, clear_iq, call_iqxy, clear_iqxy, name):
     path = _clean_source_filename(kernel[1])
     iq = [
         # define the Iq kernel
-        "#define KERNEL_NAME %s_Iq" % name,
+        f"#define KERNEL_NAME {name}_Iq",
         call_iq,
-        '#line 1 "%s Iq"' % path,
+        f'#line 1 "{path} Iq"',
         code,
         clear_iq,
         "#undef KERNEL_NAME",
         ]
 
     iqxy = [
         # define the Iqxy kernel from the same source with different #defines
-        "#define KERNEL_NAME %s_Iqxy" % name,
+        f"#define KERNEL_NAME {name}_Iqxy",
         call_iqxy,
-        '#line 1 "%s Iqxy"' % path,
+        f'#line 1 "{path} Iqxy"',
         code,
         clear_iqxy,
         "#undef KERNEL_NAME",
     ]
 
     imagnetic = [
         # define the Imagnetic kernel
-        "#define KERNEL_NAME %s_Imagnetic" % name,
+        f"#define KERNEL_NAME {name}_Imagnetic",
         "#define MAGNETIC 1",
         call_iqxy,
-        '#line 1 "%s Imagnetic"' % path,
+        f'#line 1 "{path} Imagnetic"',
         code,
         clear_iqxy,
         "#undef MAGNETIC",