Use parameter translation rather than memoization so that OpenMP can …

…operate along Q
SasView · Jan 23, 2024 · 660ffc4 · 660ffc4
1 parent 4d75dbd
commit 660ffc4
Show file tree

Hide file tree

Showing 4 changed files with 129 additions and 124 deletions.
diff --git a/sasmodels/generate.py b/sasmodels/generate.py
@@ -1033,43 +1033,81 @@ def make_source(model_info):
         call_volume = (
             "#define CALL_VOLUME(_form, _shell, _v) "
             "do { _form = _shell = 1.0; } while (0)")
-    source.append(translation_vars)
-    source.append(call_volume)
-    source.append(call_radius_effective)
+    def add_macro(name, code):
+        source.append(f"#ifndef {name}\n{code}\n#endif")
+    add_macro("TRANSLATION_VARS", translation_vars)
+    add_macro("CALL_VOLUME", call_volume)
+    add_macro("CALL_RADIUS_EFFECTIVE", call_radius_effective)
     model_refs = _call_pars(base_table.iq_parameters, subs)
 
     if model_info.have_Fq:
         pars = ",".join(["_q", "&_F1", "&_F2",] + model_refs)
-        call_iq = "#define CALL_FQ(_q, _F1, _F2, _v) Fq(%s)" % pars
+        call_iq = f"""\
+#ifdef OVERRIDE_FQ
+# define CALL_FQ(_q, _F1, _F2, _v) OVERRIDE_FQ(_q, _F1, _F2, _v)
+#else
+# define CALL_FQ(_q, _F1, _F2, _v) Fq({pars})
+#endif"""
         clear_iq = "#undef CALL_FQ"
     else:
         pars = ",".join(["_q"] + model_refs)
-        call_iq = "#define CALL_IQ(_q, _v) Iq(%s)" % pars
+        call_iq = f"""\
+#ifdef OVERRIDE_IQ
+# define CALL_IQ(_q, _v) OVERRIDE_IQ(_q, _v)
+#else
+# define CALL_IQ(_q, _v) Iq({pars})
+#endif"""
         clear_iq = "#undef CALL_IQ"
     if xy_mode == 'qabc':
         pars = ",".join(["_qa", "_qb", "_qc"] + model_refs)
-        call_iqxy = "#define CALL_IQ_ABC(_qa,_qb,_qc,_v) Iqabc(%s)" % pars
+        call_iqxy = f"""\
+#ifdef OVERRIDE_IQ_ABC
+# define CALL_IQ_ABC(_qa,_qb,_qc,_v) OVERRIDE_IQ_ABC(_qa,_qb,_qc,_v)
+#else
+# define CALL_IQ_ABC(_qa,_qb,_qc,_v) Iqabc({pars})
+#endif"""
         clear_iqxy = "#undef CALL_IQ_ABC"
     elif xy_mode == 'qac':
         pars = ",".join(["_qa", "_qc"] + model_refs)
-        call_iqxy = "#define CALL_IQ_AC(_qa,_qc,_v) Iqac(%s)" % pars
+        call_iqxy = "#ifndef CALL_IQ_AC\n#define CALL_IQ_AC(_qa,_qc,_v) Iqac(%s)\ n#endif" % pars
+        call_iqxy = f"""\
+#ifdef OVERRIDE_IQ_AC
+# define CALL_IQ_AC(_qa,_qc,_v) OVERRIDE_IQ_AC(_qa,_qc,_v)
+#else
+# define CALL_IQ_AC(_qa,_qc,_v) Iqac({pars})
+#endif"""
         clear_iqxy = "#undef CALL_IQ_AC"
     elif xy_mode == 'qa' and not model_info.have_Fq:
         pars = ",".join(["_qa"] + model_refs)
-        call_iqxy = "#define CALL_IQ_A(_qa,_v) Iq(%s)" % pars
+        call_iqxy = f"""\
+#ifdef OVERRIDE_IQ
+# define CALL_IQ_A(_qa,_v) OVERRIDE_IQ(_qa,_v)
+#else
+# define CALL_IQ_A(_qa,_v) Iq({pars})
+#endif"""
         clear_iqxy = "#undef CALL_IQ_A"
     elif xy_mode == 'qa' and model_info.have_Fq:
-        pars = ",".join(["_qa", "&_F1", "&_F2",] + model_refs)
+        pars = ",".join(["_qa", "&_F1", "&_F2"] + model_refs)
         # Note: uses rare C construction (expr1, expr2) which computes
         # expr1 then expr2 and evaluates to expr2.  This allows us to
         # leave it looking like a function even though it is returning
         # its values by reference.
-        call_iqxy = "#define CALL_FQ_A(_qa,_F1,_F2,_v) (Fq(%s),_F2)" % pars
+        call_iqxy = f"""\
+#ifdef OVERRIDE_FQ
+# define CALL_FQ_A(_qa,_F1,_F2,_v) (OVERRIDE_FQ(_qa,_F1,_F2,v),_F2)
+#else
+# define CALL_FQ_A(_qa,_F1,_F2,_v) (Fq({pars}),_F2)
+#endif"""
         clear_iqxy = "#undef CALL_FQ_A"
     elif xy_mode == 'qxy':
         qxy_refs = _call_pars(base_table.orientation_parameters, subs)
         pars = ",".join(["_qx", "_qy"] + model_refs + qxy_refs)
-        call_iqxy = "#define CALL_IQ_XY(_qx,_qy,_v) Iqxy(%s)" % pars
+        call_iqxy = f"""\
+#ifdef OVERRIDE_IQ_XY
+# define CALL_IQ_XY(_qx,_qy,_v) OVERRIDE_IQ_XY(_qx,_qy,_v)
+#else
+# define CALL_IQ_XY(_qx,_qy,_v) Iqxy({pars})
+#endif"""
         clear_iqxy = "#undef CALL_IQ_XY"
         if base_table.orientation_parameters:
             call_iqxy += "\n#define HAVE_THETA"
@@ -1106,30 +1144,30 @@ def _kernels(kernel, call_iq, clear_iq, call_iqxy, clear_iqxy, name):
     path = kernel[1].replace('\\', '/')
     iq = [
         # define the Iq kernel
-        "#define KERNEL_NAME %s_Iq" % name,
+        f"#define KERNEL_NAME {name}_Iq",
         call_iq,
-        '#line 1 "%s Iq"' % path,
+        f'#line 1 "{path} Iq"',
         code,
         clear_iq,
         "#undef KERNEL_NAME",
         ]
 
     iqxy = [
         # define the Iqxy kernel from the same source with different #defines
-        "#define KERNEL_NAME %s_Iqxy" % name,
+        f"#define KERNEL_NAME {name}_Iqxy",
         call_iqxy,
-        '#line 1 "%s Iqxy"' % path,
+        f'#line 1 "{path} Iqxy"',
         code,
         clear_iqxy,
         "#undef KERNEL_NAME",
     ]
 
     imagnetic = [
         # define the Imagnetic kernel
-        "#define KERNEL_NAME %s_Imagnetic" % name,
+        f"#define KERNEL_NAME {name}_Imagnetic",
         "#define MAGNETIC 1",
         call_iqxy,
-        '#line 1 "%s Imagnetic"' % path,
+        f'#line 1 "{path} Imagnetic"',
         code,
         clear_iqxy,
         "#undef MAGNETIC",

diff --git a/sasmodels/models/TY_TwoYukawa.c b/sasmodels/models/TY_TwoYukawa.c
@@ -196,10 +196,11 @@ double TY_NonlinearEquation_2( double Z1, double Z2, double K1, double K2, doubl
 								 - c2 * TY_tau( Z2, Z1, Z2, a, b, c1, c2 ) * exp( -Z2 ) );
 }
 
-// Check the computed solutions satisfy the system of equations 
+// Check the computed solutions satisfy the system of equations (tol=1e-6)
 int TY_CheckSolution( double Z1, double Z2, double K1, double K2, double phi, 
 				      double a, double b, double c1, double c2, double d1, double d2 )
 {
+	// The chop(x) function returns zero when |x| < 1e-6.
 	double eq_1 = chop( TY_LinearEquation_1( Z1, Z2, K1, K2, phi, a, b, c1, c2, d1, d2 ) );
 	double eq_2 = chop( TY_LinearEquation_2( Z1, Z2, K1, K2, phi, a, b, c1, c2, d1, d2 ) );
 	double eq_3 = chop( TY_LinearEquation_3( Z1, Z2, K1, K2, phi, a, b, c1, c2, d1, d2 ) );

diff --git a/sasmodels/models/TY_YukawaSq.c b/sasmodels/models/TY_YukawaSq.c
@@ -1,87 +1,74 @@
 static double checksum = 0.0;
 
-double Iq(
-    double qexp, 
-    double radius,
-    double volumefraction,
-    double k1,  
-    double k2,   
-    double z1,    
-    double z2
-    )
+void translate(
+    double *z1, double *z2, double *k1, double *k2, double *volumefraction,
+    double *a, double *b, double *c1, double *c2, double *d1, double *d2)
 {
-    static double a;
-    static double b;
-    static double c1;
-    static double c2;
-    static double d1;
-    static double d2;
-    static double last_radius = -2.0; // Radius is always greater than zero
-    static double last_volumefraction = -0.0;
-    static double last_k1 =  0.0;
-    static double last_k2 = 0.0;
-    static double last_z1 = 0.0;
-    static double last_z2 = 0.0;
-    int debug;
-    int checkFlag;
-
-    int changed = (
-        (last_radius != radius)
-        || (last_volumefraction != volumefraction)
-        || (last_k1 != k1)
-        || (last_k2 != k2)
-        || (last_z1 != z1)
-        || (last_z2 != z2)
-    );
-    if (changed) {
-
-        last_radius = radius;
-        last_volumefraction = volumefraction;
-        last_k1 = k1;
-        last_k2 = k2;
-        last_z1 = z1;
-        last_z2 = z2;
-
-    a = 1.0;
-    b = 1.0;
-    c1 = 1.0;
-    d1 = 1.0;
-    c2 = 1.0;
-    d2 = 1.0;
-    debug = 0;
-    checkFlag = 0;  
-
-    if ( z1 == z2 )
-    {
- //       Y_SolveEquations(z1, k1 + k2, volumefraction, &a, &b, &c1, &d1, debug );
- //       Y_CheckSolution(z1, k1 + k2, volumefraction, &a, &b, &c1, &d1 );
- //       return SqOneYukawa(qexp, z1, k1 + k2, volumefraction, &a, &b, &c1, &d1 );
-          return 0;
-    }
-
-
+    int debug = 0;
 // Theoretically speaking, z1 and z2 (and k1 and k2) are symetric. 
 // Exchanging z1 (k1) with z2 (k2) does not altern the potential. 
 // The results should be identical. However, the orginal model proposed 
 // by Y. Liu treats z1 and z2 differently when implementing the numerical solution. // Hence, it is in general a good practice to require the z1 > z2. 
 // The following code is added here to swap z1 (k1) with z2 (k2) if z1 < z2.
 
-    double temp;
-    if ( z1 < z2 )
+    if ( *z1 < *z2 )
     {
-        temp = z1;
-        z1 = z2;
-        z2 = temp;
-        temp = k1;
-        k1 = k2;
-        k2 = temp;
+        double temp = *z1;
+        *z1 = *z2;
+        *z2 = temp;
+        temp = *k1;
+        *k1 = *k2;
+        *k2 = temp;
+    }
+
+    else if ( *z1 == *z2) {
+        // The calculator produces NaM when z1 == z2 so no need to precompute
+        // from the parameter values.
+        *a = NAN;
+        return;
     }
 
+    TY_SolveEquations(*z1, *z2, *k1, *k2, *volumefraction, a, b, c1, c2, d1, d2, debug );
 
-        TY_SolveEquations(z1, z2, k1, k2, volumefraction, &a, &b, &c1, &c2, &d1, &d2,debug );
-        checkFlag = TY_CheckSolution( z1, z2, k1, k2, volumefraction, a, b, c1, c2, d1, d2 );
+    int checkFlag = TY_CheckSolution( *z1, *z2, *k1, *k2, *volumefraction, *a, *b, *c1, *c2, *d1, *d2 );
+    if (!checkFlag) {
+        *a = NAN;
+        return;
     }
+    //printf("Solving (z1=%g, z2=%g, k1=%g, k2=%g, Vf=%g)\n", *z1, *z2, *k1, *k2, *volumefraction);
+    //printf("=> (a=%g, b=%g, c1=%g, c2=%g, d1=%g, d2=%g)\n", *a, *b, *c1, *c2, *d1, *d2);
+}
 
-    return SqTwoYukawa( qexp * 2 * radius, z1, z2, k1, k2, volumefraction, a, b, c1, c2, d1, d2 );
+// Normally constructed in generate.py, but we are doing something special here
+// so create them ourselves.
+// Using k1, k2 negative from the values given in the the model parameters
+#define TRANSLATION_VARS(_v) \
+   double z1=_v.z1, z2=_v.z2, k1=-_v.k1, k2=-_v.k2, vf=_v.volfraction; \
+   double a, b, c1, c2, d1, d2; \
+   translate(&z1, &z2, &k1, &k2, &vf, &a, &b, &c1, &c2, &d1, &d2)
+#define OVERRIDE_IQ(_q, _v) \
+   Iq(_q, _v.radius_effective, vf, k1, k2, z1, z2, a, b, c1, c2, d1, d2)
 
-}
+double Iq(
+    double qexp,
+    double radius,
+    double volumefraction,
+    double k1,
+    double k2,
+    double z1,
+    double z2,
+    // Hidden parameters set by translate before the q loop
+    double a,
+    double b,
+    double c1,
+    double c2,
+    double d1,
+    double d2
+    )
+{
+    if (isnan(a)) {
+          return a;
+    } else {
+        return SqTwoYukawa( qexp * 2 * radius, z1, z2, k1, k2, volumefraction, a, b, c1, c2, d1, d2 );
+    }
+}
diff --git a/sasmodels/models/TY_YukawaSq.py b/sasmodels/models/TY_YukawaSq.py
@@ -75,8 +75,6 @@
 * **Last Modified by:** Yun Liu **Date:** January 22, 2024
 * **Last Reviewed by:** Yun Liu **Date:** January 22, 2024
 """
-
-from sasmodels.special import *
 from numpy import inf
 
 name = "TY_YukawaSq"
@@ -85,51 +83,32 @@
 
 category = "structure-factor"
 structure_factor = True
-
-single = False
+single = False  # make sure that it has double digit precision
+opencl = False  # related with parallel computing
 
 parameters = [ 
 #   ["name", "units", default, [lower, upper], "type", "description"],
     ["radius_effective", "Ang", 50.0, [-inf, inf], '', ''],
     ['volfraction', '', 0.1, [-inf, inf], '', ''],
-    ['k1', '', 6, [-inf, inf], '', ''],
-    ['k2', '', -2.0, [-inf, inf], '', ''],
+    ['k1', '', -6, [-inf, inf], '', ''],
+    ['k2', '', 2.0, [-inf, inf], '', ''],
     ['z1', '', 10.0, [-inf, inf], '', ''],
     ['z2', '', 2.0, [-inf, inf], '', ''],
     ]
-#def Iq(x, radius, volumefraction, k1, k2, z1, z2):
-#    """Absolute scattering"""
-#    q=x
-#    
-#    
-#    return q
-
-
-## uncomment the following if Iq works for vector x
-
-source = ["TY_utility.h", "TY_PairCorrelation.h", "TY_cpoly.h", "TY_TwoYukawa.h", "TY_PairCorrelation.c", "TY_cpoly.c","TY_TwoYukawa.c", "TY_utility.c", "TY_YukawaSq.c"]
 
-#source = ["test2yv2_inc.c"]
-
-#haveFq = True   # for beta calculation
-#single = False  # make sure that it has double digit precision
-opencl = False  # related with parallel computing
-
-#Iq.vectorized = True
-
-#def Iqxy(x, y, radius, volumefraction, k1, k2, z1, z2):
-#    """Absolute scattering of oriented particles."""
-#    ...
-#    return oriented_form(x, y, args)
-## uncomment the following if Iqxy works for vector x, y
-#Iqxy.vectorized = True
+source = [
+    "TY_utility.h", "TY_utility.c",
+    "TY_cpoly.h", "TY_cpoly.c",
+    "TY_PairCorrelation.h", "TY_PairCorrelation.c",
+    "TY_TwoYukawa.h", "TY_TwoYukawa.c",
+    "TY_YukawaSq.c",
+    #"TY_YukawaSq_old.c",
+    ]
 
 # The test results were generated with MatLab Code (TYSQ22) (Jan. 22, 2024)
-# Note that this test follows the definition of the original code : k1 < 0 means repulsion and k1 > 0 means attraction.
-# Paul K. and Yun discussed to switch sign of k1 (and k2) so that negative values mean attraction and positive ones mean repulsion.
-# Once the code is changed, the input values for k1 and k2 of the unit test need to change the signs.
+# Note that this test reverses the definition of the original code : k1 > 0 means repulsion and k1 < 0 means attraction.
 tests = [
     [{'scale': 1.0, 'background' : 0.0, 'radius_effective' : 50.0,
-      'volfraction' : 0.2, 'k1' : 6, 'k2':-2.0, 'z1':10, 'z2':2.0,'radius_effective_pd' : 0},
+      'volfraction' : 0.2, 'k1' : -6, 'k2':2.0, 'z1':10, 'z2':2.0,'radius_effective_pd' : 0},
      [0.0009425, 0.029845], [0.126775, 0.631068]],
 ]