From 35013cfff62ac5ecb92cc1f2781d3c4d1cd6d1c5 Mon Sep 17 00:00:00 2001
From: Matthew Flatt <mflatt@racket-lang.org>
Date: Wed, 18 Oct 2023 09:30:20 -0600
Subject: [PATCH] update docs and tests for weak `cas!` and `lock!` operations
 (#731)

The `vector-cas!`, `box-cas!`, and `ftype-lock!` operations are "weak"
in the sense that they may spuriously fail on an architecture (like
AArch64) with a weak memory model. Also, they do not necessarily imply
any overall memory ordering, so using `memory-order-acquire` and
`memory-order-release` may be needed. The documentation did not make
this clear, and some tests did not handle spurious failure.

Closes #720
---
 csug/objects.stex | 18 ++++++++++++++++--
 csug/threads.stex | 18 ++++++++++++++----
 mats/5_6.ms       | 10 +++++-----
 mats/5_8.ms       |  8 +++-----
 mats/foreign.ms   |  4 ++--
 mats/ftype.ms     | 21 ++++++++++++++++-----
 mats/mat.ss       | 17 +++++++++++++++++
 mats/primvars.ms  |  2 +-
 8 files changed, 74 insertions(+), 24 deletions(-)

diff --git a/csug/objects.stex b/csug/objects.stex
index 2cb58f79f..961462da9 100644
--- a/csug/objects.stex
+++ b/csug/objects.stex
@@ -820,9 +820,16 @@ If the \var{n}th element of \var{vector} that would be replaced
 is not \scheme{eq?} to \var{old-obj}, then
 \var{vector} is unchanged.
 
+On an architecture with a weak memory model, \scheme{vector-cas!} can
+spuriously fail, leaving \var{vector} unchanged and returning
+\scheme{#f} even if the current value of element \var{n} is
+\var{old-obj}. On success, no memory ordering is implied, which means
+that \scheme{memory-order-acquire} and/or \scheme{memory-order-release}
+may be needed to complete an intended synchronization.
+
 \schemedisplay
 (define v (vector 'old0 'old1 'old2))
-(vector-cas! v 1 'old1 'new1) ;=> #t
+(vector-cas! v 1 'old1 'new1) ;=> #t, assuming no spurious failure
 (vector-ref v 1) ;=> 'new1
 (vector-cas! v 2 'old1 'new2) ;=> #f
 (vector-ref v 2) ;=> 'old2
@@ -1852,9 +1859,16 @@ if the replaced content is \scheme{eq?} to \var{old-obj}.
 If the content of \var{box} that would be replaced is not \scheme{eq?} to \var{old-obj}, then
 \var{box} is unchanged.
 
+On an architecture with a weak memory model, \scheme{box-cas!} can
+spuriously fail, leaving \var{box} unchanged and returning
+\scheme{#f} even if the current value in \var{box} is \var{old-obj}.
+On success, no memory ordering is implied, which means that
+\scheme{memory-order-acquire} and/or \scheme{memory-order-release} may be
+needed to complete an intended synchronization.
+
 \schemedisplay
 (define b (box 'old))
-(box-cas! b 'old 'new) ;=> #t
+(box-cas! b 'old 'new) ;=> #t, assuming no spurious failure
 (unbox b) ;=> 'new
 (box-cas! b 'other 'wrong) ;=> #f
 (unbox b) ;=> 'new
diff --git a/csug/threads.stex b/csug/threads.stex
index a1805cb06..15506be38 100644
--- a/csug/threads.stex
+++ b/csug/threads.stex
@@ -364,7 +364,10 @@ not just by the process or thread that most recently locked the lock.
 
 The lock mechanism provides little structure, and mistakes
 in allocation and use can lead to memory faults, deadlocks,
-and other problems.
+and other problems. Furthermore, no memory ordering is implied by a lock
+operation, which means that \scheme{memory-order-acquire} and
+\scheme{memory-order-release} may be needed to complete the intended
+synchronization of a lock.
 Thus, it is usually advisable to use locks only as part of a
 higher-level abstraction that ensures locks are used in a
 disciplined manner.
@@ -375,7 +378,7 @@ disciplined manner.
     (foreign-alloc (ftype-sizeof uptr))))
 
 (ftype-init-lock! uptr () lock)
-(ftype-lock! uptr () lock) ;=> #t
+(ftype-lock! uptr () lock) ;=> #t, assuming no spurious failure
 (ftype-lock! uptr () lock) ;=> #f
 (ftype-unlock! uptr () lock)
 (ftype-spin-lock! uptr () lock)
@@ -414,9 +417,13 @@ to the use of any of the other operators; if this is not done, the
 behavior of the other operators is undefined.
 
 \scheme{ftype-lock!} can be used to lock the lock.
-If it finds the lock unlocked at the time of the operation, it locks
+If it finds the lock unlocked at the time of the operation, it (normally) locks
 the lock and returns \scheme{#t}; if it finds the lock already locked,
-it returns \scheme{#f} without changing the lock.
+it returns \scheme{#f} without changing the lock. On an architecture with a weak memory model,
+\scheme{ftype-lock!} can spuriously fail, leaving a lock unchanged and returning
+\scheme{#f} even if the lock is currently unlocked. On success, no memory ordering is implied,
+which means that \scheme{memory-order-acquire} may be
+needed to complete an intended synchronization.
 
 \scheme{ftype-spin-lock!} can also be used to lock the lock.
 If it finds the lock unlocked at the time of the operation, it locks the
@@ -432,6 +439,9 @@ the lock.
 \scheme{ftype-unlock!} is used to unlock a lock.
 If it finds the lock locked, it unlocks the lock and returns.
 Otherwise, it returns without changing the lock.
+On an architecture with a weak memory model,
+no memory ordering is implied, and \scheme{memory-order-release} may be
+needed to complete an intended synchronization.
 
 \section{Locked increment and decrement\label{SECTTHREADLOCKEDINCRDECR}}
 
diff --git a/mats/5_6.ms b/mats/5_6.ms
index 2f44ebdfa..83ac6ac73 100644
--- a/mats/5_6.ms
+++ b/mats/5_6.ms
@@ -1378,22 +1378,22 @@
     (eq? 1 (vector-ref vec1 0)))
   (not (vector-cas! vec1 0 0 1))
   (eq? 1 (vector-ref vec1 0))
-  (vector-cas! vec1 0 1 4)
+  (retry-for-spurious (vector-cas! vec1 0 1 4))
   (eq? 4 (vector-ref vec1 0))
   (not (vector-cas! vec1 0 1 5))
 
   (not (vector-cas! vec1 1 0 1))
   (eq? 2 (vector-ref vec1 1))
-  (vector-cas! vec1 1 2 5)
+  (retry-for-spurious (vector-cas! vec1 1 2 5))
   (eq? 5 (vector-ref vec1 1))
 
   (not (vector-cas! vec2 0 'banana 'donut))
-  (vector-cas! vec2 0 'apple 'donut)
+  (retry-for-spurious (vector-cas! vec2 0 'apple 'donut))
   (not (vector-cas! vec2 0 'apple 'eclair))
   (eq? 'donut (vector-ref vec2 0))
 
   (not (vector-cas! vec2 1 'apple 'fig))
-  (vector-cas! vec2 1 'banana 'fig)
+  (retry-for-spurious (vector-cas! vec2 1 'banana 'fig))
   (not (vector-cas! vec2 1 'banana 'grape))
   (eq? 'fig (vector-ref vec2 1))
 
@@ -1414,7 +1414,7 @@
   (begin
     (collect 0)
     (let ([g1 (gensym)])
-      (and (vector-cas! vec2 2 'coconut g1)
+      (and (retry-for-spurious (vector-cas! vec2 2 'coconut g1))
            (begin
              (collect 0)
              (eq? g1 (vector-ref vec2 2))))))
diff --git a/mats/5_8.ms b/mats/5_8.ms
index 51c69a193..a45adfeb6 100644
--- a/mats/5_8.ms
+++ b/mats/5_8.ms
@@ -38,11 +38,11 @@
     (eq? 1 (unbox bx1)))
   (not (box-cas! bx1 0 1))
   (eq? 1 (unbox bx1))
-  (box-cas! bx1 1 2)
+  (retry-for-spurious (box-cas! bx1 1 2))
   (eq? 2 (unbox bx1))
 
   (not (box-cas! bx2 #f 'banana))
-  (box-cas! bx2 'apple 'banana)
+  (retry-for-spurious (box-cas! bx2 'apple 'banana))
   (not (box-cas! bx2 'apple 'banana))
   (eq? 'banana (unbox bx2))
 
@@ -59,9 +59,7 @@
   (begin
     (collect 0)
     (let ([g1 (gensym)])
-      (and (let loop ()
-             (or (box-cas! bx2 'banana g1)
-                 (loop)))
+      (and (retry-for-spurious (box-cas! bx2 'banana g1))
            (begin
              (collect 0)
              (eq? g1 (unbox bx2))))))
diff --git a/mats/foreign.ms b/mats/foreign.ms
index 6c2bb20b1..117f0e3c8 100644
--- a/mats/foreign.ms
+++ b/mats/foreign.ms
@@ -2234,13 +2234,13 @@
     (define ff-locked-decr (foreign-procedure "locked_decr" ((* uptr)) boolean))
     #t)
   (eq? (ff-init-lock (ftype-&ref A (x) a)) (void))
-  (ftype-lock! A (x) a)
+  (retry-for-spurious (ftype-lock! A (x) a))
   (not (ftype-lock! A (x) a))
   (eq? (ftype-unlock! A (x) a) (void))
   (eq? (ff-spinlock (ftype-&ref A (x) a)) (void))
   (not (ftype-lock! A (x) a))
   (eq? (ff-unlock (ftype-&ref A (x) a)) (void))
-  (ftype-lock! A (x) a)
+  (retry-for-spurious (ftype-lock! A (x) a))
   (eq? (ff-unlock (ftype-&ref A (x) a)) (void))
   (eq? (ff-spinlock (ftype-&ref A (x) a)) (void))
   (not (ftype-lock! A (x) a))
diff --git a/mats/ftype.ms b/mats/ftype.ms
index 7c9d25a3e..25c78137b 100644
--- a/mats/ftype.ms
+++ b/mats/ftype.ms
@@ -2797,11 +2797,11 @@
     (ftype-init-lock! A (g *) x)
     #t)
 
-  (ftype-lock! A (c) x)
-  (ftype-lock! A (f f1) x)
-  (ftype-lock! A (f f2 1 f3b) x)
-  (ftype-lock! A (f f2 $idx f3b) x)
-  (ftype-lock! A (g *) x)
+  (retry-for-spurious (ftype-lock! A (c) x))
+  (retry-for-spurious (ftype-lock! A (f f1) x))
+  (retry-for-spurious (ftype-lock! A (f f2 1 f3b) x))
+  (retry-for-spurious (ftype-lock! A (f f2 $idx f3b) x))
+  (retry-for-spurious (ftype-lock! A (g *) x))
 
   (not (ftype-lock! A (c) x))
   (not (ftype-lock! A (f f1) x))
@@ -2831,6 +2831,17 @@
     (fptr-free x)
     (fptr-free g)
     #t)
+
+  ;; This is intended mainly as a test of `retry-for-spurious`:
+  (let ([m (make-ftype-pointer iptr (foreign-alloc (ftype-sizeof iptr)))])
+    (ftype-init-lock! iptr () m)
+    (let loop ([i 10000000])
+      (or (fx= i 0)
+          (and (retry-for-spurious (ftype-lock! iptr () m))
+               (begin
+                 (ftype-unlock! iptr () m)
+                 (loop (fx- i 1)))))))
+
 )
 
 (mat ftype-compile-file
diff --git a/mats/mat.ss b/mats/mat.ss
index a54ae37b5..63e3e8671 100644
--- a/mats/mat.ss
+++ b/mats/mat.ss
@@ -615,3 +615,20 @@
                                 (set! counter n)
                                 (fx= n 0))))])
         (collect)))))
+
+(define-syntax retry-for-spurious
+  (let ([mt (symbol->string (machine-type))])
+    (if (or (memq (substring mt 0 2) '("a6" "i3"))
+            (memq (substring mt 0 3) '("ta6" "ti3")))
+        ;; no retry loop needed on x86
+        (lambda (stx)
+          (syntax-case stx ()
+            [(_ e) #'e]))
+        ;; add retry loop
+        (lambda (stx)
+          (syntax-case stx ()
+            [(_ e) #'(let loop ([n 10])
+                       ;; 10 spurious failures in a row is vanishingly unlikely?
+                       (or e
+                           (and (> n 0)
+                                (loop (- n 1)))))])))))
diff --git a/mats/primvars.ms b/mats/primvars.ms
index bbe1e09cf..bb44cf8a7 100644
--- a/mats/primvars.ms
+++ b/mats/primvars.ms
@@ -28,7 +28,7 @@
            heap-check-interval
            preexisting-profile-dump-entry?
            coverage-table record-run-coverage load-coverage-files combine-coverage-files coverage-percent
-           parameters)))
+           parameters retry-for-spurious)))
     (define (canonical-label x)
       (let ([s (symbol->string x)])
         (#%$intern3 (format "~a*top*:~a" s s) (string-length s) (+ 6 (* 2 (string-length s))))))