Skip to content

Commit

Permalink
clean up memory fence and compare-and-swap kernel functions (#826)
Browse files Browse the repository at this point in the history
CAS variants with different fence properties in "atomic.h" ware a
leftover from an early experiment in parallel garbage collection, and
that experiment did not turn out to be a good idea. The CAS variants
were left half-implemented, and they're not needed, so this commit
simplifies to reflect the needed functionality. Also, the commit adds
comments to clarify that CAS support is used only for pb mode and to
explain trade-offs for inline assembly and compiler intrinsics.

Small repairs to the build scripts enable pbarch builds on Windows,
for what it's worth. Otherwise, this is just an internal clean-up and
does not add anything new or fix any bugs.
  • Loading branch information
mflatt authored Apr 13, 2024
1 parent c7a3560 commit 08a45e5
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 79 deletions.
91 changes: 38 additions & 53 deletions c/atomic.h
Original file line number Diff line number Diff line change
@@ -1,39 +1,57 @@
/* `STORE_FENCE` is used by the storage-management system, but
`ACQUIRE_FENCE`, `RELEASE_FENCE`, and `COMPARE_AND_SWAP_PTR`
are used only by the pb interpreter.
It's always ok to map `ACQUIRE_FENCE` and `RELEASE_FENCE` to
`STORE_FENCE`. For portability, we mainly rely on a
`__sync_synchronize` intrinsic as provided by reasonably modern
versions of GCC and Clang. In some cases, more specialized fence
variants are available via inline assembly or platform-specific
intrinsics. When inline assembly is written only for `STORE_FENCE`
below, then the only advantage over using `__sync_synchronize` is
to support environments with different or very old compilers.
For `COMPARE_AND_SWAP_PTR`, we similarlly rely on a GCC/Clang
`__sync_bool_compare_and_swap` intrinsic. Some inline-assembly
versions are here --- but, again, the only advantage of those is to
support environments with different or very old compilers. */

#if !defined(PTHREADS)
# define STORE_FENCE() do { } while (0)
#elif defined(_MSC_VER) && defined(_M_ARM64)
# define STORE_FENCE() __dmb(_ARM64_BARRIER_ISHST)
# define STORE_FENCE() __dmb(_ARM64_BARRIER_ISHST)
# define ACQUIRE_FENCE() __dmb(_ARM64_BARRIER_ISH)
# define RELEASE_FENCE() ACQUIRE_FENCE()
#elif defined(__arm64__) || defined(__aarch64__)
# define STORE_FENCE() __asm__ __volatile__ ("dmb ishst" : : : "memory")
# define STORE_FENCE() __asm__ __volatile__ ("dmb ishst" : : : "memory")
# define ACQUIRE_FENCE() __asm__ __volatile__ ("dmb ish" : : : "memory")
# define RELEASE_FENCE() ACQUIRE_FENCE()
#elif defined(__arm__)
# if (arm_isa_version >= 7) || (__ARM_ARCH >= 7)
# define STORE_FENCE() __asm__ __volatile__ ("dmb ishst" : : : "memory")
# define STORE_FENCE() __asm__ __volatile__ ("dmb ishst" : : : "memory")
# define ACQUIRE_FENCE() __asm__ __volatile__ ("dmb ish" : : : "memory")
# define RELEASE_FENCE() ACQUIRE_FENCE()
# else
# define STORE_FENCE() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
# define STORE_FENCE() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
# define ACQUIRE_FENCE() STORE_FENCE()
# define RELEASE_FENCE() STORE_FENCE()
# endif
#elif defined(__powerpc64__)
# define STORE_FENCE() __asm__ __volatile__ ("lwsync" : : : "memory")
# define STORE_FENCE() __asm__ __volatile__ ("lwsync" : : : "memory")
# define ACQUIRE_FENCE() __asm__ __volatile__ ("sync" : : : "memory")
# define RELEASE_FENCE() ACQUIRE_FENCE()
#elif defined(__powerpc__) || defined(__POWERPC__)
# define STORE_FENCE() __asm__ __volatile__ ("sync" : : : "memory")
# define STORE_FENCE() __asm__ __volatile__ ("sync" : : : "memory")
# define ACQUIRE_FENCE() STORE_FENCE()
# define RELEASE_FENCE() STORE_FENCE()
#elif defined(__riscv)
# define STORE_FENCE() __asm__ __volatile__ ("fence w,rw" : : : "memory")
# define STORE_FENCE() __asm__ __volatile__ ("fence w,rw" : : : "memory")
# define ACQUIRE_FENCE() __asm__ __volatile__ ("fence r,rw" : : : "memory")
# define RELEASE_FENCE() __asm__ __volatile__ ("fence rw,r" : : : "memory")
#elif defined(__loongarch64)
# define STORE_FENCE() __asm__ __volatile__ ("dbar 0" : : : "memory")
# define ACQUIRE_FENCE() __asm__ __volatile__ ("dbar 0" : : : "memory")
# define RELEASE_FENCE() __asm__ __volatile__ ("dbar 0" : : : "memory")
# define STORE_FENCE() __asm__ __volatile__ ("dbar 0" : : : "memory")
# define ACQUIRE_FENCE() STORE_FENCE()
# define RELEASE_FENCE() STORE_FENCE()
#elif (__GNUC__ >= 5) || defined(__clang__)
# define STORE_FENCE() __sync_synchronize()
# define ACQUIRE_FENCE() STORE_FENCE()
Expand All @@ -50,40 +68,22 @@
#endif

#if !defined(PTHREADS)
# define CAS_ANY_FENCE(a, old, new) ((*(ptr *)(a) == TO_PTR(old)) ? (*(ptr)(a) = TO_PTR(new), 1) : 0)
# define COMPARE_AND_SWAP_PTR(a, old, new) ((*(ptr *)(a) == TO_PTR(old)) ? (*(ptr)(a) = TO_PTR(new), 1) : 0)
#elif defined(_MSC_VER)
# if ptr_bits == 64
# define CAS_ANY_FENCE(a, old, new) (_InterlockedCompareExchange64((__int64 *)(a), (__int64)(new), (__int64)(old)) == (__int64)(old))
# define COMPARE_AND_SWAP_PTR(a, old, new) (_InterlockedCompareExchange64((__int64 *)(a), (__int64)(new), (__int64)(old)) == (__int64)(old))
# else
# define CAS_ANY_FENCE(a, old, new) (_InterlockedCompareExchange64((long *)(a), (long)(new), (long)(old)) == (long)(old))
# define COMPARE_AND_SWAP_PTR(a, old, new) (_InterlockedCompareExchange((long *)(a), (long)(new), (long)(old)) == (long)(old))
# endif
#elif defined(__arm64__) || defined(__aarch64__)
FORCEINLINE int CAS_LOAD_ACQUIRE(volatile void *addr, void *old_val, void *new_val) {
FORCEINLINE int COMPARE_AND_SWAP_PTR(volatile void *addr, void *old_val, void *new_val) {
I64 ret;
__asm__ __volatile__ ("mov %0, #0\n\t"
"0:\n\t"
"ldaxr x12, [%1, #0]\n\t"
"cmp x12, %2\n\t"
"bne 1f\n\t"
"stxr w7, %3, [%1, #0]\n\t"
"cmp x7, #0\n\t"
"bne 1f\n\t"
"mov %0, #1\n\t"
"1:\n\t"
: "=&r" (ret)
: "r" (addr), "r" (old_val), "r" (new_val)
: "cc", "memory", "x12", "x7");
return ret;
}
/* same as above, but ldaxr -> ldxr and stxr -> stlxr */
FORCEINLINE int CAS_STORE_RELEASE(volatile void *addr, void *old_val, void *new_val) {
I64 ret;
__asm__ __volatile__ ("mov %0, #0\n\t"
"0:\n\t"
"ldxr x12, [%1, #0]\n\t"
"cmp x12, %2\n\t"
"bne 1f\n\t"
"stlxr w7, %3, [%1, #0]\n\t"
"stxr w7, %3, [%1, #0]\n\t"
"cmp x7, #0\n\t"
"bne 1f\n\t"
"mov %0, #1\n\t"
Expand All @@ -94,12 +94,8 @@ FORCEINLINE int CAS_STORE_RELEASE(volatile void *addr, void *old_val, void *new_
return ret;
}
#elif defined(__arm__) && ((arm_isa_version >= 6) || (__ARM_ARCH >= 6))
FORCEINLINE int S_cas_any_fence(int load_acquire, volatile void *addr, void *old_val, void *new_val) {
FORCEINLINE int COMPARE_AND_SWAP_PTR(volatile void *addr, void *old_val, void *new_val) {
int ret;
if (load_acquire)
ACQUIRE_FENCE();
else
RELEASE_FENCE();
__asm__ __volatile__ ("mov %0, #0\n\t"
"0:\n\t"
"ldrex r12, [%1]\n\t"
Expand All @@ -116,27 +112,24 @@ FORCEINLINE int S_cas_any_fence(int load_acquire, volatile void *addr, void *old
: "cc", "memory", "r12", "r7");
return ret;
}
# define CAS_LOAD_ACQUIRE(a, old, new) S_cas_any_fence(1, a, old, new)
# define CAS_STORE_RELEASE(a, old, new) S_cas_any_fence(0, a, old, new)
#elif (__GNUC__ >= 5) || defined(__clang__)
# define CAS_ANY_FENCE(a, old, new) __sync_bool_compare_and_swap((ptr *)(a), TO_PTR(old), TO_PTR(new))
# define COMPARE_AND_SWAP_PTR(a, old, new) __sync_bool_compare_and_swap((ptr *)(a), TO_PTR(old), TO_PTR(new))
#elif defined(__i386__) || defined(__x86_64__)
# if ptr_bits == 64
# define CAS_OP_SIZE "q"
# else
# define CAS_OP_SIZE ""
# endif
FORCEINLINE int S_cas_any_fence(volatile void *addr, void *old_val, void *new_val) {
FORCEINLINE int COMPARE_AND_SWAP_PTR(volatile void *addr, void *old_val, void *new_val) {
char result;
__asm__ __volatile__("lock; cmpxchg" CAS_OP_SIZE " %3, %0; setz %1"
: "=m"(*(void **)addr), "=q"(result)
: "m"(*(void **)addr), "r" (new_val), "a"(old_val)
: "memory");
return (int) result;
}
# define CAS_ANY_FENCE(a, old, new) S_cas_any_fence(a, old, new)
#elif defined(__powerpc64__)
FORCEINLINE int S_cas_any_fence(volatile void *addr, void *old_val, void *new_val) {
FORCEINLINE int COMPARE_AND_SWAP_PTR(volatile void *addr, void *old_val, void *new_val) {
int ret, tmp;
__asm__ __volatile__ ("li %0, 0\n\t"
"0:\n\t"
Expand All @@ -152,18 +145,10 @@ FORCEINLINE int S_cas_any_fence(volatile void *addr, void *old_val, void *new_va
: "cc", "memory");
return ret;
}
# define CAS_ANY_FENCE(a, old, new) S_cas_any_fence(a, old, new)
#elif defined(__riscv)
# error expected a compiler with a CAS intrinsic for RISC-V
#elif defined(__loongarch64)
# error expected a compiler with a CAS intrinsic for LoongArch64
#else
# define CAS_ANY_FENCE(a, old, new) ((*(ptr *)(a) == TO_PTR(old)) ? (*(ptr *)(a) = TO_PTR(new), 1) : 0)
#endif

#ifdef CAS_ANY_FENCE
# define CAS_LOAD_ACQUIRE(a, old, new) CAS_ANY_FENCE(a, old, new)
# define CAS_STORE_RELEASE(a, old, new) CAS_ANY_FENCE(a, old, new)
#else
# define CAS_ANY_FENCE(a, old, new) CAS_LOAD_ACQUIRE(a, old, new)
# define COMPARE_AND_SWAP_PTR(a, old, new) ((*(ptr *)(a) == TO_PTR(old)) ? (*(ptr *)(a) = TO_PTR(new), 1) : 0)
#endif
6 changes: 3 additions & 3 deletions c/build.zuo
Original file line number Diff line number Diff line change
Expand Up @@ -504,14 +504,14 @@
" /DSCHEME_STATIC")
" /Ox /W3 /DWIN32 /D_CRT_SECURE_NO_WARNINGS"
" /Zi /FS /Fd" (string->shell (at-dir "scheme.pdb"))
(if (glob-match? "pb*" (hash-ref config 'm))
(if (glob-match? "pb*" (m->arch (hash-ref config 'm)))
" /DFEATURE_WINDOWS"
"")))]
[config (hash-set config 'LDFLAGS (if (as-runtime-dll? config) "/MD" "/MT"))]
[config (hash-set config 'LIBS "rpcrt4.lib ole32.lib advapi32.lib User32.lib")])
config)]
[(and (eq? 'windows (system-type))
(glob-match? "pb*" (hash-ref config 'm)))
[(and (glob-match? "*nt" (hash-ref config 'defaultm ""))
(glob-match? "pb*" (m->arch (hash-ref config 'm))))
(config-merge config 'CFLAGS "-DFEATURE_WINDOWS")]
[else config]))

Expand Down
4 changes: 2 additions & 2 deletions c/pb.c
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ ptr *S_get_call_arena(ptr tc) {
#if defined(PTHREADS)
void S_pb_spinlock(void *addr) {
while (1) {
if (CAS_ANY_FENCE(addr, TO_VOIDP(0), TO_VOIDP(1)))
if (COMPARE_AND_SWAP_PTR(addr, TO_VOIDP(0), TO_VOIDP(1)))
break;
}
}
Expand All @@ -511,7 +511,7 @@ int S_pb_locked_adjust(void *addr, int delta) {
while (1) {
uptr oldv = *(uptr *)addr;
uptr newv = oldv + delta;
if (CAS_ANY_FENCE(addr, TO_VOIDP(oldv), TO_VOIDP(newv)))
if (COMPARE_AND_SWAP_PTR(addr, TO_VOIDP(oldv), TO_VOIDP(newv)))
return newv == 0;
}
}
Expand Down
14 changes: 7 additions & 7 deletions c/pb.h
Original file line number Diff line number Diff line change
Expand Up @@ -956,10 +956,10 @@ enum {
*(float *)TO_VOIDP(regs[reg] + imm) = (float)fpregs[dest]

#if defined(PTHREADS)
# define CAS_ANY_FENCE_SEQOK(addr, old_r, r) \
CAS_ANY_FENCE(TO_VOIDP(addr), TO_VOIDP(old_r), TO_VOIDP(r))
# define COMPARE_AND_SWAP_PTR_SEQOK(addr, old_r, r) \
COMPARE_AND_SWAP_PTR(TO_VOIDP(addr), TO_VOIDP(old_r), TO_VOIDP(r))
#else
# define CAS_ANY_FENCE_SEQOK(addr, old_r, r) \
# define COMPARE_AND_SWAP_PTR_SEQOK(addr, old_r, r) \
(*(uptr *)TO_VOIDP(addr) = r, 1)
#endif

Expand All @@ -971,7 +971,7 @@ enum {
while (1) { \
uptr old_r = *(uptr *)TO_VOIDP(addr); \
uptr r = old_r + regs[reg]; \
if (CAS_ANY_FENCE_SEQOK(addr, old_r, r)) { \
if (COMPARE_AND_SWAP_PTR_SEQOK(addr, old_r, r)) { \
flag = (r == 0); \
break; \
} \
Expand All @@ -986,7 +986,7 @@ enum {
while (1) { \
uptr old_r = *(uptr *)TO_VOIDP(addr); \
uptr r = old_r + imm; \
if (CAS_ANY_FENCE_SEQOK(addr, old_r, r)) { \
if (COMPARE_AND_SWAP_PTR_SEQOK(addr, old_r, r)) { \
flag = (r == 0); \
break; \
} \
Expand All @@ -999,7 +999,7 @@ enum {
# define do_pb_lock(dest) \
do { \
uptr *l = TO_VOIDP(regs[dest]); \
flag = CAS_ANY_FENCE(l, TO_VOIDP(0), TO_VOIDP(1)); \
flag = COMPARE_AND_SWAP_PTR(l, TO_VOIDP(0), TO_VOIDP(1)); \
} while (0)
#else
# define doi_pb_lock(instr) \
Expand All @@ -1023,7 +1023,7 @@ enum {
uptr *l = TO_VOIDP(regs[dest]); \
uptr old = regs[reg1]; \
uptr new = regs[reg2]; \
flag = CAS_ANY_FENCE(l, TO_VOIDP(old), TO_VOIDP(new)); \
flag = COMPARE_AND_SWAP_PTR(l, TO_VOIDP(old), TO_VOIDP(new)); \
} while (0)
#else
#define doi_pb_cas(instr) \
Expand Down
2 changes: 1 addition & 1 deletion c/schsig.c
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ static BOOL WINAPI handle_signal(DWORD dwCtrlType) {
case CTRL_BREAK_EVENT: {
#ifdef PTHREADS
/* get_thread_context() always returns 0, so assume main thread */
ptr tc = S_G.thread_context;
ptr tc = TO_PTR(S_G.thread_context);
#else
ptr tc = get_thread_context();
#endif
Expand Down
11 changes: 0 additions & 11 deletions c/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -449,12 +449,6 @@ typedef struct {
# define END_IMPLICIT_ATOMIC() do { } while (0)
#endif

#define S_cas_load_acquire_voidp(a, old, new) CAS_LOAD_ACQUIRE(a, old, new)
#define S_cas_store_release_voidp(a, old, new) CAS_STORE_RELEASE(a, old, new)
#define S_cas_load_acquire_ptr(a, old, new) CAS_LOAD_ACQUIRE(a, TO_VOIDP(old), TO_VOIDP(new))
#define S_cas_store_release_ptr(a, old, new) CAS_STORE_RELEASE(a, TO_VOIDP(old), TO_VOIDP(new))
#define S_store_release() RELEASE_FENCE()

#else
#define get_thread_context() TO_PTR(S_G.thread_context)
#define deactivate_thread(tc) {}
Expand All @@ -465,11 +459,6 @@ typedef struct {
#define alloc_mutex_release() do {} while (0)
#define IS_TC_MUTEX_OWNER() 1
#define IS_ALLOC_MUTEX_OWNER() 1
#define S_cas_load_acquire_voidp(a, old, new) (*(a) = new, 1)
#define S_cas_store_release_voidp(a, old, new) (*(a) = new, 1)
#define S_cas_load_acquire_ptr(a, old, new) (*(a) = new, 1)
#define S_cas_store_release_ptr(a, old, new) (*(a) = new, 1)
#define S_store_release() do { } while (0)
#define BEGIN_IMPLICIT_ATOMIC() do { } while (0)
#define END_IMPLICIT_ATOMIC() do { } while (0)
#define AS_IMPLICIT_ATOMIC(T, X) X
Expand Down
4 changes: 2 additions & 2 deletions mats/build.zuo
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@
"cc")))
(cons "FTYPE_CFLAGS"
(build-shell (or (lookup 'CPPFLAGS) "")
(if (glob-match? "*nt" m)
(if (eq? 'windows (system-type))
"-DWIN32"
"")
(or (lookup 'mdinclude) "")
Expand Down Expand Up @@ -759,7 +759,7 @@
(string->shell (at-dir "../bin" m
(~a "csv" (get-dll-version)
".lib"))))]
[(glob-match? "*nt" m)
[(eq? 'windows (system-type))
(config-merge config 'LDFLAGS
(string->shell (at-dir "../boot" m "schemeexe.lib")))]
[else config])])
Expand Down

0 comments on commit 08a45e5

Please sign in to comment.