From d078eb7a8ad538b0f004fe2dea33a736ef17d8bd Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 14 Aug 2020 16:14:34 -0700
Subject: [PATCH 01/70] tools/memory-model:  Document locking corner cases

Most Linux-kernel uses of locking are straightforward, but there are
corner-case uses that rely on less well-known aspects of the lock and
unlock primitives.  This commit therefore adds a locking.txt and litmus
tests in Documentation/litmus-tests/locking to explain these corner-case
uses.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../litmus-tests/locking/DCL-broken.litmus    |  55 +++
 .../litmus-tests/locking/DCL-fixed.litmus     |  56 +++
 .../litmus-tests/locking/RM-broken.litmus     |  42 +++
 .../litmus-tests/locking/RM-fixed.litmus      |  42 +++
 tools/memory-model/Documentation/locking.txt  | 320 ++++++++++++++++++
 5 files changed, 515 insertions(+)
 create mode 100644 Documentation/litmus-tests/locking/DCL-broken.litmus
 create mode 100644 Documentation/litmus-tests/locking/DCL-fixed.litmus
 create mode 100644 Documentation/litmus-tests/locking/RM-broken.litmus
 create mode 100644 Documentation/litmus-tests/locking/RM-fixed.litmus
 create mode 100644 tools/memory-model/Documentation/locking.txt

diff --git a/Documentation/litmus-tests/locking/DCL-broken.litmus b/Documentation/litmus-tests/locking/DCL-broken.litmus
new file mode 100644
index 0000000000000..cfaa25ff82b1e
--- /dev/null
+++ b/Documentation/litmus-tests/locking/DCL-broken.litmus
@@ -0,0 +1,55 @@
+C DCL-broken
+
+(*
+ * Result: Sometimes
+ *
+ * This litmus test demonstrates more than just locking is required to
+ * correctly implement double-checked locking.
+ *)
+
+{
+	int flag;
+	int data;
+	int lck;
+}
+
+P0(int *flag, int *data, int *lck)
+{
+	int r0;
+	int r1;
+	int r2;
+
+	r0 = READ_ONCE(*flag);
+	if (r0 == 0) {
+		spin_lock(lck);
+		r1 = READ_ONCE(*flag);
+		if (r1 == 0) {
+			WRITE_ONCE(*data, 1);
+			WRITE_ONCE(*flag, 1);
+		}
+		spin_unlock(lck);
+	}
+	r2 = READ_ONCE(*data);
+}
+
+P1(int *flag, int *data, int *lck)
+{
+	int r0;
+	int r1;
+	int r2;
+
+	r0 = READ_ONCE(*flag);
+	if (r0 == 0) {
+		spin_lock(lck);
+		r1 = READ_ONCE(*flag);
+		if (r1 == 0) {
+			WRITE_ONCE(*data, 1);
+			WRITE_ONCE(*flag, 1);
+		}
+		spin_unlock(lck);
+	}
+	r2 = READ_ONCE(*data);
+}
+
+locations [flag;data;lck;0:r0;0:r1;1:r0;1:r1]
+exists (0:r2=0 \/ 1:r2=0)
diff --git a/Documentation/litmus-tests/locking/DCL-fixed.litmus b/Documentation/litmus-tests/locking/DCL-fixed.litmus
new file mode 100644
index 0000000000000..579d6c246f167
--- /dev/null
+++ b/Documentation/litmus-tests/locking/DCL-fixed.litmus
@@ -0,0 +1,56 @@
+C DCL-fixed
+
+(*
+ * Result: Never
+ *
+ * This litmus test demonstrates that double-checked locking can be
+ * reliable given proper use of smp_load_acquire() and smp_store_release()
+ * in addition to the locking.
+ *)
+
+{
+	int flag;
+	int data;
+	int lck;
+}
+
+P0(int *flag, int *data, int *lck)
+{
+	int r0;
+	int r1;
+	int r2;
+
+	r0 = smp_load_acquire(flag);
+	if (r0 == 0) {
+		spin_lock(lck);
+		r1 = READ_ONCE(*flag);
+		if (r1 == 0) {
+			WRITE_ONCE(*data, 1);
+			smp_store_release(flag, 1);
+		}
+		spin_unlock(lck);
+	}
+	r2 = READ_ONCE(*data);
+}
+
+P1(int *flag, int *data, int *lck)
+{
+	int r0;
+	int r1;
+	int r2;
+
+	r0 = smp_load_acquire(flag);
+	if (r0 == 0) {
+		spin_lock(lck);
+		r1 = READ_ONCE(*flag);
+		if (r1 == 0) {
+			WRITE_ONCE(*data, 1);
+			smp_store_release(flag, 1);
+		}
+		spin_unlock(lck);
+	}
+	r2 = READ_ONCE(*data);
+}
+
+locations [flag;data;lck;0:r0;0:r1;1:r0;1:r1]
+exists (0:r2=0 \/ 1:r2=0)
diff --git a/Documentation/litmus-tests/locking/RM-broken.litmus b/Documentation/litmus-tests/locking/RM-broken.litmus
new file mode 100644
index 0000000000000..c586ae4b547de
--- /dev/null
+++ b/Documentation/litmus-tests/locking/RM-broken.litmus
@@ -0,0 +1,42 @@
+C RM-broken
+
+(*
+ * Result: DEADLOCK
+ *
+ * This litmus test demonstrates that the old "roach motel" approach
+ * to locking, where code can be freely moved into critical sections,
+ * cannot be used in the Linux kernel.
+ *)
+
+{
+	int lck;
+	int x;
+	int y;
+}
+
+P0(int *x, int *y, int *lck)
+{
+	int r2;
+
+	spin_lock(lck);
+	r2 = atomic_inc_return(y);
+	WRITE_ONCE(*x, 1);
+	spin_unlock(lck);
+}
+
+P1(int *x, int *y, int *lck)
+{
+	int r0;
+	int r1;
+	int r2;
+
+	spin_lock(lck);
+	r0 = READ_ONCE(*x);
+	r1 = READ_ONCE(*x);
+	r2 = atomic_inc_return(y);
+	spin_unlock(lck);
+}
+
+locations [x;lck;0:r2;1:r0;1:r1;1:r2]
+filter (y=2 /\ 1:r0=0 /\ 1:r1=1)
+exists (1:r2=1)
diff --git a/Documentation/litmus-tests/locking/RM-fixed.litmus b/Documentation/litmus-tests/locking/RM-fixed.litmus
new file mode 100644
index 0000000000000..672856736b42e
--- /dev/null
+++ b/Documentation/litmus-tests/locking/RM-fixed.litmus
@@ -0,0 +1,42 @@
+C RM-fixed
+
+(*
+ * Result: Never
+ *
+ * This litmus test demonstrates that the old "roach motel" approach
+ * to locking, where code can be freely moved into critical sections,
+ * cannot be used in the Linux kernel.
+ *)
+
+{
+	int lck;
+	int x;
+	int y;
+}
+
+P0(int *x, int *y, int *lck)
+{
+	int r2;
+
+	spin_lock(lck);
+	r2 = atomic_inc_return(y);
+	WRITE_ONCE(*x, 1);
+	spin_unlock(lck);
+}
+
+P1(int *x, int *y, int *lck)
+{
+	int r0;
+	int r1;
+	int r2;
+
+	r0 = READ_ONCE(*x);
+	r1 = READ_ONCE(*x);
+	spin_lock(lck);
+	r2 = atomic_inc_return(y);
+	spin_unlock(lck);
+}
+
+locations [x;lck;0:r2;1:r0;1:r1;1:r2]
+filter (y=2 /\ 1:r0=0 /\ 1:r1=1)
+exists (1:r2=1)
diff --git a/tools/memory-model/Documentation/locking.txt b/tools/memory-model/Documentation/locking.txt
new file mode 100644
index 0000000000000..4e05c6d53ab72
--- /dev/null
+++ b/tools/memory-model/Documentation/locking.txt
@@ -0,0 +1,320 @@
+Locking
+=======
+
+Locking is well-known and the common use cases are straightforward: Any
+CPU holding a given lock sees any changes previously seen or made by any
+CPU before it previously released that same lock.  This last sentence
+is the only part of this document that most developers will need to read.
+
+However, developers who would like to also access lock-protected shared
+variables outside of their corresponding locks should continue reading.
+
+
+Locking and Prior Accesses
+--------------------------
+
+The basic rule of locking is worth repeating:
+
+	Any CPU holding a given lock sees any changes previously seen
+	or made by any CPU before it previously released that same lock.
+
+Note that this statement is a bit stronger than "Any CPU holding a
+given lock sees all changes made by any CPU during the time that CPU was
+previously holding this same lock".  For example, consider the following
+pair of code fragments:
+
+	/* See MP+polocks.litmus. */
+	void CPU0(void)
+	{
+		WRITE_ONCE(x, 1);
+		spin_lock(&mylock);
+		WRITE_ONCE(y, 1);
+		spin_unlock(&mylock);
+	}
+
+	void CPU1(void)
+	{
+		spin_lock(&mylock);
+		r0 = READ_ONCE(y);
+		spin_unlock(&mylock);
+		r1 = READ_ONCE(x);
+	}
+
+The basic rule guarantees that if CPU0() acquires mylock before CPU1(),
+then both r0 and r1 must be set to the value 1.  This also has the
+consequence that if the final value of r0 is equal to 1, then the final
+value of r1 must also be equal to 1.  In contrast, the weaker rule would
+say nothing about the final value of r1.
+
+
+Locking and Subsequent Accesses
+-------------------------------
+
+The converse to the basic rule also holds:  Any CPU holding a given
+lock will not see any changes that will be made by any CPU after it
+subsequently acquires this same lock.  This converse statement is
+illustrated by the following litmus test:
+
+	/* See MP+porevlocks.litmus. */
+	void CPU0(void)
+	{
+		r0 = READ_ONCE(y);
+		spin_lock(&mylock);
+		r1 = READ_ONCE(x);
+		spin_unlock(&mylock);
+	}
+
+	void CPU1(void)
+	{
+		spin_lock(&mylock);
+		WRITE_ONCE(x, 1);
+		spin_unlock(&mylock);
+		WRITE_ONCE(y, 1);
+	}
+
+This converse to the basic rule guarantees that if CPU0() acquires
+mylock before CPU1(), then both r0 and r1 must be set to the value 0.
+This also has the consequence that if the final value of r1 is equal
+to 0, then the final value of r0 must also be equal to 0.  In contrast,
+the weaker rule would say nothing about the final value of r0.
+
+These examples show only a single pair of CPUs, but the effects of the
+locking basic rule extend across multiple acquisitions of a given lock
+across multiple CPUs.
+
+
+Double-Checked Locking
+----------------------
+
+It is well known that more than just a lock is required to make
+double-checked locking work correctly,  This litmus test illustrates
+one incorrect approach:
+
+	/* See Documentation/litmus-tests/locking/DCL-broken.litmus. */
+	P0(int *flag, int *data, int *lck)
+	{
+		int r0;
+		int r1;
+		int r2;
+
+		r0 = READ_ONCE(*flag);
+		if (r0 == 0) {
+			spin_lock(lck);
+			r1 = READ_ONCE(*flag);
+			if (r1 == 0) {
+				WRITE_ONCE(*data, 1);
+				WRITE_ONCE(*flag, 1);
+			}
+			spin_unlock(lck);
+		}
+		r2 = READ_ONCE(*data);
+	}
+	/* P1() is the exactly the same as P0(). */
+
+There are two problems.  First, there is no ordering between the first
+READ_ONCE() of "flag" and the READ_ONCE() of "data".  Second, there is
+no ordering between the two WRITE_ONCE() calls.  It should therefore be
+no surprise that "r2" can be zero, and a quick herd7 run confirms this.
+
+One way to fix this is to use smp_load_acquire() and smp_store_release()
+as shown in this corrected version:
+
+	/* See Documentation/litmus-tests/locking/DCL-fixed.litmus. */
+	P0(int *flag, int *data, int *lck)
+	{
+		int r0;
+		int r1;
+		int r2;
+
+		r0 = smp_load_acquire(flag);
+		if (r0 == 0) {
+			spin_lock(lck);
+			r1 = READ_ONCE(*flag);
+			if (r1 == 0) {
+				WRITE_ONCE(*data, 1);
+				smp_store_release(flag, 1);
+			}
+			spin_unlock(lck);
+		}
+		r2 = READ_ONCE(*data);
+	}
+	/* P1() is the exactly the same as P0(). */
+
+The smp_load_acquire() guarantees that its load from "flags" will
+be ordered before the READ_ONCE() from data, thus solving the first
+problem.  The smp_store_release() guarantees that its store will be
+ordered after the WRITE_ONCE() to "data", solving the second problem.
+The smp_store_release() pairs with the smp_load_acquire(), thus ensuring
+that the ordering provided by each actually takes effect.  Again, a
+quick herd7 run confirms this.
+
+In short, if you access a lock-protected variable without holding the
+corresponding lock, you will need to provide additional ordering, in
+this case, via the smp_load_acquire() and the smp_store_release().
+
+
+Ordering Provided by a Lock to CPUs Not Holding That Lock
+---------------------------------------------------------
+
+It is not necessarily the case that accesses ordered by locking will be
+seen as ordered by CPUs not holding that lock.  Consider this example:
+
+	/* See Z6.0+pooncelock+pooncelock+pombonce.litmus. */
+	void CPU0(void)
+	{
+		spin_lock(&mylock);
+		WRITE_ONCE(x, 1);
+		WRITE_ONCE(y, 1);
+		spin_unlock(&mylock);
+	}
+
+	void CPU1(void)
+	{
+		spin_lock(&mylock);
+		r0 = READ_ONCE(y);
+		WRITE_ONCE(z, 1);
+		spin_unlock(&mylock);
+	}
+
+	void CPU2(void)
+	{
+		WRITE_ONCE(z, 2);
+		smp_mb();
+		r1 = READ_ONCE(x);
+	}
+
+Counter-intuitive though it might be, it is quite possible to have
+the final value of r0 be 1, the final value of z be 2, and the final
+value of r1 be 0.  The reason for this surprising outcome is that CPU2()
+never acquired the lock, and thus did not fully benefit from the lock's
+ordering properties.
+
+Ordering can be extended to CPUs not holding the lock by careful use
+of smp_mb__after_spinlock():
+
+	/* See Z6.0+pooncelock+poonceLock+pombonce.litmus. */
+	void CPU0(void)
+	{
+		spin_lock(&mylock);
+		WRITE_ONCE(x, 1);
+		WRITE_ONCE(y, 1);
+		spin_unlock(&mylock);
+	}
+
+	void CPU1(void)
+	{
+		spin_lock(&mylock);
+		smp_mb__after_spinlock();
+		r0 = READ_ONCE(y);
+		WRITE_ONCE(z, 1);
+		spin_unlock(&mylock);
+	}
+
+	void CPU2(void)
+	{
+		WRITE_ONCE(z, 2);
+		smp_mb();
+		r1 = READ_ONCE(x);
+	}
+
+This addition of smp_mb__after_spinlock() strengthens the lock
+acquisition sufficiently to rule out the counter-intuitive outcome.
+In other words, the addition of the smp_mb__after_spinlock() prohibits
+the counter-intuitive result where the final value of r0 is 1, the final
+value of z is 2, and the final value of r1 is 0.
+
+
+No Roach-Motel Locking!
+-----------------------
+
+This example requires familiarity with the herd7 "filter" clause, so
+please read up on that topic in litmus-tests.txt.
+
+It is tempting to allow memory-reference instructions to be pulled
+into a critical section, but this cannot be allowed in the general case.
+For example, consider a spin loop preceding a lock-based critical section.
+Now, herd7 does not model spin loops, but we can emulate one with two
+loads, with a "filter" clause to constrain the first to return the
+initial value and the second to return the updated value, as shown below:
+
+	/* See Documentation/litmus-tests/locking/RM-fixed.litmus. */
+	P0(int *x, int *y, int *lck)
+	{
+		int r2;
+
+		spin_lock(lck);
+		r2 = atomic_inc_return(y);
+		WRITE_ONCE(*x, 1);
+		spin_unlock(lck);
+	}
+
+	P1(int *x, int *y, int *lck)
+	{
+		int r0;
+		int r1;
+		int r2;
+
+		r0 = READ_ONCE(*x);
+		r1 = READ_ONCE(*x);
+		spin_lock(lck);
+		r2 = atomic_inc_return(y);
+		spin_unlock(lck);
+	}
+
+	filter (y=2 /\ 1:r0=0 /\ 1:r1=1)
+	exists (1:r2=1)
+
+The variable "x" is the control variable for the emulated spin loop.
+P0() sets it to "1" while holding the lock, and P1() emulates the
+spin loop by reading it twice, first into "1:r0" (which should get the
+initial value "0") and then into "1:r1" (which should get the updated
+value "1").
+
+The purpose of the variable "y" is to reject deadlocked executions.
+Only those executions where the final value of "y" have avoided deadlock.
+
+The "filter" clause takes all this into account, constraining "y" to
+equal "2", "1:r0" to equal "0", and "1:r1" to equal 1.
+
+Then the "exists" clause checks to see if P1() acquired its lock first,
+which should not happen given the filter clause because P0() updates
+"x" while holding the lock.  And herd7 confirms this.
+
+But suppose that the compiler was permitted to reorder the spin loop
+into P1()'s critical section, like this:
+
+	/* See Documentation/litmus-tests/locking/RM-broken.litmus. */
+	P0(int *x, int *y, int *lck)
+	{
+		int r2;
+
+		spin_lock(lck);
+		r2 = atomic_inc_return(y);
+		WRITE_ONCE(*x, 1);
+		spin_unlock(lck);
+	}
+
+	P1(int *x, int *y, int *lck)
+	{
+		int r0;
+		int r1;
+		int r2;
+
+		spin_lock(lck);
+		r0 = READ_ONCE(*x);
+		r1 = READ_ONCE(*x);
+		r2 = atomic_inc_return(y);
+		spin_unlock(lck);
+	}
+
+	locations [x;lck;0:r2;1:r0;1:r1;1:r2]
+	filter (y=2 /\ 1:r0=0 /\ 1:r1=1)
+	exists (1:r2=1)
+
+If "1:r0" is equal to "0", "1:r1" can never equal "1" because P0()
+cannot update "x" while P1() holds the lock.  And herd7 confirms this,
+showing zero executions matching the "filter" criteria.
+
+And this is why Linux-kernel lock and unlock primitives must prevent
+code from entering critical sections.  It is not sufficient to only
+prevent code from leaving them.

From d55a79c282beb6ffe3605e7d6afded0ca0c909f5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 18 Mar 2019 11:53:50 -0700
Subject: [PATCH 02/70] tools/memory-model: Make judgelitmus.sh note timeouts

Currently, judgelitmus.sh treats timeouts (as in the "--timeout" argument)
as "!!! Verification error".  This can be misleading because it is quite
possible that running the test longer would have produced a verification.
This commit therefore changes judgelitmus.sh to check for timeouts and
to report them with "!!! Timeout".

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/judgelitmus.sh | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh
index 0cc63875e395d..d3c313b9a458a 100755
--- a/tools/memory-model/scripts/judgelitmus.sh
+++ b/tools/memory-model/scripts/judgelitmus.sh
@@ -42,6 +42,14 @@ grep '^Observation' $LKMM_DESTDIR/$litmus.out
 if grep -q '^Observation' $LKMM_DESTDIR/$litmus.out
 then
 	:
+elif grep '^Command exited with non-zero status 124' $LKMM_DESTDIR/$litmus.out
+then
+	echo ' !!! Timeout' $litmus
+	if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out
+	then
+		echo ' !!! Timeout' >> $LKMM_DESTDIR/$litmus.out 2>&1
+	fi
+	exit 124
 else
 	echo ' !!! Verification error' $litmus
 	if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out

From 3e0e0d96d110701b2436de6117967c929b24a05f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 18 Mar 2019 13:07:46 -0700
Subject: [PATCH 03/70] tools/memory-model: Make cmplitmushist.sh note timeouts

Currently, cmplitmushist.sh treats timeouts (as in the "--timeout"
argument) as "Missing Observation line".  This can be misleading because
it is quite possible that running the test longer would have produced
a verification.  This commit therefore changes cmplitmushist.sh to check
for timeouts and to report them with "Timed out".

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/cmplitmushist.sh | 22 +++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tools/memory-model/scripts/cmplitmushist.sh b/tools/memory-model/scripts/cmplitmushist.sh
index 0f498aeeccf5e..b9c174dd80042 100755
--- a/tools/memory-model/scripts/cmplitmushist.sh
+++ b/tools/memory-model/scripts/cmplitmushist.sh
@@ -12,12 +12,30 @@ trap 'rm -rf $T' 0
 mkdir $T
 
 # comparetest oldpath newpath
+timedout=0
 perfect=0
 obsline=0
 noobsline=0
 obsresult=0
 badcompare=0
 comparetest () {
+	if grep -q '^Command exited with non-zero status 124' $1 ||
+	   grep -q '^Command exited with non-zero status 124' $2
+	then
+		if grep -q '^Command exited with non-zero status 124' $1 &&
+		   grep -q '^Command exited with non-zero status 124' $2
+		then
+			echo Both runs timed out: $2
+		elif grep -q '^Command exited with non-zero status 124' $1
+		then
+			echo Old run timed out: $2
+		elif grep -q '^Command exited with non-zero status 124' $2
+		then
+			echo New run timed out: $2
+		fi
+		timedout=`expr "$timedout" + 1`
+		return 0
+	fi
 	grep -v 'maxresident)k\|minor)pagefaults\|^Time' $1 > $T/oldout
 	grep -v 'maxresident)k\|minor)pagefaults\|^Time' $2 > $T/newout
 	if cmp -s $T/oldout $T/newout && grep -q '^Observation' $1
@@ -78,6 +96,10 @@ if test "$obsresult" -ne 0
 then
 	echo Matching Observation Always/Sometimes/Never result: $obsresult 1>&2
 fi
+if test "$timedout" -ne 0
+then
+	echo "!!!" Timed out: $timedout 1>&2
+fi
 if test "$badcompare" -ne 0
 then
 	echo "!!!" Result changed: $badcompare 1>&2

From ce26c0d452e43b28ea6056000feb658b28a7e710 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 18 Mar 2019 13:40:57 -0700
Subject: [PATCH 04/70] tools/memory-model: Make judgelitmus.sh identify bad
 macros

Currently, judgelitmus.sh treats use of unknown primitives (such as
srcu_read_lock() prior to SRCU support) as "!!! Verification error".
This can be misleading because it fails to call out typos and running
a version LKMM on a litmus test requiring a feature not provided by
that version.  This commit therefore changes judgelitmus.sh to check
for unknown primitives and to report them, for example, with:

	'!!! Current LKMM version does not know "rcu_write_lock"'.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/cmplitmushist.sh | 31 ++++++++++++++++++---
 tools/memory-model/scripts/judgelitmus.sh   | 12 ++++++++
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/tools/memory-model/scripts/cmplitmushist.sh b/tools/memory-model/scripts/cmplitmushist.sh
index b9c174dd80042..ca1ac8b646144 100755
--- a/tools/memory-model/scripts/cmplitmushist.sh
+++ b/tools/memory-model/scripts/cmplitmushist.sh
@@ -12,6 +12,7 @@ trap 'rm -rf $T' 0
 mkdir $T
 
 # comparetest oldpath newpath
+badmacnam=0
 timedout=0
 perfect=0
 obsline=0
@@ -19,8 +20,26 @@ noobsline=0
 obsresult=0
 badcompare=0
 comparetest () {
-	if grep -q '^Command exited with non-zero status 124' $1 ||
-	   grep -q '^Command exited with non-zero status 124' $2
+	if grep -q ': Unknown macro ' $1 || grep -q ': Unknown macro ' $2
+	then
+		if grep -q ': Unknown macro ' $1
+		then
+			badname=`grep ': Unknown macro ' $1 |
+				sed -e 's/^.*: Unknown macro //' |
+				sed -e 's/ (User error).*$//'`
+			echo 'Current LKMM version does not know "'$badname'"' $1
+		fi
+		if grep -q ': Unknown macro ' $2
+		then
+			badname=`grep ': Unknown macro ' $2 |
+				sed -e 's/^.*: Unknown macro //' |
+				sed -e 's/ (User error).*$//'`
+			echo 'Current LKMM version does not know "'$badname'"' $2
+		fi
+		badmacnam=`expr "$badmacnam" + 1`
+		return 0
+	elif grep -q '^Command exited with non-zero status 124' $1 ||
+	     grep -q '^Command exited with non-zero status 124' $2
 	then
 		if grep -q '^Command exited with non-zero status 124' $1 &&
 		   grep -q '^Command exited with non-zero status 124' $2
@@ -56,7 +75,7 @@ comparetest () {
 			return 0
 		fi
 	else
-		echo Missing Observation line "(e.g., herd7 timeout)": $2
+		echo Missing Observation line "(e.g., syntax error)": $2
 		noobsline=`expr "$noobsline" + 1`
 		return 0
 	fi
@@ -90,7 +109,7 @@ then
 fi
 if test "$noobsline" -ne 0
 then
-	echo Missing Observation line "(e.g., herd7 timeout)": $noobsline 1>&2
+	echo Missing Observation line "(e.g., syntax error)": $noobsline 1>&2
 fi
 if test "$obsresult" -ne 0
 then
@@ -100,6 +119,10 @@ if test "$timedout" -ne 0
 then
 	echo "!!!" Timed out: $timedout 1>&2
 fi
+if test "$badmacnam" -ne 0
+then
+	echo "!!!" Unknown primitive: $badmacnam 1>&2
+fi
 if test "$badcompare" -ne 0
 then
 	echo "!!!" Result changed: $badcompare 1>&2
diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh
index d3c313b9a458a..d40439c7b71e0 100755
--- a/tools/memory-model/scripts/judgelitmus.sh
+++ b/tools/memory-model/scripts/judgelitmus.sh
@@ -42,6 +42,18 @@ grep '^Observation' $LKMM_DESTDIR/$litmus.out
 if grep -q '^Observation' $LKMM_DESTDIR/$litmus.out
 then
 	:
+elif grep ': Unknown macro ' $LKMM_DESTDIR/$litmus.out
+then
+	badname=`grep ': Unknown macro ' $LKMM_DESTDIR/$litmus.out |
+		sed -e 's/^.*: Unknown macro //' |
+		sed -e 's/ (User error).*$//'`
+	badmsg=' !!! Current LKMM version does not know "'$badname'"'" $litmus"
+	echo $badmsg
+	if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out
+	then
+		echo ' !!! '$badmsg >> $LKMM_DESTDIR/$litmus.out 2>&1
+	fi
+	exit 254
 elif grep '^Command exited with non-zero status 124' $LKMM_DESTDIR/$litmus.out
 then
 	echo ' !!! Timeout' $litmus

From e450f7e9c2b61e7ffe2efa238051ebf3926ee7c9 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 19 Mar 2019 14:27:06 -0700
Subject: [PATCH 05/70] tools/memory-model: Make judgelitmus.sh detect hard
 deadlocks

If a litmus test specifies "Result: Never" and if it contains an
unconditional ("hard") deadlock, then running checklitmus.sh on it will
not flag any errors, despite the fact that there are no executions.
This commit therefore updates judgelitmus.sh to complain about tests
with no executions that are marked, but not as "Result: DEADLOCK".

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/judgelitmus.sh | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh
index d40439c7b71e0..84c62eee321bf 100755
--- a/tools/memory-model/scripts/judgelitmus.sh
+++ b/tools/memory-model/scripts/judgelitmus.sh
@@ -83,6 +83,14 @@ then
 		fi
 		ret=1
 	fi
+elif grep '^Observation' $LKMM_DESTDIR/$litmus.out | grep -q 'Never 0 0$'
+then
+	echo " !!! Unexpected non-$outcome deadlock" $litmus
+	if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out
+	then
+		echo " !!! Unexpected non-$outcome deadlock" $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1
+	fi
+	ret=1
 elif grep '^Observation' $LKMM_DESTDIR/$litmus.out | grep -q $outcome || test "$outcome" = Maybe
 then
 	ret=0

From c82c61e122978f555b8591ae484f1ceb62f4aff1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 11 Apr 2019 07:33:18 -0700
Subject: [PATCH 06/70] tools/memory-model: Fix paulmck email address on
 pre-existing scripts

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/checkalllitmus.sh  | 2 +-
 tools/memory-model/scripts/checklitmus.sh     | 2 +-
 tools/memory-model/scripts/checklitmushist.sh | 2 +-
 tools/memory-model/scripts/judgelitmus.sh     | 2 +-
 tools/memory-model/scripts/newlitmushist.sh   | 2 +-
 tools/memory-model/scripts/parseargs.sh       | 2 +-
 tools/memory-model/scripts/runlitmushist.sh   | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh
index 3c0c7fbbd223b..10e14d94acee5 100755
--- a/tools/memory-model/scripts/checkalllitmus.sh
+++ b/tools/memory-model/scripts/checkalllitmus.sh
@@ -17,7 +17,7 @@
 #
 # Copyright IBM Corporation, 2018
 #
-# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
 
 . scripts/parseargs.sh
 
diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh
index 11461ed40b5e4..638b8c610894b 100755
--- a/tools/memory-model/scripts/checklitmus.sh
+++ b/tools/memory-model/scripts/checklitmus.sh
@@ -15,7 +15,7 @@
 #
 # Copyright IBM Corporation, 2018
 #
-# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
 
 litmus=$1
 herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg}
diff --git a/tools/memory-model/scripts/checklitmushist.sh b/tools/memory-model/scripts/checklitmushist.sh
index 1d210ffb7c8af..406ecfc0aee4c 100755
--- a/tools/memory-model/scripts/checklitmushist.sh
+++ b/tools/memory-model/scripts/checklitmushist.sh
@@ -12,7 +12,7 @@
 #
 # Copyright IBM Corporation, 2018
 #
-# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
 
 . scripts/parseargs.sh
 
diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh
index 84c62eee321bf..d82133e75580c 100755
--- a/tools/memory-model/scripts/judgelitmus.sh
+++ b/tools/memory-model/scripts/judgelitmus.sh
@@ -13,7 +13,7 @@
 #
 # Copyright IBM Corporation, 2018
 #
-# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
 
 litmus=$1
 
diff --git a/tools/memory-model/scripts/newlitmushist.sh b/tools/memory-model/scripts/newlitmushist.sh
index 991f8f8148817..3f4b06e299886 100755
--- a/tools/memory-model/scripts/newlitmushist.sh
+++ b/tools/memory-model/scripts/newlitmushist.sh
@@ -12,7 +12,7 @@
 #
 # Copyright IBM Corporation, 2018
 #
-# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
 
 . scripts/parseargs.sh
 
diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh
index 40f52080fdbd6..afe7bd23de6b8 100755
--- a/tools/memory-model/scripts/parseargs.sh
+++ b/tools/memory-model/scripts/parseargs.sh
@@ -9,7 +9,7 @@
 #
 # Copyright IBM Corporation, 2018
 #
-# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
 
 T=/tmp/parseargs.sh.$$
 mkdir $T
diff --git a/tools/memory-model/scripts/runlitmushist.sh b/tools/memory-model/scripts/runlitmushist.sh
index 6ed376f495bb4..852786fef179f 100755
--- a/tools/memory-model/scripts/runlitmushist.sh
+++ b/tools/memory-model/scripts/runlitmushist.sh
@@ -13,7 +13,7 @@
 #
 # Copyright IBM Corporation, 2018
 #
-# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
 
 T=/tmp/runlitmushist.sh.$$
 trap 'rm -rf $T' 0

From 4be1396ec0650df0866f263bb0ce822465d57133 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 19 Mar 2019 15:59:26 -0700
Subject: [PATCH 07/70] tools/memory-model: Update parseargs.sh for hardware
 verification

This commit adds a --hw argument to parseargs.sh to specify the CPU
family for a hardware verification.  For example, "--hw AArch64" will
specify that a C-language litmus test is to be translated to ARMv8 and
the result verified.  This will set the LKMM_HW_MAP_FILE environment
variable accordingly.  If there is no --hw argument, this environment
variable will be set to the empty string.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/parseargs.sh | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh
index afe7bd23de6b8..5f016fc3f3af5 100755
--- a/tools/memory-model/scripts/parseargs.sh
+++ b/tools/memory-model/scripts/parseargs.sh
@@ -27,6 +27,7 @@ initparam () {
 
 initparam LKMM_DESTDIR "."
 initparam LKMM_HERD_OPTIONS "-conf linux-kernel.cfg"
+initparam LKMM_HW_MAP_FILE ""
 initparam LKMM_JOBS `getconf _NPROCESSORS_ONLN`
 initparam LKMM_PROCS "3"
 initparam LKMM_TIMEOUT "1m"
@@ -37,10 +38,11 @@ usagehelp () {
 	echo "Usage $scriptname [ arguments ]"
 	echo "      --destdir path (place for .litmus.out, default by .litmus)"
 	echo "      --herdopts -conf linux-kernel.cfg ..."
+	echo "      --hw AArch64"
 	echo "      --jobs N (number of jobs, default one per CPU)"
 	echo "      --procs N (litmus tests with at most this many processes)"
 	echo "      --timeout N (herd7 timeout (e.g., 10s, 1m, 2hr, 1d, '')"
-	echo "Defaults: --destdir '$LKMM_DESTDIR_DEF' --herdopts '$LKMM_HERD_OPTIONS_DEF' --jobs '$LKMM_JOBS_DEF' --procs '$LKMM_PROCS_DEF' --timeout '$LKMM_TIMEOUT_DEF'"
+	echo "Defaults: --destdir '$LKMM_DESTDIR_DEF' --herdopts '$LKMM_HERD_OPTIONS_DEF' --hw '$LKMM_HW_MAP_FILE' --jobs '$LKMM_JOBS_DEF' --procs '$LKMM_PROCS_DEF' --timeout '$LKMM_TIMEOUT_DEF'"
 	exit 1
 }
 
@@ -95,6 +97,11 @@ do
 		LKMM_HERD_OPTIONS="$2"
 		shift
 		;;
+	--hw)
+		checkarg --hw "(.map file architecture name)" "$#" "$2" '^[A-Za-z0-9_-]\+' '^--'
+		LKMM_HW_MAP_FILE="$2"
+		shift
+		;;
 	-j[1-9]*)
 		njobs="`echo $1 | sed -e 's/^-j//'`"
 		trailchars="`echo $njobs | sed -e 's/[0-9]\+\(.*\)$/\1/'`"

From a0ef691e64972f1add643b915fbde09bdf2ce144 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 19 Mar 2019 14:39:10 -0700
Subject: [PATCH 08/70] tools/memory-model: Make judgelitmus.sh handle hardware
 verifications

This commit makes the judgelitmus.sh script check the --hw argument
(AKA the LKMM_HW_MAP_FILE environment variable) and to adjust its
judgment for a run where a C-language litmus test has been translated to
assembly and the assembly version verified.  In this case, the assembly
verification output is checked against the C-language script's "Result:"
comment.  However, because hardware can be stronger than LKMM requires,
the judgelitmus.sh script forgives verification mismatches featuring
a "Sometimes" in the C-language script and an "Always" or "Never"
assembly-language verification.

Note that deadlock is not forgiven, however, this should not normally be
an issue given that C-language tests containing locking, RCU, or SRCU
cannot be translated to assembly.  However, this issue can crop up in
litmus tests that mimic deadlock by using the "filter" clause to ignore
all executions.  It can also crop up when certain herd arguments are
used to autofilter everything that does not match the "exists" clause
in cases where the "exists" clause cannot be satisfied.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/README         |  8 +--
 tools/memory-model/scripts/judgelitmus.sh | 75 ++++++++++++++---------
 2 files changed, 51 insertions(+), 32 deletions(-)

diff --git a/tools/memory-model/scripts/README b/tools/memory-model/scripts/README
index 095c7eb36f9f9..0e29a52044c1a 100644
--- a/tools/memory-model/scripts/README
+++ b/tools/memory-model/scripts/README
@@ -43,10 +43,10 @@ initlitmushist.sh
 
 judgelitmus.sh
 
-	Given a .litmus file and its .litmus.out herd7 output, check the
-	.litmus.out file against the .litmus file's "Result:" comment to
-	judge whether the test ran correctly.  Not normally run manually,
-	provided instead for use by other scripts.
+	Given a .litmus file and its herd7 output, check the output file
+	against the .litmus file's "Result:" comment to judge whether
+	the test ran correctly.  Not normally run manually, provided
+	instead for use by other scripts.
 
 newlitmushist.sh
 
diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh
index d82133e75580c..6f3c60065c8b5 100755
--- a/tools/memory-model/scripts/judgelitmus.sh
+++ b/tools/memory-model/scripts/judgelitmus.sh
@@ -1,9 +1,14 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0+
 #
-# Given a .litmus test and the corresponding .litmus.out file, check
-# the .litmus.out file against the "Result:" comment to judge whether
-# the test ran correctly.
+# Given a .litmus test and the corresponding litmus output file, check
+# the .litmus.out file against the "Result:" comment to judge whether the
+# test ran correctly.  If the --hw argument is omitted, check against the
+# LKMM output, which is assumed to be in file.litmus.out.  If this argument
+# is provided, this is assumed to be a hardware test, and the output is
+# assumed to be in file.HW.litmus.out, where "HW" is the --hw argument.
+# In addition, non-Sometimes verification results will be noted, but
+# forgiven.
 #
 # Usage:
 #	judgelitmus.sh file.litmus
@@ -24,11 +29,18 @@ else
 	echo ' --- ' error: \"$litmus\" is not a readable file
 	exit 255
 fi
-if test -f "$LKMM_DESTDIR/$litmus".out -a -r "$LKMM_DESTDIR/$litmus".out
+if test -z "$LKMM_HW_MAP_FILE"
+then
+	litmusout=$litmus.out
+else
+	litmusout="`echo $litmus |
+		sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'`.out"
+fi
+if test -f "$LKMM_DESTDIR/$litmusout" -a -r "$LKMM_DESTDIR/$litmusout"
 then
 	:
 else
-	echo ' --- ' error: \"$LKMM_DESTDIR/$litmus\".out is not a readable file
+	echo ' --- ' error: \"$LKMM_DESTDIR/$litmusout is not a readable file
 	exit 255
 fi
 if grep -q '^ \* Result: ' $litmus
@@ -38,69 +50,76 @@ else
 	outcome=specified
 fi
 
-grep '^Observation' $LKMM_DESTDIR/$litmus.out
-if grep -q '^Observation' $LKMM_DESTDIR/$litmus.out
+grep '^Observation' $LKMM_DESTDIR/$litmusout
+if grep -q '^Observation' $LKMM_DESTDIR/$litmusout
 then
 	:
-elif grep ': Unknown macro ' $LKMM_DESTDIR/$litmus.out
+elif grep ': Unknown macro ' $LKMM_DESTDIR/$litmusout
 then
-	badname=`grep ': Unknown macro ' $LKMM_DESTDIR/$litmus.out |
+	badname=`grep ': Unknown macro ' $LKMM_DESTDIR/$litmusout |
 		sed -e 's/^.*: Unknown macro //' |
 		sed -e 's/ (User error).*$//'`
 	badmsg=' !!! Current LKMM version does not know "'$badname'"'" $litmus"
 	echo $badmsg
-	if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out
+	if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout
 	then
-		echo ' !!! '$badmsg >> $LKMM_DESTDIR/$litmus.out 2>&1
+		echo ' !!! '$badmsg >> $LKMM_DESTDIR/$litmusout 2>&1
 	fi
 	exit 254
-elif grep '^Command exited with non-zero status 124' $LKMM_DESTDIR/$litmus.out
+elif grep '^Command exited with non-zero status 124' $LKMM_DESTDIR/$litmusout
 then
 	echo ' !!! Timeout' $litmus
-	if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out
+	if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout
 	then
-		echo ' !!! Timeout' >> $LKMM_DESTDIR/$litmus.out 2>&1
+		echo ' !!! Timeout' >> $LKMM_DESTDIR/$litmusout 2>&1
 	fi
 	exit 124
 else
 	echo ' !!! Verification error' $litmus
-	if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out
+	if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout
 	then
-		echo ' !!! Verification error' >> $LKMM_DESTDIR/$litmus.out 2>&1
+		echo ' !!! Verification error' >> $LKMM_DESTDIR/$litmusout 2>&1
 	fi
 	exit 255
 fi
 if test "$outcome" = DEADLOCK
 then
-	if grep '^Observation' $LKMM_DESTDIR/$litmus.out | grep -q 'Never 0 0$'
+	if grep '^Observation' $LKMM_DESTDIR/$litmusout | grep -q 'Never 0 0$'
 	then
 		ret=0
 	else
 		echo " !!! Unexpected non-$outcome verification" $litmus
-		if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out
+		if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout
 		then
-			echo " !!! Unexpected non-$outcome verification" >> $LKMM_DESTDIR/$litmus.out 2>&1
+			echo " !!! Unexpected non-$outcome verification" >> $LKMM_DESTDIR/$litmusout 2>&1
 		fi
 		ret=1
 	fi
-elif grep '^Observation' $LKMM_DESTDIR/$litmus.out | grep -q 'Never 0 0$'
+elif grep '^Observation' $LKMM_DESTDIR/$litmusout | grep -q 'Never 0 0$'
 then
 	echo " !!! Unexpected non-$outcome deadlock" $litmus
-	if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out
+	if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout
 	then
-		echo " !!! Unexpected non-$outcome deadlock" $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1
+		echo " !!! Unexpected non-$outcome deadlock" $litmus >> $LKMM_DESTDIR/$litmusout 2>&1
 	fi
 	ret=1
-elif grep '^Observation' $LKMM_DESTDIR/$litmus.out | grep -q $outcome || test "$outcome" = Maybe
+elif grep '^Observation' $LKMM_DESTDIR/$litmusout | grep -q $outcome || test "$outcome" = Maybe
 then
 	ret=0
 else
-	echo " !!! Unexpected non-$outcome verification" $litmus
-	if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out
+	if test -n "$LKMM_HW_MAP_FILE" -a "$outcome" = Sometimes
 	then
-		echo " !!! Unexpected non-$outcome verification" >> $LKMM_DESTDIR/$litmus.out 2>&1
+		flag="--- Forgiven"
+		ret=0
+	else
+		flag="!!! Unexpected"
+		ret=1
+	fi
+	echo " $flag non-$outcome verification" $litmus
+	if ! grep -qe "$flag" $LKMM_DESTDIR/$litmusout
+	then
+		echo " $flag non-$outcome verification" >> $LKMM_DESTDIR/$litmusout 2>&1
 	fi
-	ret=1
 fi
-tail -2 $LKMM_DESTDIR/$litmus.out | head -1
+tail -2 $LKMM_DESTDIR/$litmusout | head -1
 exit $ret

From 8fca8d0b7578f954d3566a6511bf631e273ab927 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 19 Mar 2019 16:21:09 -0700
Subject: [PATCH 09/70] tools/memory-model: Add simpletest.sh to check locking,
 RCU, and SRCU

This commit abstracts out common function to check a given litmus test
for locking, RCU, and SRCU in order to avoid duplicating code.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/simpletest.sh | 35 ++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100755 tools/memory-model/scripts/simpletest.sh

diff --git a/tools/memory-model/scripts/simpletest.sh b/tools/memory-model/scripts/simpletest.sh
new file mode 100755
index 0000000000000..7edc5d3616657
--- /dev/null
+++ b/tools/memory-model/scripts/simpletest.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Give zero status if this is a simple test and non-zero otherwise.
+# Simple tests do not contain locking, RCU, or SRCU.
+#
+# Usage:
+#	simpletest.sh file.litmus
+#
+# Copyright IBM Corporation, 2019
+#
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
+
+
+litmus=$1
+
+if test -f "$litmus" -a -r "$litmus"
+then
+	:
+else
+	echo ' --- ' error: \"$litmus\" is not a readable file
+	exit 255
+fi
+exclude="^[[:space:]]*\("
+exclude="${exclude}spin_lock(\|spin_unlock(\|spin_trylock(\|spin_is_locked("
+exclude="${exclude}\|rcu_read_lock(\|rcu_read_unlock("
+exclude="${exclude}\|synchronize_rcu(\|synchronize_rcu_expedited("
+exclude="${exclude}\|srcu_read_lock(\|srcu_read_unlock("
+exclude="${exclude}\|synchronize_srcu(\|synchronize_srcu_expedited("
+exclude="${exclude}\)"
+if grep -q $exclude $litmus
+then
+	exit 255
+fi
+exit 0

From b483d1786f4ed9918b7f6cf3d1f3fd0d23db481e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 19 Mar 2019 16:37:01 -0700
Subject: [PATCH 10/70] tools/memory-model: Fix checkalllitmus.sh comment

The checkalllitmus.sh runs litmus tests in the litmus-tests directory,
not those in the github archive, so this commit updates the comment to
reflect this reality.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/checkalllitmus.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh
index 10e14d94acee5..54d8da8c338e1 100755
--- a/tools/memory-model/scripts/checkalllitmus.sh
+++ b/tools/memory-model/scripts/checkalllitmus.sh
@@ -30,8 +30,8 @@ else
 	exit 255
 fi
 
-# Create any new directories that have appeared in the github litmus
-# repo since the last run.
+# Create any new directories that have appeared in the litmus-tests
+# directory since the last run.
 if test "$LKMM_DESTDIR" != "."
 then
 	find $litmusdir -type d -print |

From 415343c061a2ef37f71ef96f08d4f31492ad8a59 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 20 Mar 2019 12:39:27 -0700
Subject: [PATCH 11/70] tools/memory-model: Hardware checking for
 check{,all}litmus.sh

This commit makes checklitmus.sh and checkalllitmus.sh check to see
if a hardware verification was specified (via the --hw command-line
argument, which sets the LKMM_HW_MAP_FILE environment variable).
If so, the C-language litmus test is converted to the specified type
of assembly-language litmus test and herd is run on it.  Hardware is
permitted to be stronger than LKMM requires, so "Always" and "Never"
verifications of "Sometimes" C-language litmus tests are forgiven.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/checkalllitmus.sh | 23 +++++------
 tools/memory-model/scripts/checklitmus.sh    | 42 ++++++++++++++++++--
 2 files changed, 49 insertions(+), 16 deletions(-)

diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh
index 54d8da8c338e1..2d3ee850a8399 100755
--- a/tools/memory-model/scripts/checkalllitmus.sh
+++ b/tools/memory-model/scripts/checkalllitmus.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0+
 #
 # Run herd7 tests on all .litmus files in the litmus-tests directory
@@ -8,6 +8,11 @@
 # "^^^".  It also outputs verification results to a file whose name is
 # that of the specified litmus test, but with ".out" appended.
 #
+# If the --hw argument is specified, this script translates the .litmus
+# C-language file to the specified type of assembly and verifies that.
+# But in this case, litmus tests using complex synchronization (such as
+# locking, RCU, and SRCU) are cheerfully ignored.
+#
 # Usage:
 #	checkalllitmus.sh
 #
@@ -38,21 +43,15 @@ then
 	( cd "$LKMM_DESTDIR"; sed -e 's/^/mkdir -p /' | sh )
 fi
 
-# Find the checklitmus script.  If it is not where we expect it, then
-# assume that the caller has the PATH environment variable set
-# appropriately.
-if test -x scripts/checklitmus.sh
-then
-	clscript=scripts/checklitmus.sh
-else
-	clscript=checklitmus.sh
-fi
-
 # Run the script on all the litmus tests in the specified directory
 ret=0
 for i in $litmusdir/*.litmus
 do
-	if ! $clscript $i
+	if test -n "$LKMM_HW_MAP_FILE" && ! scripts/simpletest.sh $i
+	then
+		continue
+	fi
+	if ! scripts/checklitmus.sh $i
 	then
 		ret=1
 	fi
diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh
index 638b8c610894b..42ff11869cd62 100755
--- a/tools/memory-model/scripts/checklitmus.sh
+++ b/tools/memory-model/scripts/checklitmus.sh
@@ -6,6 +6,11 @@
 # results to a file whose name is that of the specified litmus test, but
 # with ".out" appended.
 #
+# If the --hw argument is specified, this script translates the .litmus
+# C-language file to the specified type of assembly and verifies that.
+# But in this case, litmus tests using complex synchronization (such as
+# locking, RCU, and SRCU) are cheerfully ignored.
+#
 # Usage:
 #	checklitmus.sh file.litmus
 #
@@ -18,8 +23,6 @@
 # Author: Paul E. McKenney <paulmck@linux.ibm.com>
 
 litmus=$1
-herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg}
-
 if test -f "$litmus" -a -r "$litmus"
 then
 	:
@@ -28,7 +31,38 @@ else
 	exit 255
 fi
 
-echo Herd options: $herdoptions > $LKMM_DESTDIR/$litmus.out
-/usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1
+if test -z "$LKMM_HW_MAP_FILE"
+then
+	# LKMM run
+	herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg}
+	echo Herd options: $herdoptions > $LKMM_DESTDIR/$litmus.out
+	/usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1
+else
+	# Hardware run
+
+	T=/tmp/checklitmushw.sh.$$
+	trap 'rm -rf $T' 0 2
+	mkdir $T
+
+	# Generate filenames
+	catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat"
+	mapfile="Linux2${LKMM_HW_MAP_FILE}.map"
+	themefile="$T/${LKMM_HW_MAP_FILE}.theme"
+	herdoptions="-model $LKMM_HW_CAT_FILE"
+	hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'`
+	hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'`
+
+	# Don't run on litmus tests with complex synchronization
+	if ! scripts/simpletest.sh $litmus
+	then
+		echo ' --- ' error: \"$litmus\" contains locking, RCU, or SRCU
+		exit 254
+	fi
+
+	# Generate the assembly code and run herd7 on it.
+	gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile
+	jingle7 -theme $themefile $litmus > $T/$hwlitmusfile 2> $T/$hwlitmusfile.jingle7.out
+	/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $T/$hwlitmusfile > $LKMM_DESTDIR/$hwlitmus.out 2>&1
+fi
 
 scripts/judgelitmus.sh $litmus

From a4da1dc1ced8ed3c6c1ab9251d8eb52b3e974508 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 20 Mar 2019 14:37:46 -0700
Subject: [PATCH 12/70] tools/memory-model: Make judgelitmus.sh ransack
 .litmus.out files

The judgelitmus.sh script currently relies solely on the "Result:"
comment in the .litmus file.  This is problematic when using the --hw
argument, because it is necessary to check the hardware model against
LKMM even in the absence of "Result:" comments.

This commit therefore modifies judgelitmus.sh to check the observation
in a .litmus.out file, in case one was generated by a previous LKMM run.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/judgelitmus.sh | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh
index 6f3c60065c8b5..fe9131f8eb969 100755
--- a/tools/memory-model/scripts/judgelitmus.sh
+++ b/tools/memory-model/scripts/judgelitmus.sh
@@ -8,7 +8,9 @@
 # is provided, this is assumed to be a hardware test, and the output is
 # assumed to be in file.HW.litmus.out, where "HW" is the --hw argument.
 # In addition, non-Sometimes verification results will be noted, but
-# forgiven.
+# forgiven.  Furthermore, if there is no "Result:" comment but there is
+# an LKMM .litmus.out file, the observation in that file will be used
+# to judge the assembly-language verification.
 #
 # Usage:
 #	judgelitmus.sh file.litmus
@@ -32,9 +34,11 @@ fi
 if test -z "$LKMM_HW_MAP_FILE"
 then
 	litmusout=$litmus.out
+	lkmmout=
 else
 	litmusout="`echo $litmus |
 		sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'`.out"
+	lkmmout=$litmus.out
 fi
 if test -f "$LKMM_DESTDIR/$litmusout" -a -r "$LKMM_DESTDIR/$litmusout"
 then
@@ -46,6 +50,9 @@ fi
 if grep -q '^ \* Result: ' $litmus
 then
 	outcome=`grep -m 1 '^ \* Result: ' $litmus | awk '{ print $3 }'`
+elif test -n "$LKMM_HW_MAP_FILE" && grep -q '^Observation' $LKMM_DESTDIR/$lkmmout > /dev/null 2>&1
+then
+	outcome=`grep -m 1 '^Observation ' $LKMM_DESTDIR/$lkmmout | awk '{ print $3 }'`
 else
 	outcome=specified
 fi

From b8e291826fad02aae65709e3bebd4cb27ff8087a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 20 Mar 2019 14:57:56 -0700
Subject: [PATCH 13/70] tools/memory-model: Split runlitmus.sh out of
 checklitmus.sh

This commit prepares for adding --hw capability to github litmus-test
scripts by splitting runlitmus.sh (which simply runs the verification)
out of checklitmus.sh (which also judges the results).

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/checklitmus.sh | 57 ++-----------------
 tools/memory-model/scripts/runlitmus.sh   | 69 +++++++++++++++++++++++
 2 files changed, 73 insertions(+), 53 deletions(-)
 create mode 100755 tools/memory-model/scripts/runlitmus.sh

diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh
index 42ff11869cd62..4c1d0cf0ddadc 100755
--- a/tools/memory-model/scripts/checklitmus.sh
+++ b/tools/memory-model/scripts/checklitmus.sh
@@ -1,15 +1,8 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0+
 #
-# Run a herd7 test and invokes judgelitmus.sh to check the result against
-# a "Result:" comment within the litmus test.  It also outputs verification
-# results to a file whose name is that of the specified litmus test, but
-# with ".out" appended.
-#
-# If the --hw argument is specified, this script translates the .litmus
-# C-language file to the specified type of assembly and verifies that.
-# But in this case, litmus tests using complex synchronization (such as
-# locking, RCU, and SRCU) are cheerfully ignored.
+# Invokes runlitmus.sh and judgelitmus.sh on its arguments to run the
+# specified litmus test and pass judgment on the results.
 #
 # Usage:
 #	checklitmus.sh file.litmus
@@ -22,47 +15,5 @@
 #
 # Author: Paul E. McKenney <paulmck@linux.ibm.com>
 
-litmus=$1
-if test -f "$litmus" -a -r "$litmus"
-then
-	:
-else
-	echo ' --- ' error: \"$litmus\" is not a readable file
-	exit 255
-fi
-
-if test -z "$LKMM_HW_MAP_FILE"
-then
-	# LKMM run
-	herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg}
-	echo Herd options: $herdoptions > $LKMM_DESTDIR/$litmus.out
-	/usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1
-else
-	# Hardware run
-
-	T=/tmp/checklitmushw.sh.$$
-	trap 'rm -rf $T' 0 2
-	mkdir $T
-
-	# Generate filenames
-	catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat"
-	mapfile="Linux2${LKMM_HW_MAP_FILE}.map"
-	themefile="$T/${LKMM_HW_MAP_FILE}.theme"
-	herdoptions="-model $LKMM_HW_CAT_FILE"
-	hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'`
-	hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'`
-
-	# Don't run on litmus tests with complex synchronization
-	if ! scripts/simpletest.sh $litmus
-	then
-		echo ' --- ' error: \"$litmus\" contains locking, RCU, or SRCU
-		exit 254
-	fi
-
-	# Generate the assembly code and run herd7 on it.
-	gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile
-	jingle7 -theme $themefile $litmus > $T/$hwlitmusfile 2> $T/$hwlitmusfile.jingle7.out
-	/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $T/$hwlitmusfile > $LKMM_DESTDIR/$hwlitmus.out 2>&1
-fi
-
-scripts/judgelitmus.sh $litmus
+scripts/runlitmus.sh $1
+scripts/judgelitmus.sh $1
diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh
new file mode 100755
index 0000000000000..91af859c0e90c
--- /dev/null
+++ b/tools/memory-model/scripts/runlitmus.sh
@@ -0,0 +1,69 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Without the -hw argument, runs a herd7 test and outputs verification
+# results to a file whose name is that of the specified litmus test,
+# but with ".out" appended.
+#
+# If the --hw argument is specified, this script translates the .litmus
+# C-language file to the specified type of assembly and verifies that.
+# But in this case, litmus tests using complex synchronization (such as
+# locking, RCU, and SRCU) are cheerfully ignored.
+#
+# Either way, return the status of the herd7 command.
+#
+# Usage:
+#	runlitmus.sh file.litmus
+#
+# Run this in the directory containing the memory model, specifying the
+# pathname of the litmus test to check.  The caller is expected to have
+# properly set up the LKMM environment variables.
+#
+# Copyright IBM Corporation, 2019
+#
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
+
+litmus=$1
+if test -f "$litmus" -a -r "$litmus"
+then
+	:
+else
+	echo ' --- ' error: \"$litmus\" is not a readable file
+	exit 255
+fi
+
+if test -z "$LKMM_HW_MAP_FILE"
+then
+	# LKMM run
+	herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg}
+	echo Herd options: $herdoptions > $LKMM_DESTDIR/$litmus.out
+	/usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1
+else
+	# Hardware run
+
+	T=/tmp/checklitmushw.sh.$$
+	trap 'rm -rf $T' 0 2
+	mkdir $T
+
+	# Generate filenames
+	catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat"
+	mapfile="Linux2${LKMM_HW_MAP_FILE}.map"
+	themefile="$T/${LKMM_HW_MAP_FILE}.theme"
+	herdoptions="-model $LKMM_HW_CAT_FILE"
+	hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'`
+	hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'`
+
+	# Don't run on litmus tests with complex synchronization
+	if ! scripts/simpletest.sh $litmus
+	then
+		echo ' --- ' error: \"$litmus\" contains locking, RCU, or SRCU
+		exit 254
+	fi
+
+	# Generate the assembly code and run herd on it.
+	gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile
+	jingle7 -theme $themefile $litmus > $T/$hwlitmusfile 2> $T/$hwlitmusfile.jingle7.out
+	/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $T/$hwlitmusfile > $LKMM_DESTDIR/$hwlitmus.out 2>&1
+fi
+
+exit $?

From 92e93baad96061f527f20e0f190a0fad414096c2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 20 Mar 2019 16:41:41 -0700
Subject: [PATCH 14/70] tools/memory-model: Make runlitmus.sh generate
 .litmus.out for --hw

In the absence of "Result:" comments, the runlitmus.sh script relies on
litmus.out files from prior LKMM runs.  This can be a bit user-hostile,
so this commit makes runlitmus.sh generate any needed .litmus.out files
that don't already exist.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/runlitmus.sh | 54 ++++++++++++++-----------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh
index 91af859c0e90c..2865a9661b078 100755
--- a/tools/memory-model/scripts/runlitmus.sh
+++ b/tools/memory-model/scripts/runlitmus.sh
@@ -28,42 +28,48 @@ if test -f "$litmus" -a -r "$litmus"
 then
 	:
 else
-	echo ' --- ' error: \"$litmus\" is not a readable file
+	echo ' !!! ' error: \"$litmus\" is not a readable file
 	exit 255
 fi
 
-if test -z "$LKMM_HW_MAP_FILE"
+if test -z "$LKMM_HW_MAP_FILE" -o ! -e $LKMM_DESTDIR/$litmus.out
 then
 	# LKMM run
 	herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg}
 	echo Herd options: $herdoptions > $LKMM_DESTDIR/$litmus.out
 	/usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1
-else
-	# Hardware run
+	ret=$?
+	if test -z "$LKMM_HW_MAP_FILE"
+	then
+		exit $ret
+	fi
+	echo " --- " Automatically generated LKMM output for '"'--hw $LKMM_HW_MAP_FILE'"' run
+fi
 
-	T=/tmp/checklitmushw.sh.$$
-	trap 'rm -rf $T' 0 2
-	mkdir $T
+# Hardware run
 
-	# Generate filenames
-	catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat"
-	mapfile="Linux2${LKMM_HW_MAP_FILE}.map"
-	themefile="$T/${LKMM_HW_MAP_FILE}.theme"
-	herdoptions="-model $LKMM_HW_CAT_FILE"
-	hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'`
-	hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'`
+T=/tmp/checklitmushw.sh.$$
+trap 'rm -rf $T' 0 2
+mkdir $T
 
-	# Don't run on litmus tests with complex synchronization
-	if ! scripts/simpletest.sh $litmus
-	then
-		echo ' --- ' error: \"$litmus\" contains locking, RCU, or SRCU
-		exit 254
-	fi
+# Generate filenames
+catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat"
+mapfile="Linux2${LKMM_HW_MAP_FILE}.map"
+themefile="$T/${LKMM_HW_MAP_FILE}.theme"
+herdoptions="-model $LKMM_HW_CAT_FILE"
+hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'`
+hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'`
 
-	# Generate the assembly code and run herd on it.
-	gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile
-	jingle7 -theme $themefile $litmus > $T/$hwlitmusfile 2> $T/$hwlitmusfile.jingle7.out
-	/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $T/$hwlitmusfile > $LKMM_DESTDIR/$hwlitmus.out 2>&1
+# Don't run on litmus tests with complex synchronization
+if ! scripts/simpletest.sh $litmus
+then
+	echo ' --- ' error: \"$litmus\" contains locking, RCU, or SRCU
+	exit 254
 fi
 
+# Generate the assembly code and run herd7 on it.
+gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile
+jingle7 -theme $themefile $litmus > $T/$hwlitmusfile 2> $T/$hwlitmusfile.jingle7.out
+/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $T/$hwlitmusfile > $LKMM_DESTDIR/$hwlitmus.out 2>&1
+
 exit $?

From 07492ae2e39c99a215ee6a79f167571fdc6aac0e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 21 Mar 2019 14:06:27 -0700
Subject: [PATCH 15/70] tools/memory-model: Move from .AArch64.litmus.out to
 .litmus.AArch.out

When the github scripts see ".litmus.out", they assume that there must be
a corresponding C-language ".litmus" file.  Won't they be disappointed
when they instead see nothing, or, worse yet, the corresponding
assembly-language litmus test?  This commit therefore swaps the hardware
tag with the "litmus" to avoid this sort of disappointment.

This commit also adjusts the .gitignore file so as to avoid adding these
new ".out" files to git.

[ paulmck: Apply Akira Yokosawa feedback. ]
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/litmus-tests/.gitignore | 2 +-
 tools/memory-model/scripts/judgelitmus.sh  | 4 ++--
 tools/memory-model/scripts/runlitmus.sh    | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/memory-model/litmus-tests/.gitignore b/tools/memory-model/litmus-tests/.gitignore
index c492a1ddad91d..d65462d64816d 100644
--- a/tools/memory-model/litmus-tests/.gitignore
+++ b/tools/memory-model/litmus-tests/.gitignore
@@ -1,2 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
-*.litmus.out
+*.out
diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh
index fe9131f8eb969..9abda72fe013c 100755
--- a/tools/memory-model/scripts/judgelitmus.sh
+++ b/tools/memory-model/scripts/judgelitmus.sh
@@ -6,7 +6,7 @@
 # test ran correctly.  If the --hw argument is omitted, check against the
 # LKMM output, which is assumed to be in file.litmus.out.  If this argument
 # is provided, this is assumed to be a hardware test, and the output is
-# assumed to be in file.HW.litmus.out, where "HW" is the --hw argument.
+# assumed to be in file.litmus.HW.out, where "HW" is the --hw argument.
 # In addition, non-Sometimes verification results will be noted, but
 # forgiven.  Furthermore, if there is no "Result:" comment but there is
 # an LKMM .litmus.out file, the observation in that file will be used
@@ -37,7 +37,7 @@ then
 	lkmmout=
 else
 	litmusout="`echo $litmus |
-		sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'`.out"
+		sed -e 's/\.litmus$/.litmus.'${LKMM_HW_MAP_FILE}'/'`.out"
 	lkmmout=$litmus.out
 fi
 if test -f "$LKMM_DESTDIR/$litmusout" -a -r "$LKMM_DESTDIR/$litmusout"
diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh
index 2865a9661b078..c84124b32bee6 100755
--- a/tools/memory-model/scripts/runlitmus.sh
+++ b/tools/memory-model/scripts/runlitmus.sh
@@ -57,7 +57,7 @@ catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat"
 mapfile="Linux2${LKMM_HW_MAP_FILE}.map"
 themefile="$T/${LKMM_HW_MAP_FILE}.theme"
 herdoptions="-model $LKMM_HW_CAT_FILE"
-hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'`
+hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.litmus.'${LKMM_HW_MAP_FILE}'/'`
 hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'`
 
 # Don't run on litmus tests with complex synchronization

From 1585dd5041f4886a090b8b2f171dec76f12b2410 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 21 Mar 2019 14:44:09 -0700
Subject: [PATCH 16/70] tools/memory-model: Keep assembly-language litmus tests

This commit retains the assembly-language litmus tests generated from
the C-language litmus tests, appending the hardware tag to the original
C-language litmus test's filename.  Thus, S+poonceonces.litmus.AArch64
contains the Armv8 assembly language corresponding to the C-language
S+poonceonces.litmus test.

This commit also updates the .gitignore to avoid committing these
automatically generated assembly-language litmus tests.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/litmus-tests/.gitignore | 2 +-
 tools/memory-model/scripts/runlitmus.sh    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/memory-model/litmus-tests/.gitignore b/tools/memory-model/litmus-tests/.gitignore
index d65462d64816d..19c379cf069d2 100644
--- a/tools/memory-model/litmus-tests/.gitignore
+++ b/tools/memory-model/litmus-tests/.gitignore
@@ -1,2 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
-*.out
+*.litmus.*
diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh
index c84124b32bee6..62b47c7e1ba93 100755
--- a/tools/memory-model/scripts/runlitmus.sh
+++ b/tools/memory-model/scripts/runlitmus.sh
@@ -69,7 +69,7 @@ fi
 
 # Generate the assembly code and run herd7 on it.
 gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile
-jingle7 -theme $themefile $litmus > $T/$hwlitmusfile 2> $T/$hwlitmusfile.jingle7.out
-/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $T/$hwlitmusfile > $LKMM_DESTDIR/$hwlitmus.out 2>&1
+jingle7 -theme $themefile $litmus > $LKMM_DESTDIR/$hwlitmus 2> $T/$hwlitmusfile.jingle7.out
+/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1
 
 exit $?

From 199d9882d3a95f662012eceb61616cd031d157d2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 22 Mar 2019 08:57:20 -0700
Subject: [PATCH 17/70] tools/memory-model: Allow herd to deduce CPU type

Currently, the scripts specify the CPU's .cat file to herd.  But this is
pointless because herd will select a good and sufficient .cat file from
the assembly-language litmus test itself.  This commit therefore removes
the -model argument to herd, allowing herd to figure the CPU family out
itself.

Note that the user can override herd's choice using the "--herdopts"
argument to the scripts.

Suggested-by: Luc Maranget <luc.maranget@inria.fr>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/runlitmus.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh
index 62b47c7e1ba93..afb196d7ef106 100755
--- a/tools/memory-model/scripts/runlitmus.sh
+++ b/tools/memory-model/scripts/runlitmus.sh
@@ -53,7 +53,6 @@ trap 'rm -rf $T' 0 2
 mkdir $T
 
 # Generate filenames
-catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat"
 mapfile="Linux2${LKMM_HW_MAP_FILE}.map"
 themefile="$T/${LKMM_HW_MAP_FILE}.theme"
 herdoptions="-model $LKMM_HW_CAT_FILE"
@@ -70,6 +69,6 @@ fi
 # Generate the assembly code and run herd7 on it.
 gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile
 jingle7 -theme $themefile $litmus > $LKMM_DESTDIR/$hwlitmus 2> $T/$hwlitmusfile.jingle7.out
-/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1
+/usr/bin/time $LKMM_TIMEOUT_CMD herd7 $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1
 
 exit $?

From f85c750c90c16222492d69bfca959c74b58f5ca0 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 25 Mar 2019 17:20:51 -0700
Subject: [PATCH 18/70] tools/memory-model: Make runlitmus.sh check for jingle
 errors

It turns out that the jingle7 tool is currently a bit picky about
the litmus tests it is willing to process.  This commit therefore
ensures that jingle7 failures are reported.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/runlitmus.sh | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh
index afb196d7ef106..5f2d29b460ff0 100755
--- a/tools/memory-model/scripts/runlitmus.sh
+++ b/tools/memory-model/scripts/runlitmus.sh
@@ -69,6 +69,11 @@ fi
 # Generate the assembly code and run herd7 on it.
 gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile
 jingle7 -theme $themefile $litmus > $LKMM_DESTDIR/$hwlitmus 2> $T/$hwlitmusfile.jingle7.out
+if grep -q "Generated 0 tests" $T/$hwlitmusfile.jingle7.out
+then
+	echo ' !!! ' jingle7 failed, no $hwlitmus generated
+	exit 253
+fi
 /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1
 
 exit $?

From d0dc9d342bc5c3d6a86723004adfc52ae12e4b8b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 5 Apr 2019 12:34:56 -0700
Subject: [PATCH 19/70] tools/memory-model: Add -v flag to jingle7 runs

Adding the -v flag to jingle7 invocations gives much useful information
on why jingle7 didn't like a given litmus test.  This commit therefore
adds this flag and saves off any such information into a .err file.

Suggested-by: Luc Maranget <luc.maranget@inria.fr>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/runlitmus.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh
index 5f2d29b460ff0..dfdb1f00fcc03 100755
--- a/tools/memory-model/scripts/runlitmus.sh
+++ b/tools/memory-model/scripts/runlitmus.sh
@@ -68,10 +68,11 @@ fi
 
 # Generate the assembly code and run herd7 on it.
 gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile
-jingle7 -theme $themefile $litmus > $LKMM_DESTDIR/$hwlitmus 2> $T/$hwlitmusfile.jingle7.out
+jingle7 -v -theme $themefile $litmus > $LKMM_DESTDIR/$hwlitmus 2> $T/$hwlitmusfile.jingle7.out
 if grep -q "Generated 0 tests" $T/$hwlitmusfile.jingle7.out
 then
-	echo ' !!! ' jingle7 failed, no $hwlitmus generated
+	echo ' !!! ' jingle7 failed, errors in $hwlitmus.err
+	cp $T/$hwlitmusfile.jingle7.out $LKMM_DESTDIR/$hwlitmus.err
 	exit 253
 fi
 /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1

From 660959dcaed212324e6171812ccf61f9bf5831ca Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 22 Mar 2019 17:18:43 -0700
Subject: [PATCH 20/70] tools/memory-model: Implement --hw support for
 checkghlitmus.sh

This commits enables the "--hw" argument for the checkghlitmus.sh script,
causing it to convert any applicable C-language litmus tests to the
specified flavor of assembly language, to verify these assembly-language
litmus tests, and checking compatibility of the outcomes.

Note that the conversion does not yet handle locking, RCU, SRCU, plain
C-language memory accesses, or casts.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/checkghlitmus.sh |  9 ++++---
 tools/memory-model/scripts/hwfnseg.sh       | 20 +++++++++++++++
 tools/memory-model/scripts/runlitmushist.sh | 27 +++++++++++++--------
 3 files changed, 42 insertions(+), 14 deletions(-)
 create mode 100755 tools/memory-model/scripts/hwfnseg.sh

diff --git a/tools/memory-model/scripts/checkghlitmus.sh b/tools/memory-model/scripts/checkghlitmus.sh
index 6589fbb6f6538..2ea220d2564b9 100755
--- a/tools/memory-model/scripts/checkghlitmus.sh
+++ b/tools/memory-model/scripts/checkghlitmus.sh
@@ -10,6 +10,7 @@
 # parseargs.sh scripts for arguments.
 
 . scripts/parseargs.sh
+. scripts/hwfnseg.sh
 
 T=/tmp/checkghlitmus.sh.$$
 trap 'rm -rf $T' 0
@@ -32,9 +33,9 @@ then
 	( cd "$LKMM_DESTDIR"; sed -e 's/^/mkdir -p /' | sh )
 fi
 
-# Create a list of the C-language litmus tests previously run.
-( cd $LKMM_DESTDIR; find litmus -name '*.litmus.out' -print ) |
-	sed -e 's/\.out$//' |
+# Create a list of the specified litmus tests previously run.
+( cd $LKMM_DESTDIR; find litmus -name "*.litmus${hwfnseg}.out" -print ) |
+	sed -e "s/${hwfnseg}"'\.out$//' |
 	xargs -r egrep -l '^ \* Result: (Never|Sometimes|Always|DEADLOCK)' |
 	xargs -r grep -L "^P${LKMM_PROCS}"> $T/list-C-already
 
@@ -44,7 +45,7 @@ find litmus -name '*.litmus' -exec grep -l -m 1 "^C " {} \; > $T/list-C
 xargs < $T/list-C -r egrep -l '^ \* Result: (Never|Sometimes|Always|DEADLOCK)' > $T/list-C-result
 xargs < $T/list-C-result -r grep -L "^P${LKMM_PROCS}" > $T/list-C-result-short
 
-# Form list of tests without corresponding .litmus.out files
+# Form list of tests without corresponding .out files
 sort $T/list-C-already $T/list-C-result-short | uniq -u > $T/list-C-needed
 
 # Run any needed tests.
diff --git a/tools/memory-model/scripts/hwfnseg.sh b/tools/memory-model/scripts/hwfnseg.sh
new file mode 100755
index 0000000000000..580c3281181c5
--- /dev/null
+++ b/tools/memory-model/scripts/hwfnseg.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Generate the hardware extension to the litmus-test filename, or the
+# empty string if this is an LKMM run.  The extension is placed in
+# the shell variable hwfnseg.
+#
+# Usage:
+#	. hwfnseg.sh
+#
+# Copyright IBM Corporation, 2019
+#
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
+
+if test -z "$LKMM_HW_MAP_FILE"
+then
+	hwfnseg=
+else
+	hwfnseg=".$LKMM_HW_MAP_FILE"
+fi
diff --git a/tools/memory-model/scripts/runlitmushist.sh b/tools/memory-model/scripts/runlitmushist.sh
index 852786fef179f..c6c2bdc67a502 100755
--- a/tools/memory-model/scripts/runlitmushist.sh
+++ b/tools/memory-model/scripts/runlitmushist.sh
@@ -15,6 +15,8 @@
 #
 # Author: Paul E. McKenney <paulmck@linux.ibm.com>
 
+. scripts/hwfnseg.sh
+
 T=/tmp/runlitmushist.sh.$$
 trap 'rm -rf $T' 0
 mkdir $T
@@ -30,15 +32,12 @@ fi
 # Prefixes for per-CPU scripts
 for ((i=0;i<$LKMM_JOBS;i++))
 do
-	echo dir="$LKMM_DESTDIR" > $T/$i.sh
 	echo T=$T >> $T/$i.sh
-	echo herdoptions=\"$LKMM_HERD_OPTIONS\" >> $T/$i.sh
 	cat << '___EOF___' >> $T/$i.sh
 	runtest () {
-		echo ' ... ' /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $1 '>' $dir/$1.out '2>&1'
-		if /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $1 > $dir/$1.out 2>&1
+		if scripts/runlitmus.sh $1
 		then
-			if ! grep -q '^Observation ' $dir/$1.out
+			if ! grep -q '^Observation ' $LKMM_DESTDIR/$1$2.out
 			then
 				echo ' !!! Herd failed, no Observation:' $1
 			fi
@@ -47,10 +46,16 @@ do
 			if test "$exitcode" -eq 124
 			then
 				exitmsg="timed out"
+			elif test "$exitcode" -eq 253
+			then
+				exitmsg=
 			else
 				exitmsg="failed, exit code $exitcode"
 			fi
-			echo ' !!! Herd' ${exitmsg}: $1
+			if test -n "$exitmsg"
+			then
+				echo ' !!! Herd' ${exitmsg}: $1
+			fi
 		fi
 	}
 ___EOF___
@@ -59,11 +64,13 @@ done
 awk -v q="'" -v b='\\' '
 {
 	print "echo `grep " q "^P[0-9]" b "+(" q " " $0 " | tail -1 | sed -e " q "s/^P" b "([0-9]" b "+" b ")(.*$/" b "1/" q "` " $0
-}' | bash |
-sort -k1n |
-awk -v ncpu=$LKMM_JOBS -v t=$T '
+}' | sh | sort -k1n |
+awk -v dq='"' -v hwfnseg="$hwfnseg" -v ncpu="$LKMM_JOBS" -v t="$T" '
 {
-	print "runtest " $2 >> t "/" NR % ncpu ".sh";
+	print "if test -z " dq hwfnseg dq " || scripts/simpletest.sh " dq $2 dq
+	print "then"
+	print "\techo runtest " dq $2 dq " " hwfnseg " >> " t "/" NR % ncpu ".sh";
+	print "fi"
 }
 
 END {

From 935d88fc11e1909f2207f718a98e63daaa680011 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 27 Mar 2019 11:47:14 -0700
Subject: [PATCH 21/70] tools/memory-model: Fix scripting --jobs argument

The parseargs.sh regular expression for the --jobs argument incorrectly
requires that the number of jobs be at least 10, that is, have at least
two digits.  This commit therefore adjusts this regular expression to
allow single-digit numbers of jobs to be specified.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/parseargs.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh
index 5f016fc3f3af5..25a81ac0dfdf4 100755
--- a/tools/memory-model/scripts/parseargs.sh
+++ b/tools/memory-model/scripts/parseargs.sh
@@ -113,7 +113,7 @@ do
 		LKMM_JOBS="`echo $njobs | sed -e 's/^\([0-9]\+\).*$/\1/'`"
 		;;
 	--jobs|--job|-j)
-		checkarg --jobs "(number)" "$#" "$2" '^[1-9][0-9]\+$' '^--'
+		checkarg --jobs "(number)" "$#" "$2" '^[1-9][0-9]*$' '^--'
 		LKMM_JOBS="$2"
 		shift
 		;;

From 8061f851dfa5ddb39aa6b4c71e6fe591ba19f7a8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 8 Apr 2019 09:27:28 -0700
Subject: [PATCH 22/70] tools/memory-model: Make checkghlitmus.sh use mselect7

The checkghlitmus.sh script currently uses grep to ignore non-C-language
litmus tests, which is a bit fragile.  This commit therefore enlists the
aid of "mselect7 -arch C", given Luc Maraget's recent modifications that
allow mselect7 to operate in filter mode.

This change requires herdtools 7.52-32-g1da3e0e50977 or later.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/checkghlitmus.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/memory-model/scripts/checkghlitmus.sh b/tools/memory-model/scripts/checkghlitmus.sh
index 2ea220d2564b9..cedd0290b73f8 100755
--- a/tools/memory-model/scripts/checkghlitmus.sh
+++ b/tools/memory-model/scripts/checkghlitmus.sh
@@ -41,7 +41,7 @@ fi
 
 # Create a list of C-language litmus tests with "Result:" commands and
 # no more than the specified number of processes.
-find litmus -name '*.litmus' -exec grep -l -m 1 "^C " {} \; > $T/list-C
+find litmus -name '*.litmus' -print | mselect7 -arch C > $T/list-C
 xargs < $T/list-C -r egrep -l '^ \* Result: (Never|Sometimes|Always|DEADLOCK)' > $T/list-C-result
 xargs < $T/list-C-result -r grep -L "^P${LKMM_PROCS}" > $T/list-C-result-short
 

From ce208b642c97141d914c4153c1014dd02a7d83cc Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 8 Apr 2019 10:02:23 -0700
Subject: [PATCH 23/70] tools/memory-model: Make history-check scripts use
 mselect7

The history-check scripts currently use grep to ignore non-C-language
litmus tests, which is a bit fragile.  This commit therefore enlists the
aid of "mselect7 -arch C", given Luc Maraget's recent modifications that
allow mselect7 to operate in filter mode.

This change requires herdtools 7.52-32-g1da3e0e50977 or later.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/initlitmushist.sh | 2 +-
 tools/memory-model/scripts/newlitmushist.sh  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/memory-model/scripts/initlitmushist.sh b/tools/memory-model/scripts/initlitmushist.sh
index 956b6957484d8..31ea782955d3f 100755
--- a/tools/memory-model/scripts/initlitmushist.sh
+++ b/tools/memory-model/scripts/initlitmushist.sh
@@ -60,7 +60,7 @@ fi
 
 # Create a list of the C-language litmus tests with no more than the
 # specified number of processes (per the --procs argument).
-find litmus -name '*.litmus' -exec grep -l -m 1 "^C " {} \; > $T/list-C
+find litmus -name '*.litmus' -print | mselect7 -arch C > $T/list-C
 xargs < $T/list-C -r grep -L "^P${LKMM_PROCS}" > $T/list-C-short
 
 scripts/runlitmushist.sh < $T/list-C-short
diff --git a/tools/memory-model/scripts/newlitmushist.sh b/tools/memory-model/scripts/newlitmushist.sh
index 3f4b06e299886..25235e2049cf0 100755
--- a/tools/memory-model/scripts/newlitmushist.sh
+++ b/tools/memory-model/scripts/newlitmushist.sh
@@ -43,7 +43,7 @@ fi
 
 # Form full list of litmus tests with no more than the specified
 # number of processes (per the --procs argument).
-find litmus -name '*.litmus' -exec grep -l -m 1 "^C " {} \; > $T/list-C-all
+find litmus -name '*.litmus' -print | mselect7 -arch C > $T/list-C-all
 xargs < $T/list-C-all -r grep -L "^P${LKMM_PROCS}" > $T/list-C-short
 
 # Form list of new tests.  Note: This does not handle litmus-test deletion!

From c5cdcace06298197c0670f2022d786ba5a72404f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 2 May 2019 09:51:57 -0700
Subject: [PATCH 24/70] tools/memory-model:  Add "--" to parseargs.sh for
 additional arguments

Currently, parseargs.sh expects to consume all the command-line arguments,
which prevents the calling script from having any of its own arguments.
This commit therefore causes parseargs.sh to stop consuming arguments
when it encounters a "--" argument, leaving any remaining arguments for
the calling script.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/parseargs.sh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh
index 25a81ac0dfdf4..7aa58755adfc0 100755
--- a/tools/memory-model/scripts/parseargs.sh
+++ b/tools/memory-model/scripts/parseargs.sh
@@ -83,7 +83,7 @@ do
 			echo "Cannot create directory --destdir '$LKMM_DESTDIR'"
 			usage
 		fi
-		if test -d "$LKMM_DESTDIR" -a -w "$LKMM_DESTDIR" -a -x "$LKMM_DESTDIR"
+		if test -d "$LKMM_DESTDIR" -a -x "$LKMM_DESTDIR"
 		then
 			:
 		else
@@ -127,6 +127,10 @@ do
 		LKMM_TIMEOUT="$2"
 		shift
 		;;
+	--)
+		shift
+		break
+		;;
 	*)
 		echo Unknown argument $1
 		usage

From 8fe3019794a2bc97048a1d1010c5ab9fb44b7361 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 2 May 2019 10:03:29 -0700
Subject: [PATCH 25/70] tools/memory-model: Repair parseargs.sh header comment

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/parseargs.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh
index 7aa58755adfc0..08ded59098607 100755
--- a/tools/memory-model/scripts/parseargs.sh
+++ b/tools/memory-model/scripts/parseargs.sh
@@ -1,7 +1,7 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0+
 #
-# the corresponding .litmus.out file, and does not judge the result.
+# Parse arguments common to the various scripts.
 #
 # . scripts/parseargs.sh
 #

From b53862d336ee0f8b8c071d8850297e9b6963c260 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 2 May 2019 10:05:14 -0700
Subject: [PATCH 26/70] tools/memory-model: Add checktheselitmus.sh to run
 specified litmus tests

This commit adds a checktheselitmus.sh script that runs the litmus tests
specified on the command line.  This is useful for verifying fixes to
specific litmus tests.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/README             |  8 ++++
 .../memory-model/scripts/checktheselitmus.sh  | 43 +++++++++++++++++++
 2 files changed, 51 insertions(+)
 create mode 100755 tools/memory-model/scripts/checktheselitmus.sh

diff --git a/tools/memory-model/scripts/README b/tools/memory-model/scripts/README
index 0e29a52044c1a..cc2c4e5be9ec1 100644
--- a/tools/memory-model/scripts/README
+++ b/tools/memory-model/scripts/README
@@ -27,6 +27,14 @@ checklitmushist.sh
 checklitmus.sh
 
 	Check a single litmus test against its "Result:" expected result.
+	Not intended to for manual use.
+
+checktheselitmus.sh
+
+	Check the specified list of litmus tests against their "Result:"
+	expected results.  This takes optional parseargs.sh arguments,
+	followed by "--" followed by pathnames starting from the current
+	directory.
 
 cmplitmushist.sh
 
diff --git a/tools/memory-model/scripts/checktheselitmus.sh b/tools/memory-model/scripts/checktheselitmus.sh
new file mode 100755
index 0000000000000..10eeb5ecea6de
--- /dev/null
+++ b/tools/memory-model/scripts/checktheselitmus.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Invokes checklitmus.sh on its arguments to run the specified litmus
+# test and pass judgment on the results.
+#
+# Usage:
+#	checktheselitmus.sh -- [ file1.litmus [ file2.litmus ... ] ]
+#
+# Run this in the directory containing the memory model, specifying the
+# pathname of the litmus test to check.  The usual parseargs.sh arguments
+# can be specified prior to the "--".
+#
+# This script is intended for use with pathnames that start from the
+# tools/memory-model directory.  If some of the pathnames instead start at
+# the root directory, they all must do so and the "--destdir /" parseargs.sh
+# argument must be specified prior to the "--".  Alternatively, some other
+# "--destdir" argument can be supplied as long as the needed subdirectories
+# are populated.
+#
+# Copyright IBM Corporation, 2018
+#
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
+
+. scripts/parseargs.sh
+
+ret=0
+for i in "$@"
+do
+	if scripts/checklitmus.sh $i
+	then
+		:
+	else
+		ret=1
+	fi
+done
+if test "$ret" -ne 0
+then
+	echo " ^^^ VERIFICATION MISMATCHES" 1>&2
+else
+	echo All litmus tests verified as was expected. 1>&2
+fi
+exit $ret

From 7458b3bea9ade9992fc2516095768f30ca72f123 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 3 May 2019 07:34:20 -0700
Subject: [PATCH 27/70] tools/memory-model: Add data-race capabilities to
 judgelitmus.sh

This commit adds functionality to judgelitmus.sh to allow it to handle
both the "DATARACE" markers in the "Result:" comments in litmus tests
and the "Flag data-race" markers in LKMM output.  For C-language tests,
if either marker is present, the other must also be as well, at least for
litmus tests having a "Result:" comment.  If the LKMM output indicates
a data race, then failures of the Always/Sometimes/Never portion of the
"Result:" prediction are forgiven.

The reason for forgiving "Result:" mispredictions is that data races can
result in "interesting" compiler optimizations, so that all bets are off
in the data-race case.

[ paulmck: Apply Akira Yokosawa feedback. ]
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/judgelitmus.sh | 40 ++++++++++++++++++-----
 1 file changed, 32 insertions(+), 8 deletions(-)

diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh
index 9abda72fe013c..2700481d20f01 100755
--- a/tools/memory-model/scripts/judgelitmus.sh
+++ b/tools/memory-model/scripts/judgelitmus.sh
@@ -4,13 +4,19 @@
 # Given a .litmus test and the corresponding litmus output file, check
 # the .litmus.out file against the "Result:" comment to judge whether the
 # test ran correctly.  If the --hw argument is omitted, check against the
-# LKMM output, which is assumed to be in file.litmus.out.  If this argument
-# is provided, this is assumed to be a hardware test, and the output is
-# assumed to be in file.litmus.HW.out, where "HW" is the --hw argument.
-# In addition, non-Sometimes verification results will be noted, but
-# forgiven.  Furthermore, if there is no "Result:" comment but there is
-# an LKMM .litmus.out file, the observation in that file will be used
-# to judge the assembly-language verification.
+# LKMM output, which is assumed to be in file.litmus.out. If either a
+# "DATARACE" marker in the "Result:" comment or a "Flag data-race" marker
+# in the LKMM output is present, the other must also be as well, at least
+# for litmus tests having a "Result:" comment. In this case, a failure of
+# the Always/Sometimes/Never portion of the "Result:" prediction will be
+# noted, but forgiven.
+#
+# If the --hw argument is provided, this is assumed to be a hardware
+# test, and the output is assumed to be in file.litmus.HW.out, where
+# "HW" is the --hw argument. In addition, non-Sometimes verification
+# results will be noted, but forgiven.  Furthermore, if there is no
+# "Result:" comment but there is an LKMM .litmus.out file, the observation
+# in that file will be used to judge the assembly-language verification.
 #
 # Usage:
 #	judgelitmus.sh file.litmus
@@ -47,9 +53,27 @@ else
 	echo ' --- ' error: \"$LKMM_DESTDIR/$litmusout is not a readable file
 	exit 255
 fi
+if grep -q '^Flag data-race$' "$LKMM_DESTDIR/$litmusout"
+then
+	datarace_modeled=1
+fi
 if grep -q '^ \* Result: ' $litmus
 then
 	outcome=`grep -m 1 '^ \* Result: ' $litmus | awk '{ print $3 }'`
+	if grep -m1 '^ \* Result: .* DATARACE' $litmus
+	then
+		datarace_predicted=1
+	fi
+	if test -n "$datarace_predicted" -a -z "$datarace_modeled" -a -z "$LKMM_HW_MAP_FILE"
+	then
+		echo '!!! Predicted data race not modeled' $litmus
+		exit 252
+	elif test -z "$datarace_predicted" -a -n "$datarace_modeled"
+	then
+		# Note that hardware models currently don't model data races
+		echo '!!! Unexpected data race modeled' $litmus
+		exit 253
+	fi
 elif test -n "$LKMM_HW_MAP_FILE" && grep -q '^Observation' $LKMM_DESTDIR/$lkmmout > /dev/null 2>&1
 then
 	outcome=`grep -m 1 '^Observation ' $LKMM_DESTDIR/$lkmmout | awk '{ print $3 }'`
@@ -114,7 +138,7 @@ elif grep '^Observation' $LKMM_DESTDIR/$litmusout | grep -q $outcome || test "$o
 then
 	ret=0
 else
-	if test -n "$LKMM_HW_MAP_FILE" -a "$outcome" = Sometimes
+	if test \( -n "$LKMM_HW_MAP_FILE" -a "$outcome" = Sometimes \) -o -n "$datarace_modeled"
 	then
 		flag="--- Forgiven"
 		ret=0

From 40652cf55bac4558f0422e08232656345c09b37d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 6 Jun 2019 02:13:27 -0700
Subject: [PATCH 28/70] tools/memory-model: Make judgelitmus.sh handle scripted
 Result: tag

The scripts that generate the litmus tests in the "auto" directory of
the https://github.com/paulmckrcu/litmus archive place the "Result:"
tag into a single-line ocaml comment, which judgelitmus.sh currently
does not recognize.  This commit therefore makes judgelitmus.sh
recognize both the multiline comment format that it currently does
and the automatically generated single-line format.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/judgelitmus.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh
index 2700481d20f01..1ec5d89fcfbb2 100755
--- a/tools/memory-model/scripts/judgelitmus.sh
+++ b/tools/memory-model/scripts/judgelitmus.sh
@@ -57,10 +57,10 @@ if grep -q '^Flag data-race$' "$LKMM_DESTDIR/$litmusout"
 then
 	datarace_modeled=1
 fi
-if grep -q '^ \* Result: ' $litmus
+if grep -q '^[( ]\* Result: ' $litmus
 then
-	outcome=`grep -m 1 '^ \* Result: ' $litmus | awk '{ print $3 }'`
-	if grep -m1 '^ \* Result: .* DATARACE' $litmus
+	outcome=`grep -m 1 '^[( ]\* Result: ' $litmus | awk '{ print $3 }'`
+	if grep -m1 '^[( ]\* Result: .* DATARACE' $litmus
 	then
 		datarace_predicted=1
 	fi

From 5e12d26866e41e93b61e0f958c17f6cd34377b11 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 24 Jun 2019 22:30:32 -0700
Subject: [PATCH 29/70] tools/memory-model: Use "-unroll 0" to keep --hw runs
 finite

Litmus tests involving atomic operations produce LL/SC loops on a number
of architectures, and unrolling these loops can result in excessive
verification times or even stack overflows.  This commit therefore uses
the "-unroll 0" herd7 argument to avoid unrolling, on the grounds that
additional passes through an LL/SC loop should not change the verification.

Note however, that certain bugs in the mapping of the LL/SC loop to
machine instructions may go undetected.  On the other hand, herd7 might
not be the best vehicle for finding such bugs in any case.  (You do
stress-test your architecture-specific code, don't you?)

Suggested-by: Luc Maranget <luc.maranget@inria.fr>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/scripts/runlitmus.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh
index dfdb1f00fcc03..94608d4b6502e 100755
--- a/tools/memory-model/scripts/runlitmus.sh
+++ b/tools/memory-model/scripts/runlitmus.sh
@@ -75,6 +75,6 @@ then
 	cp $T/$hwlitmusfile.jingle7.out $LKMM_DESTDIR/$hwlitmus.err
 	exit 253
 fi
-/usr/bin/time $LKMM_TIMEOUT_CMD herd7 $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1
+/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -unroll 0 $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1
 
 exit $?

From b468343673fdc512a0a6b51287f6a0ca9c246655 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 15 Sep 2022 12:09:30 -0700
Subject: [PATCH 30/70] srcu: Convert ->srcu_lock_count and ->srcu_unlock_count
 to atomic

NMI-safe variants of srcu_read_lock() and srcu_read_unlock() are needed
by printk(), which on many architectures entails read-modify-write
atomic operations.  This commit prepares Tree SRCU for this change by
making both ->srcu_lock_count and ->srcu_unlock_count by atomic_long_t.

[ paulmck: Apply feedback from John Ogness. ]

Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Petr Mladek <pmladek@suse.com>
---
 include/linux/srcutree.h |  4 ++--
 kernel/rcu/srcutree.c    | 24 ++++++++++++------------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index e3014319d1ade..0c4eca07d78d5 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -23,8 +23,8 @@ struct srcu_struct;
  */
 struct srcu_data {
 	/* Read-side state. */
-	unsigned long srcu_lock_count[2];	/* Locks per CPU. */
-	unsigned long srcu_unlock_count[2];	/* Unlocks per CPU. */
+	atomic_long_t srcu_lock_count[2];	/* Locks per CPU. */
+	atomic_long_t srcu_unlock_count[2];	/* Unlocks per CPU. */
 
 	/* Update-side state. */
 	spinlock_t __private lock ____cacheline_internodealigned_in_smp;
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 1c304fec89c02..25e9458da6a26 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -417,7 +417,7 @@ static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx)
 	for_each_possible_cpu(cpu) {
 		struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
 
-		sum += READ_ONCE(cpuc->srcu_lock_count[idx]);
+		sum += atomic_long_read(&cpuc->srcu_lock_count[idx]);
 	}
 	return sum;
 }
@@ -434,7 +434,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
 	for_each_possible_cpu(cpu) {
 		struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
 
-		sum += READ_ONCE(cpuc->srcu_unlock_count[idx]);
+		sum += atomic_long_read(&cpuc->srcu_unlock_count[idx]);
 	}
 	return sum;
 }
@@ -503,10 +503,10 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
 	for_each_possible_cpu(cpu) {
 		struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
 
-		sum += READ_ONCE(cpuc->srcu_lock_count[0]);
-		sum += READ_ONCE(cpuc->srcu_lock_count[1]);
-		sum -= READ_ONCE(cpuc->srcu_unlock_count[0]);
-		sum -= READ_ONCE(cpuc->srcu_unlock_count[1]);
+		sum += atomic_long_read(&cpuc->srcu_lock_count[0]);
+		sum += atomic_long_read(&cpuc->srcu_lock_count[1]);
+		sum -= atomic_long_read(&cpuc->srcu_unlock_count[0]);
+		sum -= atomic_long_read(&cpuc->srcu_unlock_count[1]);
 	}
 	return sum;
 }
@@ -636,7 +636,7 @@ int __srcu_read_lock(struct srcu_struct *ssp)
 	int idx;
 
 	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
-	this_cpu_inc(ssp->sda->srcu_lock_count[idx]);
+	this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter);
 	smp_mb(); /* B */  /* Avoid leaking the critical section. */
 	return idx;
 }
@@ -650,7 +650,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
 void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
 {
 	smp_mb(); /* C */  /* Avoid leaking the critical section. */
-	this_cpu_inc(ssp->sda->srcu_unlock_count[idx]);
+	this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter);
 }
 EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 
@@ -1687,8 +1687,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
 			struct srcu_data *sdp;
 
 			sdp = per_cpu_ptr(ssp->sda, cpu);
-			u0 = data_race(sdp->srcu_unlock_count[!idx]);
-			u1 = data_race(sdp->srcu_unlock_count[idx]);
+			u0 = data_race(atomic_long_read(&sdp->srcu_unlock_count[!idx]));
+			u1 = data_race(atomic_long_read(&sdp->srcu_unlock_count[idx]));
 
 			/*
 			 * Make sure that a lock is always counted if the corresponding
@@ -1696,8 +1696,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
 			 */
 			smp_rmb();
 
-			l0 = data_race(sdp->srcu_lock_count[!idx]);
-			l1 = data_race(sdp->srcu_lock_count[idx]);
+			l0 = data_race(atomic_long_read(&sdp->srcu_lock_count[!idx]));
+			l1 = data_race(atomic_long_read(&sdp->srcu_lock_count[idx]));
 
 			c0 = l0 - u0;
 			c1 = l1 - u1;

From 9dfbe74183a8b190d496f3f75a61fb694f566fcc Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 15 Sep 2022 14:29:07 -0700
Subject: [PATCH 31/70] srcu: Create an srcu_read_lock_nmisafe() and
 srcu_read_unlock_nmisafe()

On strict load-store architectures, the use of this_cpu_inc() by
srcu_read_lock() and srcu_read_unlock() is not NMI-safe in TREE SRCU.
To see this suppose that an NMI arrives in the middle of srcu_read_lock(),
just after it has read ->srcu_lock_count, but before it has written
the incremented value back to memory.  If that NMI handler also does
srcu_read_lock() and srcu_read_lock() on that same srcu_struct structure,
then upon return from that NMI handler, the interrupted srcu_read_lock()
will overwrite the NMI handler's update to ->srcu_lock_count, but
leave unchanged the NMI handler's update by srcu_read_unlock() to
->srcu_unlock_count.

This can result in a too-short SRCU grace period, which can in turn
result in arbitrary memory corruption.

If the NMI handler instead interrupts the srcu_read_unlock(), this
can result in eternal SRCU grace periods, which is not much better.

This commit therefore creates a pair of new srcu_read_lock_nmisafe()
and srcu_read_unlock_nmisafe() functions, which allow SRCU readers in
both NMI handlers and in process and IRQ context.  It is bad practice
to mix the existing and the new _nmisafe() primitives on the same
srcu_struct structure.  Use one set or the other, not both.

Just to underline that "bad practice" point, using srcu_read_lock() at
process level and srcu_read_lock_nmisafe() in your NMI handler will not,
repeat NOT, work.  If you do not immediately understand why this is the
case, please review the earlier paragraphs in this commit log.

[ paulmck: Apply kernel test robot feedback. ]
[ paulmck: Apply feedback from Randy Dunlap. ]

Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Petr Mladek <pmladek@suse.com>
---
 include/linux/srcu.h     | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/srcutiny.h | 11 +++++++++++
 include/linux/srcutree.h |  3 +++
 kernel/rcu/Kconfig       |  3 +++
 kernel/rcu/rcutorture.c  | 11 +++++++++--
 kernel/rcu/srcutree.c    | 39 +++++++++++++++++++++++++++++++++++----
 6 files changed, 100 insertions(+), 6 deletions(-)

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 01226e4d960a0..2cc8321c0c86a 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -52,6 +52,8 @@ int init_srcu_struct(struct srcu_struct *ssp);
 #else
 /* Dummy definition for things like notifiers.  Actual use gets link error. */
 struct srcu_struct { };
+int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, bool chknmisafe) __acquires(ssp);
+void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx, bool chknmisafe) __releases(ssp);
 #endif
 
 void call_srcu(struct srcu_struct *ssp, struct rcu_head *head,
@@ -166,6 +168,25 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
 	return retval;
 }
 
+/**
+ * srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure.
+ * @ssp: srcu_struct in which to register the new reader.
+ *
+ * Enter an SRCU read-side critical section, but in an NMI-safe manner.
+ * See srcu_read_lock() for more information.
+ */
+static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp)
+{
+	int retval;
+
+	if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
+		retval = __srcu_read_lock_nmisafe(ssp);
+	else
+		retval = __srcu_read_lock(ssp);
+	rcu_lock_acquire(&(ssp)->dep_map);
+	return retval;
+}
+
 /* Used by tracing, cannot be traced and cannot invoke lockdep. */
 static inline notrace int
 srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
@@ -191,6 +212,24 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
 	__srcu_read_unlock(ssp, idx);
 }
 
+/**
+ * srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure.
+ * @ssp: srcu_struct in which to unregister the old reader.
+ * @idx: return value from corresponding srcu_read_lock().
+ *
+ * Exit an SRCU read-side critical section, but in an NMI-safe manner.
+ */
+static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
+	__releases(ssp)
+{
+	WARN_ON_ONCE(idx & ~0x1);
+	rcu_lock_release(&(ssp)->dep_map);
+	if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
+		__srcu_read_unlock_nmisafe(ssp, idx);
+	else
+		__srcu_read_unlock(ssp, idx);
+}
+
 /* Used by tracing, cannot be traced and cannot call lockdep. */
 static inline notrace void
 srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp)
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 6cfaa0a9a9b96..bccfa18b1b150 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -88,4 +88,15 @@ static inline void srcu_torture_stats_print(struct srcu_struct *ssp,
 		 data_race(READ_ONCE(ssp->srcu_lock_nesting[idx])));
 }
 
+static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
+{
+	BUG();
+	return 0;
+}
+
+static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
+{
+	BUG();
+}
+
 #endif
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 0c4eca07d78d5..d45dd507f4a56 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -154,4 +154,7 @@ void synchronize_srcu_expedited(struct srcu_struct *ssp);
 void srcu_barrier(struct srcu_struct *ssp);
 void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
 
+int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp);
+void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp);
+
 #endif
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index d471d22a5e21b..f53ad63b2bc63 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -72,6 +72,9 @@ config TREE_SRCU
 	help
 	  This option selects the full-fledged version of SRCU.
 
+config NEED_SRCU_NMI_SAFE
+	def_bool HAVE_NMI && !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !TINY_SRCU
+
 config TASKS_RCU_GENERIC
 	def_bool TASKS_RCU || TASKS_RUDE_RCU || TASKS_TRACE_RCU
 	select SRCU
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index d8e1b270a065f..7f9703b88caed 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -574,10 +574,14 @@ static struct rcu_torture_ops rcu_busted_ops = {
 DEFINE_STATIC_SRCU(srcu_ctl);
 static struct srcu_struct srcu_ctld;
 static struct srcu_struct *srcu_ctlp = &srcu_ctl;
+static struct rcu_torture_ops srcud_ops;
 
 static int srcu_torture_read_lock(void) __acquires(srcu_ctlp)
 {
-	return srcu_read_lock(srcu_ctlp);
+	if (cur_ops == &srcud_ops)
+		return srcu_read_lock_nmisafe(srcu_ctlp);
+	else
+		return srcu_read_lock(srcu_ctlp);
 }
 
 static void
@@ -601,7 +605,10 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
 
 static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp)
 {
-	srcu_read_unlock(srcu_ctlp, idx);
+	if (cur_ops == &srcud_ops)
+		srcu_read_unlock_nmisafe(srcu_ctlp, idx);
+	else
+		srcu_read_unlock(srcu_ctlp, idx);
 }
 
 static int torture_srcu_read_lock_held(void)
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 25e9458da6a26..630c7463324de 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -654,6 +654,37 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
 }
 EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 
+/*
+ * Counts the new reader in the appropriate per-CPU element of the
+ * srcu_struct, but in an NMI-safe manner using RMW atomics.
+ * Returns an index that must be passed to the matching srcu_read_unlock().
+ */
+int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
+{
+	int idx;
+	struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
+
+	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
+	atomic_long_inc(&sdp->srcu_lock_count[idx]);
+	smp_mb__after_atomic(); /* B */  /* Avoid leaking the critical section. */
+	return idx;
+}
+EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
+
+/*
+ * Removes the count for the old reader from the appropriate per-CPU
+ * element of the srcu_struct.  Note that this may well be a different
+ * CPU than that which was incremented by the corresponding srcu_read_lock().
+ */
+void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
+{
+	struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
+
+	smp_mb__before_atomic(); /* C */  /* Avoid leaking the critical section. */
+	atomic_long_inc(&sdp->srcu_unlock_count[idx]);
+}
+EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe);
+
 /*
  * Start an SRCU grace period.
  */
@@ -1090,7 +1121,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
 	int ss_state;
 
 	check_init_srcu_struct(ssp);
-	idx = srcu_read_lock(ssp);
+	idx = __srcu_read_lock_nmisafe(ssp);
 	ss_state = smp_load_acquire(&ssp->srcu_size_state);
 	if (ss_state < SRCU_SIZE_WAIT_CALL)
 		sdp = per_cpu_ptr(ssp->sda, 0);
@@ -1123,7 +1154,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
 		srcu_funnel_gp_start(ssp, sdp, s, do_norm);
 	else if (needexp)
 		srcu_funnel_exp_start(ssp, sdp_mynode, s);
-	srcu_read_unlock(ssp, idx);
+	__srcu_read_unlock_nmisafe(ssp, idx);
 	return s;
 }
 
@@ -1427,13 +1458,13 @@ void srcu_barrier(struct srcu_struct *ssp)
 	/* Initial count prevents reaching zero until all CBs are posted. */
 	atomic_set(&ssp->srcu_barrier_cpu_cnt, 1);
 
-	idx = srcu_read_lock(ssp);
+	idx = __srcu_read_lock_nmisafe(ssp);
 	if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
 		srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0));
 	else
 		for_each_possible_cpu(cpu)
 			srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu));
-	srcu_read_unlock(ssp, idx);
+	__srcu_read_unlock_nmisafe(ssp, idx);
 
 	/* Remove the initial count, at which point reaching zero can happen. */
 	if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))

From 6b8c22e1285d527a9a07e5e7c2e522137580e463 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 19 Sep 2022 14:03:07 -0700
Subject: [PATCH 32/70] srcu: Check for consistent per-CPU per-srcu_struct NMI
 safety

This commit adds runtime checks to verify that a given srcu_struct uses
consistent NMI-safe (or not) read-side primitives on a per-CPU basis.

Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Petr Mladek <pmladek@suse.com>
---
 include/linux/srcu.h     |  4 ++--
 include/linux/srcutiny.h |  4 ++--
 include/linux/srcutree.h |  9 +++++++--
 kernel/rcu/srcutree.c    | 38 ++++++++++++++++++++++++++++++++------
 4 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 2cc8321c0c86a..565f60d574847 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -180,7 +180,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp
 	int retval;
 
 	if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
-		retval = __srcu_read_lock_nmisafe(ssp);
+		retval = __srcu_read_lock_nmisafe(ssp, true);
 	else
 		retval = __srcu_read_lock(ssp);
 	rcu_lock_acquire(&(ssp)->dep_map);
@@ -225,7 +225,7 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
 	WARN_ON_ONCE(idx & ~0x1);
 	rcu_lock_release(&(ssp)->dep_map);
 	if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
-		__srcu_read_unlock_nmisafe(ssp, idx);
+		__srcu_read_unlock_nmisafe(ssp, idx, true);
 	else
 		__srcu_read_unlock(ssp, idx);
 }
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index bccfa18b1b150..efd808a23f8d8 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -88,13 +88,13 @@ static inline void srcu_torture_stats_print(struct srcu_struct *ssp,
 		 data_race(READ_ONCE(ssp->srcu_lock_nesting[idx])));
 }
 
-static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
+static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, bool chknmisafe)
 {
 	BUG();
 	return 0;
 }
 
-static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
+static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx, bool chknmisafe)
 {
 	BUG();
 }
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index d45dd507f4a56..35ffdedf86ccb 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -25,6 +25,7 @@ struct srcu_data {
 	/* Read-side state. */
 	atomic_long_t srcu_lock_count[2];	/* Locks per CPU. */
 	atomic_long_t srcu_unlock_count[2];	/* Unlocks per CPU. */
+	int srcu_nmi_safety;			/* NMI-safe srcu_struct structure? */
 
 	/* Update-side state. */
 	spinlock_t __private lock ____cacheline_internodealigned_in_smp;
@@ -42,6 +43,10 @@ struct srcu_data {
 	struct srcu_struct *ssp;
 };
 
+#define SRCU_NMI_UNKNOWN	0x0
+#define SRCU_NMI_NMI_UNSAFE	0x1
+#define SRCU_NMI_NMI_SAFE	0x2
+
 /*
  * Node in SRCU combining tree, similar in function to rcu_data.
  */
@@ -154,7 +159,7 @@ void synchronize_srcu_expedited(struct srcu_struct *ssp);
 void srcu_barrier(struct srcu_struct *ssp);
 void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
 
-int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp);
-void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp);
+int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, bool chknmisafe) __acquires(ssp);
+void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx, bool chknmisafe) __releases(ssp);
 
 #endif
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 630c7463324de..9952fd8cb1690 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -626,6 +626,26 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
 }
 EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
+/*
+ * Check for consistent NMI safety.
+ */
+static void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe)
+{
+	int nmi_safe_mask = 1 << nmi_safe;
+	int old_nmi_safe_mask;
+	struct srcu_data *sdp;
+
+	if (!IS_ENABLED(CONFIG_PROVE_RCU))
+		return;
+	sdp = raw_cpu_ptr(ssp->sda);
+	old_nmi_safe_mask = READ_ONCE(sdp->srcu_nmi_safety);
+	if (!old_nmi_safe_mask) {
+		WRITE_ONCE(sdp->srcu_nmi_safety, nmi_safe_mask);
+		return;
+	}
+	WARN_ONCE(old_nmi_safe_mask != nmi_safe_mask, "CPU %d old state %d new state %d\n", sdp->cpu, old_nmi_safe_mask, nmi_safe_mask);
+}
+
 /*
  * Counts the new reader in the appropriate per-CPU element of the
  * srcu_struct.
@@ -638,6 +658,7 @@ int __srcu_read_lock(struct srcu_struct *ssp)
 	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
 	this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter);
 	smp_mb(); /* B */  /* Avoid leaking the critical section. */
+	srcu_check_nmi_safety(ssp, false);
 	return idx;
 }
 EXPORT_SYMBOL_GPL(__srcu_read_lock);
@@ -651,6 +672,7 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
 {
 	smp_mb(); /* C */  /* Avoid leaking the critical section. */
 	this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter);
+	srcu_check_nmi_safety(ssp, false);
 }
 EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 
@@ -659,7 +681,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
  * srcu_struct, but in an NMI-safe manner using RMW atomics.
  * Returns an index that must be passed to the matching srcu_read_unlock().
  */
-int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
+int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, bool chknmisafe)
 {
 	int idx;
 	struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
@@ -667,6 +689,8 @@ int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
 	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
 	atomic_long_inc(&sdp->srcu_lock_count[idx]);
 	smp_mb__after_atomic(); /* B */  /* Avoid leaking the critical section. */
+	if (chknmisafe)
+		srcu_check_nmi_safety(ssp, true);
 	return idx;
 }
 EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
@@ -676,12 +700,14 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
  * element of the srcu_struct.  Note that this may well be a different
  * CPU than that which was incremented by the corresponding srcu_read_lock().
  */
-void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
+void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx, bool chknmisafe)
 {
 	struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
 
 	smp_mb__before_atomic(); /* C */  /* Avoid leaking the critical section. */
 	atomic_long_inc(&sdp->srcu_unlock_count[idx]);
+	if (chknmisafe)
+		srcu_check_nmi_safety(ssp, true);
 }
 EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe);
 
@@ -1121,7 +1147,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
 	int ss_state;
 
 	check_init_srcu_struct(ssp);
-	idx = __srcu_read_lock_nmisafe(ssp);
+	idx = __srcu_read_lock_nmisafe(ssp, false);
 	ss_state = smp_load_acquire(&ssp->srcu_size_state);
 	if (ss_state < SRCU_SIZE_WAIT_CALL)
 		sdp = per_cpu_ptr(ssp->sda, 0);
@@ -1154,7 +1180,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
 		srcu_funnel_gp_start(ssp, sdp, s, do_norm);
 	else if (needexp)
 		srcu_funnel_exp_start(ssp, sdp_mynode, s);
-	__srcu_read_unlock_nmisafe(ssp, idx);
+	__srcu_read_unlock_nmisafe(ssp, idx, false);
 	return s;
 }
 
@@ -1458,13 +1484,13 @@ void srcu_barrier(struct srcu_struct *ssp)
 	/* Initial count prevents reaching zero until all CBs are posted. */
 	atomic_set(&ssp->srcu_barrier_cpu_cnt, 1);
 
-	idx = __srcu_read_lock_nmisafe(ssp);
+	idx = __srcu_read_lock_nmisafe(ssp, false);
 	if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
 		srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0));
 	else
 		for_each_possible_cpu(cpu)
 			srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu));
-	__srcu_read_unlock_nmisafe(ssp, idx);
+	__srcu_read_unlock_nmisafe(ssp, idx, false);
 
 	/* Remove the initial count, at which point reaching zero can happen. */
 	if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))

From f944bb4ab0199fc94e823029121181c0c195e6b2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 20 Sep 2022 14:54:41 -0700
Subject: [PATCH 33/70] srcu: Check for consistent global per-srcu_struct NMI
 safety

This commit adds runtime checks to verify that a given srcu_struct uses
consistent NMI-safe (or not) read-side primitives globally, but based
on the per-CPU data.  These global checks are made by the grace-period
code that must scan the srcu_data structures anyway, and are done only
in kernels built with CONFIG_PROVE_RCU=y.

Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Petr Mladek <pmladek@suse.com>
---
 kernel/rcu/srcutree.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 9952fd8cb1690..c54142374793b 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -429,13 +429,18 @@ static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx)
 static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
 {
 	int cpu;
+	unsigned long mask = 0;
 	unsigned long sum = 0;
 
 	for_each_possible_cpu(cpu) {
 		struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
 
 		sum += atomic_long_read(&cpuc->srcu_unlock_count[idx]);
+		if (IS_ENABLED(CONFIG_PROVE_RCU))
+			mask = mask | READ_ONCE(cpuc->srcu_nmi_safety);
 	}
+	WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask >> 1)),
+		  "Mixed NMI-safe readers for srcu_struct at %ps.\n", ssp);
 	return sum;
 }
 

From 73fb4e77de9bafd7fb59247db7b63fe7639e9b00 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 28 Sep 2022 11:11:18 -0700
Subject: [PATCH 34/70] arch/x86: Add ARCH_HAS_NMI_SAFE_THIS_CPU_OPS Kconfig
 option

The x86 architecture uses an add-to-memory instruction to implement
this_cpu_add(), which is NMI safe.  This means that the old and
more-efficient srcu_read_lock() may be used in NMI context, without
the need for srcu_read_lock_nmisafe().  Therefore, add the new Kconfig
option ARCH_HAS_NMI_SAFE_THIS_CPU_OPS to arch/x86/Kconfig, which will
cause NEED_SRCU_NMI_SAFE to be deselected, thus preserving the current
srcu_read_lock() behavior.

Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Petr Mladek <pmladek@suse.com>
Cc: <x86@kernel.org>
---
 arch/x86/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f9920f1341c8d..ee5783d8ec71b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -81,6 +81,7 @@ config X86
 	select ARCH_HAS_KCOV			if X86_64
 	select ARCH_HAS_MEM_ENCRYPT
 	select ARCH_HAS_MEMBARRIER_SYNC_CORE
+	select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
 	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
 	select ARCH_HAS_PMEM_API		if X86_64
 	select ARCH_HAS_PTE_DEVMAP		if X86_64

From 26a861bd368565eb70fe79d792fcffd035369c83 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 28 Sep 2022 11:17:05 -0700
Subject: [PATCH 35/70] arch/arm64: Add ARCH_HAS_NMI_SAFE_THIS_CPU_OPS Kconfig
 option

The arm64 architecture uses either an LL/SC loop (old systems) or an LSE
stadd instruction (new systems) to implement this_cpu_add(), both of which
are NMI safe.  This means that the old and more-efficient srcu_read_lock()
may be used in NMI context, without the need for srcu_read_lock_nmisafe().
Therefore, add the new Kconfig option ARCH_HAS_NMI_SAFE_THIS_CPU_OPS to
arch/arm64/Kconfig, which will cause NEED_SRCU_NMI_SAFE to be deselected,
thus preserving the current srcu_read_lock() behavior.

Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/

Suggested-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Suggested-by: Frederic Weisbecker <frederic@kernel.org>
Suggested-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Petr Mladek <pmladek@suse.com>
Cc: <linux-arm-kernel@lists.infradead.org>
---
 arch/arm64/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 571cc234d0b3f..664725a0b5dd2 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -31,6 +31,7 @@ config ARM64
 	select ARCH_HAS_KCOV
 	select ARCH_HAS_KEEPINITRD
 	select ARCH_HAS_MEMBARRIER_SYNC_CORE
+	select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
 	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
 	select ARCH_HAS_PTE_DEVMAP
 	select ARCH_HAS_PTE_SPECIAL

From 839fc1e464ebdca765af926d89dc36b4b5f2275d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 28 Sep 2022 11:21:54 -0700
Subject: [PATCH 36/70] arch/loongarch: Add ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
 Kconfig option

The loongarch architecture uses the atomic read-modify-write amadd
instruction to implement this_cpu_add(), which is NMI safe.  This means
that the old and more-efficient srcu_read_lock() may be used in NMI
context, without the need for srcu_read_lock_nmisafe().  Therefore, add
the new Kconfig option ARCH_HAS_NMI_SAFE_THIS_CPU_OPS to arch/x86/Kconfig,
which will cause NEED_SRCU_NMI_SAFE to be deselected, thus preserving
the current srcu_read_lock() behavior.

Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/

Suggested-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Suggested-by: Frederic Weisbecker <frederic@kernel.org>
Suggested-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Petr Mladek <pmladek@suse.com>
Cc: <loongarch@lists.linux.dev>
---
 arch/loongarch/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 4abc9a28aba4e..c8864768dc4d0 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -10,6 +10,7 @@ config LOONGARCH
 	select ARCH_ENABLE_MEMORY_HOTPLUG
 	select ARCH_ENABLE_MEMORY_HOTREMOVE
 	select ARCH_HAS_ACPI_TABLE_UPGRADE	if ACPI
+	select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
 	select ARCH_HAS_PHYS_TO_DMA
 	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST

From 83ddb5e0e56d37f892b2b6ddd39b361dc91c758c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 28 Sep 2022 11:25:20 -0700
Subject: [PATCH 37/70] arch/s390: Add ARCH_HAS_NMI_SAFE_THIS_CPU_OPS Kconfig
 option

The s390 architecture uses either a cmpxchg loop (old systems)
or the laa add-to-memory instruction (new systems) to implement
this_cpu_add(), both of which are NMI safe.  This means that the old
and more-efficient srcu_read_lock() may be used in NMI context, without
the need for srcu_read_lock_nmisafe().  Therefore, add the new Kconfig
option ARCH_HAS_NMI_SAFE_THIS_CPU_OPS to arch/arm64/Kconfig, which will
cause NEED_SRCU_NMI_SAFE to be deselected, thus preserving the current
srcu_read_lock() behavior.

Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/

Suggested-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Suggested-by: Frederic Weisbecker <frederic@kernel.org>
Suggested-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Petr Mladek <pmladek@suse.com>
Cc: <linux-s390@vger.kernel.org>
---
 arch/s390/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 318fce77601d3..0acdfda332908 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -73,6 +73,7 @@ config S390
 	select ARCH_HAS_GIGANTIC_PAGE
 	select ARCH_HAS_KCOV
 	select ARCH_HAS_MEM_ENCRYPT
+	select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
 	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_HAS_SCALED_CPUTIME
 	select ARCH_HAS_SET_MEMORY

From 6a08a86a63d7fb408245ae6f30809ba84d936f97 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 26 Apr 2022 17:05:39 -0700
Subject: [PATCH 38/70] memory-model: Prohibit nested SRCU read-side critical
 sections

This commit prohibits nested SRCU read-side critical sections of the
same srcu_struct structure.  The memory model does not currently handle
these correctly because it ignores the required connection between
srcu_read_lock() and srcu_read_unlock() provided by the value returned
from the former and passed into the latter.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/linux-kernel.bell | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index 5be86b1025e8d..65c32ca9d5ea2 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -56,17 +56,11 @@ let rcu-rscs = let rec
 flag ~empty Rcu-lock \ domain(rcu-rscs) as unbalanced-rcu-locking
 flag ~empty Rcu-unlock \ range(rcu-rscs) as unbalanced-rcu-locking
 
-(* Compute matching pairs of nested Srcu-lock and Srcu-unlock *)
-let srcu-rscs = let rec
-	    unmatched-locks = Srcu-lock \ domain(matched)
-	and unmatched-unlocks = Srcu-unlock \ range(matched)
-	and unmatched = unmatched-locks | unmatched-unlocks
-	and unmatched-po = ([unmatched] ; po ; [unmatched]) & loc
-	and unmatched-locks-to-unlocks =
-		([unmatched-locks] ; po ; [unmatched-unlocks]) & loc
-	and matched = matched | (unmatched-locks-to-unlocks \
-		(unmatched-po ; unmatched-po))
-	in matched
+(* Compute matching pairs of Srcu-lock and Srcu-unlock, but prohibit nesting *)
+let srcu-unmatched = Srcu-lock | Srcu-unlock
+let srcu-unmatched-po = ([srcu-unmatched] ; po ; [srcu-unmatched]) & loc
+let srcu-unmatched-locks-to-unlock = ([Srcu-lock] ; po ; [Srcu-unlock]) & loc
+let srcu-rscs = srcu-unmatched-locks-to-unlock \ (srcu-unmatched-po ; srcu-unmatched-po)
 
 (* Validate nesting *)
 flag ~empty Srcu-lock \ domain(srcu-rscs) as unbalanced-srcu-locking

From ef1e00c2c4622b1010a93d92cd62eb0625a69d36 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 25 Aug 2022 13:37:59 -0700
Subject: [PATCH 39/70] rcutorture: Verify NUM_ACTIVE_RCU_POLL_FULL_OLDSTATE

This commit adds code to the RTWS_POLL_GET_FULL case
of rcu_torture_writer() to verify that the value of
NUM_ACTIVE_RCU_POLL_FULL_OLDSTATE is sufficiently large.

[ paulmck: Fix whitespace issue located by checkpatch.pl. ]

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcutorture.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index b4c74ce102256..02ac1fb77193d 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -357,6 +357,8 @@ struct rcu_torture_ops {
 	bool (*poll_gp_state_exp)(unsigned long oldstate);
 	void (*cond_sync_exp)(unsigned long oldstate);
 	void (*cond_sync_exp_full)(struct rcu_gp_oldstate *rgosp);
+	void (*get_comp_state_full)(struct rcu_gp_oldstate *rgosp);
+	bool (*same_gp_state_full)(struct rcu_gp_oldstate *rgosp1, struct rcu_gp_oldstate *rgosp2);
 	unsigned long (*get_gp_state)(void);
 	void (*get_gp_state_full)(struct rcu_gp_oldstate *rgosp);
 	unsigned long (*get_gp_completed)(void);
@@ -535,6 +537,8 @@ static struct rcu_torture_ops rcu_ops = {
 	.deferred_free		= rcu_torture_deferred_free,
 	.sync			= synchronize_rcu,
 	.exp_sync		= synchronize_rcu_expedited,
+	.same_gp_state_full	= same_state_synchronize_rcu_full,
+	.get_comp_state_full	= get_completed_synchronize_rcu_full,
 	.get_gp_state		= get_state_synchronize_rcu,
 	.get_gp_state_full	= get_state_synchronize_rcu_full,
 	.get_gp_completed	= get_completed_synchronize_rcu,
@@ -1271,7 +1275,8 @@ static void rcu_torture_write_types(void)
 	} else if (gp_poll && (!cur_ops->start_gp_poll || !cur_ops->poll_gp_state)) {
 		pr_alert("%s: gp_poll without primitives.\n", __func__);
 	}
-	if (gp_poll_full1 && cur_ops->start_gp_poll_full && cur_ops->poll_gp_state_full) {
+	if (gp_poll_full1 && cur_ops->get_comp_state_full && cur_ops->same_gp_state_full
+	    && cur_ops->start_gp_poll_full && cur_ops->poll_gp_state_full) {
 		synctype[nsynctypes++] = RTWS_POLL_GET_FULL;
 		pr_info("%s: Testing polling full-state GPs.\n", __func__);
 	} else if (gp_poll_full && (!cur_ops->start_gp_poll_full || !cur_ops->poll_gp_state_full)) {
@@ -1347,9 +1352,11 @@ rcu_torture_writer(void *arg)
 	int expediting = 0;
 	unsigned long gp_snap;
 	struct rcu_gp_oldstate gp_snap_full;
+	struct rcu_gp_oldstate gp_snap1_full;
 	int i;
 	int idx;
 	int oldnice = task_nice(current);
+	struct rcu_gp_oldstate rgo[NUM_ACTIVE_RCU_POLL_FULL_OLDSTATE];
 	struct rcu_torture *rp;
 	struct rcu_torture *old_rp;
 	static DEFINE_TORTURE_RANDOM(rand);
@@ -1479,11 +1486,23 @@ rcu_torture_writer(void *arg)
 				break;
 			case RTWS_POLL_GET_FULL:
 				rcu_torture_writer_state = RTWS_POLL_GET_FULL;
+				for (i = 0; i < ARRAY_SIZE(rgo); i++)
+					cur_ops->get_comp_state_full(&rgo[i]);
 				cur_ops->start_gp_poll_full(&gp_snap_full);
 				rcu_torture_writer_state = RTWS_POLL_WAIT_FULL;
-				while (!cur_ops->poll_gp_state_full(&gp_snap_full))
+				while (!cur_ops->poll_gp_state_full(&gp_snap_full)) {
+					cur_ops->get_gp_state_full(&gp_snap1_full);
+					for (i = 0; i < ARRAY_SIZE(rgo); i++)
+						if (cur_ops->poll_gp_state_full(&rgo[i]) ||
+						    cur_ops->same_gp_state_full(&rgo[i],
+										&gp_snap1_full)) {
+							rgo[i] = gp_snap1_full;
+							break;
+						}
+					WARN_ON_ONCE(i >= ARRAY_SIZE(rgo));
 					torture_hrtimeout_jiffies(torture_random(&rand) % 16,
 								  &rand);
+				}
 				rcu_torture_pipe_update(old_rp);
 				break;
 			case RTWS_POLL_GET_EXP:

From fa95bca4833a87ae2c729e261bab65dc2761fabd Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 25 Aug 2022 15:47:09 -0700
Subject: [PATCH 40/70] rcutorture: Verify NUM_ACTIVE_RCU_POLL_OLDSTATE

This commit adds code to the RTWS_POLL_GET case of rcu_torture_writer()
to verify that the value of NUM_ACTIVE_RCU_POLL_OLDSTATE is sufficiently
large

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcutorture.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 02ac1fb77193d..684e24f12a798 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -357,7 +357,9 @@ struct rcu_torture_ops {
 	bool (*poll_gp_state_exp)(unsigned long oldstate);
 	void (*cond_sync_exp)(unsigned long oldstate);
 	void (*cond_sync_exp_full)(struct rcu_gp_oldstate *rgosp);
+	unsigned long (*get_comp_state)(void);
 	void (*get_comp_state_full)(struct rcu_gp_oldstate *rgosp);
+	bool (*same_gp_state)(unsigned long oldstate1, unsigned long oldstate2);
 	bool (*same_gp_state_full)(struct rcu_gp_oldstate *rgosp1, struct rcu_gp_oldstate *rgosp2);
 	unsigned long (*get_gp_state)(void);
 	void (*get_gp_state_full)(struct rcu_gp_oldstate *rgosp);
@@ -537,7 +539,9 @@ static struct rcu_torture_ops rcu_ops = {
 	.deferred_free		= rcu_torture_deferred_free,
 	.sync			= synchronize_rcu,
 	.exp_sync		= synchronize_rcu_expedited,
+	.same_gp_state		= same_state_synchronize_rcu,
 	.same_gp_state_full	= same_state_synchronize_rcu_full,
+	.get_comp_state		= get_completed_synchronize_rcu,
 	.get_comp_state_full	= get_completed_synchronize_rcu_full,
 	.get_gp_state		= get_state_synchronize_rcu,
 	.get_gp_state_full	= get_state_synchronize_rcu_full,
@@ -1269,7 +1273,8 @@ static void rcu_torture_write_types(void)
 	} else if (gp_normal && !cur_ops->deferred_free) {
 		pr_alert("%s: gp_normal without primitives.\n", __func__);
 	}
-	if (gp_poll1 && cur_ops->start_gp_poll && cur_ops->poll_gp_state) {
+	if (gp_poll1 && cur_ops->get_comp_state && cur_ops->same_gp_state &&
+	    cur_ops->start_gp_poll && cur_ops->poll_gp_state) {
 		synctype[nsynctypes++] = RTWS_POLL_GET;
 		pr_info("%s: Testing polling GPs.\n", __func__);
 	} else if (gp_poll && (!cur_ops->start_gp_poll || !cur_ops->poll_gp_state)) {
@@ -1351,6 +1356,7 @@ rcu_torture_writer(void *arg)
 	struct rcu_gp_oldstate cookie_full;
 	int expediting = 0;
 	unsigned long gp_snap;
+	unsigned long gp_snap1;
 	struct rcu_gp_oldstate gp_snap_full;
 	struct rcu_gp_oldstate gp_snap1_full;
 	int i;
@@ -1361,6 +1367,7 @@ rcu_torture_writer(void *arg)
 	struct rcu_torture *old_rp;
 	static DEFINE_TORTURE_RANDOM(rand);
 	bool stutter_waited;
+	unsigned long ulo[NUM_ACTIVE_RCU_POLL_OLDSTATE];
 
 	VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
 	if (!can_expedite)
@@ -1477,11 +1484,22 @@ rcu_torture_writer(void *arg)
 				break;
 			case RTWS_POLL_GET:
 				rcu_torture_writer_state = RTWS_POLL_GET;
+				for (i = 0; i < ARRAY_SIZE(ulo); i++)
+					ulo[i] = cur_ops->get_comp_state();
 				gp_snap = cur_ops->start_gp_poll();
 				rcu_torture_writer_state = RTWS_POLL_WAIT;
-				while (!cur_ops->poll_gp_state(gp_snap))
+				while (!cur_ops->poll_gp_state(gp_snap)) {
+					gp_snap1 = cur_ops->get_gp_state();
+					for (i = 0; i < ARRAY_SIZE(ulo); i++)
+						if (cur_ops->poll_gp_state(ulo[i]) ||
+						    cur_ops->same_gp_state(ulo[i], gp_snap1)) {
+							ulo[i] = gp_snap1;
+							break;
+						}
+					WARN_ON_ONCE(i >= ARRAY_SIZE(ulo));
 					torture_hrtimeout_jiffies(torture_random(&rand) % 16,
 								  &rand);
+				}
 				rcu_torture_pipe_update(old_rp);
 				break;
 			case RTWS_POLL_GET_FULL:

From 66d61057266260e8e28e1c5e5797fce15f32c48a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 26 Aug 2022 14:47:48 -0700
Subject: [PATCH 41/70] rcutorture: Add --bootargs parameter to kvm-again.sh

The kvm-again.sh script can be used to repeat short boot-time tests,
but the kernel boot arguments cannot be changed.  This means that every
change in kernel boot arguments currently necessitates a kernel build,
which greatly increases the duration of kernel-boot testing.

This commit therefore adds a --bootargs parameter to kvm-again.sh,
which allows a given kernel to be repeatedly booted, but overriding
old and adding new kernel boot parameters.  This allows an old kernel
to be booted with new kernel boot parameters, avoiding the overhead of
rebuilding the kernel under test.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../selftests/rcutorture/bin/kvm-again.sh     |  9 ++-
 .../selftests/rcutorture/bin/kvm-transform.sh | 68 ++++++++++++++++---
 2 files changed, 65 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
index 0941f1ddab658..85cfd139f9737 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
@@ -51,6 +51,7 @@ RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
 PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 
+bootargs=
 dryrun=
 dur=
 default_link="cp -R"
@@ -61,6 +62,7 @@ starttime="`get_starttime`"
 
 usage () {
 	echo "Usage: $scriptname $oldrun [ arguments ]:"
+	echo "       --bootargs kernel-boot-arguments"
 	echo "       --dryrun"
 	echo "       --duration minutes | <seconds>s | <hours>h | <days>d"
 	echo "       --link hard|soft|copy"
@@ -72,6 +74,11 @@ usage () {
 while test $# -gt 0
 do
 	case "$1" in
+	--bootargs|--bootarg)
+		checkarg --bootargs "(list of kernel boot arguments)" "$#" "$2" '.*' '^--'
+		bootargs="$bootargs $2"
+		shift
+		;;
 	--dryrun)
 		dryrun=1
 		;;
@@ -156,7 +163,7 @@ do
 	qemu_cmd_dir="`dirname "$i"`"
 	kernel_dir="`echo $qemu_cmd_dir | sed -e 's/\.[0-9]\+$//'`"
 	jitter_dir="`dirname "$kernel_dir"`"
-	kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" $dur < $T/qemu-cmd > $i
+	kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" $dur "$bootargs" < $T/qemu-cmd > $i
 	if test -n "$arg_remote"
 	then
 		echo "# TORTURE_KCONFIG_GDB_ARG=''" >> $i
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh
index d40b4e60a50cb..75a2610a27f37 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh
@@ -3,10 +3,14 @@
 #
 # Transform a qemu-cmd file to allow reuse.
 #
-# Usage: kvm-transform.sh bzImage console.log jitter_dir [ seconds ] < qemu-cmd-in > qemu-cmd-out
+# Usage: kvm-transform.sh bzImage console.log jitter_dir seconds [ bootargs ] < qemu-cmd-in > qemu-cmd-out
 #
 #	bzImage: Kernel and initrd from the same prior kvm.sh run.
 #	console.log: File into which to place console output.
+#	jitter_dir: Jitter directory for TORTURE_JITTER_START and
+#		TORTURE_JITTER_STOP environment variables.
+#	seconds: Run duaration for *.shutdown_secs module parameter.
+#	bootargs: New kernel boot parameters.  Beware of Robert Tables.
 #
 # The original qemu-cmd file is provided on standard input.
 # The transformed qemu-cmd file is on standard output.
@@ -17,6 +21,9 @@
 #
 # Authors: Paul E. McKenney <paulmck@kernel.org>
 
+T=`mktemp -d /tmp/kvm-transform.sh.XXXXXXXXXX`
+trap 'rm -rf $T' 0 2
+
 image="$1"
 if test -z "$image"
 then
@@ -41,9 +48,17 @@ then
 	echo "Invalid duration, should be numeric in seconds: '$seconds'"
 	exit 1
 fi
+bootargs="$5"
+
+# Build awk program.
+echo "BEGIN {" > $T/bootarg.awk
+echo $bootargs | tr -s ' ' '\012' |
+	awk -v dq='"' '/./ { print "\tbootarg[" NR "] = " dq $1 dq ";" }' >> $T/bootarg.awk
+echo $bootargs | tr -s ' ' '\012' | sed -e 's/=.*$//' |
+	awk -v dq='"' '/./ { print "\tbootpar[" NR "] = " dq $1 dq ";" }' >> $T/bootarg.awk
+cat >> $T/bootarg.awk << '___EOF___'
+}
 
-awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \
-    -v seconds="$seconds" '
 /^# seconds=/ {
 	if (seconds == "")
 		print $0;
@@ -70,13 +85,7 @@ awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \
 {
 	line = "";
 	for (i = 1; i <= NF; i++) {
-		if ("" seconds != "" && $i ~ /\.shutdown_secs=[0-9]*$/) {
-			sub(/[0-9]*$/, seconds, $i);
-			if (line == "")
-				line = $i;
-			else
-				line = line " " $i;
-		} else if (line == "") {
+		if (line == "") {
 			line = $i;
 		} else {
 			line = line " " $i;
@@ -87,7 +96,44 @@ awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \
 		} else if ($i == "-kernel") {
 			i++;
 			line = line " " image;
+		} else if ($i == "-append") {
+			for (i++; i <= NF; i++) {
+				arg = $i;
+				lq = "";
+				rq = "";
+				if ("" seconds != "" && $i ~ /\.shutdown_secs=[0-9]*$/)
+					sub(/[0-9]*$/, seconds, arg);
+				if (arg ~ /^"/) {
+					lq = substr(arg, 1, 1);
+					arg  = substr(arg, 2);
+				}
+				if (arg ~ /"$/) {
+					rq = substr(arg, length($i), 1);
+					arg = substr(arg, 1, length($i) - 1);
+				}
+				par = arg;
+				gsub(/=.*$/, "", par);
+				j = 1;
+				while (bootpar[j] != "") {
+					if (bootpar[j] == par) {
+						arg = "";
+						break;
+					}
+					j++;
+				}
+				if (line == "")
+					line = lq arg;
+				else
+					line = line " " lq arg;
+			}
+			for (j in bootarg)
+				line = line " " bootarg[j];
+			line = line rq;
 		}
 	}
 	print line;
-}'
+}
+___EOF___
+
+awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \
+    -v seconds="$seconds" -f $T/bootarg.awk

From 501064759ab4a65b7253468515dc94df9d42cb35 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sat, 27 Aug 2022 07:16:27 -0700
Subject: [PATCH 42/70] torture: Use mktemp instead of guessing at unique names

This commit drags the rcutorture scripting kicking and screaming into the
twenty-first century by making use of the BSD-derived mktemp command to
create temporary files and directories.  In happy contrast to many of its
ill-behaved predecessors, mktemp seems to actually work reasonably reliably!

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../testing/selftests/rcutorture/bin/config2csv.sh  |  3 +--
 .../selftests/rcutorture/bin/config_override.sh     |  3 +--
 .../testing/selftests/rcutorture/bin/configcheck.sh |  3 +--
 .../testing/selftests/rcutorture/bin/configinit.sh  |  3 +--
 tools/testing/selftests/rcutorture/bin/kvm-again.sh |  3 +--
 .../selftests/rcutorture/bin/kvm-assign-cpus.sh     |  3 +--
 tools/testing/selftests/rcutorture/bin/kvm-build.sh |  3 +--
 .../selftests/rcutorture/bin/kvm-end-run-stats.sh   |  3 +--
 .../testing/selftests/rcutorture/bin/kvm-remote.sh  | 13 ++++++-------
 .../rcutorture/bin/kvm-test-1-run-batch.sh          |  3 +--
 .../selftests/rcutorture/bin/kvm-test-1-run-qemu.sh |  3 +--
 .../selftests/rcutorture/bin/kvm-test-1-run.sh      |  3 +--
 tools/testing/selftests/rcutorture/bin/kvm.sh       |  3 +--
 .../testing/selftests/rcutorture/bin/parse-build.sh |  3 +--
 tools/testing/selftests/rcutorture/bin/torture.sh   |  3 +--
 15 files changed, 20 insertions(+), 35 deletions(-)

diff --git a/tools/testing/selftests/rcutorture/bin/config2csv.sh b/tools/testing/selftests/rcutorture/bin/config2csv.sh
index d5a16631b16ee..0cf55f1bf6548 100755
--- a/tools/testing/selftests/rcutorture/bin/config2csv.sh
+++ b/tools/testing/selftests/rcutorture/bin/config2csv.sh
@@ -30,9 +30,8 @@ else
 fi
 scenarios="`echo $scenariosarg | sed -e "s/\<CFLIST\>/$defaultconfigs/g"`"
 
-T=/tmp/config2latex.sh.$$
+T=`mktemp -d /tmp/config2latex.sh.XXXXXX`
 trap 'rm -rf $T' 0
-mkdir $T
 
 cat << '---EOF---' >> $T/p.awk
 END	{
diff --git a/tools/testing/selftests/rcutorture/bin/config_override.sh b/tools/testing/selftests/rcutorture/bin/config_override.sh
index 90016c359e839..b3d2e7efa40cd 100755
--- a/tools/testing/selftests/rcutorture/bin/config_override.sh
+++ b/tools/testing/selftests/rcutorture/bin/config_override.sh
@@ -29,9 +29,8 @@ else
 	exit 1
 fi
 
-T=${TMPDIR-/tmp}/config_override.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/config_override.sh.XXXXXX`"
 trap 'rm -rf $T' 0
-mkdir $T
 
 sed < $override -e 's/^/grep -v "/' -e 's/=.*$/="/' |
 	awk '
diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh
index 31584cee84d71..83fac1852ab23 100755
--- a/tools/testing/selftests/rcutorture/bin/configcheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh
@@ -7,9 +7,8 @@
 #
 # Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
-T=${TMPDIR-/tmp}/abat-chk-config.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/configcheck.sh.XXXXXX`"
 trap 'rm -rf $T' 0
-mkdir $T
 
 cat $1 > $T/.config
 
diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh
index d6e5ce084b1cf..28bdb3ac7ba6f 100755
--- a/tools/testing/selftests/rcutorture/bin/configinit.sh
+++ b/tools/testing/selftests/rcutorture/bin/configinit.sh
@@ -15,9 +15,8 @@
 #
 # Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
-T=${TMPDIR-/tmp}/configinit.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/configinit.sh.XXXXXX`"
 trap 'rm -rf $T' 0
-mkdir $T
 
 # Capture config spec file.
 
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
index 85cfd139f9737..20941c1051087 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
@@ -12,9 +12,8 @@
 scriptname=$0
 args="$*"
 
-T=${TMPDIR-/tmp}/kvm-again.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-again.sh.XXXXXX`"
 trap 'rm -rf $T' 0
-mkdir $T
 
 if ! test -d tools/testing/selftests/rcutorture/bin
 then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh
index f99b2c146f835..46b08cd16ba5c 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh
@@ -7,9 +7,8 @@
 #
 # Usage: kvm-assign-cpus.sh /path/to/sysfs
 
-T=/tmp/kvm-assign-cpus.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-assign-cpus.sh.XXXXXX`"
 trap 'rm -rf $T' 0 2
-mkdir $T
 
 sysfsdir=${1-/sys/devices/system/node}
 if ! cd "$sysfsdir" > $T/msg 2>&1
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
index 5ad973dca8207..e28a82851f7c4 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -23,9 +23,8 @@ then
 fi
 resdir=${2}
 
-T=${TMPDIR-/tmp}/test-linux.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-build.sh.XXXXXX`"
 trap 'rm -rf $T' 0
-mkdir $T
 
 cp ${config_template} $T/config
 cat << ___EOF___ >> $T/config
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
index ee886b40a5d2c..2b56baceb05d7 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
@@ -18,9 +18,8 @@ then
 	exit 1
 fi
 
-T=${TMPDIR-/tmp}/kvm-end-run-stats.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-end-run-stats.sh.XXXXXX`"
 trap 'rm -rf $T' 0
-mkdir $T
 
 RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
 PATH=${RCUTORTURE}/bin:$PATH; export PATH
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index 9f0a5d5ff2ddc..a2328163eba1d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -34,19 +34,18 @@ fi
 shift
 
 # Pathnames:
-# T:	  /tmp/kvm-remote.sh.$$
-# resdir: /tmp/kvm-remote.sh.$$/res
-# rundir: /tmp/kvm-remote.sh.$$/res/$ds ("-remote" suffix)
+# T:	  /tmp/kvm-remote.sh.NNNNNN where "NNNNNN" is set by mktemp
+# resdir: /tmp/kvm-remote.sh.NNNNNN/res
+# rundir: /tmp/kvm-remote.sh.NNNNNN/res/$ds ("-remote" suffix)
 # oldrun: `pwd`/tools/testing/.../res/$otherds
 #
 # Pathname segments:
-# TD:	  kvm-remote.sh.$$
+# TD:	  kvm-remote.sh.NNNNNN
 # ds:	  yyyy.mm.dd-hh.mm.ss-remote
 
-TD=kvm-remote.sh.$$
-T=${TMPDIR-/tmp}/$TD
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-remote.sh.XXXXXX`"
 trap 'rm -rf $T' 0
-mkdir $T
+TD="`basename "$T"`"
 
 resdir="$T/res"
 ds=`date +%Y.%m.%d-%H.%M.%S`-remote
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
index 1e29d656501bc..c3808c490d92d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
@@ -13,9 +13,8 @@
 #
 # Authors: Paul E. McKenney <paulmck@kernel.org>
 
-T=${TMPDIR-/tmp}/kvm-test-1-run-batch.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run-batch.sh.XXXXXX`"
 trap 'rm -rf $T' 0
-mkdir $T
 
 echo ---- Running batch $*
 # Check arguments
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
index 44280582c594e..9da86e6cd1812 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
@@ -17,9 +17,8 @@
 #
 # Authors: Paul E. McKenney <paulmck@kernel.org>
 
-T=${TMPDIR-/tmp}/kvm-test-1-run-qemu.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run-qemu.sh.XXXXXX`"
 trap 'rm -rf $T' 0
-mkdir $T
 
 resdir="$1"
 if ! test -d "$resdir"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index f4c8055dbf7ad..d2a3710a5f2ad 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -25,9 +25,8 @@
 #
 # Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
-T=${TMPDIR-/tmp}/kvm-test-1-run.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run.sh.XXXXXX`"
 trap 'rm -rf $T' 0
-mkdir $T
 
 . functions.sh
 . $CONFIGFRAG/ver_functions.sh
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 6c734818a8757..7710b1e1cddab 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -14,9 +14,8 @@
 scriptname=$0
 args="$*"
 
-T=${TMPDIR-/tmp}/kvm.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm.sh.XXXXXX`"
 trap 'rm -rf $T' 0
-mkdir $T
 
 cd `dirname $scriptname`/../../../../../
 
diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh
index 2dbfca3589b17..5a0b7ffcf047a 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh
@@ -15,9 +15,8 @@
 
 F=$1
 title=$2
-T=${TMPDIR-/tmp}/parse-build.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/parse-build.sh.XXXXXX`"
 trap 'rm -rf $T' 0
-mkdir $T
 
 . functions.sh
 
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index d477618e7261d..4d285db2fbbdd 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -206,9 +206,8 @@ ds="`date +%Y.%m.%d-%H.%M.%S`-torture"
 startdate="`date`"
 starttime="`get_starttime`"
 
-T=/tmp/torture.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/torture.sh.XXXXXX`"
 trap 'rm -rf $T' 0 2
-mkdir $T
 
 echo " --- " $scriptname $args | tee -a $T/log
 echo " --- Results directory: " $ds | tee -a $T/log

From de8d9a3a3f9a884d37c09ce03509e1c6f22f39e6 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 29 Aug 2022 16:21:19 -0700
Subject: [PATCH 43/70] rcutorture: Make kvm-test-1-run-qemu.sh check for
 alternative output

The kvm-again.sh script, when running locally, can place the QEMU output
into kvm-test-1-run-qemu.sh.out instead of kvm-test-1-run.sh.out.  This
commit therefore makes kvm-test-1-run-qemu.sh check both locations.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
index 9da86e6cd1812..76f24cd5825be 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
@@ -108,7 +108,7 @@ do
 		if test $kruntime -lt $seconds
 		then
 			echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1
-			grep "^(qemu) qemu:" $resdir/kvm-test-1-run.sh.out >> $resdir/Warnings 2>&1
+			grep "^(qemu) qemu:" $resdir/kvm-test-1-run*.sh.out >> $resdir/Warnings 2>&1
 			killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`"
 			if test -n "$killpid"
 			then

From 9cfd8c651771126b41699c5fff6bdf16004db0e1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 29 Aug 2022 16:23:11 -0700
Subject: [PATCH 44/70] rcutorture: Make kvm-recheck.sh export TORTURE_SUITE

As it should, the kvm-recheck.sh script sets the TORTURE_SUITE bash
variable based on the type of rcutorture test being run.  However,
it does not export it.  Which is OK, at least until you try running
kvm-again.sh on either a rcuscale or a refscale test, at which point you
get false-positive "no success message, N successful version messages"
errors.  This commit therefore causes the kvm-recheck.sh script to export
TORTURE_SUITE, suppressing these false positives.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-recheck.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 0789c5606d2ab..1df7e695edf75 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -30,7 +30,7 @@ do
 			resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'`
 			head -1 $resdir/log
 		fi
-		TORTURE_SUITE="`cat $i/../torture_suite`"
+		TORTURE_SUITE="`cat $i/../torture_suite`" ; export TORTURE_SUITE
 		configfile=`echo $i | sed -e 's,^.*/,,'`
 		rm -f $i/console.log.*.diags
 		case "${TORTURE_SUITE}" in

From e04ff9b09af749eb37967fdf4cb13c4891616af7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 29 Aug 2022 16:26:26 -0700
Subject: [PATCH 45/70] rcutorture: Add --datestamp parameter to kvm-again.sh

This commit adds a --datestamp parameter to kvm-again.sh, which, in
contrast to the existing --rundir argument, specifies only the last
segments of the pathname.  This addition enables torture.sh to use
kvm-again.sh in order to avoid redundant kernel builds.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../selftests/rcutorture/bin/kvm-again.sh     | 37 +++++++++++++++++--
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
index 20941c1051087..8a968fbda02c9 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
@@ -54,7 +54,10 @@ bootargs=
 dryrun=
 dur=
 default_link="cp -R"
-rundir="`pwd`/tools/testing/selftests/rcutorture/res/`date +%Y.%m.%d-%H.%M.%S-again`"
+resdir="`pwd`/tools/testing/selftests/rcutorture/res"
+rundir="$resdir/`date +%Y.%m.%d-%H.%M.%S-again`"
+got_datestamp=
+got_rundir=
 
 startdate="`date`"
 starttime="`get_starttime`"
@@ -62,11 +65,13 @@ starttime="`get_starttime`"
 usage () {
 	echo "Usage: $scriptname $oldrun [ arguments ]:"
 	echo "       --bootargs kernel-boot-arguments"
+	echo "       --datestamp string"
 	echo "       --dryrun"
 	echo "       --duration minutes | <seconds>s | <hours>h | <days>d"
 	echo "       --link hard|soft|copy"
 	echo "       --remote"
 	echo "       --rundir /new/res/path"
+	echo "Command line: $scriptname $args"
 	exit 1
 }
 
@@ -78,6 +83,23 @@ do
 		bootargs="$bootargs $2"
 		shift
 		;;
+	--datestamp)
+		checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._/-]*$' '^--'
+		if test -n "$got_rundir" || test -n "$got_datestamp"
+		then
+			echo Only one of --datestamp or --rundir may be specified
+			usage
+		fi
+		got_datestamp=y
+		ds=$2
+		rundir="$resdir/$ds"
+		if test -e "$rundir"
+		then
+			echo "--datestamp $2: Already exists."
+			usage
+		fi
+		shift
+		;;
 	--dryrun)
 		dryrun=1
 		;;
@@ -119,6 +141,12 @@ do
 		;;
 	--rundir)
 		checkarg --rundir "(absolute pathname)" "$#" "$2" '^/' '^error'
+		if test -n "$got_rundir" || test -n "$got_datestamp"
+		then
+			echo Only one of --datestamp or --rundir may be specified
+			usage
+		fi
+		got_rundir=y
 		rundir=$2
 		if test -e "$rundir"
 		then
@@ -128,8 +156,11 @@ do
 		shift
 		;;
 	*)
-		echo Unknown argument $1
-		usage
+		if test -n "$1"
+		then
+			echo Unknown argument $1
+			usage
+		fi
 		;;
 	esac
 	shift

From 7d0b63c254eb5f3bc14c7d85977e483e8230e38b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 29 Aug 2022 16:30:14 -0700
Subject: [PATCH 46/70] rcutorture: Avoid redundant builds for rcuscale and
 refscale in torture.sh

This commit causes torture.sh to use the new --bootargs and --datestamp
parameters to kvm-again.sh in order to avoid redundant kernel builds
during rcuscale and refscale testing.  This trims the better part of an
hour off of torture.sh runs that use --do-kasan.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../selftests/rcutorture/bin/torture.sh       | 87 ++++++++++++++++++-
 1 file changed, 83 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index 4d285db2fbbdd..b376688fe280a 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -277,6 +277,8 @@ function torture_one {
 	then
 		cat $T/$curflavor.out | tee -a $T/log
 		echo retcode=$retcode | tee -a $T/log
+	else
+		echo $resdir > $T/last-resdir
 	fi
 	if test "$retcode" == 0
 	then
@@ -302,10 +304,12 @@ function torture_set {
 	shift
 	curflavor=$flavor
 	torture_one "$@"
+	mv $T/last-resdir $T/last-resdir-nodebug || :
 	if test "$do_kasan" = "yes"
 	then
 		curflavor=${flavor}-kasan
 		torture_one "$@" --kasan
+		mv $T/last-resdir $T/last-resdir-kasan || :
 	fi
 	if test "$do_kcsan" = "yes"
 	then
@@ -316,6 +320,7 @@ function torture_set {
 			cur_kcsan_kmake_args="$kcsan_kmake_args"
 		fi
 		torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan
+		mv $T/last-resdir $T/last-resdir-kcsan || :
 	fi
 }
 
@@ -378,11 +383,48 @@ then
 else
 	primlist=
 fi
+firsttime=1
+do_kasan_save="$do_kasan"
+do_kcsan_save="$do_kcsan"
 for prim in $primlist
 do
-	torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot"
-	torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
+	if test -n "$firsttime"
+	then
+		torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot"
+		torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
+		mv $T/last-resdir-nodebug $T/first-resdir-nodebug || :
+		if test -f "$T/last-resdir-kasan"
+		then
+			mv $T/last-resdir-kasan $T/first-resdir-kasan || :
+		fi
+		if test -f "$T/last-resdir-kcsan"
+		then
+			mv $T/last-resdir-kcsan $T/first-resdir-kcsan || :
+		fi
+		firsttime=
+		do_kasan=
+		do_kcsan=
+	else
+		torture_bootargs=
+		for i in $T/first-resdir-*
+		do
+			case "$i" in
+			*-nodebug)
+				torture_suffix=
+				;;
+			*-kasan)
+				torture_suffix="-kasan"
+				;;
+			*-kcsan)
+				torture_suffix="-kcsan"
+				;;
+			esac
+			torture_set "refscale-$prim$torture_suffix" tools/testing/selftests/rcutorture/bin/kvm-again.sh "`cat "$i"`" --duration 5 --bootargs "refscale.scale_type=$prim"
+		done
+	fi
 done
+do_kasan="$do_kasan_save"
+do_kcsan="$do_kcsan_save"
 
 if test "$do_rcuscale" = yes
 then
@@ -390,11 +432,48 @@ then
 else
 	primlist=
 fi
+firsttime=1
+do_kasan_save="$do_kasan"
+do_kcsan_save="$do_kcsan"
 for prim in $primlist
 do
-	torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot"
-	torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
+	if test -n "$firsttime"
+	then
+		torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot"
+		torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
+		mv $T/last-resdir-nodebug $T/first-resdir-nodebug || :
+		if test -f "$T/last-resdir-kasan"
+		then
+			mv $T/last-resdir-kasan $T/first-resdir-kasan || :
+		fi
+		if test -f "$T/last-resdir-kcsan"
+		then
+			mv $T/last-resdir-kcsan $T/first-resdir-kcsan || :
+		fi
+		firsttime=
+		do_kasan=
+		do_kcsan=
+	else
+		torture_bootargs=
+		for i in $T/first-resdir-*
+		do
+			case "$i" in
+			*-nodebug)
+				torture_suffix=
+				;;
+			*-kasan)
+				torture_suffix="-kasan"
+				;;
+			*-kcsan)
+				torture_suffix="-kcsan"
+				;;
+			esac
+			torture_set "rcuscale-$prim$torture_suffix" tools/testing/selftests/rcutorture/bin/kvm-again.sh "`cat "$i"`" --duration 5 --bootargs "rcuscale.scale_type=$prim"
+		done
+	fi
 done
+do_kasan="$do_kasan_save"
+do_kcsan="$do_kcsan_save"
 
 if test "$do_kvfree" = "yes"
 then

From 89ab4857676e15a31f435a6579c1faf087282caf Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 30 Aug 2022 11:52:13 -0700
Subject: [PATCH 47/70] rcutorture: Avoid torture.sh compressing identical
 files

Currently, torture.sh will compress the vmlinux files for KASAN and
KCSAN runs.  But it will compress all of the files, including those
copied verbatim by the kvm-again.sh script.  Compression takes around ten
minutes, so this is not a good thing.  This commit therefore compresses
only one of a given set of identical vmlinux files, and then hard-links
it to the directories produced by kvm-again.sh.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../selftests/rcutorture/bin/torture.sh       | 21 ++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index b376688fe280a..fc63539377147 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -536,7 +536,10 @@ if test -n "$tdir" && test $compress_concurrency -gt 0
 then
 	# KASAN vmlinux files can approach 1GB in size, so compress them.
 	echo Looking for K[AC]SAN files to compress: `date` > "$tdir/log-xz" 2>&1
-	find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo
+	find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo-all
+	find "$tdir" -type f -name 're-run' -print | sed -e 's,/re-run,,' |
+		grep -e '-k[ac]san$' > $T/xz-todo-copy
+	sort $T/xz-todo-all $T/xz-todo-copy | uniq -u > $T/xz-todo
 	ncompresses=0
 	batchno=1
 	if test -s $T/xz-todo
@@ -568,6 +571,22 @@ then
 			echo Waiting for final batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log
 		fi
 		wait
+		if test -s $T/xz-todo-copy
+		then
+			echo Linking vmlinux.xz files to re-use scenarios `date` | tee -a "$tdir/log-xz" | tee -a $T/log
+			dirstash="`pwd`"
+			for i in `cat $T/xz-todo-copy`
+			do
+				cd $i
+				find "$i" -name vmlinux -print > $T/xz-todo-copy-vmlinux
+				for v in `cat $T/xz-todo-copy-vmlinux`
+				do
+					rm -f "$v"
+					cp -l "$i/$v".xz "`dirname "$v"`"
+				done
+				cd "$dirstash"
+			done
+		fi
 		echo Size after compressing $n2compress files: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log
 		echo Total duration `get_starttime_duration $starttime`. | tee -a $T/log
 	else

From ef9801bc1a589829fbb0fbc96d2e5675e4b6297a Mon Sep 17 00:00:00 2001
From: Zqiang <qiang1.zhang@intel.com>
Date: Tue, 30 Aug 2022 16:31:51 +0800
Subject: [PATCH 48/70] rcu: Remove duplicate RCU exp QS report from
 rcu_report_dead()

The rcu_report_dead() function invokes rcu_report_exp_rdp() in order
to force an immediate expedited quiescent state on the outgoing
CPU, and then it invokes rcu_preempt_deferred_qs() to provide any
required deferred quiescent state of either sort.  Because the call to
rcu_preempt_deferred_qs() provides the expedited RCU quiescent state if
requested, the call to rcu_report_exp_rdp() is potentially redundant.

One possible issue is a concurrent start of a new expedited RCU
grace period, but this situation is already handled correctly
by __sync_rcu_exp_select_node_cpus().  This function will detect
that the CPU is going offline via the error return from its call
to smp_call_function_single().  In that case, it will retry, and
eventually stop retrying due to rcu_report_exp_rdp() clearing the
->qsmaskinitnext bit corresponding to the target CPU.  As a result,
__sync_rcu_exp_select_node_cpus() will report the necessary quiescent
state after dealing with any remaining CPU.

This change assumes that control does not enter rcu_report_dead() within
an RCU read-side critical section, but then again, the surviving call
to rcu_preempt_deferred_qs() has always made this assumption.

This commit therefore removes the call to rcu_report_exp_rdp(), thus
relying on rcu_preempt_deferred_qs() to handle both normal and expedited
quiescent states.

Signed-off-by: Zqiang <qiang1.zhang@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 6bb8e72bc8151..0ca21ac0f0648 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -4276,8 +4276,6 @@ void rcu_report_dead(unsigned int cpu)
 	// Do any dangling deferred wakeups.
 	do_nocb_deferred_wakeup(rdp);
 
-	/* QS for any half-done expedited grace period. */
-	rcu_report_exp_rdp(rdp);
 	rcu_preempt_deferred_qs(current);
 
 	/* Remove outgoing CPU from mask in the leaf rcu_node structure. */

From 802f2ea8e861ade913a64b0a8dabfda8bd01b8dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Paul=20Heidekr=C3=BCger?= <paul.heidekrueger@in.tum.de>
Date: Sat, 3 Sep 2022 16:57:17 +0000
Subject: [PATCH 49/70] tools/memory-model: Weaken ctrl dependency definition
 in explanation.txt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current informal control dependency definition in explanation.txt is
too broad and, as discussed, needs to be updated.

Consider the following example:

> if(READ_ONCE(x))
>   return 42;
>
> WRITE_ONCE(y, 42);
>
> return 21;

The read event determines whether the write event will be executed "at all"
- as per the current definition - but the formal LKMM does not recognize
this as a control dependency.

Introduce a new definition which includes the requirement for the second
memory access event to syntactically lie within the arm of a non-loop
conditional.

Link: https://lore.kernel.org/all/20220615114330.2573952-1-paul.heidekrueger@in.tum.de/
Cc: Marco Elver <elver@google.com>
Cc: Charalampos Mainas <charalampos.mainas@gmail.com>
Cc: Pramod Bhatotia <pramod.bhatotia@in.tum.de>
Cc: Soham Chakraborty <s.s.chakraborty@tudelft.nl>
Cc: Martin Fink <martin.fink@in.tum.de>
Co-developed-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Paul Heidekrüger <paul.heidekrueger@in.tum.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/Documentation/explanation.txt | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index ee819a402b698..11a1d2d4f681c 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -464,9 +464,10 @@ to address dependencies, since the address of a location accessed
 through a pointer will depend on the value read earlier from that
 pointer.
 
-Finally, a read event and another memory access event are linked by a
-control dependency if the value obtained by the read affects whether
-the second event is executed at all.  Simple example:
+Finally, a read event X and a write event Y are linked by a control
+dependency if Y syntactically lies within an arm of an if statement and
+X affects the evaluation of the if condition via a data or address
+dependency (or similarly for a switch statement).  Simple example:
 
 	int x, y;
 

From 7785e3ec245200a15d9ea66042ff7aaea21e53ee Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sun, 4 Sep 2022 10:49:56 -0700
Subject: [PATCH 50/70] torture: Make torture.sh create a properly formated log
 file

Currently, if the torture.sh allmodconfig step fails, this is counted as
an error (as it should be), but there is also an extraneous complaint
about a missing log file.  This commit therefore adds that log file,
which is hoped to reduce confused reactions to the error report.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../selftests/rcutorture/bin/torture.sh       | 32 +++++++++++++------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index fc63539377147..ec3d8a2d513f1 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -330,20 +330,34 @@ then
 	echo " --- allmodconfig:" Start `date` | tee -a $T/log
 	amcdir="tools/testing/selftests/rcutorture/res/$ds/allmodconfig"
 	mkdir -p "$amcdir"
-	echo " --- make clean" > "$amcdir/Make.out" 2>&1
+	echo " --- make clean" | tee $amcdir/log > "$amcdir/Make.out" 2>&1
 	make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1
-	echo " --- make allmodconfig" >> "$amcdir/Make.out" 2>&1
-	cp .config $amcdir
-	make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1
-	echo " --- make " >> "$amcdir/Make.out" 2>&1
-	make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1
-	retcode="$?"
-	echo $retcode > "$amcdir/Make.exitcode"
-	if test "$retcode" == 0
+	retcode=$?
+	buildphase='"make clean"'
+	if test "$retcode" -eq 0
+	then
+		echo " --- make allmodconfig" | tee -a $amcdir/log >> "$amcdir/Make.out" 2>&1
+		cp .config $amcdir
+		make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1
+		retcode=$?
+		buildphase='"make allmodconfig"'
+	fi
+	if test "$retcode" -eq 0
+	then
+		echo " --- make " | tee -a $amcdir/log >> "$amcdir/Make.out" 2>&1
+		make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1
+		retcode="$?"
+		echo $retcode > "$amcdir/Make.exitcode"
+		buildphase='"make"'
+	fi
+	if test "$retcode" -eq 0
 	then
 		echo "allmodconfig($retcode)" $amcdir >> $T/successes
+		echo Success >> $amcdir/log
 	else
 		echo "allmodconfig($retcode)" $amcdir >> $T/failures
+		echo " --- allmodconfig Test summary:" >> $amcdir/log
+		echo " --- Summary: Exit code $retcode from $buildphase, see Make.out" >> $amcdir/log
 	fi
 fi
 

From af6b74679889c5c169fab43d16437ee290bff658 Mon Sep 17 00:00:00 2001
From: Pingfan Liu <kernelfans@gmail.com>
Date: Tue, 6 Sep 2022 11:36:42 -0700
Subject: [PATCH 51/70] rcu: Synchronize ->qsmaskinitnext in
 rcu_boost_kthread_setaffinity()

Once either rcutree_online_cpu() or rcutree_dead_cpu() is invoked
concurrently, the following rcu_boost_kthread_setaffinity() race can
occur:

        CPU 1                               CPU2
mask = rcu_rnp_online_cpus(rnp);
...

                                   mask = rcu_rnp_online_cpus(rnp);
                                   ...
                                   set_cpus_allowed_ptr(t, cm);

set_cpus_allowed_ptr(t, cm);

This results in CPU2's update being overwritten by that of CPU1, and
thus the possibility of ->boost_kthread_task continuing to run on a
to-be-offlined CPU.

This commit therefore eliminates this race by relying on the pre-existing
acquisition of ->boost_kthread_mutex to serialize the full process of
changing the affinity of ->boost_kthread_task.

Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: "Jason A. Donenfeld" <Jason@zx2c4.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree_plugin.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index e3142ee35fc6a..7b0fe741a0886 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1221,11 +1221,13 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
  * We don't include outgoingcpu in the affinity set, use -1 if there is
  * no outgoing CPU.  If there are no CPUs left in the affinity set,
  * this function allows the kthread to execute on any CPU.
+ *
+ * Any future concurrent calls are serialized via ->boost_kthread_mutex.
  */
 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
 {
 	struct task_struct *t = rnp->boost_kthread_task;
-	unsigned long mask = rcu_rnp_online_cpus(rnp);
+	unsigned long mask;
 	cpumask_var_t cm;
 	int cpu;
 
@@ -1234,6 +1236,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
 	if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
 		return;
 	mutex_lock(&rnp->boost_kthread_mutex);
+	mask = rcu_rnp_online_cpus(rnp);
 	for_each_leaf_node_possible_cpu(rnp, cpu)
 		if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
 		    cpu != outgoingcpu)

From a9f11975532606434c92f3aa98bd946cdd47800f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 8 Sep 2022 05:48:01 -0700
Subject: [PATCH 52/70] doc: Remove arrayRCU.rst

Although RCU can in theory be used to protect array indexes in a manner
similar to the way it protects pointers, doing so is extremely risky
because of the huge number of optimizations that modern compilers can
apply to integral types.

For but one example, if your code can be configured such that your array
contains only a single element, then indexing that array with any integer
other than zero invokes undefined behavior, which in turn means that
the compiler is within its rights to assume (without checking!) that any
integer used as an index to that array has the value zero.  Therefore,
the compiler can index the array with the constant zero, which breaks
any dependencies that might have otherwise existed between the time the
actual value was loaded and the time that the array was indexed.

This commit therefore removes the arrayRCU.rst file that describes how
to go about carrying dependencies through array indexes.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 Documentation/RCU/arrayRCU.rst | 165 ---------------------------------
 Documentation/RCU/index.rst    |   1 -
 2 files changed, 166 deletions(-)
 delete mode 100644 Documentation/RCU/arrayRCU.rst

diff --git a/Documentation/RCU/arrayRCU.rst b/Documentation/RCU/arrayRCU.rst
deleted file mode 100644
index a5f2ff8fc54c2..0000000000000
--- a/Documentation/RCU/arrayRCU.rst
+++ /dev/null
@@ -1,165 +0,0 @@
-.. _array_rcu_doc:
-
-Using RCU to Protect Read-Mostly Arrays
-=======================================
-
-Although RCU is more commonly used to protect linked lists, it can
-also be used to protect arrays.  Three situations are as follows:
-
-1.  :ref:`Hash Tables <hash_tables>`
-
-2.  :ref:`Static Arrays <static_arrays>`
-
-3.  :ref:`Resizable Arrays <resizable_arrays>`
-
-Each of these three situations involves an RCU-protected pointer to an
-array that is separately indexed.  It might be tempting to consider use
-of RCU to instead protect the index into an array, however, this use
-case is **not** supported.  The problem with RCU-protected indexes into
-arrays is that compilers can play way too many optimization games with
-integers, which means that the rules governing handling of these indexes
-are far more trouble than they are worth.  If RCU-protected indexes into
-arrays prove to be particularly valuable (which they have not thus far),
-explicit cooperation from the compiler will be required to permit them
-to be safely used.
-
-That aside, each of the three RCU-protected pointer situations are
-described in the following sections.
-
-.. _hash_tables:
-
-Situation 1: Hash Tables
-------------------------
-
-Hash tables are often implemented as an array, where each array entry
-has a linked-list hash chain.  Each hash chain can be protected by RCU
-as described in listRCU.rst.  This approach also applies to other
-array-of-list situations, such as radix trees.
-
-.. _static_arrays:
-
-Situation 2: Static Arrays
---------------------------
-
-Static arrays, where the data (rather than a pointer to the data) is
-located in each array element, and where the array is never resized,
-have not been used with RCU.  Rik van Riel recommends using seqlock in
-this situation, which would also have minimal read-side overhead as long
-as updates are rare.
-
-Quick Quiz:
-		Why is it so important that updates be rare when using seqlock?
-
-:ref:`Answer to Quick Quiz <answer_quick_quiz_seqlock>`
-
-.. _resizable_arrays:
-
-Situation 3: Resizable Arrays
-------------------------------
-
-Use of RCU for resizable arrays is demonstrated by the grow_ary()
-function formerly used by the System V IPC code.  The array is used
-to map from semaphore, message-queue, and shared-memory IDs to the data
-structure that represents the corresponding IPC construct.  The grow_ary()
-function does not acquire any locks; instead its caller must hold the
-ids->sem semaphore.
-
-The grow_ary() function, shown below, does some limit checks, allocates a
-new ipc_id_ary, copies the old to the new portion of the new, initializes
-the remainder of the new, updates the ids->entries pointer to point to
-the new array, and invokes ipc_rcu_putref() to free up the old array.
-Note that rcu_assign_pointer() is used to update the ids->entries pointer,
-which includes any memory barriers required on whatever architecture
-you are running on::
-
-	static int grow_ary(struct ipc_ids* ids, int newsize)
-	{
-		struct ipc_id_ary* new;
-		struct ipc_id_ary* old;
-		int i;
-		int size = ids->entries->size;
-
-		if(newsize > IPCMNI)
-			newsize = IPCMNI;
-		if(newsize <= size)
-			return newsize;
-
-		new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize +
-				    sizeof(struct ipc_id_ary));
-		if(new == NULL)
-			return size;
-		new->size = newsize;
-		memcpy(new->p, ids->entries->p,
-		       sizeof(struct kern_ipc_perm *)*size +
-		       sizeof(struct ipc_id_ary));
-		for(i=size;i<newsize;i++) {
-			new->p[i] = NULL;
-		}
-		old = ids->entries;
-
-		/*
-		 * Use rcu_assign_pointer() to make sure the memcpyed
-		 * contents of the new array are visible before the new
-		 * array becomes visible.
-		 */
-		rcu_assign_pointer(ids->entries, new);
-
-		ipc_rcu_putref(old);
-		return newsize;
-	}
-
-The ipc_rcu_putref() function decrements the array's reference count
-and then, if the reference count has dropped to zero, uses call_rcu()
-to free the array after a grace period has elapsed.
-
-The array is traversed by the ipc_lock() function.  This function
-indexes into the array under the protection of rcu_read_lock(),
-using rcu_dereference() to pick up the pointer to the array so
-that it may later safely be dereferenced -- memory barriers are
-required on the Alpha CPU.  Since the size of the array is stored
-with the array itself, there can be no array-size mismatches, so
-a simple check suffices.  The pointer to the structure corresponding
-to the desired IPC object is placed in "out", with NULL indicating
-a non-existent entry.  After acquiring "out->lock", the "out->deleted"
-flag indicates whether the IPC object is in the process of being
-deleted, and, if not, the pointer is returned::
-
-	struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id)
-	{
-		struct kern_ipc_perm* out;
-		int lid = id % SEQ_MULTIPLIER;
-		struct ipc_id_ary* entries;
-
-		rcu_read_lock();
-		entries = rcu_dereference(ids->entries);
-		if(lid >= entries->size) {
-			rcu_read_unlock();
-			return NULL;
-		}
-		out = entries->p[lid];
-		if(out == NULL) {
-			rcu_read_unlock();
-			return NULL;
-		}
-		spin_lock(&out->lock);
-
-		/* ipc_rmid() may have already freed the ID while ipc_lock
-		 * was spinning: here verify that the structure is still valid
-		 */
-		if (out->deleted) {
-			spin_unlock(&out->lock);
-			rcu_read_unlock();
-			return NULL;
-		}
-		return out;
-	}
-
-.. _answer_quick_quiz_seqlock:
-
-Answer to Quick Quiz:
-	Why is it so important that updates be rare when using seqlock?
-
-	The reason that it is important that updates be rare when
-	using seqlock is that frequent updates can livelock readers.
-	One way to avoid this problem is to assign a seqlock for
-	each array entry rather than to the entire array.
diff --git a/Documentation/RCU/index.rst b/Documentation/RCU/index.rst
index e703d3dbe60ce..84a79903f6a88 100644
--- a/Documentation/RCU/index.rst
+++ b/Documentation/RCU/index.rst
@@ -9,7 +9,6 @@ RCU concepts
 .. toctree::
    :maxdepth: 3
 
-   arrayRCU
    checklist
    lockdep
    lockdep-splat

From d7d4582ec2236362ff893d2a41729e61fe2e0d62 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 9 Sep 2022 04:46:26 -0700
Subject: [PATCH 53/70] doc: Update checklist.txt

This commit updates checklist.txt to reflect RCU additions and changes
over the past few years.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 Documentation/RCU/checklist.rst | 241 +++++++++++++++++++-------------
 1 file changed, 140 insertions(+), 101 deletions(-)

diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst
index 178ca7547b987..3f1f1a5736b70 100644
--- a/Documentation/RCU/checklist.rst
+++ b/Documentation/RCU/checklist.rst
@@ -32,8 +32,8 @@ over a rather long period of time, but improvements are always welcome!
 	for lockless updates.  This does result in the mildly
 	counter-intuitive situation where rcu_read_lock() and
 	rcu_read_unlock() are used to protect updates, however, this
-	approach provides the same potential simplifications that garbage
-	collectors do.
+	approach can provide the same simplifications to certain types
+	of lockless algorithms that garbage collectors do.
 
 1.	Does the update code have proper mutual exclusion?
 
@@ -49,12 +49,12 @@ over a rather long period of time, but improvements are always welcome!
 	them -- even x86 allows later loads to be reordered to precede
 	earlier stores), and be prepared to explain why this added
 	complexity is worthwhile.  If you choose #c, be prepared to
-	explain how this single task does not become a major bottleneck on
-	big multiprocessor machines (for example, if the task is updating
-	information relating to itself that other tasks can read, there
-	by definition can be no bottleneck).  Note that the definition
-	of "large" has changed significantly:  Eight CPUs was "large"
-	in the year 2000, but a hundred CPUs was unremarkable in 2017.
+	explain how this single task does not become a major bottleneck
+	on large systems (for example, if the task is updating information
+	relating to itself that other tasks can read, there by definition
+	can be no bottleneck).	Note that the definition of "large" has
+	changed significantly:	Eight CPUs was "large" in the year 2000,
+	but a hundred CPUs was unremarkable in 2017.
 
 2.	Do the RCU read-side critical sections make proper use of
 	rcu_read_lock() and friends?  These primitives are needed
@@ -97,33 +97,38 @@ over a rather long period of time, but improvements are always welcome!
 
 	b.	Proceed as in (a) above, but also maintain per-element
 		locks (that are acquired by both readers and writers)
-		that guard per-element state.  Of course, fields that
-		the readers refrain from accessing can be guarded by
-		some other lock acquired only by updaters, if desired.
+		that guard per-element state.  Fields that the readers
+		refrain from accessing can be guarded by some other lock
+		acquired only by updaters, if desired.
 
-		This works quite well, also.
+		This also works quite well.
 
 	c.	Make updates appear atomic to readers.	For example,
 		pointer updates to properly aligned fields will
 		appear atomic, as will individual atomic primitives.
 		Sequences of operations performed under a lock will *not*
 		appear to be atomic to RCU readers, nor will sequences
-		of multiple atomic primitives.
+		of multiple atomic primitives.	One alternative is to
+		move multiple individual fields to a separate structure,
+		thus solving the multiple-field problem by imposing an
+		additional level of indirection.
 
 		This can work, but is starting to get a bit tricky.
 
-	d.	Carefully order the updates and the reads so that
-		readers see valid data at all phases of the update.
-		This is often more difficult than it sounds, especially
-		given modern CPUs' tendency to reorder memory references.
-		One must usually liberally sprinkle memory barriers
-		(smp_wmb(), smp_rmb(), smp_mb()) through the code,
-		making it difficult to understand and to test.
-
-		It is usually better to group the changing data into
-		a separate structure, so that the change may be made
-		to appear atomic by updating a pointer to reference
-		a new structure containing updated values.
+	d.	Carefully order the updates and the reads so that readers
+		see valid data at all phases of the update.  This is often
+		more difficult than it sounds, especially given modern
+		CPUs' tendency to reorder memory references.  One must
+		usually liberally sprinkle memory-ordering operations
+		through the code, making it difficult to understand and
+		to test.  Where it works, it is better to use things
+		like smp_store_release() and smp_load_acquire(), but in
+		some cases the smp_mb() full memory barrier is required.
+
+		As noted earlier, it is usually better to group the
+		changing data into a separate structure, so that the
+		change may be made to appear atomic by updating a pointer
+		to reference a new structure containing updated values.
 
 4.	Weakly ordered CPUs pose special challenges.  Almost all CPUs
 	are weakly ordered -- even x86 CPUs allow later loads to be
@@ -188,26 +193,29 @@ over a rather long period of time, but improvements are always welcome!
 		when publicizing a pointer to a structure that can
 		be traversed by an RCU read-side critical section.
 
-5.	If call_rcu() or call_srcu() is used, the callback function will
-	be called from softirq context.  In particular, it cannot block.
-	If you need the callback to block, run that code in a workqueue
-	handler scheduled from the callback.  The queue_rcu_work()
-	function does this for you in the case of call_rcu().
+5.	If any of call_rcu(), call_srcu(), call_rcu_tasks(),
+	call_rcu_tasks_rude(), or call_rcu_tasks_trace() is used,
+	the callback function may be invoked from softirq context,
+	and in any case with bottom halves disabled.  In particular,
+	this callback function cannot block.  If you need the callback
+	to block, run that code in a workqueue handler scheduled from
+	the callback.  The queue_rcu_work() function does this for you
+	in the case of call_rcu().
 
 6.	Since synchronize_rcu() can block, it cannot be called
 	from any sort of irq context.  The same rule applies
-	for synchronize_srcu(), synchronize_rcu_expedited(), and
-	synchronize_srcu_expedited().
+	for synchronize_srcu(), synchronize_rcu_expedited(),
+	synchronize_srcu_expedited(), synchronize_rcu_tasks(),
+	synchronize_rcu_tasks_rude(), and synchronize_rcu_tasks_trace().
 
 	The expedited forms of these primitives have the same semantics
-	as the non-expedited forms, but expediting is both expensive and
-	(with the exception of synchronize_srcu_expedited()) unfriendly
-	to real-time workloads.  Use of the expedited primitives should
-	be restricted to rare configuration-change operations that would
-	not normally be undertaken while a real-time workload is running.
-	However, real-time workloads can use rcupdate.rcu_normal kernel
-	boot parameter to completely disable expedited grace periods,
-	though this might have performance implications.
+	as the non-expedited forms, but expediting is more CPU intensive.
+	Use of the expedited primitives should be restricted to rare
+	configuration-change operations that would not normally be
+	undertaken while a real-time workload is running.  Note that
+	IPI-sensitive real-time workloads can use the rcupdate.rcu_normal
+	kernel boot parameter to completely disable expedited grace
+	periods, though this might have performance implications.
 
 	In particular, if you find yourself invoking one of the expedited
 	primitives repeatedly in a loop, please do everyone a favor:
@@ -215,8 +223,9 @@ over a rather long period of time, but improvements are always welcome!
 	a single non-expedited primitive to cover the entire batch.
 	This will very likely be faster than the loop containing the
 	expedited primitive, and will be much much easier on the rest
-	of the system, especially to real-time workloads running on
-	the rest of the system.
+	of the system, especially to real-time workloads running on the
+	rest of the system.  Alternatively, instead use asynchronous
+	primitives such as call_rcu().
 
 7.	As of v4.20, a given kernel implements only one RCU flavor, which
 	is RCU-sched for PREEMPTION=n and RCU-preempt for PREEMPTION=y.
@@ -239,7 +248,8 @@ over a rather long period of time, but improvements are always welcome!
 	the corresponding readers must use rcu_read_lock_trace() and
 	rcu_read_unlock_trace().  If an updater uses call_rcu_tasks_rude()
 	or synchronize_rcu_tasks_rude(), then the corresponding readers
-	must use anything that disables interrupts.
+	must use anything that disables preemption, for example,
+	preempt_disable() and preempt_enable().
 
 	Mixing things up will result in confusion and broken kernels, and
 	has even resulted in an exploitable security issue.  Therefore,
@@ -253,15 +263,16 @@ over a rather long period of time, but improvements are always welcome!
 	that this usage is safe is that readers can use anything that
 	disables BH when updaters use call_rcu() or synchronize_rcu().
 
-8.	Although synchronize_rcu() is slower than is call_rcu(), it
-	usually results in simpler code.  So, unless update performance is
-	critically important, the updaters cannot block, or the latency of
-	synchronize_rcu() is visible from userspace, synchronize_rcu()
-	should be used in preference to call_rcu().  Furthermore,
-	kfree_rcu() usually results in even simpler code than does
-	synchronize_rcu() without synchronize_rcu()'s multi-millisecond
-	latency.  So please take advantage of kfree_rcu()'s "fire and
-	forget" memory-freeing capabilities where it applies.
+8.	Although synchronize_rcu() is slower than is call_rcu(),
+	it usually results in simpler code.  So, unless update
+	performance is critically important, the updaters cannot block,
+	or the latency of synchronize_rcu() is visible from userspace,
+	synchronize_rcu() should be used in preference to call_rcu().
+	Furthermore, kfree_rcu() and kvfree_rcu() usually result
+	in even simpler code than does synchronize_rcu() without
+	synchronize_rcu()'s multi-millisecond latency.	So please take
+	advantage of kfree_rcu()'s and kvfree_rcu()'s "fire and forget"
+	memory-freeing capabilities where it applies.
 
 	An especially important property of the synchronize_rcu()
 	primitive is that it automatically self-limits: if grace periods
@@ -271,8 +282,8 @@ over a rather long period of time, but improvements are always welcome!
 	cases where grace periods are delayed, as failing to do so can
 	result in excessive realtime latencies or even OOM conditions.
 
-	Ways of gaining this self-limiting property when using call_rcu()
-	include:
+	Ways of gaining this self-limiting property when using call_rcu(),
+	kfree_rcu(), or kvfree_rcu() include:
 
 	a.	Keeping a count of the number of data-structure elements
 		used by the RCU-protected data structure, including
@@ -304,18 +315,21 @@ over a rather long period of time, but improvements are always welcome!
 		here is that superuser already has lots of ways to crash
 		the machine.
 
-	d.	Periodically invoke synchronize_rcu(), permitting a limited
-		number of updates per grace period.  Better yet, periodically
-		invoke rcu_barrier() to wait for all outstanding callbacks.
+	d.	Periodically invoke rcu_barrier(), permitting a limited
+		number of updates per grace period.
 
-	The same cautions apply to call_srcu() and kfree_rcu().
+	The same cautions apply to call_srcu(), call_rcu_tasks(),
+	call_rcu_tasks_rude(), and call_rcu_tasks_trace().  This is
+	why there is an srcu_barrier(), rcu_barrier_tasks(),
+	rcu_barrier_tasks_rude(), and rcu_barrier_tasks_rude(),
+	respectively.
 
-	Note that although these primitives do take action to avoid memory
-	exhaustion when any given CPU has too many callbacks, a determined
-	user could still exhaust memory.  This is especially the case
-	if a system with a large number of CPUs has been configured to
-	offload all of its RCU callbacks onto a single CPU, or if the
-	system has relatively little free memory.
+	Note that although these primitives do take action to avoid
+	memory exhaustion when any given CPU has too many callbacks,
+	a determined user or administrator can still exhaust memory.
+	This is especially the case if a system with a large number of
+	CPUs has been configured to offload all of its RCU callbacks onto
+	a single CPU, or if the system has relatively little free memory.
 
 9.	All RCU list-traversal primitives, which include
 	rcu_dereference(), list_for_each_entry_rcu(), and
@@ -344,14 +358,14 @@ over a rather long period of time, but improvements are always welcome!
 	and you don't hold the appropriate update-side lock, you *must*
 	use the "_rcu()" variants of the list macros.  Failing to do so
 	will break Alpha, cause aggressive compilers to generate bad code,
-	and confuse people trying to read your code.
+	and confuse people trying to understand your code.
 
 11.	Any lock acquired by an RCU callback must be acquired elsewhere
-	with softirq disabled, e.g., via spin_lock_irqsave(),
-	spin_lock_bh(), etc.  Failing to disable softirq on a given
-	acquisition of that lock will result in deadlock as soon as
-	the RCU softirq handler happens to run your RCU callback while
-	interrupting that acquisition's critical section.
+	with softirq disabled, e.g., via spin_lock_bh().  Failing to
+	disable softirq on a given acquisition of that lock will result
+	in deadlock as soon as the RCU softirq handler happens to run
+	your RCU callback while interrupting that acquisition's critical
+	section.
 
 12.	RCU callbacks can be and are executed in parallel.  In many cases,
 	the callback code simply wrappers around kfree(), so that this
@@ -372,7 +386,17 @@ over a rather long period of time, but improvements are always welcome!
 	for some  real-time workloads, this is the whole point of using
 	the rcu_nocbs= kernel boot parameter.
 
-13.	Unlike other forms of RCU, it *is* permissible to block in an
+	In addition, do not assume that callbacks queued in a given order
+	will be invoked in that order, even if they all are queued on the
+	same CPU.  Furthermore, do not assume that same-CPU callbacks will
+	be invoked serially.  For example, in recent kernels, CPUs can be
+	switched between offloaded and de-offloaded callback invocation,
+	and while a given CPU is undergoing such a switch, its callbacks
+	might be concurrently invoked by that CPU's softirq handler and
+	that CPU's rcuo kthread.  At such times, that CPU's callbacks
+	might be executed both concurrently and out of order.
+
+13.	Unlike most flavors of RCU, it *is* permissible to block in an
 	SRCU read-side critical section (demarked by srcu_read_lock()
 	and srcu_read_unlock()), hence the "SRCU": "sleepable RCU".
 	Please note that if you don't need to sleep in read-side critical
@@ -412,6 +436,12 @@ over a rather long period of time, but improvements are always welcome!
 	never sends IPIs to other CPUs, so it is easier on
 	real-time workloads than is synchronize_rcu_expedited().
 
+	It is also permissible to sleep in RCU Tasks Trace read-side
+	critical, which are delimited by rcu_read_lock_trace() and
+	rcu_read_unlock_trace().  However, this is a specialized flavor
+	of RCU, and you should not use it without first checking with
+	its current users.  In most cases, you should instead use SRCU.
+
 	Note that rcu_assign_pointer() relates to SRCU just as it does to
 	other forms of RCU, but instead of rcu_dereference() you should
 	use srcu_dereference() in order to avoid lockdep splats.
@@ -442,50 +472,59 @@ over a rather long period of time, but improvements are always welcome!
 	find problems as follows:
 
 	CONFIG_PROVE_LOCKING:
-		check that accesses to RCU-protected data
-		structures are carried out under the proper RCU
-		read-side critical section, while holding the right
-		combination of locks, or whatever other conditions
-		are appropriate.
+		check that accesses to RCU-protected data structures
+		are carried out under the proper RCU read-side critical
+		section, while holding the right combination of locks,
+		or whatever other conditions are appropriate.
 
 	CONFIG_DEBUG_OBJECTS_RCU_HEAD:
-		check that you don't pass the
-		same object to call_rcu() (or friends) before an RCU
-		grace period has elapsed since the last time that you
-		passed that same object to call_rcu() (or friends).
+		check that you don't pass the same object to call_rcu()
+		(or friends) before an RCU grace period has elapsed
+		since the last time that you passed that same object to
+		call_rcu() (or friends).
 
 	__rcu sparse checks:
-		tag the pointer to the RCU-protected data
-		structure with __rcu, and sparse will warn you if you
-		access that pointer without the services of one of the
-		variants of rcu_dereference().
+		tag the pointer to the RCU-protected data structure
+		with __rcu, and sparse will warn you if you access that
+		pointer without the services of one of the variants
+		of rcu_dereference().
 
 	These debugging aids can help you find problems that are
 	otherwise extremely difficult to spot.
 
-17.	If you register a callback using call_rcu() or call_srcu(), and
-	pass in a function defined within a loadable module, then it in
-	necessary to wait for all pending callbacks to be invoked after
-	the last invocation and before unloading that module.  Note that
-	it is absolutely *not* sufficient to wait for a grace period!
-	The current (say) synchronize_rcu() implementation is *not*
-	guaranteed to wait for callbacks registered on other CPUs.
-	Or even on the current CPU if that CPU recently went offline
-	and came back online.
+17.	If you pass a callback function defined within a module to one of
+	call_rcu(), call_srcu(), call_rcu_tasks(), call_rcu_tasks_rude(),
+	or call_rcu_tasks_trace(), then it is necessary to wait for all
+	pending callbacks to be invoked before unloading that module.
+	Note that it is absolutely *not* sufficient to wait for a grace
+	period!  For example, synchronize_rcu() implementation is *not*
+	guaranteed to wait for callbacks registered on other CPUs via
+	call_rcu().  Or even on the current CPU if that CPU recently
+	went offline and came back online.
 
 	You instead need to use one of the barrier functions:
 
 	-	call_rcu() -> rcu_barrier()
 	-	call_srcu() -> srcu_barrier()
+	-	call_rcu_tasks() -> rcu_barrier_tasks()
+	-	call_rcu_tasks_rude() -> rcu_barrier_tasks_rude()
+	-	call_rcu_tasks_trace() -> rcu_barrier_tasks_trace()
 
 	However, these barrier functions are absolutely *not* guaranteed
-	to wait for a grace period.  In fact, if there are no call_rcu()
-	callbacks waiting anywhere in the system, rcu_barrier() is within
-	its rights to return immediately.
-
-	So if you need to wait for both an RCU grace period and for
-	all pre-existing call_rcu() callbacks, you will need to execute
-	both rcu_barrier() and synchronize_rcu(), if necessary, using
-	something like workqueues to to execute them concurrently.
+	to wait for a grace period.  For example, if there are no
+	call_rcu() callbacks queued anywhere in the system, rcu_barrier()
+	can and will return immediately.
+
+	So if you need to wait for both a grace period and for all
+	pre-existing callbacks, you will need to invoke both functions,
+	with the pair depending on the flavor of RCU:
+
+	-	Either synchronize_rcu() or synchronize_rcu_expedited(),
+		together with rcu_barrier()
+	-	Either synchronize_srcu() or synchronize_srcu_expedited(),
+		together with and srcu_barrier()
+	-	synchronize_rcu_tasks() and rcu_barrier_tasks()
+	-	synchronize_tasks_rude() and rcu_barrier_tasks_rude()
+	-	synchronize_tasks_trace() and rcu_barrier_tasks_trace()
 
 	See rcubarrier.rst for more information.

From b7788778b674244dbd3f43ddb0f1e172de3961ee Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sun, 11 Sep 2022 02:57:47 -0700
Subject: [PATCH 54/70] doc: Update listRCU.rst

This commit updates listRCU.txt to reflect RCU additions and changes
over the past few years.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 Documentation/RCU/listRCU.rst | 174 ++++++++++++++++++++--------------
 1 file changed, 103 insertions(+), 71 deletions(-)

diff --git a/Documentation/RCU/listRCU.rst b/Documentation/RCU/listRCU.rst
index 2a643e293fb41..fa5493c1e28f2 100644
--- a/Documentation/RCU/listRCU.rst
+++ b/Documentation/RCU/listRCU.rst
@@ -3,11 +3,10 @@
 Using RCU to Protect Read-Mostly Linked Lists
 =============================================
 
-One of the best applications of RCU is to protect read-mostly linked lists
-(``struct list_head`` in list.h).  One big advantage of this approach
-is that all of the required memory barriers are included for you in
-the list macros.  This document describes several applications of RCU,
-with the best fits first.
+One of the most common uses of RCU is protecting read-mostly linked lists
+(``struct list_head`` in list.h).  One big advantage of this approach is
+that all of the required memory ordering is provided by the list macros.
+This document describes several list-based RCU use cases.
 
 
 Example 1: Read-mostly list: Deferred Destruction
@@ -35,7 +34,8 @@ The code traversing the list of all processes typically looks like::
 	}
 	rcu_read_unlock();
 
-The simplified code for removing a process from a task list is::
+The simplified and heavily inlined code for removing a process from a
+task list is::
 
 	void release_task(struct task_struct *p)
 	{
@@ -45,39 +45,48 @@ The simplified code for removing a process from a task list is::
 		call_rcu(&p->rcu, delayed_put_task_struct);
 	}
 
-When a process exits, ``release_task()`` calls ``list_del_rcu(&p->tasks)`` under
-``tasklist_lock`` writer lock protection, to remove the task from the list of
-all tasks. The ``tasklist_lock`` prevents concurrent list additions/removals
-from corrupting the list. Readers using ``for_each_process()`` are not protected
-with the ``tasklist_lock``. To prevent readers from noticing changes in the list
-pointers, the ``task_struct`` object is freed only after one or more grace
-periods elapse (with the help of call_rcu()). This deferring of destruction
-ensures that any readers traversing the list will see valid ``p->tasks.next``
-pointers and deletion/freeing can happen in parallel with traversal of the list.
-This pattern is also called an **existence lock**, since RCU pins the object in
-memory until all existing readers finish.
+When a process exits, ``release_task()`` calls ``list_del_rcu(&p->tasks)``
+via __exit_signal() and __unhash_process() under ``tasklist_lock``
+writer lock protection.  The list_del_rcu() invocation removes
+the task from the list of all tasks. The ``tasklist_lock``
+prevents concurrent list additions/removals from corrupting the
+list. Readers using ``for_each_process()`` are not protected with the
+``tasklist_lock``. To prevent readers from noticing changes in the list
+pointers, the ``task_struct`` object is freed only after one or more
+grace periods elapse, with the help of call_rcu(), which is invoked via
+put_task_struct_rcu_user(). This deferring of destruction ensures that
+any readers traversing the list will see valid ``p->tasks.next`` pointers
+and deletion/freeing can happen in parallel with traversal of the list.
+This pattern is also called an **existence lock**, since RCU refrains
+from invoking the delayed_put_task_struct() callback function until until
+all existing readers finish, which guarantees that the ``task_struct``
+object in question will remain in existence until after the completion
+of all RCU readers that might possibly have a reference to that object.
 
 
 Example 2: Read-Side Action Taken Outside of Lock: No In-Place Updates
 ----------------------------------------------------------------------
 
-The best applications are cases where, if reader-writer locking were
-used, the read-side lock would be dropped before taking any action
-based on the results of the search.  The most celebrated example is
-the routing table.  Because the routing table is tracking the state of
-equipment outside of the computer, it will at times contain stale data.
-Therefore, once the route has been computed, there is no need to hold
-the routing table static during transmission of the packet.  After all,
-you can hold the routing table static all you want, but that won't keep
-the external Internet from changing, and it is the state of the external
-Internet that really matters.  In addition, routing entries are typically
-added or deleted, rather than being modified in place.
-
-A straightforward example of this use of RCU may be found in the
-system-call auditing support.  For example, a reader-writer locked
+Some reader-writer locking use cases compute a value while holding
+the read-side lock, but continue to use that value after that lock is
+released.  These use cases are often good candidates for conversion
+to RCU.  One prominent example involves network packet routing.
+Because the packet-routing data tracks the state of equipment outside
+of the computer, it will at times contain stale data.  Therefore, once
+the route has been computed, there is no need to hold the routing table
+static during transmission of the packet.  After all, you can hold the
+routing table static all you want, but that won't keep the external
+Internet from changing, and it is the state of the external Internet
+that really matters.  In addition, routing entries are typically added
+or deleted, rather than being modified in place.  This is a rare example
+of the finite speed of light and the non-zero size of atoms actually
+helping make synchronization be lighter weight.
+
+A straightforward example of this type of RCU use case may be found in
+the system-call auditing support.  For example, a reader-writer locked
 implementation of ``audit_filter_task()`` might be as follows::
 
-	static enum audit_state audit_filter_task(struct task_struct *tsk)
+	static enum audit_state audit_filter_task(struct task_struct *tsk, char **key)
 	{
 		struct audit_entry *e;
 		enum audit_state   state;
@@ -86,6 +95,8 @@ implementation of ``audit_filter_task()`` might be as follows::
 		/* Note: audit_filter_mutex held by caller. */
 		list_for_each_entry(e, &audit_tsklist, list) {
 			if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
+				if (state == AUDIT_STATE_RECORD)
+					*key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
 				read_unlock(&auditsc_lock);
 				return state;
 			}
@@ -101,7 +112,7 @@ you are turning auditing off, it is OK to audit a few extra system calls.
 
 This means that RCU can be easily applied to the read side, as follows::
 
-	static enum audit_state audit_filter_task(struct task_struct *tsk)
+	static enum audit_state audit_filter_task(struct task_struct *tsk, char **key)
 	{
 		struct audit_entry *e;
 		enum audit_state   state;
@@ -110,6 +121,8 @@ This means that RCU can be easily applied to the read side, as follows::
 		/* Note: audit_filter_mutex held by caller. */
 		list_for_each_entry_rcu(e, &audit_tsklist, list) {
 			if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
+				if (state == AUDIT_STATE_RECORD)
+					*key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
 				rcu_read_unlock();
 				return state;
 			}
@@ -118,13 +131,15 @@ This means that RCU can be easily applied to the read side, as follows::
 		return AUDIT_BUILD_CONTEXT;
 	}
 
-The ``read_lock()`` and ``read_unlock()`` calls have become rcu_read_lock()
-and rcu_read_unlock(), respectively, and the list_for_each_entry() has
-become list_for_each_entry_rcu().  The **_rcu()** list-traversal primitives
-insert the read-side memory barriers that are required on DEC Alpha CPUs.
+The read_lock() and read_unlock() calls have become rcu_read_lock()
+and rcu_read_unlock(), respectively, and the list_for_each_entry()
+has become list_for_each_entry_rcu().  The **_rcu()** list-traversal
+primitives add READ_ONCE() and diagnostic checks for incorrect use
+outside of an RCU read-side critical section.
 
 The changes to the update side are also straightforward. A reader-writer lock
-might be used as follows for deletion and insertion::
+might be used as follows for deletion and insertion in these simplified
+versions of audit_del_rule() and audit_add_rule()::
 
 	static inline int audit_del_rule(struct audit_rule *rule,
 					 struct list_head *list)
@@ -188,16 +203,16 @@ Following are the RCU equivalents for these two functions::
 		return 0;
 	}
 
-Normally, the ``write_lock()`` and ``write_unlock()`` would be replaced by a
+Normally, the write_lock() and write_unlock() would be replaced by a
 spin_lock() and a spin_unlock(). But in this case, all callers hold
 ``audit_filter_mutex``, so no additional locking is required. The
-``auditsc_lock`` can therefore be eliminated, since use of RCU eliminates the
+auditsc_lock can therefore be eliminated, since use of RCU eliminates the
 need for writers to exclude readers.
 
 The list_del(), list_add(), and list_add_tail() primitives have been
 replaced by list_del_rcu(), list_add_rcu(), and list_add_tail_rcu().
-The **_rcu()** list-manipulation primitives add memory barriers that are needed on
-weakly ordered CPUs (most of them!).  The list_del_rcu() primitive omits the
+The **_rcu()** list-manipulation primitives add memory barriers that are
+needed on weakly ordered CPUs.  The list_del_rcu() primitive omits the
 pointer poisoning debug-assist code that would otherwise cause concurrent
 readers to fail spectacularly.
 
@@ -238,7 +253,9 @@ need to be filled in)::
 The RCU version creates a copy, updates the copy, then replaces the old
 entry with the newly updated entry.  This sequence of actions, allowing
 concurrent reads while making a copy to perform an update, is what gives
-RCU (*read-copy update*) its name.  The RCU code is as follows::
+RCU (*read-copy update*) its name.
+
+The RCU version of audit_upd_rule() is as follows::
 
 	static inline int audit_upd_rule(struct audit_rule *rule,
 					 struct list_head *list,
@@ -267,6 +284,9 @@ RCU (*read-copy update*) its name.  The RCU code is as follows::
 Again, this assumes that the caller holds ``audit_filter_mutex``.  Normally, the
 writer lock would become a spinlock in this sort of code.
 
+The update_lsm_rule() does something very similar, for those who would
+prefer to look at real Linux-kernel code.
+
 Another use of this pattern can be found in the openswitch driver's *connection
 tracking table* code in ``ct_limit_set()``.  The table holds connection tracking
 entries and has a limit on the maximum entries.  There is one such table
@@ -281,9 +301,10 @@ Example 4: Eliminating Stale Data
 ---------------------------------
 
 The auditing example above tolerates stale data, as do most algorithms
-that are tracking external state.  Because there is a delay from the
-time the external state changes before Linux becomes aware of the change,
-additional RCU-induced staleness is generally not a problem.
+that are tracking external state.  After all, given there is a delay
+from the time the external state changes before Linux becomes aware
+of the change, and so as noted earlier, a small quantity of additional
+RCU-induced staleness is generally not a problem.
 
 However, there are many examples where stale data cannot be tolerated.
 One example in the Linux kernel is the System V IPC (see the shm_lock()
@@ -302,7 +323,7 @@ Quick Quiz:
 
 If the system-call audit module were to ever need to reject stale data, one way
 to accomplish this would be to add a ``deleted`` flag and a ``lock`` spinlock to the
-audit_entry structure, and modify ``audit_filter_task()`` as follows::
+``audit_entry`` structure, and modify audit_filter_task() as follows::
 
 	static enum audit_state audit_filter_task(struct task_struct *tsk)
 	{
@@ -319,6 +340,8 @@ audit_entry structure, and modify ``audit_filter_task()`` as follows::
 					return AUDIT_BUILD_CONTEXT;
 				}
 				rcu_read_unlock();
+				if (state == AUDIT_STATE_RECORD)
+					*key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
 				return state;
 			}
 		}
@@ -326,12 +349,6 @@ audit_entry structure, and modify ``audit_filter_task()`` as follows::
 		return AUDIT_BUILD_CONTEXT;
 	}
 
-Note that this example assumes that entries are only added and deleted.
-Additional mechanism is required to deal correctly with the update-in-place
-performed by ``audit_upd_rule()``.  For one thing, ``audit_upd_rule()`` would
-need additional memory barriers to ensure that the list_add_rcu() was really
-executed before the list_del_rcu().
-
 The ``audit_del_rule()`` function would need to set the ``deleted`` flag under the
 spinlock as follows::
 
@@ -357,24 +374,32 @@ spinlock as follows::
 
 This too assumes that the caller holds ``audit_filter_mutex``.
 
+Note that this example assumes that entries are only added and deleted.
+Additional mechanism is required to deal correctly with the update-in-place
+performed by audit_upd_rule().  For one thing, audit_upd_rule() would
+need to hold the locks of both the old ``audit_entry`` and its replacement
+while executing the list_replace_rcu().
+
 
 Example 5: Skipping Stale Objects
 ---------------------------------
 
-For some usecases, reader performance can be improved by skipping stale objects
-during read-side list traversal if the object in concern is pending destruction
-after one or more grace periods. One such example can be found in the timerfd
-subsystem. When a ``CLOCK_REALTIME`` clock is reprogrammed - for example due to
-setting of the system time, then all programmed timerfds that depend on this
-clock get triggered and processes waiting on them to expire are woken up in
-advance of their scheduled expiry. To facilitate this, all such timers are added
-to an RCU-managed ``cancel_list`` when they are setup in
+For some use cases, reader performance can be improved by skipping
+stale objects during read-side list traversal, where stale objects
+are those that will be removed and destroyed after one or more grace
+periods. One such example can be found in the timerfd subsystem. When a
+``CLOCK_REALTIME`` clock is reprogrammed (for example due to setting
+of the system time) then all programmed ``timerfds`` that depend on
+this clock get triggered and processes waiting on them are awakened in
+advance of their scheduled expiry. To facilitate this, all such timers
+are added to an RCU-managed ``cancel_list`` when they are setup in
 ``timerfd_setup_cancel()``::
 
 	static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
 	{
 		spin_lock(&ctx->cancel_lock);
-		if ((ctx->clockid == CLOCK_REALTIME &&
+		if ((ctx->clockid == CLOCK_REALTIME ||
+		     ctx->clockid == CLOCK_REALTIME_ALARM) &&
 		    (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) {
 			if (!ctx->might_cancel) {
 				ctx->might_cancel = true;
@@ -382,13 +407,16 @@ to an RCU-managed ``cancel_list`` when they are setup in
 				list_add_rcu(&ctx->clist, &cancel_list);
 				spin_unlock(&cancel_lock);
 			}
+		} else {
+			__timerfd_remove_cancel(ctx);
 		}
 		spin_unlock(&ctx->cancel_lock);
 	}
 
-When a timerfd is freed (fd is closed), then the ``might_cancel`` flag of the
-timerfd object is cleared, the object removed from the ``cancel_list`` and
-destroyed::
+When a timerfd is freed (fd is closed), then the ``might_cancel``
+flag of the timerfd object is cleared, the object removed from the
+``cancel_list`` and destroyed, as shown in this simplified and inlined
+version of timerfd_release()::
 
 	int timerfd_release(struct inode *inode, struct file *file)
 	{
@@ -403,7 +431,10 @@ destroyed::
 		}
 		spin_unlock(&ctx->cancel_lock);
 
-		hrtimer_cancel(&ctx->t.tmr);
+		if (isalarm(ctx))
+			alarm_cancel(&ctx->t.alarm);
+		else
+			hrtimer_cancel(&ctx->t.tmr);
 		kfree_rcu(ctx, rcu);
 		return 0;
 	}
@@ -416,6 +447,7 @@ objects::
 
 	void timerfd_clock_was_set(void)
 	{
+		ktime_t moffs = ktime_mono_to_real(0);
 		struct timerfd_ctx *ctx;
 		unsigned long flags;
 
@@ -424,7 +456,7 @@ objects::
 			if (!ctx->might_cancel)
 				continue;
 			spin_lock_irqsave(&ctx->wqh.lock, flags);
-			if (ctx->moffs != ktime_mono_to_real(0)) {
+			if (ctx->moffs != moffs) {
 				ctx->moffs = KTIME_MAX;
 				ctx->ticks++;
 				wake_up_locked_poll(&ctx->wqh, EPOLLIN);
@@ -434,10 +466,10 @@ objects::
 		rcu_read_unlock();
 	}
 
-The key point here is, because RCU-traversal of the ``cancel_list`` happens
-while objects are being added and removed to the list, sometimes the traversal
-can step on an object that has been removed from the list. In this example, it
-is seen that it is better to skip such objects using a flag.
+The key point is that because RCU-protected traversal of the
+``cancel_list`` happens concurrently with object addition and removal,
+sometimes the traversal can access an object that has been removed from
+the list. In this example, a flag is used to skip such objects.
 
 
 Summary

From 86d5d4d89fc80a7b050a438fb660b7158c3d326b Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Mon, 12 Sep 2022 11:45:40 +0200
Subject: [PATCH 55/70] kcsan: Instrument memcpy/memset/memmove with newer
 Clang

With Clang version 16+, -fsanitize=thread will turn
memcpy/memset/memmove calls in instrumented functions into
__tsan_memcpy/__tsan_memset/__tsan_memmove calls respectively.

Add these functions to the core KCSAN runtime, so that we (a) catch data
races with mem* functions, and (b) won't run into linker errors with
such newer compilers.

Cc: stable@vger.kernel.org # v5.10+
Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/kcsan/core.c | 50 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index fe12dfe254ecf..54d077e1a2dc7 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -14,10 +14,12 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
+#include <linux/minmax.h>
 #include <linux/moduleparam.h>
 #include <linux/percpu.h>
 #include <linux/preempt.h>
 #include <linux/sched.h>
+#include <linux/string.h>
 #include <linux/uaccess.h>
 
 #include "encoding.h"
@@ -1308,3 +1310,51 @@ noinline void __tsan_atomic_signal_fence(int memorder)
 	}
 }
 EXPORT_SYMBOL(__tsan_atomic_signal_fence);
+
+#ifdef __HAVE_ARCH_MEMSET
+void *__tsan_memset(void *s, int c, size_t count);
+noinline void *__tsan_memset(void *s, int c, size_t count)
+{
+	/*
+	 * Instead of not setting up watchpoints where accessed size is greater
+	 * than MAX_ENCODABLE_SIZE, truncate checked size to MAX_ENCODABLE_SIZE.
+	 */
+	size_t check_len = min_t(size_t, count, MAX_ENCODABLE_SIZE);
+
+	check_access(s, check_len, KCSAN_ACCESS_WRITE, _RET_IP_);
+	return memset(s, c, count);
+}
+#else
+void *__tsan_memset(void *s, int c, size_t count) __alias(memset);
+#endif
+EXPORT_SYMBOL(__tsan_memset);
+
+#ifdef __HAVE_ARCH_MEMMOVE
+void *__tsan_memmove(void *dst, const void *src, size_t len);
+noinline void *__tsan_memmove(void *dst, const void *src, size_t len)
+{
+	size_t check_len = min_t(size_t, len, MAX_ENCODABLE_SIZE);
+
+	check_access(dst, check_len, KCSAN_ACCESS_WRITE, _RET_IP_);
+	check_access(src, check_len, 0, _RET_IP_);
+	return memmove(dst, src, len);
+}
+#else
+void *__tsan_memmove(void *dst, const void *src, size_t len) __alias(memmove);
+#endif
+EXPORT_SYMBOL(__tsan_memmove);
+
+#ifdef __HAVE_ARCH_MEMCPY
+void *__tsan_memcpy(void *dst, const void *src, size_t len);
+noinline void *__tsan_memcpy(void *dst, const void *src, size_t len)
+{
+	size_t check_len = min_t(size_t, len, MAX_ENCODABLE_SIZE);
+
+	check_access(dst, check_len, KCSAN_ACCESS_WRITE, _RET_IP_);
+	check_access(src, check_len, 0, _RET_IP_);
+	return memcpy(dst, src, len);
+}
+#else
+void *__tsan_memcpy(void *dst, const void *src, size_t len) __alias(memcpy);
+#endif
+EXPORT_SYMBOL(__tsan_memcpy);

From 06615505775a925cea8570785a0f1b268cd0c340 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Mon, 12 Sep 2022 11:45:41 +0200
Subject: [PATCH 56/70] objtool, kcsan: Add volatile read/write instrumentation
 to whitelist

Adds KCSAN's volatile instrumentation to objtool's uaccess whitelist.

Recent kernel change have shown that this was missing from the uaccess
whitelist (since the first upstreamed version of KCSAN):

  mm/gup.o: warning: objtool: fault_in_readable+0x101: call to __tsan_volatile_write1() with UACCESS enabled

Fixes: 75d75b7a4d54 ("kcsan: Support distinguishing volatile accesses")
Signed-off-by: Marco Elver <elver@google.com>
Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/objtool/check.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 0cec74da7ffea..4252482e92801 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -997,6 +997,16 @@ static const char *uaccess_safe_builtin[] = {
 	"__tsan_read_write4",
 	"__tsan_read_write8",
 	"__tsan_read_write16",
+	"__tsan_volatile_read1",
+	"__tsan_volatile_read2",
+	"__tsan_volatile_read4",
+	"__tsan_volatile_read8",
+	"__tsan_volatile_read16",
+	"__tsan_volatile_write1",
+	"__tsan_volatile_write2",
+	"__tsan_volatile_write4",
+	"__tsan_volatile_write8",
+	"__tsan_volatile_write16",
 	"__tsan_atomic8_load",
 	"__tsan_atomic16_load",
 	"__tsan_atomic32_load",

From d8c746c8056abeeb4dea3f044fb4f865abf8e9d1 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 13 Sep 2022 11:00:36 +0800
Subject: [PATCH 57/70] rcu: Simplify rcu_init_nohz() cpumask handling

In kernels built with either CONFIG_RCU_NOCB_CPU_DEFAULT_ALL=y or
CONFIG_NO_HZ_FULL=y, additional CPUs must be added to rcu_nocb_mask.
Except that kernels booted without the rcu_nocbs= will not have
allocated rcu_nocb_mask.  And the current rcu_init_nohz() function uses
its need_rcu_nocb_mask and offload_all local variables to track the
rcu_nocb and nohz_full state.

But there is a much simpler approach, namely creating a cpumask pointer
to track the default and then using cpumask_available() to check the
rcu_nocb_mask state.  This commit takes this approach, thereby simplifying
and shortening the rcu_init_nohz() function.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Acked-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree_nocb.h | 34 +++++++++++-----------------------
 1 file changed, 11 insertions(+), 23 deletions(-)

diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 0a5f0ef414845..ce526cc2791ca 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1210,45 +1210,33 @@ EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload);
 void __init rcu_init_nohz(void)
 {
 	int cpu;
-	bool need_rcu_nocb_mask = false;
-	bool offload_all = false;
 	struct rcu_data *rdp;
-
-#if defined(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL)
-	if (!rcu_state.nocb_is_setup) {
-		need_rcu_nocb_mask = true;
-		offload_all = true;
-	}
-#endif /* #if defined(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL) */
+	const struct cpumask *cpumask = NULL;
 
 #if defined(CONFIG_NO_HZ_FULL)
-	if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask)) {
-		need_rcu_nocb_mask = true;
-		offload_all = false; /* NO_HZ_FULL has its own mask. */
-	}
-#endif /* #if defined(CONFIG_NO_HZ_FULL) */
+	if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask))
+		cpumask = tick_nohz_full_mask;
+#endif
+
+	if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL) &&
+	    !rcu_state.nocb_is_setup && !cpumask)
+		cpumask = cpu_possible_mask;
 
-	if (need_rcu_nocb_mask) {
+	if (cpumask) {
 		if (!cpumask_available(rcu_nocb_mask)) {
 			if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
 				pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
 				return;
 			}
 		}
+
+		cpumask_or(rcu_nocb_mask, rcu_nocb_mask, cpumask);
 		rcu_state.nocb_is_setup = true;
 	}
 
 	if (!rcu_state.nocb_is_setup)
 		return;
 
-#if defined(CONFIG_NO_HZ_FULL)
-	if (tick_nohz_full_running)
-		cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
-#endif /* #if defined(CONFIG_NO_HZ_FULL) */
-
-	if (offload_all)
-		cpumask_setall(rcu_nocb_mask);
-
 	if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
 		pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
 		cpumask_and(rcu_nocb_mask, cpu_possible_mask,

From f3706cb9a7eb29dabe2631b9e8b420851d41a765 Mon Sep 17 00:00:00 2001
From: Zeng Heng <zengheng4@huawei.com>
Date: Thu, 15 Sep 2022 16:38:24 +0800
Subject: [PATCH 58/70] rcu: Remove unused 'cpu' in
 rcu_virt_note_context_switch()

This commit removes the unused function argument 'cpu'.  This does not
change functionality, but might save a cycle or two.

Signed-off-by: Zeng Heng <zengheng4@huawei.com>
Acked-by: Mukesh Ojha <quic_mojha@quicinc.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/kvm_host.h | 2 +-
 include/linux/rcutiny.h  | 2 +-
 include/linux/rcutree.h  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1c480b1821e18..deaa777ec70f6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -417,7 +417,7 @@ static __always_inline void guest_context_enter_irqoff(void)
 	 */
 	if (!context_tracking_guest_enter()) {
 		instrumentation_begin();
-		rcu_virt_note_context_switch(smp_processor_id());
+		rcu_virt_note_context_switch();
 		instrumentation_end();
 	}
 }
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 768196a5f39d6..9bc025aa79a30 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -142,7 +142,7 @@ static inline int rcu_needs_cpu(void)
  * Take advantage of the fact that there is only one CPU, which
  * allows us to ignore virtualization-based context switches.
  */
-static inline void rcu_virt_note_context_switch(int cpu) { }
+static inline void rcu_virt_note_context_switch(void) { }
 static inline void rcu_cpu_stall_reset(void) { }
 static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; }
 static inline void rcu_irq_exit_check_preempt(void) { }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 5efb51486e8af..70795386b9ffa 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -27,7 +27,7 @@ void rcu_cpu_stall_reset(void);
  * wrapper around rcu_note_context_switch(), which allows TINY_RCU
  * to save a few bytes. The caller must have disabled interrupts.
  */
-static inline void rcu_virt_note_context_switch(int cpu)
+static inline void rcu_virt_note_context_switch(void)
 {
 	rcu_note_context_switch(false);
 }

From b1dfe79d1f5c12b4cadb71291033bad73b56396d Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Sat, 17 Sep 2022 16:41:58 +0000
Subject: [PATCH 59/70] rcu: Use READ_ONCE() for lockless read of rnp->qsmask

The rnp->qsmask is locklessly accessed from rcutree_dying_cpu(). This
may help avoid load tearing due to concurrent access, KCSAN
issues, and preserve sanity of people reading the mask in tracing.

Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 0ca21ac0f0648..5ec97e3f7468f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2106,7 +2106,7 @@ int rcutree_dying_cpu(unsigned int cpu)
 	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
 		return 0;
 
-	blkd = !!(rnp->qsmask & rdp->grpmask);
+	blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask);
 	trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),
 			       blkd ? TPS("cpuofl-bgp") : TPS("cpuofl"));
 	return 0;

From 083088b373c5882848a6f71d41c819f979334ab9 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Sat, 17 Sep 2022 16:41:59 +0000
Subject: [PATCH 60/70] rcu: Fix late wakeup when flush of bypass cblist
 happens (v6)

When the bypass cblist gets too big or its timeout has occurred, it is
flushed into the main cblist. However, the bypass timer is still running
and the behavior is that it would eventually expire and wake the GP
thread.

Since we are going to use the bypass cblist for lazy CBs, do the wakeup
soon as the flush for "too big or too long" bypass list happens.
Otherwise, long delays can happen for callbacks which get promoted from
lazy to non-lazy.

This is a good thing to do anyway (regardless of future lazy patches),
since it makes the behavior consistent with behavior of other code paths
where flushing into the ->cblist makes the GP kthread into a
non-sleeping state quickly.

[ Frederic Weisbec: changes to not do wake up GP thread unless needed,
		    comment changes ].

Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree_nocb.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index ce526cc2791ca..f77a6d7e13564 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -433,8 +433,9 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
 	if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) ||
 	    ncbs >= qhimark) {
 		rcu_nocb_lock(rdp);
+		*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
+
 		if (!rcu_nocb_flush_bypass(rdp, rhp, j)) {
-			*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
 			if (*was_alldone)
 				trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
 						    TPS("FirstQ"));
@@ -447,7 +448,12 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
 			rcu_advance_cbs_nowake(rdp->mynode, rdp);
 			rdp->nocb_gp_adv_time = j;
 		}
-		rcu_nocb_unlock_irqrestore(rdp, flags);
+
+		// The flush succeeded and we moved CBs into the regular list.
+		// Don't wait for the wake up timer as it may be too far ahead.
+		// Wake up the GP thread now instead, if the cblist was empty.
+		__call_rcu_nocb_wake(rdp, *was_alldone, flags);
+
 		return true; // Callback already enqueued.
 	}
 

From 52d7c5b0e490871739f49331f100fb9f10a6cb8f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 21 Sep 2022 13:30:24 -0700
Subject: [PATCH 61/70] rcu: Let non-offloaded idle CPUs with callbacks defer
 tick

When a CPU goes idle, rcu_needs_cpu() is invoked to determine whether or
not RCU needs the scheduler-clock tick to keep interrupting.  Right now,
RCU keeps the tick on for a given idle CPU if there are any non-offloaded
callbacks queued on that CPU.

But if all of these callbacks are waiting for a grace period to finish,
there is no point in scheduling a tick before that grace period has any
reasonable chance of completing.  This commit therefore delays the tick
in the case where all the callbacks are waiting for a specific grace
period to elapse.  In theory, this should result in a 50-70% reduction in
RCU-induced scheduling-clock ticks on mostly-idle CPUs.  In practice, TBD.
/bin/bash: fm: command not found

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/rcutiny.h  |  2 +-
 include/linux/rcutree.h  |  2 +-
 kernel/rcu/tree.c        | 44 ++++++++++++++++++++++++++++++++--------
 kernel/time/tick-sched.c | 10 +++++----
 4 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 9bc025aa79a30..84e930c110656 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -133,7 +133,7 @@ static inline void rcu_softirq_qs(void)
 		rcu_tasks_qs(current, (preempt)); \
 	} while (0)
 
-static inline int rcu_needs_cpu(void)
+static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
 {
 	return 0;
 }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 70795386b9ffa..3066e0975022c 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -19,7 +19,7 @@
 
 void rcu_softirq_qs(void);
 void rcu_note_context_switch(bool preempt);
-int rcu_needs_cpu(void);
+int rcu_needs_cpu(u64 basemono, u64 *nextevt);
 void rcu_cpu_stall_reset(void);
 
 /*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 5ec97e3f7468f..1930cee1ccdbc 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -676,12 +676,40 @@ void __rcu_irq_enter_check_tick(void)
  * scheduler-clock interrupt.
  *
  * Just check whether or not this CPU has non-offloaded RCU callbacks
- * queued.
+ * queued that need immediate attention.
  */
-int rcu_needs_cpu(void)
+int rcu_needs_cpu(u64 basemono, u64 *nextevt)
 {
-	return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
-		!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
+	unsigned long j;
+	unsigned long jlast;
+	unsigned long jwait;
+	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
+	struct rcu_segcblist *rsclp = &rdp->cblist;
+
+	// Disabled, empty, or offloaded means nothing to do.
+	if (!rcu_segcblist_is_enabled(rsclp) ||
+	    rcu_segcblist_empty(rsclp) || rcu_rdp_is_offloaded(rdp)) {
+		*nextevt = KTIME_MAX;
+		return 0;
+	}
+
+	// Callbacks ready to invoke or that have not already been
+	// assigned a grace period need immediate attention.
+	if (!rcu_segcblist_segempty(rsclp, RCU_DONE_TAIL) ||
+	    !rcu_segcblist_segempty(rsclp, RCU_NEXT_TAIL))
+		return 1;
+
+	// There are callbacks waiting for some later grace period.
+	// Wait for about a grace period or two since the last tick, at which
+	// point there is high probability that this CPU will need to do some
+	// work for RCU.
+	j = jiffies;
+	jlast = __this_cpu_read(rcu_data.last_sched_clock);
+	jwait = READ_ONCE(jiffies_till_first_fqs) + READ_ONCE(jiffies_till_next_fqs) + 1;
+	if (time_after(j, jlast + jwait))
+		return 1;
+	*nextevt = basemono + TICK_NSEC * (jlast + jwait - j);
+	return 0;
 }
 
 /*
@@ -2324,11 +2352,9 @@ void rcu_sched_clock_irq(int user)
 {
 	unsigned long j;
 
-	if (IS_ENABLED(CONFIG_PROVE_RCU)) {
-		j = jiffies;
-		WARN_ON_ONCE(time_before(j, __this_cpu_read(rcu_data.last_sched_clock)));
-		__this_cpu_write(rcu_data.last_sched_clock, j);
-	}
+	j = jiffies;
+	WARN_ON_ONCE(time_before(j, __this_cpu_read(rcu_data.last_sched_clock)));
+	__this_cpu_write(rcu_data.last_sched_clock, j);
 	trace_rcu_utilization(TPS("Start scheduler-tick"));
 	lockdep_assert_irqs_disabled();
 	raw_cpu_inc(rcu_data.ticks_this_gp);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index b0e3c9205946f..303ea15cdb960 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -784,7 +784,7 @@ static inline bool local_timer_softirq_pending(void)
 
 static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 {
-	u64 basemono, next_tick, delta, expires;
+	u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
 	unsigned long basejiff;
 	unsigned int seq;
 
@@ -807,7 +807,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 	 * minimal delta which brings us back to this place
 	 * immediately. Lather, rinse and repeat...
 	 */
-	if (rcu_needs_cpu() || arch_needs_cpu() ||
+	if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
 	    irq_work_needs_cpu() || local_timer_softirq_pending()) {
 		next_tick = basemono + TICK_NSEC;
 	} else {
@@ -818,8 +818,10 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 		 * disabled this also looks at the next expiring
 		 * hrtimer.
 		 */
-		next_tick = get_next_timer_interrupt(basejiff, basemono);
-		ts->next_timer = next_tick;
+		next_tmr = get_next_timer_interrupt(basejiff, basemono);
+		ts->next_timer = next_tmr;
+		/* Take the next rcu event into account */
+		next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
 	}
 
 	/*

From 33f3f2deb43fa4b10a1a18bd2daa4587a734a0a2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 26 Sep 2022 08:57:56 -0700
Subject: [PATCH 62/70] slab: Explain why SLAB_DESTROY_BY_RCU reference before
 locking

It is not obvious to the casual user why it is absolutely necessary to
acquire a reference to a SLAB_DESTROY_BY_RCU structure before acquiring
a lock in that structure.  Therefore, add a comment explaining this point.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: <linux-mm@kvack.org>
---
 include/linux/slab.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 0fefdf528e0d2..78c9b975a55c3 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -74,6 +74,12 @@
  * rcu_read_lock before reading the address, then rcu_read_unlock after
  * taking the spinlock within the structure expected at that address.
  *
+ * Note that it is not possible to acquire a lock within a structure
+ * allocated with SLAB_DESTROY_BY_RCU without first acquiring a reference
+ * as described above.  The reason is that SLAB_DESTROY_BY_RCU pages are
+ * not zeroed before being given to the slab, which means that any locks
+ * must be initialized after each and every kmem_struct_alloc().
+ *
  * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
  */
 /* Defer freeing slabs to RCU */

From 470211947e9192e20abec27f6fe436ea0b9e79f2 Mon Sep 17 00:00:00 2001
From: Yipeng Zou <zouyipeng@huawei.com>
Date: Mon, 26 Sep 2022 09:58:27 +0800
Subject: [PATCH 63/70] rcu: Remove rcu_is_idle_cpu()

The commit 3fcd6a230fa7 ("x86/cpu: Avoid cpuinfo-induced IPIing of
idle CPUs") introduced rcu_is_idle_cpu() in order to identify the
current CPU idle state.  But commit f3eca381bd49 ("x86/aperfmperf:
Replace arch_freq_get_on_cpu()") switched to using MAX_SAMPLE_AGE,
so rcu_is_idle_cpu() is no longer used.  This commit therefore removes it.

Fixes: f3eca381bd49 ("x86/aperfmperf: Replace arch_freq_get_on_cpu()")
Signed-off-by: Yipeng Zou <zouyipeng@huawei.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcutiny.h | 2 --
 include/linux/rcutree.h | 2 --
 kernel/rcu/tree.c       | 6 ------
 3 files changed, 10 deletions(-)

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 84e930c110656..93485b8127123 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -146,8 +146,6 @@ static inline void rcu_virt_note_context_switch(void) { }
 static inline void rcu_cpu_stall_reset(void) { }
 static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; }
 static inline void rcu_irq_exit_check_preempt(void) { }
-#define rcu_is_idle_cpu(cpu) \
-	(is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq())
 static inline void exit_rcu(void) { }
 static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t)
 {
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 3066e0975022c..f41c4ab2a8848 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -87,8 +87,6 @@ bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp);
 void cond_synchronize_rcu(unsigned long oldstate);
 void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp);
 
-bool rcu_is_idle_cpu(int cpu);
-
 #ifdef CONFIG_PROVE_RCU
 void rcu_irq_exit_check_preempt(void);
 #else
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1930cee1ccdbc..96d678c9cfb6b 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -301,12 +301,6 @@ static bool rcu_dynticks_in_eqs(int snap)
 	return !(snap & RCU_DYNTICKS_IDX);
 }
 
-/* Return true if the specified CPU is currently idle from an RCU viewpoint.  */
-bool rcu_is_idle_cpu(int cpu)
-{
-	return rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu));
-}
-
 /*
  * Return true if the CPU corresponding to the specified rcu_data
  * structure has spent some time in an extended quiescent state since

From c69f94b2481f6035a24b6ddc19dfcdc6399028f5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 29 Sep 2022 11:58:10 -0700
Subject: [PATCH 64/70] rcu-tasks: Make grace-period-age message human-readable

This commit adds a few words to the informative message that appears
every ten seconds in RCU Tasks and RCU Tasks Trace grace periods.
This message currently reads as follows:

rcu_tasks_wait_gp: rcu_tasks grace period 1046 is 10088 jiffies old.

After this change, it provides additional context, instead reading
as follows:

rcu_tasks_wait_gp: rcu_tasks grace period number 1046 (since boot) is 10088 jiffies old.

Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tasks.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index f5bf6fb430dab..b0b885e071fa8 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -728,7 +728,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
 		if (rtsi > 0 && !reported && time_after(j, lastinfo + rtsi)) {
 			lastinfo = j;
 			rtsi = rtsi * rcu_task_stall_info_mult;
-			pr_info("%s: %s grace period %lu is %lu jiffies old.\n",
+			pr_info("%s: %s grace period number %lu (since boot) is %lu jiffies old.\n",
 				__func__, rtp->kname, rtp->tasks_gp_seq, j - rtp->gp_start);
 		}
 	}

From ec68deb1adb1c0800e3aa593119e1bddd2ea7b86 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 5 Oct 2022 09:50:02 -0700
Subject: [PATCH 65/70] clocksource: Reject bogus watchdog clocksource
 measurements

One remaining clocksource-skew issue involves extreme CPU overcommit,
which can cause the clocksource watchdog measurements to be delayed by
tens of seconds.  This in turn means that a clock-skew criterion that
is appropriate for a 500-millisecond interval will instead give lots of
false positives.

Therefore, check for the watchdog clocksource reporting much larger or
much less than the time specified by WATCHDOG_INTERVAL.  In these cases,
print a pr_warn() warning and refrain from marking the clocksource under
test as being unstable.

Reported-by: Chris Mason <clm@meta.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: John Stultz <jstultz@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Waiman Long <longman@redhat.com>
---
 kernel/time/clocksource.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index cee5da1e54c41..d36ea0ba498d3 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -386,7 +386,7 @@ EXPORT_SYMBOL_GPL(clocksource_verify_percpu);
 
 static void clocksource_watchdog(struct timer_list *unused)
 {
-	u64 csnow, wdnow, cslast, wdlast, delta;
+	u64 csnow, wdnow, cslast, wdlast, delta, wdi;
 	int next_cpu, reset_pending;
 	int64_t wd_nsec, cs_nsec;
 	struct clocksource *cs;
@@ -440,6 +440,17 @@ static void clocksource_watchdog(struct timer_list *unused)
 		if (atomic_read(&watchdog_reset_pending))
 			continue;
 
+		/* Check for bogus measurements. */
+		wdi = jiffies_to_nsecs(WATCHDOG_INTERVAL);
+		if (wd_nsec < (wdi >> 2)) {
+			pr_warn("timekeeping watchdog on CPU%d: Watchdog clocksource '%s' advanced only %lld ns during %d-jiffy time interval, skipping watchdog check.\n", smp_processor_id(), watchdog->name, wd_nsec, WATCHDOG_INTERVAL);
+			continue;
+		}
+		if (wd_nsec > (wdi << 2)) {
+			pr_warn("timekeeping watchdog on CPU%d: Watchdog clocksource '%s' advanced an excessive %lld ns during %d-jiffy time interval, probable CPU overutilization, skipping watchdog check.\n", smp_processor_id(), watchdog->name, wd_nsec, WATCHDOG_INTERVAL);
+			continue;
+		}
+
 		/* Check the deviation from the watchdog clocksource. */
 		md = cs->uncertainty_margin + watchdog->uncertainty_margin;
 		if (abs(cs_nsec - wd_nsec) > md) {

From 0d7d3cdcba3752edda77ab81060c18eca6b12b6c Mon Sep 17 00:00:00 2001
From: Manu Bretelle <chantr4@gmail.com>
Date: Wed, 5 Oct 2022 13:55:23 -0700
Subject: [PATCH 66/70] adding/updating ci files

---
 .github/workflows/test.yml       | 170 +++++++++++++++++++++++++++++++
 README                           |  18 ----
 ci/vmtest/configs/DENYLIST       |   6 ++
 ci/vmtest/configs/DENYLIST.s390x |   4 +
 ci/vmtest/helpers.sh             |  36 +++++++
 ci/vmtest/run_selftests.sh       |  93 +++++++++++++++++
 6 files changed, 309 insertions(+), 18 deletions(-)
 create mode 100644 .github/workflows/test.yml
 create mode 100644 ci/vmtest/configs/DENYLIST
 create mode 100644 ci/vmtest/configs/DENYLIST.s390x
 create mode 100755 ci/vmtest/helpers.sh
 create mode 100755 ci/vmtest/run_selftests.sh

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000000000..c52b4f539803d
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,170 @@
+name: bpf-ci
+
+on:
+  pull_request:
+
+concurrency: 
+  group: ci-test-${{ github.head_ref }}
+  cancel-in-progress: true
+
+jobs:
+  llvm-toolchain:
+    runs-on: ubuntu-latest
+    outputs:
+      llvm: ${{ steps.llvm-toolchain-impl.outputs.version }}
+    steps:
+      - id: llvm-version
+        uses: libbpf/ci/get-llvm-version@master
+      - id: llvm-toolchain-impl
+        shell: bash
+        run: echo "::set-output name=version::llvm-${{ steps.llvm-version.outputs.version }}"
+  set-matrix:
+    needs: llvm-toolchain
+    runs-on: ubuntu-latest
+    outputs:
+      build-matrix: ${{ steps.set-matrix-impl.outputs.build_matrix }}
+      test-matrix: ${{ steps.set-matrix-impl.outputs.test_matrix }}
+    steps:
+      - id: set-matrix-impl
+        shell: python3 -I {0}
+        run: |
+          from json import dumps
+
+          matrix = [
+            {"kernel": "LATEST", "runs_on": ["ubuntu-latest", "self-hosted"], "arch": "x86_64", "toolchain": "gcc"},
+            {"kernel": "LATEST", "runs_on": ["ubuntu-latest", "self-hosted"], "arch": "x86_64", "toolchain": "${{ needs.llvm-toolchain.outputs.llvm }}"},
+            {"kernel": "LATEST", "runs_on": ["z15", "self-hosted"], "arch": "s390x", "toolchain": "gcc"},
+          ]
+          self_hosted_repos = [
+            "kernel-patches/bpf",
+            "kernel-patches/vmtest",
+          ]
+
+          # Only a few repository within "kernel-patches" use self-hosted runners.
+          if "${{ github.repository_owner }}" != "kernel-patches" or "${{ github.repository }}" not in self_hosted_repos:
+            # Outside of those repositories, remove the self-hosted label and skip
+            # any testing on s390x, as no suitable runners will be available.
+            for idx in range(len(matrix) - 1, -1, -1):
+              if "z15" in matrix[idx]["runs_on"]:
+                del matrix[idx]
+              else:
+                matrix[idx]["runs_on"].remove("self-hosted")
+
+          build_matrix = {"include": matrix}
+          print(f"::set-output name=build_matrix::{dumps(build_matrix)}")
+
+          tests = ["test_progs", "test_progs_no_alu32", "test_maps", "test_verifier"]
+          test_matrix = {"include": [{**config, **{"test": test}}
+                                      for config in matrix
+                                        for test in tests]}
+          print(f"::set-output name=test_matrix::{dumps(test_matrix)}")
+  build:
+    name: build for ${{ matrix.arch }} with ${{ matrix.toolchain }}
+    needs: set-matrix
+    runs-on: ${{ matrix.runs_on }}
+    timeout-minutes: 100
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJSON(needs.set-matrix.outputs.build-matrix) }}
+    env:
+      KERNEL: ${{ matrix.kernel }}
+      REPO_ROOT: ${{ github.workspace }}
+      REPO_PATH: ""
+    steps:
+      - uses: actions/checkout@v2
+      - if: ${{ github.repository == 'kernel-patches/vmtest' }}
+        name: Download bpf-next tree
+        uses: libbpf/ci/get-linux-source@master
+        with:
+          dest: '.kernel'
+      - if: ${{ github.repository == 'kernel-patches/vmtest' }}
+        name: Move linux source in place
+        shell: bash
+        run: |
+          rm -rf .kernel/.git
+          cp -rf .kernel/. .
+          rm -rf .kernel
+      - uses: libbpf/ci/patch-kernel@master
+        with:
+          patches-root: '${{ github.workspace }}/ci/diffs'
+          repo-root: '${{ github.workspace }}'
+      - name: Setup build environment
+        uses: libbpf/ci/setup-build-env@master
+      - name: Build kernel image
+        uses: libbpf/ci/build-linux@master
+        with:
+          arch: ${{ matrix.arch }}
+          toolchain: ${{ matrix.toolchain }}
+      - name: Build selftests
+        uses: libbpf/ci/build-selftests@master
+        with:
+          vmlinux_btf: ${{ github.workspace }}/vmlinux
+          toolchain: ${{ matrix.toolchain }}
+      - name: Build samples
+        uses: libbpf/ci/build-samples@master
+        with:
+          vmlinux_btf: ${{ github.workspace }}/vmlinux
+          toolchain: ${{ matrix.toolchain }}
+      - name: Tar artifacts
+        run: |
+          file_list=""
+          if [ "${{ github.repository }}" == "kernel-patches/vmtest" ]; then
+            # Package up a bunch of additional infrastructure to support running
+            # 'make kernelrelease' and bpf tool checks later on.
+            file_list="$(find . -iname Makefile | xargs) \
+              scripts/ \
+              tools/testing/selftests/bpf/ \
+              tools/include/ \
+              tools/bpf/bpftool/";
+          fi
+
+          tar -czf vmlinux-${{ matrix.arch }}-${{ matrix.toolchain }}.tar.gz \
+            .config \
+            arch/*/boot/bzImage \
+            include/config/auto.conf \
+            include/generated/autoconf.h \
+            ${file_list} \
+            --exclude '*.h' \
+            selftests/bpf/ \
+            vmlinux
+      - uses: actions/upload-artifact@v3
+        with:
+          name: vmlinux-${{ matrix.arch }}-${{ matrix.toolchain }}
+          if-no-files-found: error
+          path: vmlinux-${{ matrix.arch }}-${{ matrix.toolchain }}.tar.gz
+  test:
+    name: ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain }}
+    needs: [set-matrix, build]
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJSON(needs.set-matrix.outputs.test-matrix) }}
+    runs-on: ${{ matrix.runs_on }}
+    timeout-minutes: 100
+    env:
+      KERNEL: ${{ matrix.kernel }}
+      REPO_ROOT: ${{ github.workspace }}
+      REPO_PATH: ""
+    steps:
+      - uses: actions/checkout@main
+      - uses: actions/download-artifact@v3
+        with:
+          name: vmlinux-${{ matrix.arch }}-${{ matrix.toolchain }}
+          path: .
+      - name: Untar artifacts
+        run: tar -xzf vmlinux-${{ matrix.arch }}-${{ matrix.toolchain }}.tar.gz
+      - name: Prepare rootfs
+        uses: libbpf/ci/prepare-rootfs@master
+        with:
+          project-name: 'libbpf'
+          arch: ${{ matrix.arch }}
+          kernel: ${{ matrix.kernel }}
+          kernel-root: '.'
+          image-output: '/tmp/root.img'
+          test: ${{ matrix.test }}
+      - name: Run selftests
+        uses: libbpf/ci/run-qemu@master
+        with:
+          arch: ${{ matrix.arch}}
+          img: '/tmp/root.img'
+          vmlinuz: '${{ github.workspace }}/vmlinuz'
+          kernel-root: '.'
diff --git a/README b/README
index 669ac7c322927..e69de29bb2d1d 100644
--- a/README
+++ b/README
@@ -1,18 +0,0 @@
-Linux kernel
-============
-
-There are several guides for kernel developers and users. These guides can
-be rendered in a number of formats, like HTML and PDF. Please read
-Documentation/admin-guide/README.rst first.
-
-In order to build the documentation, use ``make htmldocs`` or
-``make pdfdocs``.  The formatted documentation can also be read online at:
-
-    https://www.kernel.org/doc/html/latest/
-
-There are various text files in the Documentation/ subdirectory,
-several of them using the Restructured Text markup notation.
-
-Please read the Documentation/process/changes.rst file, as it contains the
-requirements for building and running the kernel, and information about
-the problems which may result by upgrading your kernel.
diff --git a/ci/vmtest/configs/DENYLIST b/ci/vmtest/configs/DENYLIST
new file mode 100644
index 0000000000000..d12cf9fae5eee
--- /dev/null
+++ b/ci/vmtest/configs/DENYLIST
@@ -0,0 +1,6 @@
+# TEMPORARY
+btf_dump/btf_dump: syntax
+kprobe_multi_test/bench_attach
+core_reloc/enum64val
+core_reloc/size___diff_sz
+core_reloc/type_based___diff_sz
diff --git a/ci/vmtest/configs/DENYLIST.s390x b/ci/vmtest/configs/DENYLIST.s390x
new file mode 100644
index 0000000000000..3a5abdfc93672
--- /dev/null
+++ b/ci/vmtest/configs/DENYLIST.s390x
@@ -0,0 +1,4 @@
+tc_redirect/tc_redirect_dtime            # very flaky
+lru_bug                                  # not yet in bpf-next denylist
+usdt/basic                               # failing verifier due to bounds check after LLVM update
+usdt/multispec                           # same as above
diff --git a/ci/vmtest/helpers.sh b/ci/vmtest/helpers.sh
new file mode 100755
index 0000000000000..3b2cda0153b71
--- /dev/null
+++ b/ci/vmtest/helpers.sh
@@ -0,0 +1,36 @@
+# $1 - start or end
+# $2 - fold identifier, no spaces
+# $3 - fold section description
+foldable() {
+  local YELLOW='\033[1;33m'
+  local NOCOLOR='\033[0m'
+  if [ $1 = "start" ]; then
+    line="::group::$2"
+    if [ ! -z "${3:-}" ]; then
+      line="$line - ${YELLOW}$3${NOCOLOR}"
+    fi
+  else
+    line="::endgroup::"
+  fi
+  echo -e "$line"
+}
+
+__print() {
+  local TITLE=""
+  if [[ -n $2 ]]; then
+      TITLE=" title=$2"
+  fi
+  echo "::$1${TITLE}::$3"
+}
+
+# $1 - title
+# $2 - message
+print_error() {
+  __print error $1 $2
+}
+
+# $1 - title
+# $2 - message
+print_notice() {
+  __print notice $1 $2
+}
diff --git a/ci/vmtest/run_selftests.sh b/ci/vmtest/run_selftests.sh
new file mode 100755
index 0000000000000..416960dd029d1
--- /dev/null
+++ b/ci/vmtest/run_selftests.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+
+set -euo pipefail
+
+source $(cd $(dirname $0) && pwd)/helpers.sh
+
+ARCH=$(uname -m)
+
+STATUS_FILE=/exitstatus
+
+read_lists() {
+	(for path in "$@"; do
+		if [[ -s "$path" ]]; then
+			cat "$path"
+		fi;
+	done) | cut -d'#' -f1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | tr -s '\n' ','
+}
+
+TEST_PROGS_ARGS=""
+# Disabled due to issue
+# if [[ "$(nproc)" -gt 2 ]]; then
+#   TEST_PROGS_ARGS="-j"
+# fi
+
+test_progs() {
+  foldable start test_progs "Testing test_progs"
+  # "&& true" does not change the return code (it is not executed
+  # if the Python script fails), but it prevents exiting on a
+  # failure due to the "set -e".
+  ./test_progs ${DENYLIST:+-d"$DENYLIST"} ${ALLOWLIST:+-a"$ALLOWLIST"} ${TEST_PROGS_ARGS} && true
+  echo "test_progs:$?" >>"${STATUS_FILE}"
+  foldable end test_progs
+}
+
+test_progs_no_alu32() {
+  foldable start test_progs-no_alu32 "Testing test_progs-no_alu32"
+  ./test_progs-no_alu32 ${DENYLIST:+-d"$DENYLIST"} ${ALLOWLIST:+-a"$ALLOWLIST"} ${TEST_PROGS_ARGS} && true
+  echo "test_progs-no_alu32:$?" >>"${STATUS_FILE}"
+  foldable end test_progs-no_alu32
+}
+
+test_maps() {
+  foldable start test_maps "Testing test_maps"
+  taskset 0xF ./test_maps && true
+  echo "test_maps:$?" >>"${STATUS_FILE}"
+  foldable end test_maps
+}
+
+test_verifier() {
+  foldable start test_verifier "Testing test_verifier"
+  ./test_verifier && true
+  echo "test_verifier:$?" >>"${STATUS_FILE}"
+  foldable end test_verifier
+}
+
+foldable end vm_init
+
+foldable start kernel_config "Kconfig"
+
+zcat /proc/config.gz
+
+foldable end kernel_config
+
+configs_path=${PROJECT_NAME}/selftests/bpf
+local_configs_path=${PROJECT_NAME}/vmtest/configs
+DENYLIST=$(read_lists \
+	"$configs_path/DENYLIST" \
+	"$configs_path/DENYLIST.${ARCH}" \
+	"$local_configs_path/DENYLIST" \
+	"$local_configs_path/DENYLIST.${ARCH}" \
+)
+ALLOWLIST=$(read_lists \
+	"$configs_path/ALLOWLIST" \
+	"$configs_path/ALLOWLIST.${ARCH}" \
+	"$local_configs_path/ALLOWLIST" \
+	"$local_configs_path/ALLOWLIST.${ARCH}" \
+)
+
+echo "DENYLIST: ${DENYLIST}"
+echo "ALLOWLIST: ${ALLOWLIST}"
+
+cd ${PROJECT_NAME}/selftests/bpf
+
+if [ $# -eq 0 ]; then
+	test_progs
+	test_progs_no_alu32
+	test_maps
+	test_verifier
+else
+	for test_name in "$@"; do
+		"${test_name}"
+	done
+fi

From 253c16fdb326d561eb516068ef963e991f8a06f4 Mon Sep 17 00:00:00 2001
From: Manu Bretelle <chantr4@gmail.com>
Date: Wed, 5 Oct 2022 13:57:59 -0700
Subject: [PATCH 67/70] Dummy commit


From 29622f19e240715f9a987f1ba495c849d2cf1521 Mon Sep 17 00:00:00 2001
From: chantra <chantr4@gmail.com>
Date: Tue, 4 Oct 2022 15:02:09 -0700
Subject: [PATCH 68/70] test running on arm64

---
 .github/workflows/test.yml                 |  19 +--
 tools/testing/selftests/bpf/config.aarch64 | 185 +++++++++++++++++++++
 2 files changed, 187 insertions(+), 17 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/config.aarch64

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c52b4f539803d..6a9589c07ac8d 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -31,24 +31,9 @@ jobs:
           from json import dumps
 
           matrix = [
-            {"kernel": "LATEST", "runs_on": ["ubuntu-latest", "self-hosted"], "arch": "x86_64", "toolchain": "gcc"},
-            {"kernel": "LATEST", "runs_on": ["ubuntu-latest", "self-hosted"], "arch": "x86_64", "toolchain": "${{ needs.llvm-toolchain.outputs.llvm }}"},
-            {"kernel": "LATEST", "runs_on": ["z15", "self-hosted"], "arch": "s390x", "toolchain": "gcc"},
-          ]
-          self_hosted_repos = [
-            "kernel-patches/bpf",
-            "kernel-patches/vmtest",
+            {"kernel": "LATEST", "runs_on": ["ARM64", "self-hosted"], "arch": "aarch64", "toolchain": "gcc"},
           ]
 
-          # Only a few repository within "kernel-patches" use self-hosted runners.
-          if "${{ github.repository_owner }}" != "kernel-patches" or "${{ github.repository }}" not in self_hosted_repos:
-            # Outside of those repositories, remove the self-hosted label and skip
-            # any testing on s390x, as no suitable runners will be available.
-            for idx in range(len(matrix) - 1, -1, -1):
-              if "z15" in matrix[idx]["runs_on"]:
-                del matrix[idx]
-              else:
-                matrix[idx]["runs_on"].remove("self-hosted")
 
           build_matrix = {"include": matrix}
           print(f"::set-output name=build_matrix::{dumps(build_matrix)}")
@@ -120,7 +105,7 @@ jobs:
 
           tar -czf vmlinux-${{ matrix.arch }}-${{ matrix.toolchain }}.tar.gz \
             .config \
-            arch/*/boot/bzImage \
+            arch/*/boot/*Image* \
             include/config/auto.conf \
             include/generated/autoconf.h \
             ${file_list} \
diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
new file mode 100644
index 0000000000000..3016f617c9168
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -0,0 +1,185 @@
+CONFIG_9P_FS=y
+CONFIG_ARCH_VEXPRESS=y
+CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
+CONFIG_ARM_SMMU_V3=y
+CONFIG_ATA=y
+CONFIG_AUDIT=y
+CONFIG_BINFMT_MISC=y
+CONFIG_BLK_CGROUP=y
+CONFIG_BLK_DEV_BSGLIB=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BONDING=y
+CONFIG_BPFILTER=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_JIT_DEFAULT_ON=y
+CONFIG_BPF_PRELOAD_UMD=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_BRIDGE=m
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_COMPAT=y
+CONFIG_CPUSETS=y
+CONFIG_CRASH_DUMP=y
+CONFIG_CRYPTO_USER_API_RNG=y
+CONFIG_CRYPTO_USER_API_SKCIPHER=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_SG=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DEVTMPFS=y
+CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_DRM=y
+CONFIG_DUMMY=y
+CONFIG_EXPERT=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_FANOTIFY=y
+CONFIG_FB=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_FUSE_FS=y
+CONFIG_FW_CFG_SYSFS_CMDLINE=y
+CONFIG_FW_CFG_SYSFS=y
+CONFIG_GDB_SCRIPTS=y
+CONFIG_HAVE_EBPF_JIT=y
+CONFIG_HAVE_KPROBES_ON_FTRACE=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HEADERS_INSTALL=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HUGETLBFS=y
+CONFIG_HW_RANDOM_VIRTIO=y
+CONFIG_HW_RANDOM=y
+CONFIG_HZ_100=y
+CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IKHEADERS=y
+CONFIG_INET6_ESP=y
+CONFIG_INET_ESP=y
+CONFIG_INET=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPVLAN=y
+CONFIG_JUMP_LABEL=y
+CONFIG_KERNEL_UNCOMPRESSED=y
+CONFIG_KPROBES_ON_FTRACE=y
+CONFIG_KPROBES=y
+CONFIG_KRETPROBES=y
+CONFIG_KSM=y
+CONFIG_LATENCYTOP=y
+CONFIG_LIVEPATCH=y
+CONFIG_LOCK_STAT=y
+CONFIG_MACVLAN=y
+CONFIG_MACVTAP=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MAILBOX=y
+CONFIG_MEMCG=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_MODULE_SIG=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_NAMESPACES=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_NET_9P=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NET_ACT_GACT=y
+CONFIG_NETDEVICES=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NET_KEY=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_FQ=y
+CONFIG_NET_VRF=y
+CONFIG_NET=y
+CONFIG_NF_TABLES=y
+CONFIG_NLMON=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
+CONFIG_OVERLAY_FS=y
+CONFIG_PACKET_DIAG=y
+CONFIG_PACKET=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PCI=y
+CONFIG_PL320_MBOX=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROFILING=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_PTDUMP_DEBUGFS=y
+CONFIG_RC_DEVICES=y
+CONFIG_RC_LOOPBACK=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_PL031=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_SAMPLE_SECCOMP=y
+CONFIG_SAMPLES=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_SCHED_TRACER=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_SCSI=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_STACK_TRACER=y
+CONFIG_STATIC_KEYS_SELFTEST=y
+CONFIG_SYSVIPC=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_XACCT=y
+CONFIG_TCG_TIS=y
+CONFIG_TCG_TPM=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_DCTCP=y
+CONFIG_TLS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS=y
+CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TUN=y
+CONFIG_UNIX=y
+CONFIG_UPROBES=y
+CONFIG_USELIB=y
+CONFIG_USER_NS=y
+CONFIG_VETH=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_FS=y
+CONFIG_VIRTIO_INPUT=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_NET=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VLAN_8021Q=y
+CONFIG_VSOCKETS=y
+CONFIG_XFRM_USER=y

From 26140a2452a30044375d263f756c61cd30e77498 Mon Sep 17 00:00:00 2001
From: chantra <chantr4@gmail.com>
Date: Wed, 5 Oct 2022 13:55:56 -0700
Subject: [PATCH 69/70] use chantra/ci repo @aarch64_img

---
 .github/workflows/test.yml | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 6a9589c07ac8d..e1b0c07796dbb 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -14,7 +14,7 @@ jobs:
       llvm: ${{ steps.llvm-toolchain-impl.outputs.version }}
     steps:
       - id: llvm-version
-        uses: libbpf/ci/get-llvm-version@master
+        uses: chantra/ci/get-llvm-version@aarch64_img
       - id: llvm-toolchain-impl
         shell: bash
         run: echo "::set-output name=version::llvm-${{ steps.llvm-version.outputs.version }}"
@@ -59,7 +59,7 @@ jobs:
       - uses: actions/checkout@v2
       - if: ${{ github.repository == 'kernel-patches/vmtest' }}
         name: Download bpf-next tree
-        uses: libbpf/ci/get-linux-source@master
+        uses: chantra/ci/get-linux-source@aarch64_img
         with:
           dest: '.kernel'
       - if: ${{ github.repository == 'kernel-patches/vmtest' }}
@@ -69,24 +69,24 @@ jobs:
           rm -rf .kernel/.git
           cp -rf .kernel/. .
           rm -rf .kernel
-      - uses: libbpf/ci/patch-kernel@master
+      - uses: chantra/ci/patch-kernel@aarch64_img
         with:
           patches-root: '${{ github.workspace }}/ci/diffs'
           repo-root: '${{ github.workspace }}'
       - name: Setup build environment
-        uses: libbpf/ci/setup-build-env@master
+        uses: chantra/ci/setup-build-env@aarch64_img
       - name: Build kernel image
-        uses: libbpf/ci/build-linux@master
+        uses: chantra/ci/build-linux@aarch64_img
         with:
           arch: ${{ matrix.arch }}
           toolchain: ${{ matrix.toolchain }}
       - name: Build selftests
-        uses: libbpf/ci/build-selftests@master
+        uses: chantra/ci/build-selftests@aarch64_img
         with:
           vmlinux_btf: ${{ github.workspace }}/vmlinux
           toolchain: ${{ matrix.toolchain }}
       - name: Build samples
-        uses: libbpf/ci/build-samples@master
+        uses: chantra/ci/build-samples@aarch64_img
         with:
           vmlinux_btf: ${{ github.workspace }}/vmlinux
           toolchain: ${{ matrix.toolchain }}
@@ -138,7 +138,7 @@ jobs:
       - name: Untar artifacts
         run: tar -xzf vmlinux-${{ matrix.arch }}-${{ matrix.toolchain }}.tar.gz
       - name: Prepare rootfs
-        uses: libbpf/ci/prepare-rootfs@master
+        uses: chantra/ci/prepare-rootfs@aarch64_img
         with:
           project-name: 'libbpf'
           arch: ${{ matrix.arch }}
@@ -147,7 +147,7 @@ jobs:
           image-output: '/tmp/root.img'
           test: ${{ matrix.test }}
       - name: Run selftests
-        uses: libbpf/ci/run-qemu@master
+        uses: chantra/ci/run-qemu@aarch64_img
         with:
           arch: ${{ matrix.arch}}
           img: '/tmp/root.img'

From fc2c58941aff5f2729f911dc85438dff43698ff5 Mon Sep 17 00:00:00 2001
From: chantra <chantr4@gmail.com>
Date: Wed, 5 Oct 2022 14:06:28 -0700
Subject: [PATCH 70/70] run on first steps on arm64 to speed up process, e/g no
 need to wait for GH runners

---
 .github/workflows/test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index e1b0c07796dbb..f754c0b30318d 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -9,7 +9,7 @@ concurrency:
 
 jobs:
   llvm-toolchain:
-    runs-on: ubuntu-latest
+    runs-on: ARM64
     outputs:
       llvm: ${{ steps.llvm-toolchain-impl.outputs.version }}
     steps:
@@ -20,7 +20,7 @@ jobs:
         run: echo "::set-output name=version::llvm-${{ steps.llvm-version.outputs.version }}"
   set-matrix:
     needs: llvm-toolchain
-    runs-on: ubuntu-latest
+    runs-on: ARM64
     outputs:
       build-matrix: ${{ steps.set-matrix-impl.outputs.build_matrix }}
       test-matrix: ${{ steps.set-matrix-impl.outputs.test_matrix }}