diff --git a/test/array.jl b/test/array.jl
index e2aaaacc..b55a7d71 100644
--- a/test/array.jl
+++ b/test/array.jl
@@ -1,5 +1,9 @@
 STORAGEMODES = [Metal.PrivateStorage, Metal.SharedStorage, Metal.ManagedStorage]
 
+const FILL_TYPES = [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64,
+                    Float16, Float32]
+Metal.metal_support() >= v"3.1" && push!(FILL_TYPES, BFloat16)
+
 @testset "array" begin
 
 let arr = MtlVector{Int}(undef, 1)
@@ -27,8 +31,7 @@ end
     @test mtl(1:3) === 1:3
 
 
-    # Page 22 of https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf
-    # Only bfloat missing
+    # Section 2.1 of https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf
     supported_number_types = [Float16  => Float16,
                               Float32  => Float32,
                               Float64  => Float32,
@@ -41,6 +44,8 @@ end
                               UInt32   => UInt32,
                               UInt64   => UInt64,
                               UInt8    => UInt8]
+    Metal.metal_support() >= v"3.1" && push!(supported_number_types, BFloat16 => BFloat16)
+
     # Test supported types and ensure only Float64 get converted to Float32
     for (SrcType, TargType) in supported_number_types
         @test mtl(SrcType[1]) isa MtlArray{TargType}
@@ -203,8 +208,7 @@ end
 
 end
 
-@testset "fill($T)" for T in [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64,
-                              Float16, Float32]
+@testset "fill($T)" for T in FILL_TYPES
 
     b = rand(T)
 
@@ -241,8 +245,7 @@ end
     end
 end
 
-@testset "fill!($T)" for T in [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64,
-                               Float16, Float32]
+@testset "fill!($T)" for T in FILL_TYPES
 
     b = rand(T)
 
diff --git a/test/device/intrinsics.jl b/test/device/intrinsics.jl
index a849bfda..3b5155b7 100644
--- a/test/device/intrinsics.jl
+++ b/test/device/intrinsics.jl
@@ -276,6 +276,7 @@ end
 
 @testset "parametrically typed" begin
     typs = [Int32, Int64, Float32]
+    metal_support() >= v"3.1" && push!(types, BFloat16)
     @testset for typ in typs
         function kernel(d::MtlDeviceArray{T}, n) where {T}
             t = thread_position_in_threadgroup_1d()
diff --git a/test/mps/linalg.jl b/test/mps/linalg.jl
index 106d7669..f8e9a9dc 100644
--- a/test/mps/linalg.jl
+++ b/test/mps/linalg.jl
@@ -147,33 +147,36 @@ function cpu_topk(x::Matrix{T}, k; rev=true, dims=1) where {T}
 end
 
 @testset "topk & topk!" begin
-    for ftype in (Float16, Float32)
+    ftypes = [Float16, Float32]
+
+    @testset "$ftype" for ftype in ftypes
         # Normal operation
-        @testset "$ftype" begin
-            for (shp,k) in [((3,1), 2), ((20,30), 5)]
-                cpu_a = rand(ftype, shp...)
+        @testset "$shp, k=$k" for (shp,k) in [((3,1), 2), ((20,30), 5)]
+            cpu_a = rand(ftype, shp...)
 
-                #topk
-                cpu_i, cpu_v = cpu_topk(cpu_a, k)
+            #topk
+            cpu_i, cpu_v = cpu_topk(cpu_a, k)
 
-                a = MtlMatrix(cpu_a)
-                i, v = MPS.topk(a, k)
+            a = MtlMatrix(cpu_a)
+            i, v = MPS.topk(a, k)
 
-                @test Array(i) == cpu_i
-                @test Array(v) == cpu_v
+            @test Array(i) == cpu_i
+            @test Array(v) == cpu_v
 
-                #topk!
-                i = MtlMatrix{UInt32}(undef, (k, shp[2]))
-                v = MtlMatrix{ftype}(undef, (k, shp[2]))
+            #topk!
+            i = MtlMatrix{UInt32}(undef, (k, shp[2]))
+            v = MtlMatrix{ftype}(undef, (k, shp[2]))
 
-                i, v = MPS.topk!(a, i, v, k)
+            i, v = MPS.topk!(a, i, v, k)
 
-                @test Array(i) == cpu_i
-                @test Array(v) == cpu_v
-            end
-            shp = (20,30)
-            k = 17
+            @test Array(i) == cpu_i
+            @test Array(v) == cpu_v
+        end
 
+        # test too big `k`
+        shp = (20,30)
+        k = 17
+        @testset "$shp, k=$k" begin
             cpu_a = rand(ftype, shp...)
             cpu_i, cpu_v = cpu_topk(cpu_a, k)
 
@@ -185,7 +188,6 @@ end
             v = MtlMatrix{ftype}(undef, (k, shp[2]))
 
             @test_throws "MPSMatrixFindTopK does not support values of k > 16" i, v = MPS.topk!(a, i, v, k)
-
         end
     end
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index 187b5b9b..0554e868 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -73,14 +73,27 @@ for (rootpath, dirs, files) in walkdir(@__DIR__)
     test_runners[file] = ()->include("$(@__DIR__)/$file.jl")
   end
 end
+
 ## GPUArrays testsuite
+const gpuarr_eltypes = [Int16, Int32, Int64,
+                        Complex{Int16}, Complex{Int32}, Complex{Int64},
+                        Float16, Float32,
+                        ComplexF16, ComplexF32]
+const gpuarr_eltypes_nobf16 = copy(gpuarr_eltypes)
+
+# Add BFloat16 for tests that use it
+Metal.metal_support() >= v"3.1" && push!(gpuarr_eltypes, BFloat16)
+
 for name in keys(TestSuite.tests)
     if Metal.DefaultStorageMode != Metal.PrivateStorage && name == "indexing scalar"
         # GPUArrays' scalar indexing tests assume that indexing is not supported
         continue
     end
+
+    tmp_eltypes = name in ["random"] ? gpuarr_eltypes_nobf16 : gpuarr_eltypes
+
     push!(tests, "gpuarrays$(Base.Filesystem.path_separator)$name")
-    test_runners["gpuarrays$(Base.Filesystem.path_separator)$name"] = ()->TestSuite.tests[name](MtlArray)
+    test_runners["gpuarrays$(Base.Filesystem.path_separator)$name"] = ()->TestSuite.tests[name](MtlArray;eltypes=tmp_eltypes)
 end
 unique!(tests)
 
diff --git a/test/setup.jl b/test/setup.jl
index 8593266a..dfbec53f 100644
--- a/test/setup.jl
+++ b/test/setup.jl
@@ -1,4 +1,4 @@
-using Distributed, Test, Metal, Adapt, ObjectiveC, ObjectiveC.Foundation
+using Distributed, Test, Metal, BFloat16s, Adapt, ObjectiveC, ObjectiveC.Foundation
 
 Metal.functional() || error("Metal.jl is not functional on this system")
 
@@ -10,12 +10,6 @@ gpuarrays_root = dirname(dirname(gpuarrays))
 include(joinpath(gpuarrays_root, "test", "testsuite.jl"))
 testf(f, xs...; kwargs...) = TestSuite.compare(f, MtlArray, xs...; kwargs...)
 
-const eltypes = [Int16, Int32, Int64,
-                 Complex{Int16}, Complex{Int32}, Complex{Int64},
-                 Float16, Float32,
-                 ComplexF16, ComplexF32]
-TestSuite.supported_eltypes(::Type{<:MtlArray}) = eltypes
-
 const runtime_validation = get(ENV, "MTL_DEBUG_LAYER", "0") != "0"
 
 using Random
@@ -31,7 +25,7 @@ function runtests(f, name)
         # generate a temporary module to execute the tests in
         mod_name = Symbol("Test", rand(1:100), "Main_", replace(name, '/' => '_'))
         mod = @eval(Main, module $mod_name end)
-        @eval(mod, using Test, Random, Metal)
+        @eval(mod, using Test, Random, Metal, BFloat16s)
 
         let id = myid()
             wait(@spawnat 1 print_testworker_started(name, id))