diff --git a/.gitlab-ci.d/buildtest-template.yml b/.gitlab-ci.d/buildtest-template.yml
index 4cc1923931949..39da7698b095d 100644
--- a/.gitlab-ci.d/buildtest-template.yml
+++ b/.gitlab-ci.d/buildtest-template.yml
@@ -63,7 +63,6 @@
   stage: test
   image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:$QEMU_CI_CONTAINER_TAG
   script:
-    - export RUST_BACKTRACE=1
     - source scripts/ci/gitlab-ci-section
     - section_start buildenv "Setting up to run tests"
     - scripts/git-submodule.sh update roms/SLOF
diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
index 4265a57783430..00f4bfcd9f355 100644
--- a/.gitlab-ci.d/buildtest.yml
+++ b/.gitlab-ci.d/buildtest.yml
@@ -131,6 +131,12 @@ build-system-fedora-rust-nightly:
     CONFIGURE_ARGS: --disable-docs --enable-rust --enable-strict-rust-lints
     TARGETS: aarch64-softmmu
     MAKE_CHECK_ARGS: check-build
+  after_script:
+    - source scripts/ci/gitlab-ci-section
+    - section_start test "Running Rust doctests"
+    - cd build
+    - pyvenv/bin/meson devenv -w ../rust ${CARGO-cargo} test --doc -p qemu_api
+
   allow_failure: true
 
 check-system-fedora:
diff --git a/.gitlab-ci.d/check-dco.py b/.gitlab-ci.d/check-dco.py
index 70dec7d6ee9b4..2fd56683dc6e4 100755
--- a/.gitlab-ci.d/check-dco.py
+++ b/.gitlab-ci.d/check-dco.py
@@ -21,7 +21,7 @@
 
 print(f"adding upstream git repo @ {repourl}")
 subprocess.check_call(["git", "remote", "add", "check-dco", repourl])
-subprocess.check_call(["git", "fetch", "check-dco", "master"])
+subprocess.check_call(["git", "fetch", "--refetch", "check-dco", "master"])
 
 ancestor = subprocess.check_output(["git", "merge-base",
                                     "check-dco/master", "HEAD"],
diff --git a/.gitlab-ci.d/check-patch.py b/.gitlab-ci.d/check-patch.py
index 68c549a146a3f..be13e6f77d725 100755
--- a/.gitlab-ci.d/check-patch.py
+++ b/.gitlab-ci.d/check-patch.py
@@ -24,7 +24,7 @@
 # base for the user's branch. We thus need to figure out a common
 # ancestor between the user's branch and current git master.
 subprocess.check_call(["git", "remote", "add", "check-patch", repourl])
-subprocess.check_call(["git", "fetch", "check-patch", "master"])
+subprocess.check_call(["git", "fetch", "--refetch", "check-patch", "master"])
 
 ancestor = subprocess.check_output(["git", "merge-base",
                                     "check-patch/master", "HEAD"],
diff --git a/.gitlab-ci.d/check-units.py b/.gitlab-ci.d/check-units.py
new file mode 100755
index 0000000000000..268a4118d5999
--- /dev/null
+++ b/.gitlab-ci.d/check-units.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+#
+# check-units.py: check the number of compilation units and identify
+#                 those that are rebuilt multiple times
+#
+# Copyright (C) 2025 Linaro Ltd.
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from os import access, R_OK, path
+from sys import argv, exit
+import json
+from collections import Counter
+
+
+def extract_build_units(cc_path):
+    """
+    Extract the build units and their counds from compile_commands.json file.
+
+    Returns:
+        Hash table of ["unit"] = count
+    """
+
+    j = json.load(open(cc_path, 'r'))
+    files = [f['file'] for f in j]
+    build_units = Counter(files)
+
+    return build_units
+
+
+def analyse_units(build_units):
+    """
+    Analyse the build units and report stats and the top 10 rebuilds
+    """
+
+    print(f"Total source files: {len(build_units.keys())}")
+    print(f"Total build units: {sum(units.values())}")
+
+    # Create a sorted list by number of rebuilds
+    sorted_build_units = sorted(build_units.items(),
+                                key=lambda item: item[1],
+                                reverse=True)
+
+    print("Most rebuilt units:")
+    for unit, count in sorted_build_units[:20]:
+        print(f"  {unit} built {count} times")
+
+    print("Least rebuilt units:")
+    for unit, count in sorted_build_units[-10:]:
+        print(f"  {unit} built {count} times")
+
+
+if __name__ == "__main__":
+    if len(argv) != 2:
+        script_name = path.basename(argv[0])
+        print(f"Usage: {script_name} <path_to_compile_commands.json>")
+        exit(1)
+
+    cc_path = argv[1]
+    if path.isfile(cc_path) and access(cc_path, R_OK):
+        units = extract_build_units(cc_path)
+        analyse_units(units)
+        exit(0)
+    else:
+        print(f"{cc_path} doesn't exist or isn't readable")
+        exit(1)
diff --git a/.gitlab-ci.d/cirrus/freebsd-14.vars b/.gitlab-ci.d/cirrus/freebsd-14.vars
index 0997c47af5861..19ca0d36638ce 100644
--- a/.gitlab-ci.d/cirrus/freebsd-14.vars
+++ b/.gitlab-ci.d/cirrus/freebsd-14.vars
@@ -11,6 +11,6 @@ MAKE='/usr/local/bin/gmake'
 NINJA='/usr/local/bin/ninja'
 PACKAGING_COMMAND='pkg'
 PIP3='/usr/local/bin/pip'
-PKGS='alsa-lib bash bison bzip2 ca_root_nss capstone4 ccache4 cmocka ctags curl cyrus-sasl dbus diffutils dtc flex fusefs-libs3 gettext git glib gmake gnutls gsed gtk-vnc gtk3 json-c libepoxy libffi libgcrypt libjpeg-turbo libnfs libslirp libspice-server libssh libtasn1 llvm lzo2 meson mtools ncurses nettle ninja opencv pixman pkgconf png py311-numpy py311-pillow py311-pip py311-pyyaml py311-sphinx py311-sphinx_rtd_theme py311-tomli python3 rpm2cpio rust rust-bindgen-cli sdl2 sdl2_image snappy sndio socat spice-protocol tesseract usbredir virglrenderer vte3 xorriso zstd'
+PKGS='alsa-lib bash bison bzip2 ca_root_nss capstone4 ccache4 cmocka ctags curl cyrus-sasl dbus diffutils dtc flex fusefs-libs3 gettext git glib gmake gnutls gsed gtk-vnc gtk3 json-c libepoxy libffi libgcrypt libjpeg-turbo libnfs libslirp libspice-server libssh libtasn1 llvm lzo2 meson mtools ncurses nettle ninja opencv pixman pkgconf png py311-numpy py311-pillow py311-pip py311-pyyaml py311-sphinx py311-sphinx_rtd_theme py311-tomli python3 rpm2cpio rust rust-bindgen-cli sdl2 sdl2_image snappy sndio socat spice-protocol tesseract usbredir virglrenderer vte3 vulkan-tools xorriso zstd'
 PYPI_PKGS=''
 PYTHON='/usr/local/bin/python3'
diff --git a/.gitlab-ci.d/cirrus/macos-14.vars b/.gitlab-ci.d/cirrus/macos-14.vars
index 25dff322e6ac1..b039465f56f25 100644
--- a/.gitlab-ci.d/cirrus/macos-14.vars
+++ b/.gitlab-ci.d/cirrus/macos-14.vars
@@ -11,6 +11,6 @@ MAKE='/opt/homebrew/bin/gmake'
 NINJA='/opt/homebrew/bin/ninja'
 PACKAGING_COMMAND='brew'
 PIP3='/opt/homebrew/bin/pip3'
-PKGS='bash bc bindgen bison bzip2 capstone ccache cmocka ctags curl dbus diffutils dtc flex gcovr gettext git glib gnu-sed gnutls gtk+3 gtk-vnc jemalloc jpeg-turbo json-c libcbor libepoxy libffi libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 libusb llvm lzo make meson mtools ncurses nettle ninja pixman pkg-config python3 rpm2cpio rust sdl2 sdl2_image snappy socat sparse spice-protocol swtpm tesseract usbredir vde vte3 xorriso zlib zstd'
+PKGS='bash bc bindgen bison bzip2 capstone ccache cmocka ctags curl dbus diffutils dtc flex gcovr gettext git glib gnu-sed gnutls gtk+3 gtk-vnc jemalloc jpeg-turbo json-c libcbor libepoxy libffi libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 libusb llvm lzo make meson mtools ncurses nettle ninja pixman pkg-config python3 rpm2cpio rust sdl2 sdl2_image snappy socat sparse spice-protocol swtpm tesseract usbredir vde vte3 vulkan-tools xorriso zlib zstd'
 PYPI_PKGS='PyYAML numpy pillow sphinx sphinx-rtd-theme tomli'
 PYTHON='/opt/homebrew/bin/python3'
diff --git a/.gitlab-ci.d/static_checks.yml b/.gitlab-ci.d/static_checks.yml
index c0ba4533826ed..c3ed6de453d07 100644
--- a/.gitlab-ci.d/static_checks.yml
+++ b/.gitlab-ci.d/static_checks.yml
@@ -70,3 +70,25 @@ check-rust-tools-nightly:
     expire_in: 2 days
     paths:
       - rust/target/doc
+
+check-build-units:
+  extends: .base_job_template
+  stage: build
+  image: $CI_REGISTRY_IMAGE/qemu/debian:$QEMU_CI_CONTAINER_TAG
+  needs:
+    job: amd64-debian-container
+  before_script:
+    - source scripts/ci/gitlab-ci-section
+    - section_start setup "Install Tools"
+    - apt install --assume-yes --no-install-recommends jq
+    - section_end setup
+  script:
+    - mkdir build
+    - cd build
+    - section_start configure "Running configure"
+    - ../configure
+    - cd ..
+    - section_end configure
+    - section_start analyse "Analyse"
+    - .gitlab-ci.d/check-units.py build/compile_commands.json
+    - section_end analyse
diff --git a/Kconfig.host b/Kconfig.host
index 842cbe0d6c594..933425c74b473 100644
--- a/Kconfig.host
+++ b/Kconfig.host
@@ -61,3 +61,6 @@ config HV_BALLOON_POSSIBLE
 
 config HAVE_RUST
     bool
+
+config MAC_PVG
+    bool
diff --git a/MAINTAINERS b/MAINTAINERS
index 3848d37a38d20..f185fa31f1a73 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -152,10 +152,7 @@ Overall TCG CPUs
 M: Richard Henderson <richard.henderson@linaro.org>
 R: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
-F: system/cpus.c
 F: system/watchpoint.c
-F: cpu-common.c
-F: cpu-target.c
 F: page-vary-target.c
 F: page-vary-common.c
 F: accel/tcg/
@@ -165,17 +162,13 @@ F: util/cacheflush.c
 F: scripts/decodetree.py
 F: docs/devel/decodetree.rst
 F: docs/devel/tcg*
-F: include/exec/cpu*.h
-F: include/exec/exec-all.h
 F: include/exec/tb-flush.h
-F: include/exec/target_long.h
 F: include/exec/helper*.h
 F: include/exec/helper*.h.inc
 F: include/exec/helper-info.c.inc
 F: include/exec/page-protection.h
-F: include/system/cpus.h
 F: include/system/tcg.h
-F: include/hw/core/tcg-cpu-ops.h
+F: include/accel/tcg/cpu-ops.h
 F: host/include/*/host/cpuinfo.h
 F: util/cpuinfo-*.c
 F: include/tcg/
@@ -232,14 +225,25 @@ Hexagon TCG CPUs
 M: Brian Cain <brian.cain@oss.qualcomm.com>
 S: Supported
 F: target/hexagon/
+F: hw/intc/l2vic.[ch]
+F: hw/hexagon/
+F: hw/timer/qct-qtimer.c
+F: include/hw/hexagon/
+F: include/hw/timer/qct-qtimer.h
 X: target/hexagon/idef-parser/
 X: target/hexagon/gen_idef_parser_funcs.py
 F: linux-user/hexagon/
 F: tests/tcg/hexagon/
 F: disas/hexagon.c
 F: configs/targets/hexagon-linux-user/default.mak
+F: configs/devices/hexagon-softmmu/default.mak
 F: docker/dockerfiles/debian-hexagon-cross.docker
 F: gdb-xml/hexagon*.xml
+F: docs/system/target-hexagon.rst
+F: docs/devel/hexagon-sys.rst
+F: docs/devel/hexagon-l2vic.rst
+F: tests/functional/test_hexagon_minivm.py
+F: docs/devel/hexagon-vm.rst
 T: git https://github.com/quic/qemu.git hex-next
 
 Hexagon idef-parser
@@ -319,7 +323,6 @@ F: tests/functional/test_ppc_74xx.py
 RISC-V TCG CPUs
 M: Palmer Dabbelt <palmer@dabbelt.com>
 M: Alistair Francis <alistair.francis@wdc.com>
-M: Bin Meng <bmeng.cn@gmail.com>
 R: Weiwei Li <liwei1518@gmail.com>
 R: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
 R: Liu Zhiwei <zhiwei_liu@linux.alibaba.com>
@@ -498,12 +501,19 @@ Overall
 M: Richard Henderson <richard.henderson@linaro.org>
 R: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
+F: include/exec/cpu*.h
+F: include/exec/exec-all.h
+F: include/exec/target_long.h
 F: include/qemu/accel.h
 F: include/system/accel-*.h
-F: include/hw/core/accel-cpu.h
+F: include/system/cpus.h
+F: include/accel/accel-cpu-target.h
 F: accel/accel-*.c
 F: accel/Makefile.objs
 F: accel/stubs/Makefile.objs
+F: cpu-common.c
+F: cpu-target.c
+F: system/cpus.c
 
 Apple Silicon HVF CPUs
 M: Alexander Graf <agraf@csgraf.de>
@@ -787,7 +797,7 @@ F: docs/system/arm/kzm.rst
 Integrator CP
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
-S: Maintained
+S: Odd Fixes
 F: hw/arm/integratorcp.c
 F: hw/misc/arm_integrator_debug.c
 F: include/hw/misc/arm_integrator_debug.h
@@ -820,6 +830,21 @@ F: hw/pci-host/designware.c
 F: include/hw/pci-host/designware.h
 F: docs/system/arm/mcimx7d-sabre.rst
 
+MCIMX8MP-EVK / i.MX8MP
+M: Bernhard Beschow <shentey@gmail.com>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/imx8mp-evk.c
+F: hw/arm/fsl-imx8mp.c
+F: hw/misc/imx8mp_*.c
+F: hw/pci-host/fsl_imx8m_phy.c
+F: hw/rtc/rs5c372.c
+F: include/hw/arm/fsl-imx8mp.h
+F: include/hw/misc/imx8mp_*.h
+F: include/hw/pci-host/fsl_imx8m_phy.h
+F: docs/system/arm/imx8mp-evk.rst
+F: tests/qtest/rs5c372-test.c
+
 MPS2 / MPS3
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
@@ -853,7 +878,7 @@ F: docs/system/arm/mps2.rst
 Musca
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
-S: Maintained
+S: Odd Fixes
 F: hw/arm/musca.c
 F: docs/system/arm/musca.rst
 
@@ -878,6 +903,7 @@ F: include/hw/*/npcm*
 F: tests/qtest/npcm*
 F: tests/qtest/adm1266-test.c
 F: pc-bios/npcm7xx_bootrom.bin
+F: pc-bios/npcm8xx_bootrom.bin
 F: roms/vbootrom
 F: docs/system/arm/nuvoton.rst
 F: tests/functional/test_arm_quanta_gsj.py
@@ -900,7 +926,7 @@ F: tests/functional/test_aarch64_raspi4.py
 Real View
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
-S: Maintained
+S: Odd Fixes
 F: hw/arm/realview*
 F: hw/cpu/realview_mpcore.c
 F: hw/intc/realview_gic.c
@@ -950,7 +976,7 @@ F: tests/functional/test_arm_collie.py
 Stellaris
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
-S: Maintained
+S: Odd Fixes
 F: hw/*/stellaris*
 F: hw/display/ssd03*
 F: include/hw/input/gamepad.h
@@ -980,7 +1006,7 @@ F: docs/system/arm/stm32.rst
 Versatile Express
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
-S: Maintained
+S: Odd Fixes
 F: hw/arm/vexpress.c
 F: hw/display/sii9022.c
 F: docs/system/arm/vexpress.rst
@@ -989,7 +1015,7 @@ F: tests/functional/test_arm_vexpress.py
 Versatile PB
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
-S: Maintained
+S: Odd Fixes
 F: hw/*/versatile*
 F: hw/i2c/arm_sbcon_i2c.c
 F: include/hw/i2c/arm_sbcon_i2c.h
@@ -1394,12 +1420,6 @@ F: hw/openrisc/openrisc_sim.c
 
 PowerPC Machines
 ----------------
-405 (ref405ep)
-L: qemu-ppc@nongnu.org
-S: Orphan
-F: hw/ppc/ppc405*
-F: tests/functional/test_ppc_405.py
-
 Bamboo
 L: qemu-ppc@nongnu.org
 S: Orphan
@@ -1530,6 +1550,7 @@ F: include/hw/ppc/pnv*
 F: include/hw/pci-host/pnv*
 F: include/hw/ssi/pnv_spi*
 F: pc-bios/skiboot.lid
+F: pc-bios/pnv-pnor.bin
 F: tests/qtest/pnv*
 F: tests/functional/test_ppc64_powernv.py
 
@@ -1604,7 +1625,6 @@ F: include/hw/riscv/opentitan.h
 F: include/hw/*/ibex_*.h
 
 Microchip PolarFire SoC Icicle Kit
-M: Bin Meng <bmeng.cn@gmail.com>
 L: qemu-riscv@nongnu.org
 S: Supported
 F: docs/system/riscv/microchip-icicle-kit.rst
@@ -1631,7 +1651,6 @@ F: include/hw/char/shakti_uart.h
 
 SiFive Machines
 M: Alistair Francis <Alistair.Francis@wdc.com>
-M: Bin Meng <bmeng.cn@gmail.com>
 M: Palmer Dabbelt <palmer@dabbelt.com>
 L: qemu-riscv@nongnu.org
 S: Supported
@@ -1995,10 +2014,11 @@ F: include/hw/hyperv/vmbus*.h
 OMAP
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
-S: Maintained
+S: Odd Fixes
 F: hw/*/omap*
 F: include/hw/arm/omap.h
 F: docs/system/arm/sx1.rst
+F: tests/functional/test_arm_sx1.py
 
 IPack
 M: Alberto Garcia <berto@igalia.com>
@@ -2175,10 +2195,17 @@ M: Cédric Le Goater <clg@redhat.com>
 S: Supported
 F: hw/vfio/*
 F: include/hw/vfio/
-F: docs/igd-assign.txt
 F: docs/devel/migration/vfio.rst
 F: qapi/vfio.json
 
+vfio-igd
+M: Alex Williamson <alex.williamson@redhat.com>
+M: Cédric Le Goater <clg@redhat.com>
+M: Tomita Moeko <tomitamoeko@gmail.com>
+S: Supported
+F: hw/vfio/igd.c
+F: docs/igd-assign.txt
+
 vfio-ccw
 M: Eric Farman <farman@linux.ibm.com>
 M: Matthew Rosato <mjrosato@linux.ibm.com>
@@ -2220,12 +2247,16 @@ M: Michael S. Tsirkin <mst@redhat.com>
 R: Stefano Garzarella <sgarzare@redhat.com>
 S: Supported
 F: hw/*/*vhost*
-F: docs/interop/vhost-user.json
-F: docs/interop/vhost-user.rst
+F: docs/interop/vhost-user*
+F: docs/system/devices/vhost-user*
 F: contrib/vhost-user-*/
-F: backends/vhost-user.c
+F: backends/*vhost*
 F: include/system/vhost-user-backend.h
+F: include/hw/virtio/vhost*
+F: include/*/vhost*
 F: subprojects/libvhost-user/
+F: block/export/vhost-user*
+F: util/vhost-user-server.c
 
 vhost-shadow-virtqueue
 R: Eugenio Pérez <eperezma@redhat.com>
@@ -2254,6 +2285,7 @@ F: include/hw/virtio/virtio-balloon.h
 F: system/balloon.c
 F: include/system/balloon.h
 F: tests/qtest/virtio-balloon-test.c
+F: tests/functional/test_virtio_balloon.py
 
 virtio-9p
 M: Christian Schoenebeck <qemu_oss@crudebyte.com>
@@ -2523,8 +2555,7 @@ F: hw/i2c/i2c_mux_pca954x.c
 F: include/hw/i2c/i2c_mux_pca954x.h
 
 pcf8574
-M: Dmitrii Sharikhin <d.sharikhin@yadro.com>
-S: Maintained
+S: Orphaned
 F: hw/gpio/pcf8574.c
 F: include/gpio/pcf8574.h
 
@@ -2607,6 +2638,7 @@ F: hw/display/virtio-gpu*
 F: hw/display/virtio-vga.*
 F: include/hw/virtio/virtio-gpu.h
 F: docs/system/devices/virtio-gpu.rst
+F: tests/functional/test_aarch64_virt_gpu.py
 
 vhost-user-blk
 M: Raphael Norwitz <raphael@enfabrica.net>
@@ -2801,6 +2833,20 @@ F: hw/misc/ivshmem-flat.c
 F: include/hw/misc/ivshmem-flat.h
 F: docs/system/devices/ivshmem-flat.rst
 
+UEFI variable service
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Maintained
+F: hw/uefi/
+F: include/hw/uefi/
+
+VMapple
+M: Alexander Graf <agraf@csgraf.de>
+M: Phil Dennis-Jordan <phil@philjordan.eu>
+S: Maintained
+F: hw/vmapple/*
+F: include/hw/vmapple/*
+F: docs/system/arm/vmapple.rst
+
 Subsystems
 ----------
 Overall Audio backends
@@ -3621,10 +3667,10 @@ F: net/filter-mirror.c
 F: tests/qtest/test-filter*
 
 Record/replay
-M: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
 R: Paolo Bonzini <pbonzini@redhat.com>
+R: Alex Bennée <alex.bennee@linaro.org>
 W: https://wiki.qemu.org/Features/record-replay
-S: Supported
+S: Odd Fixes
 F: replay/*
 F: block/blkreplay.c
 F: net/filter-replay.c
@@ -3728,7 +3774,7 @@ S: Orphan
 F: hw/i386/amd_iommu.?
 
 OpenSBI Firmware
-M: Bin Meng <bmeng.cn@gmail.com>
+L: qemu-riscv@nongnu.org
 S: Supported
 F: pc-bios/opensbi-*
 F: .gitlab-ci.d/opensbi.yml
@@ -3761,6 +3807,7 @@ Overall usermode emulation
 M: Riku Voipio <riku.voipio@iki.fi>
 S: Maintained
 F: accel/tcg/user-exec*.c
+F: hw/core/cpu-user.c
 F: include/user/
 F: common-user/
 
@@ -4198,6 +4245,7 @@ Functional testing framework
 M: Thomas Huth <thuth@redhat.com>
 R: Philippe Mathieu-Daudé <philmd@linaro.org>
 R: Daniel P. Berrange <berrange@redhat.com>
+F: docs/devel/testing/functional.rst
 F: tests/functional/qemu_test/
 
 Windows Hosted Continuous Integration
@@ -4285,6 +4333,7 @@ S: Orphan
 F: po/*.po
 
 Sphinx documentation configuration and build machinery
+M: John Snow <jsnow@redhat.com>
 M: Peter Maydell <peter.maydell@linaro.org>
 S: Maintained
 F: docs/conf.py
diff --git a/VERSION b/VERSION
index a318b6e8435d3..b77037335074b 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-9.2.50
+9.2.90
diff --git a/accel/Kconfig b/accel/Kconfig
index 794e0d18d21d2..4263cab72272e 100644
--- a/accel/Kconfig
+++ b/accel/Kconfig
@@ -16,4 +16,5 @@ config KVM
 config XEN
     bool
     select FSDEV_9P if VIRTFS
+    select PCI_EXPRESS_GENERIC_BRIDGE
     select XEN_BUS
diff --git a/accel/accel-system.c b/accel/accel-system.c
index a7596aef59d1a..5df49fbe831d2 100644
--- a/accel/accel-system.c
+++ b/accel/accel-system.c
@@ -26,6 +26,7 @@
 #include "qemu/osdep.h"
 #include "qemu/accel.h"
 #include "hw/boards.h"
+#include "system/accel-ops.h"
 #include "system/cpus.h"
 #include "qemu/error-report.h"
 #include "accel-system.h"
diff --git a/accel/accel-target.c b/accel/accel-target.c
index 08626c00c2d4c..33a539b4cbbf2 100644
--- a/accel/accel-target.c
+++ b/accel/accel-target.c
@@ -27,7 +27,7 @@
 #include "qemu/accel.h"
 
 #include "cpu.h"
-#include "hw/core/accel-cpu.h"
+#include "accel/accel-cpu-target.h"
 
 #ifndef CONFIG_USER_ONLY
 #include "accel-system.h"
@@ -38,6 +38,7 @@ static const TypeInfo accel_type = {
     .parent = TYPE_OBJECT,
     .class_size = sizeof(AccelClass),
     .instance_size = sizeof(AccelState),
+    .abstract = true,
 };
 
 /* Lookup AccelClass from opt_name. Returns NULL if not found */
@@ -112,22 +113,20 @@ void accel_init_interfaces(AccelClass *ac)
 
 void accel_cpu_instance_init(CPUState *cpu)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    if (cc->accel_cpu && cc->accel_cpu->cpu_instance_init) {
-        cc->accel_cpu->cpu_instance_init(cpu);
+    if (cpu->cc->accel_cpu && cpu->cc->accel_cpu->cpu_instance_init) {
+        cpu->cc->accel_cpu->cpu_instance_init(cpu);
     }
 }
 
 bool accel_cpu_common_realize(CPUState *cpu, Error **errp)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
     AccelState *accel = current_accel();
     AccelClass *acc = ACCEL_GET_CLASS(accel);
 
     /* target specific realization */
-    if (cc->accel_cpu && cc->accel_cpu->cpu_target_realize
-        && !cc->accel_cpu->cpu_target_realize(cpu, errp)) {
+    if (cpu->cc->accel_cpu
+        && cpu->cc->accel_cpu->cpu_target_realize
+        && !cpu->cc->accel_cpu->cpu_target_realize(cpu, errp)) {
         return false;
     }
 
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
index 945ba720513ae..12fc30c276115 100644
--- a/accel/hvf/hvf-accel-ops.c
+++ b/accel/hvf/hvf-accel-ops.c
@@ -54,6 +54,7 @@
 #include "exec/exec-all.h"
 #include "gdbstub/enums.h"
 #include "hw/boards.h"
+#include "system/accel-ops.h"
 #include "system/cpus.h"
 #include "system/hvf.h"
 #include "system/hvf_int.h"
diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c
index a81e8f3b03bb3..54ea60909e2f8 100644
--- a/accel/kvm/kvm-accel-ops.c
+++ b/accel/kvm/kvm-accel-ops.c
@@ -16,6 +16,7 @@
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
+#include "system/accel-ops.h"
 #include "system/kvm.h"
 #include "system/kvm_int.h"
 #include "system/runstate.h"
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index c65b790433cbd..f89568bfa3978 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -1288,7 +1288,7 @@ static void kvm_unpoison_all(void *param)
 
     QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) {
         QLIST_REMOVE(page, list);
-        qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE);
+        qemu_ram_remap(page->ram_addr);
         g_free(page);
     }
 }
diff --git a/accel/kvm/kvm-cpus.h b/accel/kvm/kvm-cpus.h
index b5435286e4201..688511151c8d1 100644
--- a/accel/kvm/kvm-cpus.h
+++ b/accel/kvm/kvm-cpus.h
@@ -10,8 +10,6 @@
 #ifndef KVM_CPUS_H
 #define KVM_CPUS_H
 
-#include "system/cpus.h"
-
 int kvm_init_vcpu(CPUState *cpu, Error **errp);
 int kvm_cpu_exec(CPUState *cpu);
 void kvm_destroy_vcpu(CPUState *cpu);
diff --git a/accel/qtest/qtest.c b/accel/qtest/qtest.c
index ad7e3441a5a8a..7fae80f6a1bed 100644
--- a/accel/qtest/qtest.c
+++ b/accel/qtest/qtest.c
@@ -18,6 +18,7 @@
 #include "qemu/option.h"
 #include "qemu/config-file.h"
 #include "qemu/accel.h"
+#include "system/accel-ops.h"
 #include "system/qtest.h"
 #include "system/cpus.h"
 #include "qemu/guest-random.h"
diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c
index 7f4208fddf25a..b2b9881bdfbd5 100644
--- a/accel/stubs/tcg-stub.c
+++ b/accel/stubs/tcg-stub.c
@@ -14,10 +14,6 @@
 #include "exec/tb-flush.h"
 #include "exec/exec-all.h"
 
-void tb_flush(CPUState *cpu)
-{
-}
-
 G_NORETURN void cpu_loop_exit(CPUState *cpu)
 {
     g_assert_not_reached();
diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c
index 6ecfc4e7c21e3..c5c513f1e4a06 100644
--- a/accel/tcg/cpu-exec-common.c
+++ b/accel/tcg/cpu-exec-common.c
@@ -18,13 +18,45 @@
  */
 
 #include "qemu/osdep.h"
-#include "system/cpus.h"
+#include "exec/log.h"
 #include "system/tcg.h"
 #include "qemu/plugin.h"
 #include "internal-common.h"
 
 bool tcg_allowed;
 
+bool tcg_cflags_has(CPUState *cpu, uint32_t flags)
+{
+    return cpu->tcg_cflags & flags;
+}
+
+void tcg_cflags_set(CPUState *cpu, uint32_t flags)
+{
+    cpu->tcg_cflags |= flags;
+}
+
+uint32_t curr_cflags(CPUState *cpu)
+{
+    uint32_t cflags = cpu->tcg_cflags;
+
+    /*
+     * Record gdb single-step.  We should be exiting the TB by raising
+     * EXCP_DEBUG, but to simplify other tests, disable chaining too.
+     *
+     * For singlestep and -d nochain, suppress goto_tb so that
+     * we can log -d cpu,exec after every TB.
+     */
+    if (unlikely(cpu->singlestep_enabled)) {
+        cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | CF_SINGLE_STEP | 1;
+    } else if (qatomic_read(&one_insn_per_tb)) {
+        cflags |= CF_NO_GOTO_TB | 1;
+    } else if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
+        cflags |= CF_NO_GOTO_TB;
+    }
+
+    return cflags;
+}
+
 /* exit the current TB, but without causing any exception to be raised */
 void cpu_loop_exit_noexc(CPUState *cpu)
 {
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 8b773d88478b1..ef3d967e3af08 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -22,7 +22,7 @@
 #include "qapi/error.h"
 #include "qapi/type-helpers.h"
 #include "hw/core/cpu.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 #include "trace.h"
 #include "disas/disas.h"
 #include "exec/cpu-common.h"
@@ -33,7 +33,6 @@
 #include "qemu/rcu.h"
 #include "exec/log.h"
 #include "qemu/main-loop.h"
-#include "system/cpus.h"
 #include "exec/cpu-all.h"
 #include "system/cpu-timers.h"
 #include "exec/replay-core.h"
@@ -148,38 +147,6 @@ static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
 }
 #endif /* CONFIG USER ONLY */
 
-bool tcg_cflags_has(CPUState *cpu, uint32_t flags)
-{
-    return cpu->tcg_cflags & flags;
-}
-
-void tcg_cflags_set(CPUState *cpu, uint32_t flags)
-{
-    cpu->tcg_cflags |= flags;
-}
-
-uint32_t curr_cflags(CPUState *cpu)
-{
-    uint32_t cflags = cpu->tcg_cflags;
-
-    /*
-     * Record gdb single-step.  We should be exiting the TB by raising
-     * EXCP_DEBUG, but to simplify other tests, disable chaining too.
-     *
-     * For singlestep and -d nochain, suppress goto_tb so that
-     * we can log -d cpu,exec after every TB.
-     */
-    if (unlikely(cpu->singlestep_enabled)) {
-        cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | CF_SINGLE_STEP | 1;
-    } else if (qatomic_read(&one_insn_per_tb)) {
-        cflags |= CF_NO_GOTO_TB | 1;
-    } else if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
-        cflags |= CF_NO_GOTO_TB;
-    }
-
-    return cflags;
-}
-
 struct tb_desc {
     vaddr pc;
     uint64_t cs_base;
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index b4ccf0cdcb72c..fb22048876e3e 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -19,7 +19,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 #include "exec/exec-all.h"
 #include "exec/page-protection.h"
 #include "exec/memory.h"
@@ -47,7 +47,6 @@
 #include "qemu/plugin-memory.h"
 #endif
 #include "tcg/tcg-ldst.h"
-#include "tcg/oversized-guest.h"
 
 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
 /* #define DEBUG_TLB */
@@ -105,26 +104,15 @@ static inline uint64_t tlb_read_idx(const CPUTLBEntry *entry,
 {
     /* Do not rearrange the CPUTLBEntry structure members. */
     QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_read) !=
-                      MMU_DATA_LOAD * sizeof(uint64_t));
+                      MMU_DATA_LOAD * sizeof(uintptr_t));
     QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_write) !=
-                      MMU_DATA_STORE * sizeof(uint64_t));
+                      MMU_DATA_STORE * sizeof(uintptr_t));
     QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_code) !=
-                      MMU_INST_FETCH * sizeof(uint64_t));
+                      MMU_INST_FETCH * sizeof(uintptr_t));
 
-#if TARGET_LONG_BITS == 32
-    /* Use qatomic_read, in case of addr_write; only care about low bits. */
-    const uint32_t *ptr = (uint32_t *)&entry->addr_idx[access_type];
-    ptr += HOST_BIG_ENDIAN;
-    return qatomic_read(ptr);
-#else
-    const uint64_t *ptr = &entry->addr_idx[access_type];
-# if TCG_OVERSIZED_GUEST
-    return *ptr;
-# else
+    const uintptr_t *ptr = &entry->addr_idx[access_type];
     /* ofs might correspond to .addr_write, so use qatomic_read */
     return qatomic_read(ptr);
-# endif
-#endif
 }
 
 static inline uint64_t tlb_addr_write(const CPUTLBEntry *entry)
@@ -904,16 +892,8 @@ static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
         addr &= TARGET_PAGE_MASK;
         addr += tlb_entry->addend;
         if ((addr - start) < length) {
-#if TARGET_LONG_BITS == 32
-            uint32_t *ptr_write = (uint32_t *)&tlb_entry->addr_write;
-            ptr_write += HOST_BIG_ENDIAN;
-            qatomic_set(ptr_write, *ptr_write | TLB_NOTDIRTY);
-#elif TCG_OVERSIZED_GUEST
-            tlb_entry->addr_write |= TLB_NOTDIRTY;
-#else
             qatomic_set(&tlb_entry->addr_write,
                         tlb_entry->addr_write | TLB_NOTDIRTY);
-#endif
         }
     }
 }
@@ -1200,7 +1180,7 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
 
 void tlb_set_page_with_attrs(CPUState *cpu, vaddr addr,
                              hwaddr paddr, MemTxAttrs attrs, int prot,
-                             int mmu_idx, uint64_t size)
+                             int mmu_idx, vaddr size)
 {
     CPUTLBEntryFull full = {
         .phys_addr = paddr,
@@ -1215,12 +1195,35 @@ void tlb_set_page_with_attrs(CPUState *cpu, vaddr addr,
 
 void tlb_set_page(CPUState *cpu, vaddr addr,
                   hwaddr paddr, int prot,
-                  int mmu_idx, uint64_t size)
+                  int mmu_idx, vaddr size)
 {
     tlb_set_page_with_attrs(cpu, addr, paddr, MEMTXATTRS_UNSPECIFIED,
                             prot, mmu_idx, size);
 }
 
+/**
+ * tlb_hit_page: return true if page aligned @addr is a hit against the
+ * TLB entry @tlb_addr
+ *
+ * @addr: virtual address to test (must be page aligned)
+ * @tlb_addr: TLB entry address (a CPUTLBEntry addr_read/write/code value)
+ */
+static inline bool tlb_hit_page(uint64_t tlb_addr, vaddr addr)
+{
+    return addr == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK));
+}
+
+/**
+ * tlb_hit: return true if @addr is a hit against the TLB entry @tlb_addr
+ *
+ * @addr: virtual address to test (need not be page aligned)
+ * @tlb_addr: TLB entry address (a CPUTLBEntry addr_read/write/code value)
+ */
+static inline bool tlb_hit(uint64_t tlb_addr, vaddr addr)
+{
+    return tlb_hit_page(tlb_addr, addr & TARGET_PAGE_MASK);
+}
+
 /*
  * Note: tlb_fill_align() can trigger a resize of the TLB.
  * This means that all of the caller's prior references to the TLB table
diff --git a/accel/tcg/icount-common.c b/accel/tcg/icount-common.c
index b178dccec4502..402d3e3f4e886 100644
--- a/accel/tcg/icount-common.c
+++ b/accel/tcg/icount-common.c
@@ -48,6 +48,8 @@ static bool icount_sleep = true;
 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
 #define MAX_ICOUNT_SHIFT 10
 
+bool icount_align_option;
+
 /* Do not count executed instructions */
 ICountMode use_icount = ICOUNT_DISABLED;
 
diff --git a/accel/tcg/internal-common.h b/accel/tcg/internal-common.h
index c8d714256cb94..9b6ab3a8cc0bd 100644
--- a/accel/tcg/internal-common.h
+++ b/accel/tcg/internal-common.h
@@ -17,6 +17,8 @@ extern int64_t max_advance;
 
 extern bool one_insn_per_tb;
 
+extern bool icount_align_option;
+
 /*
  * Return true if CS is not running in parallel with other cpus, either
  * because there are no other cpus or we are within an exclusive context.
@@ -53,10 +55,23 @@ TranslationBlock *tb_link_page(TranslationBlock *tb);
 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
                                uintptr_t host_pc);
 
+/**
+ * tlb_init - initialize a CPU's TLB
+ * @cpu: CPU whose TLB should be initialized
+ */
+void tlb_init(CPUState *cpu);
+/**
+ * tlb_destroy - destroy a CPU's TLB
+ * @cpu: CPU whose TLB should be destroyed
+ */
+void tlb_destroy(CPUState *cpu);
+
 bool tcg_exec_realizefn(CPUState *cpu, Error **errp);
 void tcg_exec_unrealizefn(CPUState *cpu);
 
 /* current cflags for hashing/comparison */
 uint32_t curr_cflags(CPUState *cpu);
 
+void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr);
+
 #endif
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
index 69f4808ac4915..38ff227eb039e 100644
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@@ -1,13 +1,13 @@
 common_ss.add(when: 'CONFIG_TCG', if_true: files(
   'cpu-exec-common.c',
+  'tcg-runtime.c',
+  'tcg-runtime-gvec.c',
 ))
 tcg_specific_ss = ss.source_set()
 tcg_specific_ss.add(files(
   'tcg-all.c',
   'cpu-exec.c',
   'tb-maint.c',
-  'tcg-runtime-gvec.c',
-  'tcg-runtime.c',
   'translate-all.c',
   'translator.c',
 ))
@@ -20,14 +20,14 @@ specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_specific_ss)
 
 specific_ss.add(when: ['CONFIG_SYSTEM_ONLY', 'CONFIG_TCG'], if_true: files(
   'cputlb.c',
-  'watchpoint.c',
-  'tcg-accel-ops.c',
-  'tcg-accel-ops-mttcg.c',
-  'tcg-accel-ops-icount.c',
-  'tcg-accel-ops-rr.c',
 ))
 
 system_ss.add(when: ['CONFIG_TCG'], if_true: files(
   'icount-common.c',
   'monitor.c',
+  'tcg-accel-ops.c',
+  'tcg-accel-ops-icount.c',
+  'tcg-accel-ops-mttcg.c',
+  'tcg-accel-ops-rr.c',
+  'watchpoint.c',
 ))
diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index ae1dbeb79f842..eeb38a4d9ce68 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -13,7 +13,6 @@
 #include "qapi/type-helpers.h"
 #include "qapi/qapi-commands-machine.h"
 #include "monitor/monitor.h"
-#include "system/cpus.h"
 #include "system/cpu-timers.h"
 #include "system/tcg.h"
 #include "tcg/tcg.h"
diff --git a/accel/tcg/tb-internal.h b/accel/tcg/tb-internal.h
index 90be61f296a13..68aa8d17f413a 100644
--- a/accel/tcg/tb-internal.h
+++ b/accel/tcg/tb-internal.h
@@ -13,6 +13,44 @@
 #include "exec/exec-all.h"
 #include "exec/translation-block.h"
 
+/*
+ * The true return address will often point to a host insn that is part of
+ * the next translated guest insn.  Adjust the address backward to point to
+ * the middle of the call insn.  Subtracting one would do the job except for
+ * several compressed mode architectures (arm, mips) which set the low bit
+ * to indicate the compressed mode; subtracting two works around that.  It
+ * is also the case that there are no host isas that contain a call insn
+ * smaller than 4 bytes, so we don't worry about special-casing this.
+ */
+#define GETPC_ADJ   2
+
+#ifdef CONFIG_SOFTMMU
+
+#define CPU_TLB_DYN_MIN_BITS 6
+#define CPU_TLB_DYN_DEFAULT_BITS 8
+
+# if HOST_LONG_BITS == 32
+/* Make sure we do not require a double-word shift for the TLB load */
+#  define CPU_TLB_DYN_MAX_BITS (32 - TARGET_PAGE_BITS)
+# else /* HOST_LONG_BITS == 64 */
+/*
+ * Assuming TARGET_PAGE_BITS==12, with 2**22 entries we can cover 2**(22+12) ==
+ * 2**34 == 16G of address space. This is roughly what one would expect a
+ * TLB to cover in a modern (as of 2018) x86_64 CPU. For instance, Intel
+ * Skylake's Level-2 STLB has 16 1G entries.
+ * Also, make sure we do not size the TLB past the guest's address space.
+ */
+#  ifdef TARGET_PAGE_BITS_VARY
+#   define CPU_TLB_DYN_MAX_BITS                                  \
+    MIN(22, TARGET_VIRT_ADDR_SPACE_BITS - TARGET_PAGE_BITS)
+#  else
+#   define CPU_TLB_DYN_MAX_BITS                                  \
+    MIN_CONST(22, TARGET_VIRT_ADDR_SPACE_BITS - TARGET_PAGE_BITS)
+#  endif
+# endif
+
+#endif /* CONFIG_SOFTMMU */
+
 #ifdef CONFIG_USER_ONLY
 #include "user/page-protection.h"
 /*
@@ -48,6 +86,4 @@ void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
 
 bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc);
 
-void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr);
-
 #endif
diff --git a/accel/tcg/tcg-accel-ops-icount.c b/accel/tcg/tcg-accel-ops-icount.c
index d6b472a0b0e94..27cf1044c7f1c 100644
--- a/accel/tcg/tcg-accel-ops-icount.c
+++ b/accel/tcg/tcg-accel-ops-icount.c
@@ -28,7 +28,7 @@
 #include "system/cpu-timers.h"
 #include "qemu/main-loop.h"
 #include "qemu/guest-random.h"
-#include "exec/exec-all.h"
+#include "hw/core/cpu.h"
 
 #include "tcg-accel-ops.h"
 #include "tcg-accel-ops-icount.h"
diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index ba7cf6819d66c..bdcc385ae971d 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -30,7 +30,6 @@
 #include "qemu/main-loop.h"
 #include "qemu/notify.h"
 #include "qemu/guest-random.h"
-#include "exec/exec-all.h"
 #include "hw/boards.h"
 #include "tcg/startup.h"
 #include "tcg-accel-ops.h"
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index 028b385af9aab..f62cf24e1d4dd 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -31,7 +31,7 @@
 #include "qemu/main-loop.h"
 #include "qemu/notify.h"
 #include "qemu/guest-random.h"
-#include "exec/exec-all.h"
+#include "exec/cpu-common.h"
 #include "tcg/startup.h"
 #include "tcg-accel-ops.h"
 #include "tcg-accel-ops-rr.h"
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index 6e3f1fa92b24e..d9b662efe3ba1 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -26,13 +26,14 @@
  */
 
 #include "qemu/osdep.h"
+#include "system/accel-ops.h"
 #include "system/tcg.h"
 #include "system/replay.h"
 #include "system/cpu-timers.h"
 #include "qemu/main-loop.h"
 #include "qemu/guest-random.h"
 #include "qemu/timer.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/hwaddr.h"
 #include "exec/tb-flush.h"
 #include "exec/translation-block.h"
@@ -120,10 +121,9 @@ static inline int xlat_gdb_type(CPUState *cpu, int gdbtype)
         [GDB_WATCHPOINT_ACCESS] = BP_GDB | BP_MEM_ACCESS,
     };
 
-    CPUClass *cc = CPU_GET_CLASS(cpu);
     int cputype = xlat[gdbtype];
 
-    if (cc->gdb_stop_before_watchpoint) {
+    if (cpu->cc->gdb_stop_before_watchpoint) {
         cputype |= BP_STOP_BEFORE_ACCESS;
     }
     return cputype;
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
index 95adaacee82ad..c1a30b01219e8 100644
--- a/accel/tcg/tcg-all.c
+++ b/accel/tcg/tcg-all.c
@@ -28,7 +28,6 @@
 #include "exec/replay-core.h"
 #include "system/cpu-timers.h"
 #include "tcg/startup.h"
-#include "tcg/oversized-guest.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/accel.h"
@@ -41,6 +40,8 @@
 #include "hw/boards.h"
 #endif
 #include "internal-common.h"
+#include "cpu-param.h"
+
 
 struct TCGState {
     AccelState parent_obj;
@@ -72,7 +73,7 @@ DECLARE_INSTANCE_CHECKER(TCGState, TCG_STATE,
 
 static bool default_mttcg_enabled(void)
 {
-    if (icount_enabled() || TCG_OVERSIZED_GUEST) {
+    if (icount_enabled()) {
         return false;
     }
 #ifdef TARGET_SUPPORTS_MTTCG
@@ -145,9 +146,7 @@ static void tcg_set_thread(Object *obj, const char *value, Error **errp)
     TCGState *s = TCG_STATE(obj);
 
     if (strcmp(value, "multi") == 0) {
-        if (TCG_OVERSIZED_GUEST) {
-            error_setg(errp, "No MTTCG when guest word size > hosts");
-        } else if (icount_enabled()) {
+        if (icount_enabled()) {
             error_setg(errp, "No MTTCG when icount is enabled");
         } else {
 #ifndef TARGET_SUPPORTS_MTTCG
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
index afca89baa1cbc..ff927c5dd8dbf 100644
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -19,7 +19,6 @@
 
 #include "qemu/osdep.h"
 #include "qemu/host-utils.h"
-#include "cpu.h"
 #include "exec/helper-proto-common.h"
 #include "tcg/tcg-gvec-desc.h"
 
diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
index 9fa539ad3d779..fa7ed9739c7b7 100644
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -23,13 +23,9 @@
  */
 #include "qemu/osdep.h"
 #include "qemu/host-utils.h"
-#include "cpu.h"
+#include "exec/cpu-common.h"
 #include "exec/helper-proto-common.h"
-#include "exec/cpu_ldst.h"
-#include "exec/exec-all.h"
-#include "disas/disas.h"
-#include "exec/log.h"
-#include "tcg/tcg.h"
+#include "accel/tcg/getpc.h"
 
 #define HELPER_H  "accel/tcg/tcg-runtime.h"
 #include "exec/helper-info.c.inc"
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index d4189c73860da..82bc16bd53535 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -54,11 +54,10 @@
 #include "qemu/cacheinfo.h"
 #include "qemu/timer.h"
 #include "exec/log.h"
-#include "system/cpus.h"
 #include "system/cpu-timers.h"
 #include "system/tcg.h"
 #include "qapi/error.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 #include "tb-jmp-cache.h"
 #include "tb-hash.h"
 #include "tb-context.h"
@@ -631,7 +630,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
      * to account for the re-execution of the branch.
      */
     n = 1;
-    cc = CPU_GET_CLASS(cpu);
+    cc = cpu->cc;
     if (cc->tcg_ops->io_recompile_replay_branch &&
         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
         cpu->neg.icount_decr.u16.low++;
diff --git a/accel/tcg/user-exec-stub.c b/accel/tcg/user-exec-stub.c
index 4fbe2dbdc883e..1d52f48226abf 100644
--- a/accel/tcg/user-exec-stub.c
+++ b/accel/tcg/user-exec-stub.c
@@ -1,6 +1,7 @@
 #include "qemu/osdep.h"
 #include "hw/core/cpu.h"
 #include "exec/replay-core.h"
+#include "internal-common.h"
 
 void cpu_resume(CPUState *cpu)
 {
@@ -18,6 +19,16 @@ void cpu_exec_reset_hold(CPUState *cpu)
 {
 }
 
+/* User mode emulation does not support softmmu yet.  */
+
+void tlb_init(CPUState *cpu)
+{
+}
+
+void tlb_destroy(CPUState *cpu)
+{
+}
+
 /* User mode emulation does not support record/replay yet.  */
 
 bool replay_exception(void)
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index 0561c4f6dc758..2322181b1516b 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -17,8 +17,9 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "qemu/osdep.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 #include "disas/disas.h"
+#include "exec/vaddr.h"
 #include "exec/exec-all.h"
 #include "tcg/tcg.h"
 #include "qemu/bitops.h"
@@ -30,6 +31,8 @@
 #include "exec/page-protection.h"
 #include "exec/helper-proto.h"
 #include "qemu/atomic128.h"
+#include "qemu/bswap.h"
+#include "qemu/int128.h"
 #include "trace.h"
 #include "tcg/tcg-ldst.h"
 #include "internal-common.h"
@@ -969,6 +972,85 @@ static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr,
     return ret;
 }
 
+/* physical memory access (slow version, mainly for debug) */
+int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
+                        void *ptr, size_t len, bool is_write)
+{
+    int flags;
+    vaddr l, page;
+    uint8_t *buf = ptr;
+    ssize_t written;
+    int ret = -1;
+    int fd = -1;
+
+    mmap_lock();
+
+    while (len > 0) {
+        page = addr & TARGET_PAGE_MASK;
+        l = (page + TARGET_PAGE_SIZE) - addr;
+        if (l > len) {
+            l = len;
+        }
+        flags = page_get_flags(page);
+        if (!(flags & PAGE_VALID)) {
+            goto out_close;
+        }
+        if (is_write) {
+            if (flags & PAGE_WRITE) {
+                memcpy(g2h(cpu, addr), buf, l);
+            } else {
+                /* Bypass the host page protection using ptrace. */
+                if (fd == -1) {
+                    fd = open("/proc/self/mem", O_WRONLY);
+                    if (fd == -1) {
+                        goto out;
+                    }
+                }
+                /*
+                 * If there is a TranslationBlock and we weren't bypassing the
+                 * host page protection, the memcpy() above would SEGV,
+                 * ultimately leading to page_unprotect(). So invalidate the
+                 * translations manually. Both invalidation and pwrite() must
+                 * be under mmap_lock() in order to prevent the creation of
+                 * another TranslationBlock in between.
+                 */
+                tb_invalidate_phys_range(addr, addr + l - 1);
+                written = pwrite(fd, buf, l,
+                                 (off_t)(uintptr_t)g2h_untagged(addr));
+                if (written != l) {
+                    goto out_close;
+                }
+            }
+        } else if (flags & PAGE_READ) {
+            memcpy(buf, g2h(cpu, addr), l);
+        } else {
+            /* Bypass the host page protection using ptrace. */
+            if (fd == -1) {
+                fd = open("/proc/self/mem", O_RDONLY);
+                if (fd == -1) {
+                    goto out;
+                }
+            }
+            if (pread(fd, buf, l,
+                      (off_t)(uintptr_t)g2h_untagged(addr)) != l) {
+                goto out_close;
+            }
+        }
+        len -= l;
+        buf += l;
+        addr += l;
+    }
+    ret = 0;
+out_close:
+    if (fd != -1) {
+        close(fd);
+    }
+out:
+    mmap_unlock();
+
+    return ret;
+}
+
 #include "ldst_atomicity.c.inc"
 
 static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
diff --git a/accel/tcg/watchpoint.c b/accel/tcg/watchpoint.c
index af57d182d5bf7..65b21884cec9b 100644
--- a/accel/tcg/watchpoint.c
+++ b/accel/tcg/watchpoint.c
@@ -19,14 +19,13 @@
 
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
-#include "qemu/error-report.h"
-#include "exec/exec-all.h"
+#include "exec/breakpoint.h"
+#include "exec/cpu-interrupt.h"
 #include "exec/page-protection.h"
 #include "exec/translation-block.h"
-#include "tb-internal.h"
 #include "system/tcg.h"
 #include "system/replay.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 #include "hw/core/cpu.h"
 #include "internal-common.h"
 
@@ -69,7 +68,6 @@ int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len)
 void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
                           MemTxAttrs attrs, int flags, uintptr_t ra)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
     CPUWatchpoint *wp;
 
     assert(tcg_enabled());
@@ -85,9 +83,9 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
         return;
     }
 
-    if (cc->tcg_ops->adjust_watchpoint_address) {
+    if (cpu->cc->tcg_ops->adjust_watchpoint_address) {
         /* this is currently used only by ARM BE32 */
-        addr = cc->tcg_ops->adjust_watchpoint_address(cpu, addr, len);
+        addr = cpu->cc->tcg_ops->adjust_watchpoint_address(cpu, addr, len);
     }
 
     assert((flags & ~BP_MEM_ACCESS) == 0);
@@ -119,8 +117,8 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
             wp->hitattrs = attrs;
 
             if (wp->flags & BP_CPU
-                && cc->tcg_ops->debug_check_watchpoint
-                && !cc->tcg_ops->debug_check_watchpoint(cpu, wp)) {
+                && cpu->cc->tcg_ops->debug_check_watchpoint
+                && !cpu->cc->tcg_ops->debug_check_watchpoint(cpu, wp)) {
                 wp->flags &= ~BP_WATCHPOINT_HIT;
                 continue;
             }
diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c
index 852e9fbe5febc..7aa28b9ab9393 100644
--- a/accel/xen/xen-all.c
+++ b/accel/xen/xen-all.c
@@ -18,6 +18,7 @@
 #include "hw/xen/xen_igd.h"
 #include "chardev/char.h"
 #include "qemu/accel.h"
+#include "system/accel-ops.h"
 #include "system/cpus.h"
 #include "system/xen.h"
 #include "system/runstate.h"
diff --git a/backends/cryptodev-vhost.c b/backends/cryptodev-vhost.c
index 8718c97326258..943680a23afa2 100644
--- a/backends/cryptodev-vhost.c
+++ b/backends/cryptodev-vhost.c
@@ -53,7 +53,7 @@ cryptodev_vhost_init(
     CryptoDevBackendVhost *crypto;
     Error *local_err = NULL;
 
-    crypto = g_new(CryptoDevBackendVhost, 1);
+    crypto = g_new0(CryptoDevBackendVhost, 1);
     crypto->dev.max_queues = 1;
     crypto->dev.nvqs = 1;
     crypto->dev.vqs = crypto->vqs;
diff --git a/backends/tpm/tpm_util.c b/backends/tpm/tpm_util.c
index 3294625106002..f07a2656ce2b7 100644
--- a/backends/tpm/tpm_util.c
+++ b/backends/tpm/tpm_util.c
@@ -21,6 +21,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
+#include "qemu/cutils.h"
 #include "qapi/error.h"
 #include "qapi/visitor.h"
 #include "tpm_int.h"
@@ -75,7 +76,7 @@ static void release_tpm(Object *obj, const char *name, void *opaque)
 }
 
 const PropertyInfo qdev_prop_tpm = {
-    .name  = "str",
+    .type  = "str",
     .description = "ID of a tpm to use as a backend",
     .get   = get_tpm,
     .set   = set_tpm,
@@ -336,8 +337,8 @@ void tpm_sized_buffer_reset(TPMSizedBuffer *tsb)
 void tpm_util_show_buffer(const unsigned char *buffer,
                           size_t buffer_size, const char *string)
 {
-    size_t len, i;
-    char *line_buffer, *p;
+    g_autoptr(GString) str = NULL;
+    size_t len, i, l;
 
     if (!trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER_CONTENT)) {
         return;
@@ -345,19 +346,14 @@ void tpm_util_show_buffer(const unsigned char *buffer,
     len = MIN(tpm_cmd_get_size(buffer), buffer_size);
     trace_tpm_util_show_buffer_header(string, len);
 
-    /*
-     * allocate enough room for 3 chars per buffer entry plus a
-     * newline after every 16 chars and a final null terminator.
-     */
-    line_buffer = g_malloc(len * 3 + (len / 16) + 1);
-
-    for (i = 0, p = line_buffer; i < len; i++) {
-        if (i && !(i % 16)) {
-            p += sprintf(p, "\n");
+    for (i = 0; i < len; i += l) {
+        if (str) {
+            g_string_append_c(str, '\n');
         }
-        p += sprintf(p, "%.2X ", buffer[i]);
+        l = MIN(len, 16);
+        str = qemu_hexdump_line(str, buffer, l, 1, 0);
     }
-    trace_tpm_util_show_buffer_content(line_buffer);
 
-    g_free(line_buffer);
+    g_string_ascii_up(str);
+    trace_tpm_util_show_buffer_content(str->str);
 }
diff --git a/block/block-backend.c b/block/block-backend.c
index 9288f7e1c69e5..a402db13f2918 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -2357,18 +2357,6 @@ void *blk_blockalign(BlockBackend *blk, size_t size)
     return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
 }
 
-bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
-{
-    BlockDriverState *bs = blk_bs(blk);
-    GLOBAL_STATE_CODE();
-    GRAPH_RDLOCK_GUARD_MAINLOOP();
-
-    if (!bs) {
-        return false;
-    }
-
-    return bdrv_op_is_blocked(bs, op, errp);
-}
 
 /**
  * Return BB's current AioContext.  Note that this context may change
diff --git a/block/create.c b/block/create.c
index 72abafb4c1248..6b23a2167535e 100644
--- a/block/create.c
+++ b/block/create.c
@@ -24,7 +24,6 @@
 
 #include "qemu/osdep.h"
 #include "block/block_int.h"
-#include "qemu/clang-tsa.h"
 #include "qemu/job.h"
 #include "qemu/main-loop.h"
 #include "qapi/qapi-commands-block-core.h"
diff --git a/block/file-posix.c b/block/file-posix.c
index 44e16dda876b2..56d1972d156e1 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -194,6 +194,7 @@ static int fd_open(BlockDriverState *bs)
 }
 
 static int64_t raw_getlength(BlockDriverState *bs);
+static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs);
 
 typedef struct RawPosixAIOData {
     BlockDriverState *bs;
@@ -804,6 +805,13 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
 #endif
     s->needs_alignment = raw_needs_alignment(bs);
 
+    bs->supported_write_flags = BDRV_REQ_FUA;
+    if (s->use_linux_aio && !laio_has_fua()) {
+        bs->supported_write_flags &= ~BDRV_REQ_FUA;
+    } else if (s->use_linux_io_uring && !luring_has_fua()) {
+        bs->supported_write_flags &= ~BDRV_REQ_FUA;
+    }
+
     bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
     if (S_ISREG(st.st_mode)) {
         /* When extending regular files, we get zeros from the OS */
@@ -2477,7 +2485,8 @@ static inline bool raw_check_linux_aio(BDRVRawState *s)
 #endif
 
 static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
-                                   uint64_t bytes, QEMUIOVector *qiov, int type)
+                                   uint64_t bytes, QEMUIOVector *qiov, int type,
+                                   int flags)
 {
     BDRVRawState *s = bs->opaque;
     RawPosixAIOData acb;
@@ -2508,13 +2517,13 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
 #ifdef CONFIG_LINUX_IO_URING
     } else if (raw_check_linux_io_uring(s)) {
         assert(qiov->size == bytes);
-        ret = luring_co_submit(bs, s->fd, offset, qiov, type);
+        ret = luring_co_submit(bs, s->fd, offset, qiov, type, flags);
         goto out;
 #endif
 #ifdef CONFIG_LINUX_AIO
     } else if (raw_check_linux_aio(s)) {
         assert(qiov->size == bytes);
-        ret = laio_co_submit(s->fd, offset, qiov, type,
+        ret = laio_co_submit(s->fd, offset, qiov, type, flags,
                               s->aio_max_batch);
         goto out;
 #endif
@@ -2534,6 +2543,10 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
 
     assert(qiov->size == bytes);
     ret = raw_thread_pool_submit(handle_aiocb_rw, &acb);
+    if (ret == 0 && (flags & BDRV_REQ_FUA)) {
+        /* TODO Use pwritev2() instead if it's available */
+        ret = raw_co_flush_to_disk(bs);
+    }
     goto out; /* Avoid the compiler err of unused label */
 
 out:
@@ -2571,14 +2584,14 @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
                                       int64_t bytes, QEMUIOVector *qiov,
                                       BdrvRequestFlags flags)
 {
-    return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ);
+    return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ, flags);
 }
 
 static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
                                        int64_t bytes, QEMUIOVector *qiov,
                                        BdrvRequestFlags flags)
 {
-    return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE);
+    return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE, flags);
 }
 
 static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
@@ -2600,12 +2613,12 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
 
 #ifdef CONFIG_LINUX_IO_URING
     if (raw_check_linux_io_uring(s)) {
-        return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
+        return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH, 0);
     }
 #endif
 #ifdef CONFIG_LINUX_AIO
     if (s->has_laio_fdsync && raw_check_linux_aio(s)) {
-        return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0);
+        return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0, 0);
     }
 #endif
     return raw_thread_pool_submit(handle_aiocb_flush, &acb);
@@ -3540,7 +3553,7 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
     }
 
     trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS);
-    return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND);
+    return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND, 0);
 }
 #endif
 
diff --git a/block/io.c b/block/io.c
index d369b994dff72..1ba8d1aeea140 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1058,6 +1058,10 @@ bdrv_driver_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
         return -ENOMEDIUM;
     }
 
+    if (bs->open_flags & BDRV_O_NO_FLUSH) {
+        flags &= ~BDRV_REQ_FUA;
+    }
+
     if ((flags & BDRV_REQ_FUA) &&
         (~bs->supported_write_flags & BDRV_REQ_FUA)) {
         flags &= ~BDRV_REQ_FUA;
diff --git a/block/io_uring.c b/block/io_uring.c
index f52b66b3407f8..dd4f304910b62 100644
--- a/block/io_uring.c
+++ b/block/io_uring.c
@@ -335,15 +335,24 @@ static void luring_deferred_fn(void *opaque)
  *
  */
 static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
-                            uint64_t offset, int type)
+                            uint64_t offset, int type, BdrvRequestFlags flags)
 {
     int ret;
     struct io_uring_sqe *sqes = &luringcb->sqeq;
 
     switch (type) {
     case QEMU_AIO_WRITE:
+#ifdef HAVE_IO_URING_PREP_WRITEV2
+    {
+        int luring_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
+        io_uring_prep_writev2(sqes, fd, luringcb->qiov->iov,
+                              luringcb->qiov->niov, offset, luring_flags);
+    }
+#else
+        assert(flags == 0);
         io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
                              luringcb->qiov->niov, offset);
+#endif
         break;
     case QEMU_AIO_ZONE_APPEND:
         io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
@@ -380,7 +389,8 @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
 }
 
 int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
-                                  QEMUIOVector *qiov, int type)
+                                  QEMUIOVector *qiov, int type,
+                                  BdrvRequestFlags flags)
 {
     int ret;
     AioContext *ctx = qemu_get_current_aio_context();
@@ -393,7 +403,7 @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
     };
     trace_luring_co_submit(bs, s, &luringcb, fd, offset, qiov ? qiov->size : 0,
                            type);
-    ret = luring_do_submit(fd, &luringcb, s, offset, type);
+    ret = luring_do_submit(fd, &luringcb, s, offset, type, flags);
 
     if (ret < 0) {
         return ret;
@@ -448,3 +458,12 @@ void luring_cleanup(LuringState *s)
     trace_luring_cleanup_state(s);
     g_free(s);
 }
+
+bool luring_has_fua(void)
+{
+#ifdef HAVE_IO_URING_PREP_WRITEV2
+    return true;
+#else
+    return false;
+#endif
+}
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 194c8f434f153..407369f5c95cc 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -368,7 +368,8 @@ static void laio_deferred_fn(void *opaque)
 }
 
 static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
-                          int type, uint64_t dev_max_batch)
+                          int type, BdrvRequestFlags flags,
+                          uint64_t dev_max_batch)
 {
     LinuxAioState *s = laiocb->ctx;
     struct iocb *iocbs = &laiocb->iocb;
@@ -376,7 +377,15 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
 
     switch (type) {
     case QEMU_AIO_WRITE:
+#ifdef HAVE_IO_PREP_PWRITEV2
+    {
+        int laio_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
+        io_prep_pwritev2(iocbs, fd, qiov->iov, qiov->niov, offset, laio_flags);
+    }
+#else
+        assert(flags == 0);
         io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
+#endif
         break;
     case QEMU_AIO_ZONE_APPEND:
         io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
@@ -409,7 +418,8 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
 }
 
 int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
-                                int type, uint64_t dev_max_batch)
+                                int type, BdrvRequestFlags flags,
+                                uint64_t dev_max_batch)
 {
     int ret;
     AioContext *ctx = qemu_get_current_aio_context();
@@ -422,7 +432,7 @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
         .qiov       = qiov,
     };
 
-    ret = laio_do_submit(fd, &laiocb, offset, type, dev_max_batch);
+    ret = laio_do_submit(fd, &laiocb, offset, type, flags, dev_max_batch);
     if (ret < 0) {
         return ret;
     }
@@ -505,3 +515,12 @@ bool laio_has_fdsync(int fd)
     io_destroy(ctx);
     return (ret == -EINVAL) ? false : true;
 }
+
+bool laio_has_fua(void)
+{
+#ifdef HAVE_IO_PREP_PWRITEV2
+    return true;
+#else
+    return false;
+#endif
+}
diff --git a/block/qed.c b/block/qed.c
index 382c9e533579b..ac24449ffb3f4 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -353,6 +353,7 @@ static void bdrv_qed_detach_aio_context(BlockDriverState *bs)
 
     qed_cancel_need_check_timer(s);
     timer_free(s->need_check_timer);
+    s->need_check_timer = NULL;
 }
 
 static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
diff --git a/block/snapshot.c b/block/snapshot.c
index 9c44780e96dbc..22567f1fb983f 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -296,6 +296,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
         bdrv_graph_wrunlock();
 
         ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp);
+        memset(bs->opaque, 0, drv->instance_size);
         open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err);
         qobject_unref(options);
         if (open_ret < 0) {
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 3f6f4ef92b441..1e3e634b87d1b 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -219,12 +219,12 @@ void nbd_server_start_options(NbdServerOptions *arg, Error **errp)
                      arg->tls_authz, arg->max_connections, errp);
 }
 
-void qmp_nbd_server_start(SocketAddressLegacy *addr,
-                          bool has_handshake_max_secs,
+void qmp_nbd_server_start(bool has_handshake_max_secs,
                           uint32_t handshake_max_secs,
                           const char *tls_creds,
                           const char *tls_authz,
                           bool has_max_connections, uint32_t max_connections,
+                          SocketAddressLegacy *addr,
                           Error **errp)
 {
     SocketAddress *addr_flat = socket_address_flatten(addr);
diff --git a/bsd-user/bsd-mem.h b/bsd-user/bsd-mem.h
index f5ec0de24ca6e..90ca0e33775a6 100644
--- a/bsd-user/bsd-mem.h
+++ b/bsd-user/bsd-mem.h
@@ -370,9 +370,11 @@ static inline abi_long do_bsd_shmat(int shmid, abi_ulong shmaddr, int shmflg)
         if (shmaddr) {
             host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg);
         } else {
+            abi_ulong alignment;
             abi_ulong mmap_start;
 
-            mmap_start = mmap_find_vma(0, shm_info.shm_segsz);
+            alignment = 0; /* alignment above page size not required */
+            mmap_start = mmap_find_vma(0, shm_info.shm_segsz, alignment);
 
             if (mmap_start == -1) {
                 return -TARGET_ENOMEM;
diff --git a/bsd-user/meson.build b/bsd-user/meson.build
index 39bad0ae33e7a..37b7cd6de87cb 100644
--- a/bsd-user/meson.build
+++ b/bsd-user/meson.build
@@ -13,6 +13,7 @@ bsd_user_ss.add(files(
   'elfload.c',
   'main.c',
   'mmap.c',
+  'plugin-api.c',
   'signal.c',
   'strace.c',
   'uaccess.c',
diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index 346f2cefd32e3..3f0df79c375a4 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -275,8 +275,7 @@ static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
  * It must be called with mmap_lock() held.
  * Return -1 if error.
  */
-static abi_ulong mmap_find_vma_aligned(abi_ulong start, abi_ulong size,
-                                       abi_ulong alignment)
+abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong alignment)
 {
     void *ptr, *prev;
     abi_ulong addr;
@@ -395,11 +394,6 @@ static abi_ulong mmap_find_vma_aligned(abi_ulong start, abi_ulong size,
     }
 }
 
-abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
-{
-    return mmap_find_vma_aligned(start, size, 0);
-}
-
 /* NOTE: all the constants are the HOST ones */
 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
                      int flags, int fd, off_t offset)
@@ -489,13 +483,12 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
      * before we truncate the length for mapping files below.
      */
     if (!(flags & MAP_FIXED)) {
+        abi_ulong alignment;
+
         host_len = len + offset - host_offset;
         host_len = HOST_PAGE_ALIGN(host_len);
-        if ((flags & MAP_ALIGNMENT_MASK) != 0)
-            start = mmap_find_vma_aligned(real_start, host_len,
-                (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT);
-        else
-            start = mmap_find_vma(real_start, host_len);
+        alignment = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT;
+        start = mmap_find_vma(real_start, host_len, alignment);
         if (start == (abi_ulong)-1) {
             errno = ENOMEM;
             goto fail;
diff --git a/bsd-user/plugin-api.c b/bsd-user/plugin-api.c
new file mode 100644
index 0000000000000..6ccef7eaa04d8
--- /dev/null
+++ b/bsd-user/plugin-api.c
@@ -0,0 +1,15 @@
+/*
+ * QEMU Plugin API - bsd-user-mode only implementations
+ *
+ * Common user-mode only APIs are in plugins/api-user. These helpers
+ * are only specific to bsd-user.
+ *
+ * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
+ * Copyright (C) 2019-2025, Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "common-user/plugin-api.c.inc"
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index 3eaa14f3f56d5..c1c508281a824 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -32,6 +32,7 @@
 extern char **environ;
 
 #include "user/thunk.h"
+#include "user/mmap.h"
 #include "target_arch.h"
 #include "syscall_defs.h"
 #include "target_syscall.h"
@@ -40,7 +41,6 @@ extern char **environ;
 #include "target.h"
 #include "exec/gdbstub.h"
 #include "exec/page-protection.h"
-#include "qemu/clang-tsa.h"
 #include "accel/tcg/vcpu-state.h"
 
 #include "qemu-os.h"
@@ -234,19 +234,8 @@ void print_taken_signal(int target_signum, const target_siginfo_t *tinfo);
 extern int do_strace;
 
 /* mmap.c */
-int target_mprotect(abi_ulong start, abi_ulong len, int prot);
-abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
-                     int flags, int fd, off_t offset);
-int target_munmap(abi_ulong start, abi_ulong len);
-abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
-                       abi_ulong new_size, unsigned long flags,
-                       abi_ulong new_addr);
 int target_msync(abi_ulong start, abi_ulong len, int flags);
-extern abi_ulong mmap_next_start;
-abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size);
 void mmap_reserve(abi_ulong start, abi_ulong size);
-void TSA_NO_TSA mmap_fork_start(void);
-void TSA_NO_TSA mmap_fork_end(int child);
 
 /* main.c */
 extern char qemu_proc_pathname[];
diff --git a/bsd-user/signal.c b/bsd-user/signal.c
index ff2ccbbf605af..a8cfcca130ecb 100644
--- a/bsd-user/signal.c
+++ b/bsd-user/signal.c
@@ -29,7 +29,7 @@
 #include "gdbstub/user.h"
 #include "signal-common.h"
 #include "trace.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 #include "host-signal.h"
 
 /* target_siginfo_t must fit in gdbstub's siginfo save area. */
@@ -1034,7 +1034,7 @@ void process_pending_signals(CPUArchState *env)
 void cpu_loop_exit_sigsegv(CPUState *cpu, target_ulong addr,
                            MMUAccessType access_type, bool maperr, uintptr_t ra)
 {
-    const TCGCPUOps *tcg_ops = CPU_GET_CLASS(cpu)->tcg_ops;
+    const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
 
     if (tcg_ops->record_sigsegv) {
         tcg_ops->record_sigsegv(cpu, addr, access_type, maperr, ra);
@@ -1050,7 +1050,7 @@ void cpu_loop_exit_sigsegv(CPUState *cpu, target_ulong addr,
 void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr,
                           MMUAccessType access_type, uintptr_t ra)
 {
-    const TCGCPUOps *tcg_ops = CPU_GET_CLASS(cpu)->tcg_ops;
+    const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
 
     if (tcg_ops->record_sigbus) {
         tcg_ops->record_sigbus(cpu, addr, access_type, ra);
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 91496ceda90ce..2f842f9f88b91 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -571,9 +571,13 @@ static char *qemu_chr_compute_filename(SocketChardev *s)
 
     switch (ss->ss_family) {
     case AF_UNIX:
-        return g_strdup_printf("unix:%s%s",
-                               ((struct sockaddr_un *)(ss))->sun_path,
-                               s->is_listen ? ",server=on" : "");
+        if (s->is_listen) {
+            return g_strdup_printf("unix:%s,server=on",
+                                   ((struct sockaddr_un *)(ss))->sun_path);
+        } else {
+            return g_strdup_printf("unix:%s",
+                                   ((struct sockaddr_un *)(ps))->sun_path);
+        }
     case AF_INET6:
         left  = "[";
         right = "]";
diff --git a/common-user/plugin-api.c.inc b/common-user/plugin-api.c.inc
new file mode 100644
index 0000000000000..5b8a1396b6068
--- /dev/null
+++ b/common-user/plugin-api.c.inc
@@ -0,0 +1,43 @@
+/*
+ * QEMU Plugin API - *-user-mode only implementations
+ *
+ * Common user-mode only APIs are in plugins/api-user. These helpers
+ * are only specific to the *-user frontends.
+ *
+ * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
+ * Copyright (C) 2019-2025, Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qemu/plugin.h"
+#include "qemu.h"
+
+/*
+ * Binary path, start and end locations. Host specific due to TaskState.
+ */
+const char *qemu_plugin_path_to_binary(void)
+{
+    TaskState *ts = get_task_state(current_cpu);
+    return g_strdup(ts->bprm->filename);
+}
+
+uint64_t qemu_plugin_start_code(void)
+{
+    TaskState *ts = get_task_state(current_cpu);
+    return ts->info->start_code;
+}
+
+uint64_t qemu_plugin_end_code(void)
+{
+    TaskState *ts = get_task_state(current_cpu);
+    return ts->info->end_code;
+}
+
+uint64_t qemu_plugin_entry_code(void)
+{
+    TaskState *ts = get_task_state(current_cpu);
+    return ts->info->entry;
+}
diff --git a/configs/devices/aarch64-softmmu/default.mak b/configs/devices/aarch64-softmmu/default.mak
index f82a04c27d1ba..93f4022ad62e2 100644
--- a/configs/devices/aarch64-softmmu/default.mak
+++ b/configs/devices/aarch64-softmmu/default.mak
@@ -8,3 +8,4 @@ include ../arm-softmmu/default.mak
 # CONFIG_XLNX_ZYNQMP_ARM=n
 # CONFIG_XLNX_VERSAL=n
 # CONFIG_SBSA_REF=n
+# CONFIG_NPCM8XX=n
diff --git a/configs/devices/hexagon-softmmu/default.mak b/configs/devices/hexagon-softmmu/default.mak
new file mode 100644
index 0000000000000..37b4f9f3237aa
--- /dev/null
+++ b/configs/devices/hexagon-softmmu/default.mak
@@ -0,0 +1,8 @@
+# Default configuration for hexagon-softmmu
+
+# Uncomment the following lines to disable these optional devices:
+
+# Boards are selected by default, uncomment to keep out of the build.
+# CONFIG_HEX_VIRT=y
+# CONFIG_HEX_DSP=y
+# CONFIG_L2VIC=y
diff --git a/configs/devices/i386-softmmu/default.mak b/configs/devices/i386-softmmu/default.mak
index 4faf2f0315e25..9ef343cace06c 100644
--- a/configs/devices/i386-softmmu/default.mak
+++ b/configs/devices/i386-softmmu/default.mak
@@ -6,6 +6,7 @@
 #CONFIG_APPLESMC=n
 #CONFIG_FDC=n
 #CONFIG_HPET=n
+#CONFIG_X_HPET_RUST=n
 #CONFIG_HYPERV=n
 #CONFIG_ISA_DEBUG=n
 #CONFIG_ISA_IPMI_BT=n
diff --git a/configs/targets/hexagon-softmmu.mak b/configs/targets/hexagon-softmmu.mak
new file mode 100644
index 0000000000000..03cf1306a3484
--- /dev/null
+++ b/configs/targets/hexagon-softmmu.mak
@@ -0,0 +1,10 @@
+# Default configuration for hexagon-softmmu
+
+TARGET_ARCH=hexagon
+TARGET_SUPPORTS_MTTCG=y
+TARGET_XML_FILES=gdb-xml/hexagon-core.xml gdb-xml/hexagon-hvx.xml gdb-xml/hexagon-sys.xml
+TARGET_LONG_BITS=32
+TARGET_NEED_FDT=y
+CONFIG_SEMIHOSTING=y
+CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y
+CONFIG_SEMIHOSTING_USE_STDIO=y
diff --git a/contrib/plugins/meson.build b/contrib/plugins/meson.build
index 484b9a808c83d..fa8a426c8b594 100644
--- a/contrib/plugins/meson.build
+++ b/contrib/plugins/meson.build
@@ -26,3 +26,5 @@ if t.length() > 0
 else
   run_target('contrib-plugins', command: find_program('true'))
 endif
+
+plugin_modules += t
diff --git a/contrib/vmapple/uuid.sh b/contrib/vmapple/uuid.sh
new file mode 100755
index 0000000000000..f5637221d236a
--- /dev/null
+++ b/contrib/vmapple/uuid.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+#
+# Used for converting a guest provisioned using Virtualization.framework
+# for use with the QEMU 'vmapple' aarch64 machine type.
+#
+# Extracts the Machine UUID from Virtualization.framework VM JSON file.
+# (as produced by 'macosvm', passed as command line argument)
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+plutil -extract machineId raw "$1" | base64 -d | plutil -extract ECID raw -
+
diff --git a/cpu-common.c b/cpu-common.c
index 4248b2d727ed8..ef5757d23bf68 100644
--- a/cpu-common.c
+++ b/cpu-common.c
@@ -21,7 +21,6 @@
 #include "qemu/main-loop.h"
 #include "exec/cpu-common.h"
 #include "hw/core/cpu.h"
-#include "system/cpus.h"
 #include "qemu/lockable.h"
 #include "trace/trace-root.h"
 
@@ -389,11 +388,10 @@ void process_queued_cpu_work(CPUState *cpu)
 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
                           CPUBreakpoint **breakpoint)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
     CPUBreakpoint *bp;
 
-    if (cc->gdb_adjust_breakpoint) {
-        pc = cc->gdb_adjust_breakpoint(cpu, pc);
+    if (cpu->cc->gdb_adjust_breakpoint) {
+        pc = cpu->cc->gdb_adjust_breakpoint(cpu, pc);
     }
 
     bp = g_malloc(sizeof(*bp));
@@ -419,11 +417,10 @@ int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 /* Remove a specific breakpoint.  */
 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
     CPUBreakpoint *bp;
 
-    if (cc->gdb_adjust_breakpoint) {
-        pc = cc->gdb_adjust_breakpoint(cpu, pc);
+    if (cpu->cc->gdb_adjust_breakpoint) {
+        pc = cpu->cc->gdb_adjust_breakpoint(cpu, pc);
     }
 
     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
diff --git a/cpu-target.c b/cpu-target.c
index 667688332c929..cae77374b3849 100644
--- a/cpu-target.c
+++ b/cpu-target.c
@@ -19,232 +19,15 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-
-#include "exec/target_page.h"
-#include "exec/page-protection.h"
-#include "hw/qdev-core.h"
-#include "hw/qdev-properties.h"
 #include "qemu/error-report.h"
 #include "qemu/qemu-print.h"
-#include "migration/vmstate.h"
-#ifdef CONFIG_USER_ONLY
-#include "qemu.h"
-#include "user/page-protection.h"
-#else
-#include "hw/core/sysemu-cpu-ops.h"
-#include "exec/address-spaces.h"
-#include "exec/memory.h"
-#endif
+#include "system/accel-ops.h"
 #include "system/cpus.h"
-#include "system/tcg.h"
 #include "exec/tswap.h"
 #include "exec/replay-core.h"
-#include "exec/cpu-common.h"
-#include "exec/exec-all.h"
-#include "exec/tb-flush.h"
-#include "exec/translation-block.h"
 #include "exec/log.h"
-#include "hw/core/accel-cpu.h"
+#include "accel/accel-cpu-target.h"
 #include "trace/trace-root.h"
-#include "qemu/accel.h"
-
-#ifndef CONFIG_USER_ONLY
-static int cpu_common_post_load(void *opaque, int version_id)
-{
-    CPUState *cpu = opaque;
-
-    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
-       version_id is increased. */
-    cpu->interrupt_request &= ~0x01;
-    tlb_flush(cpu);
-
-    /* loadvm has just updated the content of RAM, bypassing the
-     * usual mechanisms that ensure we flush TBs for writes to
-     * memory we've translated code from. So we must flush all TBs,
-     * which will now be stale.
-     */
-    tb_flush(cpu);
-
-    return 0;
-}
-
-static int cpu_common_pre_load(void *opaque)
-{
-    CPUState *cpu = opaque;
-
-    cpu->exception_index = -1;
-
-    return 0;
-}
-
-static bool cpu_common_exception_index_needed(void *opaque)
-{
-    CPUState *cpu = opaque;
-
-    return tcg_enabled() && cpu->exception_index != -1;
-}
-
-static const VMStateDescription vmstate_cpu_common_exception_index = {
-    .name = "cpu_common/exception_index",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .needed = cpu_common_exception_index_needed,
-    .fields = (const VMStateField[]) {
-        VMSTATE_INT32(exception_index, CPUState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static bool cpu_common_crash_occurred_needed(void *opaque)
-{
-    CPUState *cpu = opaque;
-
-    return cpu->crash_occurred;
-}
-
-static const VMStateDescription vmstate_cpu_common_crash_occurred = {
-    .name = "cpu_common/crash_occurred",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .needed = cpu_common_crash_occurred_needed,
-    .fields = (const VMStateField[]) {
-        VMSTATE_BOOL(crash_occurred, CPUState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-const VMStateDescription vmstate_cpu_common = {
-    .name = "cpu_common",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .pre_load = cpu_common_pre_load,
-    .post_load = cpu_common_post_load,
-    .fields = (const VMStateField[]) {
-        VMSTATE_UINT32(halted, CPUState),
-        VMSTATE_UINT32(interrupt_request, CPUState),
-        VMSTATE_END_OF_LIST()
-    },
-    .subsections = (const VMStateDescription * const []) {
-        &vmstate_cpu_common_exception_index,
-        &vmstate_cpu_common_crash_occurred,
-        NULL
-    }
-};
-#endif
-
-bool cpu_exec_realizefn(CPUState *cpu, Error **errp)
-{
-    /* cache the cpu class for the hotpath */
-    cpu->cc = CPU_GET_CLASS(cpu);
-
-    if (!accel_cpu_common_realize(cpu, errp)) {
-        return false;
-    }
-
-    /* Wait until cpu initialization complete before exposing cpu. */
-    cpu_list_add(cpu);
-
-#ifdef CONFIG_USER_ONLY
-    assert(qdev_get_vmsd(DEVICE(cpu)) == NULL ||
-           qdev_get_vmsd(DEVICE(cpu))->unmigratable);
-#else
-    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
-        vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
-    }
-    if (cpu->cc->sysemu_ops->legacy_vmsd != NULL) {
-        vmstate_register(NULL, cpu->cpu_index, cpu->cc->sysemu_ops->legacy_vmsd, cpu);
-    }
-#endif /* CONFIG_USER_ONLY */
-
-    return true;
-}
-
-void cpu_exec_unrealizefn(CPUState *cpu)
-{
-#ifndef CONFIG_USER_ONLY
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    if (cc->sysemu_ops->legacy_vmsd != NULL) {
-        vmstate_unregister(NULL, cc->sysemu_ops->legacy_vmsd, cpu);
-    }
-    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
-        vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
-    }
-#endif
-
-    cpu_list_remove(cpu);
-    /*
-     * Now that the vCPU has been removed from the RCU list, we can call
-     * accel_cpu_common_unrealize, which may free fields using call_rcu.
-     */
-    accel_cpu_common_unrealize(cpu);
-}
-
-/*
- * This can't go in hw/core/cpu.c because that file is compiled only
- * once for both user-mode and system builds.
- */
-static const Property cpu_common_props[] = {
-#ifdef CONFIG_USER_ONLY
-    /*
-     * Create a property for the user-only object, so users can
-     * adjust prctl(PR_SET_UNALIGN) from the command-line.
-     * Has no effect if the target does not support the feature.
-     */
-    DEFINE_PROP_BOOL("prctl-unalign-sigbus", CPUState,
-                     prctl_unalign_sigbus, false),
-#else
-    /*
-     * Create a memory property for system CPU object, so users can
-     * wire up its memory.  The default if no link is set up is to use
-     * the system address space.
-     */
-    DEFINE_PROP_LINK("memory", CPUState, memory, TYPE_MEMORY_REGION,
-                     MemoryRegion *),
-#endif
-};
-
-#ifndef CONFIG_USER_ONLY
-static bool cpu_get_start_powered_off(Object *obj, Error **errp)
-{
-    CPUState *cpu = CPU(obj);
-    return cpu->start_powered_off;
-}
-
-static void cpu_set_start_powered_off(Object *obj, bool value, Error **errp)
-{
-    CPUState *cpu = CPU(obj);
-    cpu->start_powered_off = value;
-}
-#endif
-
-void cpu_class_init_props(DeviceClass *dc)
-{
-#ifndef CONFIG_USER_ONLY
-    ObjectClass *oc = OBJECT_CLASS(dc);
-
-    /*
-     * We can't use DEFINE_PROP_BOOL in the Property array for this
-     * property, because we want this to be settable after realize.
-     */
-    object_class_property_add_bool(oc, "start-powered-off",
-                                   cpu_get_start_powered_off,
-                                   cpu_set_start_powered_off);
-#endif
-
-    device_class_set_props(dc, cpu_common_props);
-}
-
-void cpu_exec_initfn(CPUState *cpu)
-{
-    cpu->as = NULL;
-    cpu->num_ases = 0;
-
-#ifndef CONFIG_USER_ONLY
-    cpu->memory = get_system_memory();
-    object_ref(OBJECT(cpu->memory));
-#endif
-}
 
 char *cpu_model_from_type(const char *typename)
 {
@@ -372,97 +155,6 @@ void cpu_abort(CPUState *cpu, const char *fmt, ...)
     abort();
 }
 
-/* physical memory access (slow version, mainly for debug) */
-#if defined(CONFIG_USER_ONLY)
-int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
-                        void *ptr, size_t len, bool is_write)
-{
-    int flags;
-    vaddr l, page;
-    void * p;
-    uint8_t *buf = ptr;
-    ssize_t written;
-    int ret = -1;
-    int fd = -1;
-
-    while (len > 0) {
-        page = addr & TARGET_PAGE_MASK;
-        l = (page + TARGET_PAGE_SIZE) - addr;
-        if (l > len)
-            l = len;
-        flags = page_get_flags(page);
-        if (!(flags & PAGE_VALID)) {
-            goto out_close;
-        }
-        if (is_write) {
-            if (flags & PAGE_WRITE) {
-                /* XXX: this code should not depend on lock_user */
-                p = lock_user(VERIFY_WRITE, addr, l, 0);
-                if (!p) {
-                    goto out_close;
-                }
-                memcpy(p, buf, l);
-                unlock_user(p, addr, l);
-            } else {
-                /* Bypass the host page protection using ptrace. */
-                if (fd == -1) {
-                    fd = open("/proc/self/mem", O_WRONLY);
-                    if (fd == -1) {
-                        goto out;
-                    }
-                }
-                /*
-                 * If there is a TranslationBlock and we weren't bypassing the
-                 * host page protection, the memcpy() above would SEGV,
-                 * ultimately leading to page_unprotect(). So invalidate the
-                 * translations manually. Both invalidation and pwrite() must
-                 * be under mmap_lock() in order to prevent the creation of
-                 * another TranslationBlock in between.
-                 */
-                mmap_lock();
-                tb_invalidate_phys_range(addr, addr + l - 1);
-                written = pwrite(fd, buf, l,
-                                 (off_t)(uintptr_t)g2h_untagged(addr));
-                mmap_unlock();
-                if (written != l) {
-                    goto out_close;
-                }
-            }
-        } else if (flags & PAGE_READ) {
-            /* XXX: this code should not depend on lock_user */
-            p = lock_user(VERIFY_READ, addr, l, 1);
-            if (!p) {
-                goto out_close;
-            }
-            memcpy(buf, p, l);
-            unlock_user(p, addr, 0);
-        } else {
-            /* Bypass the host page protection using ptrace. */
-            if (fd == -1) {
-                fd = open("/proc/self/mem", O_RDONLY);
-                if (fd == -1) {
-                    goto out;
-                }
-            }
-            if (pread(fd, buf, l,
-                      (off_t)(uintptr_t)g2h_untagged(addr)) != l) {
-                goto out_close;
-            }
-        }
-        len -= l;
-        buf += l;
-        addr += l;
-    }
-    ret = 0;
-out_close:
-    if (fd != -1) {
-        close(fd);
-    }
-out:
-    return ret;
-}
-#endif
-
 bool target_words_bigendian(void)
 {
     return TARGET_BIG_ENDIAN;
diff --git a/crypto/hash-afalg.c b/crypto/hash-afalg.c
index 8c0ce5b52000d..bd3fe3b427236 100644
--- a/crypto/hash-afalg.c
+++ b/crypto/hash-afalg.c
@@ -59,7 +59,7 @@ qcrypto_afalg_hash_format_name(QCryptoHashAlgo alg,
     if (is_hmac) {
         name = g_strdup_printf("hmac(%s)", alg_name);
     } else {
-        name = g_strdup_printf("%s", alg_name);
+        name = g_strdup(alg_name);
     }
 
     return name;
diff --git a/crypto/tlssession.c b/crypto/tlssession.c
index 77286e23f4698..6d8f8df62323b 100644
--- a/crypto/tlssession.c
+++ b/crypto/tlssession.c
@@ -546,45 +546,69 @@ qcrypto_tls_session_handshake(QCryptoTLSSession *session,
                               Error **errp)
 {
     int ret = gnutls_handshake(session->handle);
-    if (ret == 0) {
+    if (!ret) {
         session->handshakeComplete = true;
+        return QCRYPTO_TLS_HANDSHAKE_COMPLETE;
+    }
+
+    if (ret == GNUTLS_E_INTERRUPTED || ret == GNUTLS_E_AGAIN) {
+        int direction = gnutls_record_get_direction(session->handle);
+        return direction ? QCRYPTO_TLS_HANDSHAKE_SENDING :
+            QCRYPTO_TLS_HANDSHAKE_RECVING;
+    }
+
+    if (session->rerr || session->werr) {
+        error_setg(errp, "TLS handshake failed: %s: %s",
+                   gnutls_strerror(ret),
+                   error_get_pretty(session->rerr ?
+                                    session->rerr : session->werr));
     } else {
-        if (ret == GNUTLS_E_INTERRUPTED ||
-            ret == GNUTLS_E_AGAIN) {
-            ret = 1;
-        } else {
-            if (session->rerr || session->werr) {
-                error_setg(errp, "TLS handshake failed: %s: %s",
-                           gnutls_strerror(ret),
-                           error_get_pretty(session->rerr ?
-                                            session->rerr : session->werr));
-            } else {
-                error_setg(errp, "TLS handshake failed: %s",
-                           gnutls_strerror(ret));
-            }
-            ret = -1;
-        }
+        error_setg(errp, "TLS handshake failed: %s",
+                   gnutls_strerror(ret));
     }
+
     error_free(session->rerr);
     error_free(session->werr);
     session->rerr = session->werr = NULL;
 
-    return ret;
+    return -1;
 }
 
 
-QCryptoTLSSessionHandshakeStatus
-qcrypto_tls_session_get_handshake_status(QCryptoTLSSession *session)
+int
+qcrypto_tls_session_bye(QCryptoTLSSession *session, Error **errp)
 {
-    if (session->handshakeComplete) {
-        return QCRYPTO_TLS_HANDSHAKE_COMPLETE;
-    } else if (gnutls_record_get_direction(session->handle) == 0) {
-        return QCRYPTO_TLS_HANDSHAKE_RECVING;
+    int ret;
+
+    if (!session->handshakeComplete) {
+        return 0;
+    }
+
+    ret = gnutls_bye(session->handle, GNUTLS_SHUT_WR);
+
+    if (!ret) {
+        return QCRYPTO_TLS_BYE_COMPLETE;
+    }
+
+    if (ret == GNUTLS_E_INTERRUPTED || ret == GNUTLS_E_AGAIN) {
+        int direction = gnutls_record_get_direction(session->handle);
+        return direction ? QCRYPTO_TLS_BYE_SENDING : QCRYPTO_TLS_BYE_RECVING;
+    }
+
+    if (session->rerr || session->werr) {
+        error_setg(errp, "TLS termination failed: %s: %s", gnutls_strerror(ret),
+                   error_get_pretty(session->rerr ?
+                                    session->rerr : session->werr));
     } else {
-        return QCRYPTO_TLS_HANDSHAKE_SENDING;
+        error_setg(errp, "TLS termination failed: %s", gnutls_strerror(ret));
     }
-}
 
+    error_free(session->rerr);
+    error_free(session->werr);
+    session->rerr = session->werr = NULL;
+
+    return -1;
+}
 
 int
 qcrypto_tls_session_get_key_size(QCryptoTLSSession *session,
@@ -692,10 +716,10 @@ qcrypto_tls_session_handshake(QCryptoTLSSession *sess,
 }
 
 
-QCryptoTLSSessionHandshakeStatus
-qcrypto_tls_session_get_handshake_status(QCryptoTLSSession *sess)
+int
+qcrypto_tls_session_bye(QCryptoTLSSession *session, Error **errp)
 {
-    return QCRYPTO_TLS_HANDSHAKE_COMPLETE;
+    return QCRYPTO_TLS_BYE_COMPLETE;
 }
 
 
diff --git a/disas/disas-common.c b/disas/disas-common.c
index de61f6d8a1233..21c2f03430b80 100644
--- a/disas/disas-common.c
+++ b/disas/disas-common.c
@@ -7,7 +7,6 @@
 #include "disas/disas.h"
 #include "disas/capstone.h"
 #include "hw/core/cpu.h"
-#include "exec/tswap.h"
 #include "disas-internal.h"
 
 
@@ -61,15 +60,11 @@ void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu)
 
     s->cpu = cpu;
     s->info.print_address_func = print_address;
-    if (target_words_bigendian()) {
-        s->info.endian = BFD_ENDIAN_BIG;
-    } else {
-        s->info.endian =  BFD_ENDIAN_LITTLE;
-    }
+    s->info.endian = BFD_ENDIAN_UNKNOWN;
 
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-    if (cc->disas_set_info) {
-        cc->disas_set_info(cpu, &s->info);
+    if (cpu->cc->disas_set_info) {
+        cpu->cc->disas_set_info(cpu, &s->info);
+        g_assert(s->info.endian != BFD_ENDIAN_UNKNOWN);
     }
 }
 
diff --git a/disas/riscv.c b/disas/riscv.c
index 4075ed6bfef08..85cd2a9c2aefe 100644
--- a/disas/riscv.c
+++ b/disas/riscv.c
@@ -1662,7 +1662,7 @@ const rv_opcode_data rvi_opcode_data[] = {
     { "aes32esi", rv_codec_k_bs, rv_fmt_rs1_rs2_bs, NULL, 0, 0, 0 },
     { "aes32dsmi", rv_codec_k_bs, rv_fmt_rs1_rs2_bs, NULL, 0, 0, 0 },
     { "aes32dsi", rv_codec_k_bs, rv_fmt_rs1_rs2_bs, NULL, 0, 0, 0 },
-    { "aes64ks1i", rv_codec_k_rnum,  rv_fmt_rd_rs1_rnum, NULL, 0, 0, 0 },
+    { "aes64ks1i", rv_codec_k_rnum, rv_fmt_rd_rs1_rnum, NULL, 0, 0, 0 },
     { "aes64ks2", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
     { "aes64im", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0 },
     { "aes64esm", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
@@ -2214,11 +2214,11 @@ const rv_opcode_data rvi_opcode_data[] = {
     { "mop.rr.5", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
     { "mop.rr.6", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
     { "mop.rr.7", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
-    { "c.mop.1",  rv_codec_ci_none, rv_fmt_none, NULL, 0, 0, 0 },
-    { "c.mop.3",  rv_codec_ci_none, rv_fmt_none, NULL, 0, 0, 0 },
-    { "c.mop.5",  rv_codec_ci_none, rv_fmt_none, NULL, 0, 0, 0 },
-    { "c.mop.7",  rv_codec_ci_none, rv_fmt_none, NULL, 0, 0, 0 },
-    { "c.mop.9",  rv_codec_ci_none, rv_fmt_none, NULL, 0, 0, 0 },
+    { "c.mop.1", rv_codec_ci_none, rv_fmt_none, NULL, 0, 0, 0 },
+    { "c.mop.3", rv_codec_ci_none, rv_fmt_none, NULL, 0, 0, 0 },
+    { "c.mop.5", rv_codec_ci_none, rv_fmt_none, NULL, 0, 0, 0 },
+    { "c.mop.7", rv_codec_ci_none, rv_fmt_none, NULL, 0, 0, 0 },
+    { "c.mop.9", rv_codec_ci_none, rv_fmt_none, NULL, 0, 0, 0 },
     { "c.mop.11", rv_codec_ci_none, rv_fmt_none, NULL, 0, 0, 0 },
     { "c.mop.13", rv_codec_ci_none, rv_fmt_none, NULL, 0, 0, 0 },
     { "c.mop.15", rv_codec_ci_none, rv_fmt_none, NULL, 0, 0, 0 },
@@ -2438,9 +2438,11 @@ static const char *csr_name(int csrno)
     case 0x07a1: return "tdata1";
     case 0x07a2: return "tdata2";
     case 0x07a3: return "tdata3";
+    case 0x07a4: return "tinfo";
     case 0x07b0: return "dcsr";
     case 0x07b1: return "dpc";
-    case 0x07b2: return "dscratch";
+    case 0x07b2: return "dscratch0";
+    case 0x07b3: return "dscratch1";
     case 0x0b00: return "mcycle";
     case 0x0b01: return "mtime";
     case 0x0b02: return "minstret";
diff --git a/docs/about/build-platforms.rst b/docs/about/build-platforms.rst
index 482b09819c155..1552b1a70441b 100644
--- a/docs/about/build-platforms.rst
+++ b/docs/about/build-platforms.rst
@@ -101,7 +101,7 @@ Python runtime
   option of the ``configure`` script to point QEMU to a supported
   version of the Python runtime.
 
-  As of QEMU |version|, the minimum supported version of Python is 3.7.
+  As of QEMU |version|, the minimum supported version of Python is 3.8.
 
 Python build dependencies
   Some of QEMU's build dependencies are written in Python.  Usually these
diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 4c86620f6d990..e2b4f077d453a 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -266,6 +266,15 @@ in the QEMU object model anymore. ``Sun-UltraSparc-IIIi+`` and
 but for consistency these will get removed in a future release, too.
 Use ``Sun-UltraSparc-IIIi-plus`` and ``Sun-UltraSparc-IV-plus`` instead.
 
+PPC 405 CPUs (since 10.0)
+'''''''''''''''''''''''''
+
+The PPC 405 CPU has no known users and the ``ref405ep`` machine was
+removed in QEMU 10.0. Since the IBM POWER [8-11] processors uses an
+embedded 405 for power management (OCC) and other internal tasks, it
+is theoretically possible to use QEMU to model them. Let's keep the
+CPU implementation for a while before removing all support.
+
 System emulator machines
 ------------------------
 
@@ -277,21 +286,6 @@ deprecated; use the new name ``dtb-randomness`` instead. The new name
 better reflects the way this property affects all random data within
 the device tree blob, not just the ``kaslr-seed`` node.
 
-``pc-i440fx-2.4`` up to ``pc-i440fx-2.12`` (since 9.1)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-These old machine types are quite neglected nowadays and thus might have
-various pitfalls with regards to live migration. Use a newer machine type
-instead.
-
-PPC 405 ``ref405ep`` machine (since 9.1)
-''''''''''''''''''''''''''''''''''''''''
-
-The ``ref405ep`` machine and PPC 405 CPU have no known users, firmware
-images are not available, OpenWRT dropped support in 2019, U-Boot in
-2017, Linux also is dropping support in 2024. It is time to let go of
-this ancient hardware and focus on newer CPUs and platforms.
-
 Big-Endian variants of MicroBlaze ``petalogix-ml605`` and ``xlnx-zynqmp-pmu`` machines (since 9.2)
 ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
 
@@ -441,6 +435,31 @@ Stream ``reconnect`` (since 9.2)
 The ``reconnect`` option only allows specifiying second granularity timeouts,
 which is not enough for all types of use cases, use ``reconnect-ms`` instead.
 
+VFIO device options
+'''''''''''''''''''
+
+``-device vfio-calxeda-xgmac`` (since 10.0)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+The vfio-calxeda-xgmac device allows to assign a host Calxeda Highbank
+10Gb XGMAC Ethernet controller device ("calxeda,hb-xgmac" compatibility
+string) to a guest. Calxeda HW has been ewasted now and there is no point
+keeping that device.
+
+``-device vfio-amd-xgbe`` (since 10.0)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+The vfio-amd-xgbe device allows to assign a host AMD 10GbE controller
+to a guest ("amd,xgbe-seattle-v1a" compatibility string). AMD "Seattle"
+is not supported anymore and there is no point keeping that device.
+
+``-device vfio-platform`` (since 10.0)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+The vfio-platform device allows to assign a host platform device
+to a guest in a generic manner. Integrating a new device into
+the vfio-platform infrastructure requires some adaptation at
+both kernel and qemu level. No such attempt has been done for years
+and the conclusion is that vfio-platform has not got any traction.
+PCIe passthrough shall be the mainline solution.
+
 CPU device properties
 '''''''''''''''''''''
 
diff --git a/docs/about/removed-features.rst b/docs/about/removed-features.rst
index c6616ce05e593..2527a91795ad2 100644
--- a/docs/about/removed-features.rst
+++ b/docs/about/removed-features.rst
@@ -972,6 +972,11 @@ from Linux in 2021, and is not supported anymore by QEMU either.
 System emulator machines
 ------------------------
 
+Note: Versioned machine types that have been introduced in a QEMU version
+that has initially been released more than 6 years before are considered
+obsolete and will be removed without further notice in this document.
+Please use newer machine types instead.
+
 ``s390-virtio`` (removed in 2.6)
 ''''''''''''''''''''''''''''''''
 
@@ -1006,12 +1011,6 @@ mips ``fulong2e`` machine alias (removed in 6.0)
 
 This machine has been renamed ``fuloong2e``.
 
-``pc-0.10`` up to ``pc-i440fx-2.3`` (removed in 4.0 up to 9.0)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-These machine types were very old and likely could not be used for live
-migration from old QEMU versions anymore. Use a newer machine type instead.
-
 Raspberry Pi ``raspi2`` and ``raspi3`` machines (removed in 6.2)
 ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
 
@@ -1065,6 +1064,13 @@ for all machine types using the PXA2xx and OMAP2 SoCs. We are also
 dropping the ``cheetah`` OMAP1 board, because we don't have any
 test images for it and don't know of anybody who does.
 
+ppc ``ref405ep`` machine (removed in 10.0)
+''''''''''''''''''''''''''''''''''''''''''
+
+This machine was removed because PPC 405 CPU have no known users,
+firmware images are not available, OpenWRT dropped support in 2019,
+U-Boot in 2017, and Linux in 2024.
+
 linux-user mode CPUs
 --------------------
 
diff --git a/docs/conf.py b/docs/conf.py
index 31bb9a37893f4..7b5712e122fc8 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -60,7 +60,14 @@
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = ['kerneldoc', 'qmp_lexer', 'hxtool', 'depfile', 'qapidoc']
+extensions = [
+    'depfile',
+    'hxtool',
+    'kerneldoc',
+    'qapi_domain',
+    'qapidoc',
+    'qmp_lexer',
+]
 
 if sphinx.version_info[:3] > (4, 0, 0):
     tags.add('sphinx4')
@@ -146,6 +153,22 @@
 with open(os.path.join(qemu_docdir, 'defs.rst.inc')) as f:
     rst_epilog += f.read()
 
+
+# Normally, the QAPI domain is picky about what field lists you use to
+# describe a QAPI entity. If you'd like to use arbitrary additional
+# fields in source documentation, add them here.
+qapi_allowed_fields = {
+    "see also",
+}
+
+# Due to a limitation in Sphinx, we need to know which indices to
+# generate in advance. Adding a namespace here allows that generation.
+qapi_namespaces = {
+    "QGA",
+    "QMP",
+    "QSD",
+}
+
 # -- Options for HTML output ----------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
diff --git a/docs/devel/build-system.rst b/docs/devel/build-system.rst
index d42045a23250c..a759982f45c4c 100644
--- a/docs/devel/build-system.rst
+++ b/docs/devel/build-system.rst
@@ -260,7 +260,7 @@ Target-dependent emulator sourcesets:
   Each emulator also includes sources for files in the ``hw/`` and ``target/``
   subdirectories.  The subdirectory used for each emulator comes
   from the target's definition of ``TARGET_BASE_ARCH`` or (if missing)
-  ``TARGET_ARCH``, as found in ``default-configs/targets/*.mak``.
+  ``TARGET_ARCH``, as found in ``configs/targets/*.mak``.
 
   Each subdirectory in ``hw/`` adds one sourceset to the ``hw_arch`` dictionary,
   for example::
@@ -317,8 +317,8 @@ Utility sourcesets:
 The following files concur in the definition of which files are linked
 into each emulator:
 
-``default-configs/devices/*.mak``
-  The files under ``default-configs/devices/`` control the boards and devices
+``configs/devices/*.mak``
+  The files under ``configs/devices/`` control the boards and devices
   that are built into each QEMU system emulation targets. They merely contain
   a list of config variable definitions such as::
 
@@ -327,11 +327,11 @@ into each emulator:
     CONFIG_XLNX_VERSAL=y
 
 ``*/Kconfig``
-  These files are processed together with ``default-configs/devices/*.mak`` and
+  These files are processed together with ``configs/devices/*.mak`` and
   describe the dependencies between various features, subsystems and
   device models.  They are described in :ref:`kconfig`
 
-``default-configs/targets/*.mak``
+``configs/targets/*.mak``
   These files mostly define symbols that appear in the ``*-config-target.h``
   file for each emulator\ [#cfgtarget]_.  However, the ``TARGET_ARCH``
   and ``TARGET_BASE_ARCH`` will also be used to select the ``hw/`` and
diff --git a/docs/devel/codebase.rst b/docs/devel/codebase.rst
index 4039875ee042b..1b09953197b21 100644
--- a/docs/devel/codebase.rst
+++ b/docs/devel/codebase.rst
@@ -23,7 +23,7 @@ Some of the main QEMU subsystems are:
 - `Devices<device-emulation>` & Board models
 - `Documentation <documentation-root>`
 - `GDB support<GDB usage>`
-- `Migration<migration>`
+- :ref:`Migration<migration>`
 - `Monitor<QEMU monitor>`
 - :ref:`QOM (QEMU Object Model)<qom>`
 - `System mode<System emulation>`
@@ -112,7 +112,7 @@ yet, so sometimes the source code is all you have.
 * `libdecnumber <https://gitlab.com/qemu-project/qemu/-/tree/master/libdecnumber>`_:
   Import of gcc library, used to implement decimal number arithmetic.
 * `migration <https://gitlab.com/qemu-project/qemu/-/tree/master/migration>`__:
-  `Migration framework <migration>`.
+  :ref:`Migration framework <migration>`.
 * `monitor <https://gitlab.com/qemu-project/qemu/-/tree/master/monitor>`_:
   `Monitor <QEMU monitor>` implementation (HMP & QMP).
 * `nbd <https://gitlab.com/qemu-project/qemu/-/tree/master/nbd>`_:
@@ -193,7 +193,7 @@ yet, so sometimes the source code is all you have.
   - `lcitool <https://gitlab.com/qemu-project/qemu/-/tree/master/tests/lcitool>`_:
     Generate dockerfiles for CI containers.
   - `migration <https://gitlab.com/qemu-project/qemu/-/tree/master/tests/migration>`_:
-    Test scripts and data for `Migration framework <migration>`.
+    Test scripts and data for :ref:`Migration framework <migration>`.
   - `multiboot <https://gitlab.com/qemu-project/qemu/-/tree/master/tests/multiboot>`_:
     Test multiboot functionality for x86_64/i386.
   - `qapi-schema <https://gitlab.com/qemu-project/qemu/-/tree/master/tests/qapi-schema>`_:
diff --git a/docs/devel/hexagon-l2vic.rst b/docs/devel/hexagon-l2vic.rst
new file mode 100644
index 0000000000000..088563627445d
--- /dev/null
+++ b/docs/devel/hexagon-l2vic.rst
@@ -0,0 +1,59 @@
+Hexagon L2 Vectored Interrupt Controller
+========================================
+
+
+.. code-block:: none
+
+              +-------+
+              |       |             +----------------+
+              | l2vic |             |  hexagon core  |
+              |       |             |                |
+              | +-----|             |                |
+        ------> |VID0 >------------->irq2 -\         |
+        ------> |     |             |      |         |
+         ...  > |     |             |      |         |
+        ------> |     |             | <int steering> |
+              | +-----|             |   / |  | \     |
+              |  ...  |             |  |  |  |  |    |
+              | +-----|             | t0 t1 t2 t3 ...|
+        ------> |VIDN |             |                |
+        ------> |     |             |                |
+        ------> |     |             |                |
+        ------> |     |             |                |
+              | +-----|             |                |
+              |       |             |Global SREG File|
+              | State |             |                |
+              | [    ]|<============|=>[VID ]        |
+              | [    ]|<============|=>[VID1]        |
+              | [    ]|             |                |
+              | [    ]|             |                |
+              |       |             |                |
+              +-------+             +----------------+
+
+L2VIC/Core Integration
+----------------------
+
+* hexagon core supports 8 external interrupt sources
+* l2vic supports 1024 input interrupts mapped among 4 output interrupts
+* l2vic has four output signals: { VID0, VID1, VID2, VID3 }
+* l2vic device has a bank of registers per-VID that can be used to query
+  the status or assert new interrupts.
+* Interrupts are 'steered' to threads based on { thread priority, 'EX' state,
+  thread interrupt mask, thread interrupt enable, global interrupt enable,
+  etc. }.
+* Any hardware thread could conceivably handle any input interrupt, dependent
+  on state.
+* The system register transfer instruction can read the VID0-VID3 values from
+  the l2vic when reading from hexagon core system registers "VID" and "VID1".
+* When l2vic VID0 has multiple active interrupts, it pulses the VID0 output
+  IRQ and stores the IRQ number for the VID0 register field.  Only after this
+  interrupt is cleared can the l2vic pulse the VID0 output IRQ again and provide
+  the next interrupt number on the VID0 register.
+* The ``ciad`` instruction clears the l2vic input interrupt and un-disables the
+  core interrupt.  If some/an l2vic VID0 interrupt is pending when this occurs,
+  the next interrupt should fire and any subseqeunt reads of the VID register
+  should reflect the newly raised interrupt.
+* In QEMU, on an external interrupt or an unmasked-pending interrupt,
+  all vCPUs are triggered (has_work==true) and each will grab the IO lock
+  while considering the steering logic to determine whether they're the thread
+  that must handle the interrupt.
diff --git a/docs/devel/hexagon-sys.rst b/docs/devel/hexagon-sys.rst
new file mode 100644
index 0000000000000..3972261a2bbed
--- /dev/null
+++ b/docs/devel/hexagon-sys.rst
@@ -0,0 +1,106 @@
+.. _Hexagon-System-arch:
+
+Hexagon System Architecture
+===========================
+
+The hexagon architecture has some unique elements which are described here.
+
+Interrupts
+----------
+When interrupts arrive at a Hexagon DSP core, they are priority-steered to
+be handled by an eligible hardware thread with the lowest priority.
+
+Memory
+------
+Each hardware thread has an ``SSR.ASID`` field that contains its Address
+Space Identifier.  This value is catenated with a 32-bit virtual address -
+the MMU can then resolve this extended virtual address to a physical address.
+
+TLBs
+----
+The format of a TLB entry is shown below.
+
+.. note::
+    The Small Core DSPs have a different TLB format which is not yet
+    supported.
+
+.. admonition:: Diagram
+
+ .. code:: text
+
+             6                   5                   4               3
+       3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |v|g|x|A|A|             |                                       |
+      |a|l|P|1|0|     ASID    |             Virtual Page              |
+      |l|b| | | |             |                                       |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+         3                   2                   1                   0
+       1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      | | | | |       |                                             | |
+      |x|w|r|u|Cacheab|               Physical Page                 |S|
+      | | | | |       |                                             | |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+* ASID: the address-space identifier
+* A1, A0: the behavior of these cache line attributes are not modeled by QEMU.
+* xP: the extra-physical bit is the most significant physical address bit.
+* S: the S bit and the LSBs of the physical page indicate the page size
+* val: this is the 'valid' bit, when set it indicates that page matching
+  should consider this entry.
+
+.. list-table:: Page sizes
+   :widths: 25 25 50
+   :header-rows: 1
+
+   * - S-bit
+     - Phys page LSBs
+     - Page size
+   * - 1
+     - N/A
+     - 4kb
+   * - 0
+     - 0b1
+     - 16kb
+   * - 0
+     - 0b10
+     - 64kb
+   * - 0
+     - 0b100
+     - 256kb
+   * - 0
+     - 0b1000
+     - 1MB
+   * - 0
+     - 0b10000
+     - 4MB
+   * - 0
+     - 0b100000
+     - 16MB
+
+* glb: if the global bit is set, the ASID is not considered when matching
+  TLBs.
+* Cacheab: the cacheability attributes of TLBs are not modeled, these bits
+  are ignored.
+* RWX: read-, write-, execute-, enable bits.  Indicates if user programs
+  are permitted to read/write/execute the given page.
+* U: indicates if user programs can access this page.
+
+Scheduler
+---------
+The Hexagon system architecture has a feature to assist the guest OS
+task scheduler.  The guest OS can enable this feature by setting
+``SCHEDCFG.EN``.  The ``BESTWAIT`` register is programmed by the guest OS
+to indicate the priority of the highest priority task waiting to run on a
+hardware thread.  The reschedule interrupt is triggered when any hardware
+thread's priority in ``STID.PRIO`` is worse than the ``BESTWAIT``.  When
+it is triggered, the ``BESTWAIT.PRIO`` value is reset to 0x1ff.
+
+HVX Coprocessor
+---------------
+The Supervisor Status Register field ``SSR.XA`` binds a DSP hardware thread
+to one of the eight possible HVX contexts.  The guest OS is responsible for
+managing this resource.
diff --git a/docs/devel/hexagon-vm.rst b/docs/devel/hexagon-vm.rst
new file mode 100644
index 0000000000000..fb16d56d59def
--- /dev/null
+++ b/docs/devel/hexagon-vm.rst
@@ -0,0 +1,190 @@
+Hexagon Virtual Machine
+=======================
+
+The hexagon virtual machine is a hypervisor that can partition a single
+Hexagon DSP among multiple guest operating systems, and abstracts the
+specific details of a DSP architectural revision for the sake of consistency
+among generations.
+
+Events
+------
+
+The guest operating system should register the Guest Event Vector Base
+via the ``vmsetvec`` virtual instruction at system startup.  The vector table
+and handlers are determined by the guest OS.
+
+Guests return from event handlers with ``vmrte``.  This instruction will restore
+the mode (user versus guest), interrupt enable state, PC, SP.
+
+.. list-table:: Event types
+   :header-rows: 1
+
+   * - Number
+     - Name
+     - Description
+     - Maskable
+     - Detail
+   * - 0
+     - Reserved
+     -
+     -
+     -
+   * - 1
+     - Machine check event
+     - unrecoverable VM state
+     - No
+     - execution terminates if unhandled
+   * - 2
+     - General exception
+     - internal hardware or software exception
+     - No
+     -
+   * - 3-4
+     - Reserved
+     -
+     -
+     -
+   * - 5
+     - ``trap0``
+     - ``trap0`` instruction
+     - No
+     -
+   * - 6
+     - Reserved
+     -
+     -
+     -
+   * - 7
+     - Interrupt
+     - external interrupts
+     - Yes
+     - increasing interrupt numbers have descending priority
+
+Startup
+-------
+In order to transition to user-mode, the guest OS must set the ``UM`` bit in
+the guest status register and specify the address to start executing in
+user mode in the guest event link register.
+
+Virtual Instructions
+--------------------
+
+.. list-table:: Virtual Instructions
+   :header-rows: 1
+
+   * - Instruction
+     - Behavior
+     - Operand
+     - Input
+     - Output
+   * - vmversion
+     - returns the VM version
+     - 0x0
+     - requested VM version
+     - provided VM version
+   * - vmrte
+     - return from event
+     - 0x1
+     - Event info in g3:0
+     - N/A
+   * - vmsetvec
+     - set event vector
+     - 0x2
+     - r0 is set to vector table addr
+     - r0 is 0 on success, 1 otherwise
+   * - vmsetie
+     - set interrupt enabled
+     - 0x3
+     - r0 is set to 1 to enable, 0 to disable
+     - previous IE bit is stored as LSB of r0
+   * - vmgetie
+     - get interrupt enabled
+     - 0x4
+     - N/A
+     - current IE bit is stored as LSB of r0
+   * - vmintop
+     - interrupt operation
+     - 0x5
+     - r0 = Interrupt Op, r1-r4: Depends on Op
+     - r0 - value depends on operation
+   * - vmclrmap
+     - clear virtual memory map
+     - 0xa
+     - r0 = Interrupt Op, r1-r4: Depends on Op
+     - r0 - value depends on operation
+   * - vmnewmap
+     - set new virtual memory map
+     - 0xb
+     - + r0 contains logical address of new segment table
+       + r1 = type of translations: 0 indicates a logical address of a zero-terminated linear list, 1 indicates a set of page tables.
+     - r0 contains 0 on success, otherwise negative error code
+   * - vmcache
+     - VM cache control: not modeled
+     - 0xd
+     - + r0 contains the operation to be performed
+       + r1 = Starting virtual address
+       + r2 contains the length in bytes
+     - r0 contains 0 on success, otherwise -1.  Cache behavior is not modeled so this operation always succeeds.
+   * - vmgettime
+     - Get virtual machine time
+     - 0xe
+     - N/A
+     - r0 contains the least significant 32 bits of timestamp, r1 contains the  most significant 32 bits of timestamp
+   * - vmsettime
+     - Set virtual machine time
+     - 0xf
+     - r0 contains the least significant 32 bits of timestamp, r1 contains the  most significant 32 bits of timestamp
+     - N/A
+   * - vmwait
+     - wait for interrupt
+     - 0x10
+     - N/A
+     - r0 contains the interrupt number of the interrupt waking the guest
+   * - vmyield
+     - voluntarily yield VM task
+     - 0x11
+     - N/A
+     - N/A
+   * - vmstart
+     - Create new virtual processor instance
+     - 0x12
+     - r0 contains the starting execution address, r1 contains the starting stack pointer
+     - r0 contains the Virtual processor number of new virtual processor on success, otherwise -1
+   * - vmstop
+     - terminate current virtual processor instance
+     - 0x13
+     - N/A
+     - N/A
+   * - vmvpid
+     - get the virtual processor ID
+     - 0x14
+     - N/A
+     - r0 contains the virtual processor number of virtual processor executing the instruction
+   * - vmsetregs
+     - Set guest registers
+     - 0x15
+     - r0-3 hold g0-3 values
+     - N/A
+   * - vmgetregs
+     - Get guest registers
+     - 0x16
+     - N/A
+     - r0-3 hold g0-3 values
+   * - vmtimerop
+     - perform an operation on a system timer
+     - 0x18
+     - + getfreq = 0
+       + getres = 1
+       + gettime = 2
+       + gettimeout = 3
+       + settimeout = 4
+       + deltatimeout = 5
+     - r0 contains result of the timer operation call
+   * - vmgetinfo
+     - Get system info
+     - 0x1a
+     - Index of the system info parameter:
+
+       + build_id = 0
+       + info_boot_flags = 1
+     - value of the indicated system info parameter
diff --git a/docs/devel/index-build.rst b/docs/devel/index-build.rst
index 0745c81a264b4..3f3cb21b9b420 100644
--- a/docs/devel/index-build.rst
+++ b/docs/devel/index-build.rst
@@ -12,4 +12,5 @@ some of the basics if you are adding new files and targets to the build.
    kconfig
    docs
    qapi-code-gen
+   qapi-domain
    control-flow-integrity
diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst
index bca597c65895d..82f788682bb4a 100644
--- a/docs/devel/index-internals.rst
+++ b/docs/devel/index-internals.rst
@@ -14,12 +14,16 @@ Details about QEMU's various subsystems including how to add features to them.
    block-coroutine-wrapper
    clocks
    ebpf_rss
+   hexagon-sys
+   hexagon-l2vic
+   hexagon-vm
    migration/index
    multi-process
    reset
    s390-cpu-topology
    s390-dasd-ipl
    tracing
+   uefi-vars
    vfio-iommufd
    writing-monitor-commands
    virtio-backends
diff --git a/docs/devel/kconfig.rst b/docs/devel/kconfig.rst
index 52d4b905f6752..493b76c4fbf70 100644
--- a/docs/devel/kconfig.rst
+++ b/docs/devel/kconfig.rst
@@ -38,7 +38,7 @@ originated in the Linux kernel, though it was heavily simplified and
 the handling of dependencies is stricter in QEMU.
 
 Unlike Linux, there is no user interface to edit the configuration, which
-is instead specified in per-target files under the ``default-configs/``
+is instead specified in per-target files under the ``configs/``
 directory of the QEMU source tree.  This is because, unlike Linux,
 configuration and dependencies can be treated as a black box when building
 QEMU; the default configuration that QEMU ships with should be okay in
@@ -103,7 +103,7 @@ directives can be included:
 **default value**: ``default <value> [if <expr>]``
 
   Default values are assigned to the config symbol if no other value was
-  set by the user via ``default-configs/*.mak`` files, and only if
+  set by the user via ``configs/*.mak`` files, and only if
   ``select`` or ``depends on`` directives do not force the value to true
   or false respectively.  ``<value>`` can be ``y`` or ``n``; it cannot
   be an arbitrary Boolean expression.  However, a condition for applying
@@ -119,7 +119,7 @@ directives can be included:
   This is similar to ``select`` as it applies a lower limit of ``y``
   to another symbol.  However, the lower limit is only a default
   and the "implied" symbol's value may still be set to ``n`` from a
-  ``default-configs/*.mak`` files.  The following two examples are
+  ``configs/*.mak`` files.  The following two examples are
   equivalent::
 
     config FOO
@@ -146,7 +146,7 @@ declares its dependencies in different ways:
       bool
 
   Subsystems always default to false (they have no ``default`` directive)
-  and are never visible in ``default-configs/*.mak`` files.  It's
+  and are never visible in ``configs/*.mak`` files.  It's
   up to other symbols to ``select`` whatever subsystems they require.
 
   They sometimes have ``select`` directives to bring in other required
@@ -238,7 +238,7 @@ declares its dependencies in different ways:
   include libraries (such as ``FDT``) or ``TARGET_BIG_ENDIAN``
   (possibly negated).
 
-  Boards are listed for convenience in the ``default-configs/*.mak``
+  Boards are listed for convenience in the ``configs/*.mak``
   for the target they apply to.
 
 **internal elements**
@@ -251,18 +251,18 @@ declares its dependencies in different ways:
 
   Internal elements group code that is useful in several boards or
   devices.  They are usually enabled with ``select`` and in turn select
-  other elements; they are never visible in ``default-configs/*.mak``
+  other elements; they are never visible in ``configs/*.mak``
   files, and often not even in the Makefile.
 
 Writing and modifying default configurations
 --------------------------------------------
 
 In addition to the Kconfig files under hw/, each target also includes
-a file called ``default-configs/TARGETNAME-softmmu.mak``.  These files
+a file called ``configs/TARGETNAME-softmmu.mak``.  These files
 initialize some Kconfig variables to non-default values and provide the
 starting point to turn on devices and subsystems.
 
-A file in ``default-configs/`` looks like the following example::
+A file in ``configs/`` looks like the following example::
 
     # Default configuration for alpha-softmmu
 
diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst
index c49482eab66d8..673e354754c8a 100644
--- a/docs/devel/migration/vfio.rst
+++ b/docs/devel/migration/vfio.rst
@@ -67,15 +67,35 @@ VFIO implements the device hooks for the iterative approach as follows:
 * A ``switchover_ack_needed`` function that checks if the VFIO device uses
   "switchover-ack" migration capability when this capability is enabled.
 
-* A ``save_state`` function to save the device config space if it is present.
+* A ``switchover_start`` function that in the multifd mode starts a thread that
+  reassembles the multifd received data and loads it in-order into the device.
+  In the non-multifd mode this function is a NOP.
+
+* A ``save_state`` function to save the device config space if it is present
+  in the non-multifd mode.
+  In the multifd mode it just emits either a dummy EOS marker.
 
 * A ``save_live_complete_precopy`` function that sets the VFIO device in
   _STOP_COPY state and iteratively copies the data for the VFIO device until
   the vendor driver indicates that no data remains.
+  In the multifd mode it just emits a dummy EOS marker.
+
+* A ``save_live_complete_precopy_thread`` function that in the multifd mode
+  provides thread handler performing multifd device state transfer.
+  It sets the VFIO device to _STOP_COPY state, iteratively reads the data
+  from the VFIO device and queues it for multifd transmission until the vendor
+  driver indicates that no data remains.
+  After that, it saves the device config space and queues it for multifd
+  transfer too.
+  In the non-multifd mode this thread is a NOP.
 
 * A ``load_state`` function that loads the config section and the data
   sections that are generated by the save functions above.
 
+* A ``load_state_buffer`` function that loads the device state and the device
+  config that arrived via multifd channels.
+  It's used only in the multifd mode.
+
 * ``cleanup`` functions for both save and load that perform any migration
   related cleanup.
 
@@ -176,8 +196,11 @@ Live migration save path
                 Then the VFIO device is put in _STOP_COPY state
                      (FINISH_MIGRATE, _ACTIVE, _STOP_COPY)
          .save_live_complete_precopy() is called for each active device
-      For the VFIO device, iterate in .save_live_complete_precopy() until
+              For the VFIO device: in the non-multifd mode iterate in
+                        .save_live_complete_precopy() until
                                pending data is 0
+	          In the multifd mode this iteration is done in
+	          .save_live_complete_precopy_thread() instead.
                                       |
                      (POSTMIGRATE, _COMPLETED, _STOP_COPY)
             Migraton thread schedules cleanup bottom half and exits
@@ -194,6 +217,9 @@ Live migration resume path
                           (RESTORE_VM, _ACTIVE, _STOP)
                                       |
      For each device, .load_state() is called for that device section data
+                 transmitted via the main migration channel.
+     For data transmitted via multifd channels .load_state_buffer() is called
+                                   instead.
                         (RESTORE_VM, _ACTIVE, _RESUMING)
                                       |
   At the end, .load_cleanup() is called for each device and vCPUs are started
@@ -206,3 +232,18 @@ Postcopy
 ========
 
 Postcopy migration is currently not supported for VFIO devices.
+
+Multifd
+=======
+
+Starting from QEMU version 10.0 there's a possibility to transfer VFIO device
+_STOP_COPY state via multifd channels. This helps reduce downtime - especially
+with multiple VFIO devices or with devices having a large migration state.
+As an additional benefit, setting the VFIO device to _STOP_COPY state and
+saving its config space is also parallelized (run in a separate thread) in
+such migration mode.
+
+The multifd VFIO device state transfer is controlled by
+"x-migration-multifd-transfer" VFIO device property. This property defaults to
+AUTO, which means that VFIO device state transfer via multifd channels is
+attempted in configurations that otherwise support it.
diff --git a/docs/devel/multi-thread-tcg.rst b/docs/devel/multi-thread-tcg.rst
index 7fd0a07633dc3..b0f473961dd16 100644
--- a/docs/devel/multi-thread-tcg.rst
+++ b/docs/devel/multi-thread-tcg.rst
@@ -37,7 +37,6 @@ if:
 
 * forced by --accel tcg,thread=single
 * enabling --icount mode
-* 64 bit guests on 32 bit hosts (TCG_OVERSIZED_GUEST)
 
 In the general case of running translated code there should be no
 inter-vCPU dependencies and all vCPUs should be able to run at full
diff --git a/docs/devel/qapi-code-gen.rst b/docs/devel/qapi-code-gen.rst
index 9fa94251b072f..f9cfe8721f7c5 100644
--- a/docs/devel/qapi-code-gen.rst
+++ b/docs/devel/qapi-code-gen.rst
@@ -229,7 +229,8 @@ These are of the form PREFIX_NAME, where PREFIX is derived from the
 enumeration type's name, and NAME from the value's name.  For the
 example above, the generator maps 'MyEnum' to MY_ENUM and 'value1' to
 VALUE1, resulting in the enumeration constant MY_ENUM_VALUE1.  The
-optional 'prefix' member overrides PREFIX.
+optional 'prefix' member overrides PREFIX.  This is rarely necessary,
+and should be used with restraint.
 
 The generated C enumeration constants have values 0, 1, ..., N-1 (in
 QAPI schema order), where N is the number of values.  There is an
diff --git a/docs/devel/qapi-domain.rst b/docs/devel/qapi-domain.rst
new file mode 100644
index 0000000000000..a748529f5156f
--- /dev/null
+++ b/docs/devel/qapi-domain.rst
@@ -0,0 +1,716 @@
+======================
+The Sphinx QAPI Domain
+======================
+
+An extension to the `rST syntax
+<https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html>`_
+in Sphinx is provided by the QAPI Domain, located in
+``docs/sphinx/qapi_domain.py``. This extension is analogous to the
+`Python Domain
+<https://www.sphinx-doc.org/en/master/usage/domains/python.html>`_
+included with Sphinx, but provides special directives and roles
+speciically for annotating and documenting QAPI definitions
+specifically.
+
+A `Domain
+<https://www.sphinx-doc.org/en/master/usage/domains/index.html>`_
+provides a set of special rST directives and cross-referencing roles to
+Sphinx for understanding rST markup written to document a specific
+language. By itself, this QAPI extension is only sufficient to parse rST
+markup written by hand; the `autodoc
+<https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html>`_
+functionality is provided elsewhere, in ``docs/sphinx/qapidoc.py``, by
+the "Transmogrifier".
+
+It is not expected that any developer nor documentation writer would
+never need to write *nor* read these special rST forms. However, in the
+event that something needs to be debugged, knowing the syntax of the
+domain is quite handy. This reference may also be useful as a guide for
+understanding the QAPI Domain extension code itself. Although most of
+these forms will not be needed for documentation writing purposes,
+understanding the cross-referencing syntax *will* be helpful when
+writing rST documentation elsewhere, or for enriching the body of
+QAPIDoc blocks themselves.
+
+
+Concepts
+========
+
+The QAPI Domain itself provides no mechanisms for reading the QAPI
+Schema or generating documentation from code that exists. It is merely
+the rST syntax used to describe things. For instance, the Sphinx Python
+domain adds syntax like ``:py:func:`` for describing Python functions in
+documentation, but it's the autodoc module that is responsible for
+reading python code and generating such syntax. QAPI is analagous here:
+qapidoc.py is responsible for reading the QAPI Schema and generating rST
+syntax, and qapi_domain.py is responsible for translating that special
+syntax and providing APIs for Sphinx internals.
+
+In other words:
+
+qapi_domain.py adds syntax like ``.. qapi:command::`` to Sphinx, and
+qapidoc.py transforms the documentation in ``qapi/*.json`` into rST
+using directives defined by the domain.
+
+Or even shorter:
+
+``:py:`` is to ``:qapi:`` as *autodoc* is to *qapidoc*.
+
+
+Info Field Lists
+================
+
+`Field lists
+<https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#field-lists>`_
+are a standard syntax in reStructuredText. Sphinx `extends that syntax
+<https://www.sphinx-doc.org/en/master/usage/domains/python.html#info-field-lists>`_
+to give certain field list entries special meaning and parsing to, for
+example, add cross-references. The QAPI Domain takes advantage of this
+field list extension to document things like Arguments, Members, Values,
+and so on.
+
+The special parsing and handling of info field lists in Sphinx is provided by
+three main classes; Field, GroupedField, and TypedField. The behavior
+and formatting for each configured field list entry in the domain
+changes depending on which class is used.
+
+Field:
+  * Creates an ungrouped field: i.e., each entry will create its own
+    section and they will not be combined.
+  * May *optionally* support an argument.
+  * May apply cross-reference roles to *either* the argument *or* the
+    content body, both, or neither.
+
+This is used primarily for entries which are not expected to be
+repeated, i.e., items that may only show up at most once. The QAPI
+domain uses this class for "Errors" section.
+
+GroupedField:
+  * Creates a grouped field: i.e. multiple adjacent entries will be
+    merged into one section, and the content will form a bulleted list.
+  * *Must* take an argument.
+  * May optionally apply a cross-reference role to the argument, but not
+    the body.
+  * Can be configured to remove the bulleted list if there is only a
+    single entry.
+  * All items will be generated with the form: "argument -- body"
+
+This is used for entries which are expected to be repeated, but aren't
+expected to have two arguments, i.e. types without names, or names
+without types. The QAPI domain uses this class for features, returns,
+and enum values.
+
+TypedField:
+  * Creates a grouped, typed field. Multiple adjacent entres will be
+    merged into one section, and the content will form a bulleted list.
+  * *Must* take at least one argument, but supports up to two -
+    nominally, a name and a type.
+  * May optionally apply a cross-reference role to the type or the name
+    argument, but not the body.
+  * Can be configured to remove the bulleted list if there is only a
+    single entry.
+  * All items will be generated with the form "name (type) -- body"
+
+This is used for entries that are expected to be repeated and will have
+a name, a type, and a description. The QAPI domain uses this class for
+arguments, alternatives, and members. Wherever type names are referenced
+below, They must be a valid, documented type that will be
+cross-referenced in the HTML output; or one of the built-in JSON types
+(string, number, int, boolean, null, value, q_empty).
+
+
+``:feat:``
+----------
+
+Document a feature attached to a QAPI definition.
+
+:availability: This field list is available in the body of Command,
+               Event, Enum, Object and Alternate directives.
+:syntax: ``:feat name: Lorem ipsum, dolor sit amet...``
+:type: `sphinx.util.docfields.GroupedField
+       <https://pydoc.dev/sphinx/latest/sphinx.util.docfields.GroupedField.html?private=1>`_
+
+Example::
+
+   .. qapi:object:: BlockdevOptionsVirtioBlkVhostVdpa
+      :since: 7.2
+      :ifcond: CONFIG_BLKIO
+
+      Driver specific block device options for the virtio-blk-vhost-vdpa
+      backend.
+
+   :memb string path: path to the vhost-vdpa character device.
+   :feat fdset: Member ``path`` supports the special "/dev/fdset/N" path
+       (since 8.1)
+
+
+``:arg:``
+---------
+
+Document an argument to a QAPI command.
+
+:availability: This field list is only available in the body of the
+               Command directive.
+:syntax: ``:arg type name: description``
+:type: `sphinx.util.docfields.TypedField
+       <https://pydoc.dev/sphinx/latest/sphinx.util.docfields.TypedField.html?private=1>`_
+
+
+Example::
+
+   .. qapi:command:: job-pause
+      :since: 3.0
+
+      Pause an active job.
+
+      This command returns immediately after marking the active job for
+      pausing.  Pausing an already paused job is an error.
+
+      The job will pause as soon as possible, which means transitioning
+      into the PAUSED state if it was RUNNING, or into STANDBY if it was
+      READY.  The corresponding JOB_STATUS_CHANGE event will be emitted.
+
+      Cancelling a paused job automatically resumes it.
+
+      :arg string id: The job identifier.
+
+
+``:error:``
+-----------
+
+Document the error condition(s) of a QAPI command.
+
+:availability: This field list is only available in the body of the
+               Command directive.
+:syntax: ``:error: Lorem ipsum dolor sit amet ...``
+:type: `sphinx.util.docfields.Field
+       <https://pydoc.dev/sphinx/latest/sphinx.util.docfields.Field.html?private=1>`_
+
+The format of the :errors: field list description is free-form rST. The
+alternative spelling ":errors:" is also permitted, but strictly
+analogous.
+
+Example::
+
+   .. qapi:command:: block-job-set-speed
+      :since: 1.1
+
+      Set maximum speed for a background block operation.
+
+      This command can only be issued when there is an active block job.
+
+      Throttling can be disabled by setting the speed to 0.
+
+      :arg string device: The job identifier.  This used to be a device
+          name (hence the name of the parameter), but since QEMU 2.7 it
+          can have other values.
+      :arg int speed: the maximum speed, in bytes per second, or 0 for
+          unlimited.  Defaults to 0.
+      :error:
+          - If no background operation is active on this device,
+            DeviceNotActive
+
+
+``:return:``
+-------------
+
+Document the return type(s) and value(s) of a QAPI command.
+
+:availability: This field list is only available in the body of the
+               Command directive.
+:syntax: ``:return type: Lorem ipsum dolor sit amet ...``
+:type: `sphinx.util.docfields.GroupedField
+       <https://pydoc.dev/sphinx/latest/sphinx.util.docfields.GroupedField.html?private=1>`_
+
+
+Example::
+
+   .. qapi:command:: query-replay
+      :since: 5.2
+
+      Retrieve the record/replay information.  It includes current
+      instruction count which may be used for ``replay-break`` and
+      ``replay-seek`` commands.
+
+      :return ReplayInfo: record/replay information.
+
+      .. qmp-example::
+
+          -> { "execute": "query-replay" }
+          <- { "return": {
+                 "mode": "play", "filename": "log.rr", "icount": 220414 }
+             }
+
+
+``:value:``
+-----------
+
+Document a possible value for a QAPI enum.
+
+:availability: This field list is only available in the body of the Enum
+               directive.
+:syntax: ``:value name: Lorem ipsum, dolor sit amet ...``
+:type: `sphinx.util.docfields.GroupedField
+       <https://pydoc.dev/sphinx/latest/sphinx.util.docfields.GroupedField.html?private=1>`_
+
+Example::
+
+   .. qapi:enum:: QapiErrorClass
+      :since: 1.2
+
+      QEMU error classes
+
+      :value GenericError: this is used for errors that don't require a specific
+          error class.  This should be the default case for most errors
+      :value CommandNotFound: the requested command has not been found
+      :value DeviceNotActive: a device has failed to be become active
+      :value DeviceNotFound: the requested device has not been found
+      :value KVMMissingCap: the requested operation can't be fulfilled because a
+          required KVM capability is missing
+
+
+``:alt:``
+------------
+
+Document a possible branch for a QAPI alternate.
+
+:availability: This field list is only available in the body of the
+               Alternate directive.
+:syntax: ``:alt type name: Lorem ipsum, dolor sit amet ...``
+:type: `sphinx.util.docfields.TypedField
+       <https://pydoc.dev/sphinx/latest/sphinx.util.docfields.TypedField.html?private=1>`_
+
+As a limitation of Sphinx, we must document the "name" of the branch in
+addition to the type, even though this information is not visible on the
+wire in the QMP protocol format. This limitation *may* be lifted at a
+future date.
+
+Example::
+
+   .. qapi:alternate:: StrOrNull
+      :since: 2.10
+
+      This is a string value or the explicit lack of a string (null
+      pointer in C).  Intended for cases when 'optional absent' already
+      has a different meaning.
+
+       :alt string s: the string value
+       :alt null n: no string value
+
+
+``:memb:``
+----------
+
+Document a member of an Event or Object.
+
+:availability: This field list is available in the body of Event or
+               Object directives.
+:syntax: ``:memb type name: Lorem ipsum, dolor sit amet ...``
+:type: `sphinx.util.docfields.TypedField
+       <https://pydoc.dev/sphinx/latest/sphinx.util.docfields.TypedField.html?private=1>`_
+
+This is fundamentally the same as ``:arg:`` and ``:alt:``, but uses the
+"Members" phrasing for Events and Objects (Structs and Unions).
+
+Example::
+
+   .. qapi:event:: JOB_STATUS_CHANGE
+      :since: 3.0
+
+      Emitted when a job transitions to a different status.
+
+      :memb string id: The job identifier
+      :memb JobStatus status: The new job status
+
+
+Arbitrary field lists
+---------------------
+
+Other field list names, while valid rST syntax, are prohibited inside of
+QAPI directives to help prevent accidental misspellings of info field
+list names. If you want to add a new arbitrary "non-value-added" field
+list to QAPI documentation, you must add the field name to the allow
+list in ``docs/conf.py``
+
+For example::
+
+   qapi_allowed_fields = {
+       "see also",
+   }
+
+Will allow you to add arbitrary field lists in QAPI directives::
+
+   .. qapi:command:: x-fake-command
+
+      :see also: Lorem ipsum, dolor sit amet ...
+
+
+Cross-references
+================
+
+Cross-reference `roles
+<https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html>`_
+in the QAPI domain are modeled closely after the `Python
+cross-referencing syntax
+<https://www.sphinx-doc.org/en/master/usage/domains/python.html#cross-referencing-python-objects>`_.
+
+QAPI definitions can be referenced using the standard `any
+<https://www.sphinx-doc.org/en/master/usage/referencing.html#role-any>`_
+role cross-reference syntax, such as with ```query-blockstats```.  In
+the event that disambiguation is needed, cross-references can also be
+written using a number of explicit cross-reference roles:
+
+* ``:qapi:mod:`block-core``` -- Reference a QAPI module. The link will
+  take you to the beginning of that section in the documentation.
+* ``:qapi:cmd:`query-block``` -- Reference a QAPI command.
+* ``:qapi:event:`JOB_STATUS_CHANGE``` -- Reference a QAPI event.
+* ``:qapi:enum:`QapiErrorClass``` -- Reference a QAPI enum.
+* ``:qapi:obj:`BlockdevOptionsVirtioBlkVhostVdpa`` -- Reference a QAPI
+  object (struct or union)
+* ``:qapi:alt:`StrOrNull``` -- Reference a QAPI alternate.
+* ``:qapi:type:`BlockDirtyInfo``` -- Reference *any* QAPI type; this
+  excludes modules, commands, and events.
+* ``:qapi:any:`block-job-set-speed``` -- Reference absolutely any QAPI entity.
+
+Type arguments in info field lists are converted into references as if
+you had used the ``:qapi:type:`` role. All of the special syntax below
+applies to both info field lists and standalone explicit
+cross-references.
+
+
+Type decorations
+----------------
+
+Type names in references can be surrounded by brackets, like
+``[typename]``, to indicate an array of that type.  The cross-reference
+will apply only to the type name between the brackets. For example;
+``:qapi:type:`[Qcow2BitmapInfoFlags]``` renders to:
+:qapi:type:`[QMP:Qcow2BitmapInfoFlags]`
+
+To indicate an optional argument/member in a field list, the type name
+can be suffixed with ``?``. The cross-reference will be transformed to
+"type, Optional" with the link applying only to the type name. For
+example; ``:qapi:type:`BitmapSyncMode?``` renders to:
+:qapi:type:`QMP:BitmapSyncMode?`
+
+
+Namespaces
+----------
+
+Mimicking the `Python domain target specification syntax
+<https://www.sphinx-doc.org/en/master/usage/domains/python.html#target-specification>`_,
+QAPI allows you to specify the fully qualified path for a data
+type.
+
+* A namespace can be explicitly provided;
+  e.g. ``:qapi:type:`QMP:BitmapSyncMode``
+* A module can be explicitly provided;
+  ``:qapi:type:`QMP:block-core.BitmapSyncMode``` will render to:
+  :qapi:type:`QMP:block-core.BitmapSyncMode`
+* If you don't want to display the "fully qualified" name, it can be
+  prefixed with a tilde; ``:qapi:type:`~QMP:block-core.BitmapSyncMode```
+  will render to: :qapi:type:`~QMP:block-core.BitmapSyncMode`
+
+
+Target resolution
+-----------------
+
+Any cross-reference to a QAPI type, whether using the ```any``` style of
+reference or the more explicit ```:qapi:any:`target``` syntax, allows
+for the presence or absence of either the namespace or module
+information.
+
+When absent, their value will be inferred from context by the presence
+of any ``qapi:namespace`` or ``qapi:module`` directives preceding the
+cross-reference.
+
+If no results are found when using the inferred values, other
+namespaces/modules will be searched as a last resort; but any explicitly
+provided values must always match in order to succeed.
+
+This allows for efficient cross-referencing with a minimum of syntax in
+the large majority of cases, but additional context or namespace markup
+may be required outside of the QAPI reference documents when linking to
+items that share a name across multiple documented QAPI schema.
+
+
+Custom link text
+----------------
+
+The name of a cross-reference link can be explicitly overridden like
+`most stock Sphinx references
+<https://www.sphinx-doc.org/en/master/usage/referencing.html#syntax>`_
+using the ``custom text <target>`` syntax.
+
+For example, ``:qapi:cmd:`Merge dirty bitmaps
+<block-dirty-bitmap-merge>``` will render as: :qapi:cmd:`Merge dirty
+bitmaps <QMP:block-dirty-bitmap-merge>`
+
+
+Directives
+==========
+
+The QAPI domain adds a number of custom directives for documenting
+various QAPI/QMP entities. The syntax is plain rST, and follows this
+general format::
+
+  .. qapi:directive:: argument
+     :option:
+     :another-option: with an argument
+
+     Content body, arbitrary rST is allowed here.
+
+
+Sphinx standard options
+-----------------------
+
+All QAPI directives inherit a number of `standard options
+<https://www.sphinx-doc.org/en/master/usage/domains/index.html#basic-markup>`_
+from Sphinx's ObjectDescription class.
+
+The dashed spellings of the below options were added in Sphinx 7.2, the
+undashed spellings are currently retained as aliases, but will be
+removed in a future version.
+
+* ``:no-index:`` and ``:noindex:`` -- Do not add this item into the
+  Index, and do not make it available for cross-referencing.
+* ``no-index-entry:`` and ``:noindexentry:`` -- Do not add this item
+  into the Index, but allow it to be cross-referenced.
+* ``no-contents-entry`` and ``:nocontentsentry:`` -- Exclude this item
+  from the Table of Contents.
+* ``no-typesetting`` -- Create TOC, Index and cross-referencing
+  entities, but don't actually display the content.
+
+
+QAPI standard options
+---------------------
+
+All QAPI directives -- *except* for namespace and module -- support
+these common options.
+
+* ``:namespace: name`` -- This option allows you to override the
+  namespace association of a given definition.
+* ``:module: modname`` -- Borrowed from the Python domain, this option allows
+  you to override the module association of a given definition.
+* ``:since: x.y`` -- Allows the documenting of "Since" information, which is
+  displayed in the signature bar.
+* ``:ifcond: CONDITION`` -- Allows the documenting of conditional availability
+  information, which is displayed in an eyecatch just below the
+  signature bar.
+* ``:deprecated:`` -- Adds an eyecatch just below the signature bar that
+  advertises that this definition is deprecated and should be avoided.
+* ``:unstable:`` -- Adds an eyecatch just below the signature bar that
+  advertises that this definition is unstable and should not be used in
+  production code.
+
+
+qapi:namespace
+--------------
+
+The ``qapi:namespace`` directive marks the start of a QAPI namespace. It
+does not take a content body, nor any options. All subsequent QAPI
+directives are associated with the most recent namespace. This affects
+the definition's "fully qualified name", allowing two different
+namespaces to create an otherwise identically named definition.
+
+This directive also influences how reference resolution works for any
+references that do not explicity specify a namespace, so this directive
+can be used to nudge references into preferring targets from within that
+namespace.
+
+Example::
+
+   .. qapi:namespace:: QMP
+
+
+This directive has no visible effect.
+
+
+qapi:module
+-----------
+
+The ``qapi:module`` directive marks the start of a QAPI module. It may have
+a content body, but it can be omitted. All subsequent QAPI directives
+are associated with the most recent module; this effects their "fully
+qualified" name, but has no other effect.
+
+Example::
+
+   .. qapi:module:: block-core
+
+      Welcome to the block-core module!
+
+Will be rendered as:
+
+.. qapi:module:: block-core
+   :noindex:
+
+   Welcome to the block-core module!
+
+
+qapi:command
+------------
+
+This directive documents a QMP command. It may use any of the standard
+Sphinx or QAPI options, and the documentation body may contain
+``:arg:``, ``:feat:``, ``:error:``, or ``:return:`` info field list
+entries.
+
+Example::
+
+  .. qapi:command:: x-fake-command
+     :since: 42.0
+     :unstable:
+
+     This command is fake, so it can't hurt you!
+
+     :arg int foo: Your favorite number.
+     :arg string? bar: Your favorite season.
+     :return [string]: A lovely computer-written poem for you.
+
+
+Will be rendered as:
+
+  .. qapi:command:: x-fake-command
+     :noindex:
+     :since: 42.0
+     :unstable:
+
+     This command is fake, so it can't hurt you!
+
+     :arg int foo: Your favorite number.
+     :arg string? bar: Your favorite season.
+     :return [string]: A lovely computer-written poem for you.
+
+
+qapi:event
+----------
+
+This directive documents a QMP event. It may use any of the standard
+Sphinx or QAPI options, and the documentation body may contain
+``:memb:`` or ``:feat:`` info field list entries.
+
+Example::
+
+  .. qapi:event:: COMPUTER_IS_RUINED
+     :since: 0.1
+     :deprecated:
+
+     This event is emitted when your computer is *extremely* ruined.
+
+     :memb string reason: Diagnostics as to what caused your computer to
+        be ruined.
+     :feat sadness: When present, the diagnostic message will also
+        explain how sad the computer is as a result of your wrongdoings.
+
+Will be rendered as:
+
+.. qapi:event:: COMPUTER_IS_RUINED
+   :noindex:
+   :since: 0.1
+   :deprecated:
+
+   This event is emitted when your computer is *extremely* ruined.
+
+   :memb string reason: Diagnostics as to what caused your computer to
+      be ruined.
+   :feat sadness: When present, the diagnostic message will also explain
+      how sad the computer is as a result of your wrongdoings.
+
+
+qapi:enum
+---------
+
+This directive documents a QAPI enum. It may use any of the standard
+Sphinx or QAPI options, and the documentation body may contain
+``:value:`` or ``:feat:`` info field list entries.
+
+Example::
+
+  .. qapi:enum:: Mood
+     :ifcond: LIB_PERSONALITY
+
+     This enum represents your virtual machine's current mood!
+
+     :value Happy: Your VM is content and well-fed.
+     :value Hungry: Your VM needs food.
+     :value Melancholic: Your VM is experiencing existential angst.
+     :value Petulant: Your VM is throwing a temper tantrum.
+
+Will be rendered as:
+
+.. qapi:enum:: Mood
+   :noindex:
+   :ifcond: LIB_PERSONALITY
+
+   This enum represents your virtual machine's current mood!
+
+   :value Happy: Your VM is content and well-fed.
+   :value Hungry: Your VM needs food.
+   :value Melancholic: Your VM is experiencing existential angst.
+   :value Petulant: Your VM is throwing a temper tantrum.
+
+
+qapi:object
+-----------
+
+This directive documents a QAPI structure or union and represents a QMP
+object. It may use any of the standard Sphinx or QAPI options, and the
+documentation body may contain ``:memb:`` or ``:feat:`` info field list
+entries.
+
+Example::
+
+  .. qapi:object:: BigBlobOfStuff
+
+     This object has a bunch of disparate and unrelated things in it.
+
+     :memb int Birthday: Your birthday, represented in seconds since the
+                         UNIX epoch.
+     :memb [string] Fav-Foods: A list of your favorite foods.
+     :memb boolean? Bizarre-Docs: True if the documentation reference
+        should be strange.
+
+Will be rendered as:
+
+.. qapi:object:: BigBlobOfStuff
+   :noindex:
+
+   This object has a bunch of disparate and unrelated things in it.
+
+   :memb int Birthday: Your birthday, represented in seconds since the
+                       UNIX epoch.
+   :memb [string] Fav-Foods: A list of your favorite foods.
+   :memb boolean? Bizarre-Docs: True if the documentation reference
+      should be strange.
+
+
+qapi:alternate
+--------------
+
+This directive documents a QAPI alternate. It may use any of the
+standard Sphinx or QAPI options, and the documentation body may contain
+``:alt:`` or ``:feat:`` info field list entries.
+
+Example::
+
+  .. qapi:alternate:: ErrorCode
+
+     This alternate represents an Error Code from the VM.
+
+     :alt int ec: An error code, like the type you're used to.
+     :alt string em: An expletive-laced error message, if your
+        computer is feeling particularly cranky and tired of your
+        antics.
+
+Will be rendered as:
+
+.. qapi:alternate:: ErrorCode
+   :noindex:
+
+   This alternate represents an Error Code from the VM.
+
+   :alt int ec: An error code, like the type you're used to.
+   :alt string em: An expletive-laced error message, if your
+      computer is feeling particularly cranky and tired of your
+      antics.
diff --git a/docs/devel/reset.rst b/docs/devel/reset.rst
index adefd59ef97c9..0b8b2fa5f40df 100644
--- a/docs/devel/reset.rst
+++ b/docs/devel/reset.rst
@@ -143,6 +143,11 @@ The *exit* phase is executed only when the last reset operation ends. Therefore
 the object does not need to care how many of reset controllers it has and how
 many of them have started a reset.
 
+DMA capable devices are expected to cancel all outstanding DMA operations
+during either 'enter' or 'hold' phases. IOMMUs are expected to reset during
+the 'exit' phase and this sequencing makes sure no outstanding DMA request
+will fault.
+
 
 Handling reset in a resettable object
 -------------------------------------
diff --git a/docs/devel/rust.rst b/docs/devel/rust.rst
index 390aae438669c..88bdec1eb28f4 100644
--- a/docs/devel/rust.rst
+++ b/docs/devel/rust.rst
@@ -139,16 +139,22 @@ anymore.
 Writing Rust code in QEMU
 -------------------------
 
-Right now QEMU includes three crates:
+QEMU includes four crates:
 
 * ``qemu_api`` for bindings to C code and useful functionality
 
 * ``qemu_api_macros`` defines several procedural macros that are useful when
   writing C code
 
-* ``pl011`` (under ``rust/hw/char/pl011``) is the sample device that is being
-  used to further develop ``qemu_api`` and ``qemu_api_macros``.  It is a functional
-  replacement for the ``hw/char/pl011.c`` file.
+* ``pl011`` (under ``rust/hw/char/pl011``) and ``hpet`` (under ``rust/hw/timer/hpet``)
+  are sample devices that demonstrate ``qemu_api`` and ``qemu_api_macros``, and are
+  used to further develop them.  These two crates are functional\ [#issues]_ replacements
+  for the ``hw/char/pl011.c`` and ``hw/timer/hpet.c`` files.
+
+.. [#issues] The ``pl011`` crate is synchronized with ``hw/char/pl011.c``
+   as of commit 02b1f7f61928.  The ``hpet`` crate is synchronized as of
+   commit f32352ff9e.  Both are lacking tracing functionality; ``hpet``
+   is also lacking support for migration.
 
 This section explains how to work with them.
 
@@ -179,12 +185,15 @@ module           status
 ``callbacks``    complete
 ``cell``         stable
 ``c_str``        complete
+``errno``        complete
 ``irq``          complete
+``memory``       stable
 ``module``       complete
 ``offset_of``    stable
 ``qdev``         stable
 ``qom``          stable
 ``sysbus``       stable
+``timer``        stable
 ``vmstate``      proof of concept
 ``zeroable``     stable
 ================ ======================
@@ -194,6 +203,50 @@ module           status
   interface either.  Also, ``unsafe`` interfaces may be replaced by safe interfaces
   later.
 
+Naming convention
+'''''''''''''''''
+
+C function names usually are prefixed according to the data type that they
+apply to, for example ``timer_mod`` or ``sysbus_connect_irq``.  Furthermore,
+both function and structs sometimes have a ``qemu_`` or ``QEMU`` prefix.
+Generally speaking, these are all removed in the corresponding Rust functions:
+``QEMUTimer`` becomes ``timer::Timer``, ``timer_mod`` becomes ``Timer::modify``,
+``sysbus_connect_irq`` becomes ``SysBusDeviceMethods::connect_irq``.
+
+Sometimes however a name appears multiple times in the QOM class hierarchy,
+and the only difference is in the prefix.  An example is ``qdev_realize`` and
+``sysbus_realize``.  In such cases, whenever a name is not unique in
+the hierarchy, always add the prefix to the classes that are lower in
+the hierarchy; for the top class, decide on a case by case basis.
+
+For example:
+
+========================== =========================================
+``device_cold_reset()``    ``DeviceMethods::cold_reset()``
+``pci_device_reset()``     ``PciDeviceMethods::pci_device_reset()``
+``pci_bridge_reset()``     ``PciBridgeMethods::pci_bridge_reset()``
+========================== =========================================
+
+Here, the name is not exactly the same, but nevertheless ``PciDeviceMethods``
+adds the prefix to avoid confusion, because the functionality of
+``device_cold_reset()`` and ``pci_device_reset()`` is subtly different.
+
+In this case, however, no prefix is needed:
+
+========================== =========================================
+``device_realize()``       ``DeviceMethods::realize()``
+``sysbus_realize()``       ``SysbusDeviceMethods::sysbus_realize()``
+``pci_realize()``          ``PciDeviceMethods::pci_realize()``
+========================== =========================================
+
+Here, the lower classes do not add any functionality, and mostly
+provide extra compile-time checking; the basic *realize* functionality
+is the same for all devices.  Therefore, ``DeviceMethods`` does not
+add the prefix.
+
+Whenever a name is unique in the hierarchy, instead, you should
+always remove the class name prefix.
+
 Common pitfalls
 '''''''''''''''
 
@@ -243,15 +296,35 @@ of ``&mut self``; access to internal fields must use *interior mutability*
 to go from a shared reference to a ``&mut``.
 
 Whenever C code provides you with an opaque ``void *``, avoid converting it
-to a Rust mutable reference, and use a shared reference instead.  Rust code
-will then have to use QEMU's ``BqlRefCell`` and ``BqlCell`` type, which
-enforce that locking rules for the "Big QEMU Lock" are respected.  These cell
-types are also known to the ``vmstate`` crate, which is able to "look inside"
-them when building an in-memory representation of a ``struct``s layout.
-Note that the same is not true of a ``RefCell`` or ``Mutex``.
-
-In the future, similar cell types might also be provided for ``AioContext``-based
-locking as well.
+to a Rust mutable reference, and use a shared reference instead.  The
+``qemu_api::cell`` module provides wrappers that can be used to tell the
+Rust compiler about interior mutability, and optionally to enforce locking
+rules for the "Big QEMU Lock".  In the future, similar cell types might
+also be provided for ``AioContext``-based locking as well.
+
+In particular, device code will usually rely on the ``BqlRefCell`` and
+``BqlCell`` type to ensure that data is accessed correctly under the
+"Big QEMU Lock".  These cell types are also known to the ``vmstate``
+crate, which is able to "look inside" them when building an in-memory
+representation of a ``struct``'s layout.  Note that the same is not true
+of a ``RefCell`` or ``Mutex``.
+
+Bindings code instead will usually use the ``Opaque`` type, which hides
+the contents of the underlying struct and can be easily converted to
+a raw pointer, for use in calls to C functions.  It can be used for
+example as follows::
+
+    #[repr(transparent)]
+    #[derive(Debug, qemu_api_macros::Wrapper)]
+    pub struct Object(Opaque<bindings::Object>);
+
+where the special ``derive`` macro provides useful methods such as
+``from_raw``, ``as_ptr`, ``as_mut_ptr`` and ``raw_get``.  The bindings will
+then manually check for the big QEMU lock with assertions, which allows
+the wrapper to be declared thread-safe::
+
+    unsafe impl Send for Object {}
+    unsafe impl Sync for Object {}
 
 Writing bindings to C code
 ''''''''''''''''''''''''''
@@ -303,7 +376,7 @@ Writing procedural macros
 '''''''''''''''''''''''''
 
 By conventions, procedural macros are split in two functions, one
-returning ``Result<proc_macro2::TokenStream, MacroError>` with the body of
+returning ``Result<proc_macro2::TokenStream, MacroError>`` with the body of
 the procedural macro, and the second returning ``proc_macro::TokenStream``
 which is the actual procedural macro.  The former's name is the same as
 the latter with the ``_or_error`` suffix.  The code for the latter is more
diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst
index 6608a293763b8..688984fd39129 100644
--- a/docs/devel/tcg-ops.rst
+++ b/docs/devel/tcg-ops.rst
@@ -927,7 +927,9 @@ operation uses a constant input constraint which does not allow all
 constants, it must also accept registers in order to have a fallback.
 The constraint '``i``' is defined generically to accept any constant.
 The constraint '``r``' is not defined generically, but is consistently
-used by each backend to indicate all registers.
+used by each backend to indicate all registers.  If ``TCG_REG_ZERO``
+is defined by the backend, the constraint '``z``' is defined generically
+to map constant 0 to the hardware zero register.
 
 The movi_i32 and movi_i64 operations must accept any constants.
 
diff --git a/docs/devel/testing/functional.rst b/docs/devel/testing/functional.rst
index ecc738922b7ce..a9fa45eac1f69 100644
--- a/docs/devel/testing/functional.rst
+++ b/docs/devel/testing/functional.rst
@@ -173,7 +173,7 @@ QEMU binary selection
 ^^^^^^^^^^^^^^^^^^^^^
 
 The QEMU binary used for the ``self.vm`` QEMUMachine instance will
-primarily depend on the value of the ``qemu_bin`` class attribute.
+primarily depend on the value of the ``qemu_bin`` instance attribute.
 If it is not explicitly set by the test code, its default value will
 be the result the QEMU_TEST_QEMU_BINARY environment variable.
 
@@ -251,7 +251,7 @@ Many functional tests download assets (e.g. Linux kernels, initrds,
 firmware images, etc.) from the internet to be able to run tests with
 them. This imposes additional challenges to the test framework.
 
-First there is the the problem that some people might not have an
+First there is the problem that some people might not have an
 unconstrained internet connection, so such tests should not be run by
 default when running ``make check``. To accomplish this situation,
 the tests that download files should only be added to the "thorough"
@@ -274,7 +274,9 @@ the tests are run. This pre-caching is done with the qemu_test.Asset
 class. To use it in your test, declare an asset in your test class with
 its URL and SHA256 checksum like this::
 
-    ASSET_somename = (
+    from qemu_test import Asset
+
+    ASSET_somename = Asset(
         ('https://www.qemu.org/assets/images/qemu_head_200.png'),
         '34b74cad46ea28a2966c1d04e102510daf1fd73e6582b6b74523940d5da029dd')
 
diff --git a/docs/devel/uefi-vars.rst b/docs/devel/uefi-vars.rst
new file mode 100644
index 0000000000000..0151a26a0a6f8
--- /dev/null
+++ b/docs/devel/uefi-vars.rst
@@ -0,0 +1,68 @@
+==============
+UEFI variables
+==============
+
+Guest UEFI variable management
+==============================
+
+The traditional approach for UEFI Variable storage in qemu guests is
+to work as close as possible to physical hardware.  That means
+providing pflash as storage and leaving the management of variables
+and flash to the guest.
+
+Secure boot support comes with the requirement that the UEFI variable
+storage must be protected against direct access by the OS.  All update
+requests must pass the sanity checks.  (Parts of) the firmware must
+run with a higher privilege level than the OS so this can be enforced
+by the firmware.  On x86 this has been implemented using System
+Management Mode (SMM) in qemu and kvm, which again is the same
+approach taken by physical hardware.  Only privileged code running in
+SMM mode is allowed to access flash storage.
+
+Communication with the firmware code running in SMM mode works by
+serializing the requests to a shared buffer, then trapping into SMM
+mode via SMI.  The SMM code processes the request, stores the reply in
+the same buffer and returns.
+
+Host UEFI variable service
+==========================
+
+Instead of running the privileged code inside the guest we can run it
+on the host.  The serialization protocol can be reused.  The
+communication with the host uses a virtual device, which essentially
+configures the shared buffer location and size, and traps to the host
+to process the requests.
+
+The ``uefi-vars`` device implements the UEFI virtual device.  It comes
+in ``uefi-vars-x86`` and ``uefi-vars-sysbus`` flavours.  The device
+reimplements the handlers needed, specifically
+``EfiSmmVariableProtocol`` and ``VarCheckPolicyLibMmiHandler``.  It
+also consumes events (``EfiEndOfDxeEventGroup``,
+``EfiEventReadyToBoot`` and ``EfiEventExitBootServices``).
+
+The advantage of the approach is that we do not need a special
+privilege level for the firmware to protect itself, i.e. it does not
+depend on SMM emulation on x64, which allows the removal of a bunch of
+complex code for SMM emulation from the linux kernel
+(CONFIG_KVM_SMM=n).  It also allows support for secure boot on arm
+without implementing secure world (el3) emulation in kvm.
+
+Of course there are also downsides.  The added device increases the
+attack surface of the host, and we are adding some code duplication
+because we have to reimplement some edk2 functionality in qemu.
+
+usage on x86_64
+---------------
+
+.. code::
+
+   qemu-system-x86_64 \
+      -device uefi-vars-x86,jsonfile=/path/to/vars.json
+
+usage on aarch64
+----------------
+
+.. code::
+
+   qemu-system-aarch64 -M virt \
+      -device uefi-vars-sysbus,jsonfile=/path/to/vars.json
diff --git a/docs/glossary.rst b/docs/glossary.rst
index 693d9855dd13b..4fa044bfb6ee7 100644
--- a/docs/glossary.rst
+++ b/docs/glossary.rst
@@ -120,7 +120,7 @@ Migration
 ---------
 
 QEMU can save and restore the execution of a virtual machine between different
-host systems. This is provided by the `Migration framework<migration>`.
+host systems. This is provided by the :ref:`Migration framework<migration>`.
 
 NBD
 ---
@@ -212,14 +212,14 @@ machine emulator and virtualizer.
 QOM
 ---
 
-`QEMU Object Model <qom>` is an object oriented API used to define various
-devices and hardware in the QEMU codebase.
+:ref:`QEMU Object Model <qom>` is an object oriented API used to define
+various devices and hardware in the QEMU codebase.
 
 Record/replay
 -------------
 
-`Record/replay <replay>` is a feature of QEMU allowing to have a deterministic
-and reproducible execution of a virtual machine.
+:ref:`Record/replay <replay>` is a feature of QEMU allowing to have a
+deterministic and reproducible execution of a virtual machine.
 
 Rust
 ----
diff --git a/docs/interop/qemu-ga-ref.rst b/docs/interop/qemu-ga-ref.rst
index 032d49245525d..19b5c7a549852 100644
--- a/docs/interop/qemu-ga-ref.rst
+++ b/docs/interop/qemu-ga-ref.rst
@@ -5,3 +5,5 @@ QEMU Guest Agent Protocol Reference
    :depth: 3
 
 .. qapi-doc:: qga/qapi-schema.json
+   :transmogrify:
+   :namespace: QGA
diff --git a/docs/interop/qemu-qmp-ref.rst b/docs/interop/qemu-qmp-ref.rst
index f94614a0b2f7d..ef8792b53f55f 100644
--- a/docs/interop/qemu-qmp-ref.rst
+++ b/docs/interop/qemu-qmp-ref.rst
@@ -7,3 +7,5 @@ QEMU QMP Reference Manual
    :depth: 3
 
 .. qapi-doc:: qapi/qapi-schema.json
+   :transmogrify:
+   :namespace: QMP
diff --git a/docs/interop/qemu-storage-daemon-qmp-ref.rst b/docs/interop/qemu-storage-daemon-qmp-ref.rst
index 9fed68152f5a4..d0228d63b8d9b 100644
--- a/docs/interop/qemu-storage-daemon-qmp-ref.rst
+++ b/docs/interop/qemu-storage-daemon-qmp-ref.rst
@@ -5,3 +5,5 @@ QEMU Storage Daemon QMP Reference Manual
    :depth: 3
 
 .. qapi-doc:: storage-daemon/qapi/qapi-schema.json
+   :transmogrify:
+   :namespace: QSD
diff --git a/docs/pcie_sriov.txt b/docs/pcie_sriov.txt
index a47aad0bfab00..ab2142807f796 100644
--- a/docs/pcie_sriov.txt
+++ b/docs/pcie_sriov.txt
@@ -52,9 +52,11 @@ setting up a BAR for a VF.
       ...
 
       /* Add and initialize the SR/IOV capability */
-      pcie_sriov_pf_init(d, 0x200, "your_virtual_dev",
-                       vf_devid, initial_vfs, total_vfs,
-                       fun_offset, stride);
+      if (!pcie_sriov_pf_init(d, 0x200, "your_virtual_dev",
+                              vf_devid, initial_vfs, total_vfs,
+                              fun_offset, stride, errp)) {
+         return;
+      }
 
       /* Set up individual VF BARs (parameters as for normal BARs) */
       pcie_sriov_pf_init_vf_bar( ... )
diff --git a/docs/specs/aspeed-intc.rst b/docs/specs/aspeed-intc.rst
new file mode 100644
index 0000000000000..9cefd7f37f855
--- /dev/null
+++ b/docs/specs/aspeed-intc.rst
@@ -0,0 +1,136 @@
+===========================
+ASPEED Interrupt Controller
+===========================
+
+AST2700
+-------
+There are a total of 480 interrupt sources in AST2700. Due to the limitation of
+interrupt numbers of processors, the interrupts are merged every 32 sources for
+interrupt numbers greater than 127.
+
+There are two levels of interrupt controllers, INTC (CPU Die) and INTCIO
+(I/O Die).
+
+Interrupt Mapping
+-----------------
+- INTC: Handles interrupt sources 0 - 127 and integrates signals from INTCIO.
+- INTCIO: Handles interrupt sources 128 - 319 independently.
+
+QEMU Support
+------------
+Currently, only GIC 192 to 201 are supported, and their source interrupts are
+from INTCIO and connected to INTC at input pin 0 and output pins 0 to 9 for
+GIC 192-201.
+
+Design for GICINT 196
+---------------------
+The orgate has interrupt sources ranging from 0 to 31, with its output pin
+connected to INTCIO "T0 GICINT_196". The output pin is then connected to INTC
+"GIC_192_201" at bit 4, and its bit 4 output pin is connected to GIC 196.
+
+INTC GIC_192_201 Output Pin Mapping
+-----------------------------------
+The design of INTC GIC_192_201 have 10 output pins, mapped as following:
+
+====  ====
+Bit   GIC
+====  ====
+0     192
+1     193
+2     194
+3     195
+4     196
+5     197
+6     198
+7     199
+8     200
+9     201
+====  ====
+
+AST2700 A0
+----------
+It has only one INTC controller, and currently, only GIC 128-136 is supported.
+To support both AST2700 A1 and AST2700 A0, there are 10 OR gates in the INTC,
+with gates 1 to 9 supporting GIC 128-136.
+
+Design for GICINT 132
+---------------------
+The orgate has interrupt sources ranging from 0 to 31, with its output pin
+connected to INTC. The output pin is then connected to GIC 132.
+
+Block Diagram of GICINT 196 for AST2700 A1 and GICINT 132 for AST2700 A0
+------------------------------------------------------------------------
+
+.. code-block::
+
+   |-------------------------------------------------------------------------------------------------------|
+   |                                                   AST2700 A1 Design                                   |
+   |           To GICINT196                                                                                |
+   |                                                                                                       |
+   |   ETH1    |-----------|                    |--------------------------|        |--------------|       |
+   |  -------->|0          |                    |         INTCIO           |        |  orgates[0]  |       |
+   |   ETH2    |          4|   orgates[0]------>|inpin[0]-------->outpin[0]|------->| 0            |       |
+   |  -------->|1         5|   orgates[1]------>|inpin[1]-------->outpin[1]|------->| 1            |       |
+   |   ETH3    |          6|   orgates[2]------>|inpin[2]-------->outpin[2]|------->| 2            |       |
+   |  -------->|2        19|   orgates[3]------>|inpin[3]-------->outpin[3]|------->| 3  OR[0:9]   |-----| |
+   |   UART0   |         20|-->orgates[4]------>|inpin[4]-------->outpin[4]|------->| 4            |     | |
+   |  -------->|7        21|   orgates[5]------>|inpin[5]-------->outpin[5]|------->| 5            |     | |
+   |   UART1   |         22|   orgates[6]------>|inpin[6]-------->outpin[6]|------->| 6            |     | |
+   |  -------->|8        23|   orgates[7]------>|inpin[7]-------->outpin[7]|------->| 7            |     | |
+   |   UART2   |         24|   orgates[8]------>|inpin[8]-------->outpin[8]|------->| 8            |     | |
+   |  -------->|9        25|   orgates[9]------>|inpin[9]-------->outpin[9]|------->| 9            |     | |
+   |   UART3   |         26|                    |--------------------------|        |--------------|     | |
+   |  ---------|10       27|                                                                             | |
+   |   UART5   |         28|                                                                             | |
+   |  -------->|11       29|                                                                             | |
+   |   UART6   |           |                                                                             | |
+   |  -------->|12       30|     |-----------------------------------------------------------------------| |
+   |   UART7   |         31|     |                                                                         |
+   |  -------->|13         |     |                                                                         |
+   |   UART8   |  OR[0:31] |     |                |------------------------------|           |----------|  |
+   |  -------->|14         |     |                |            INTC              |           |     GIC  |  |
+   |   UART9   |           |     |                |inpin[0:0]--------->outpin[0] |---------->|192       |  |
+   |  -------->|15         |     |                |inpin[0:1]--------->outpin[1] |---------->|193       |  |
+   |   UART10  |           |     |                |inpin[0:2]--------->outpin[2] |---------->|194       |  |
+   |  -------->|16         |     |                |inpin[0:3]--------->outpin[3] |---------->|195       |  |
+   |   UART11  |           |     |--------------> |inpin[0:4]--------->outpin[4] |---------->|196       |  |
+   |  -------->|17         |                      |inpin[0:5]--------->outpin[5] |---------->|197       |  |
+   |   UART12  |           |                      |inpin[0:6]--------->outpin[6] |---------->|198       |  |
+   |  -------->|18         |                      |inpin[0:7]--------->outpin[7] |---------->|199       |  |
+   |           |-----------|                      |inpin[0:8]--------->outpin[8] |---------->|200       |  |
+   |                                              |inpin[0:9]--------->outpin[9] |---------->|201       |  |
+   |-------------------------------------------------------------------------------------------------------|
+   |-------------------------------------------------------------------------------------------------------|
+   |  ETH1    |-----------|     orgates[1]------->|inpin[1]----------->outpin[10]|---------->|128       |  |
+   | -------->|0          |     orgates[2]------->|inpin[2]----------->outpin[11]|---------->|129       |  |
+   |  ETH2    |          4|     orgates[3]------->|inpin[3]----------->outpin[12]|---------->|130       |  |
+   | -------->|1         5|     orgates[4]------->|inpin[4]----------->outpin[13]|---------->|131       |  |
+   |  ETH3    |          6|---->orgates[5]------->|inpin[5]----------->outpin[14]|---------->|132       |  |
+   | -------->|2        19|     orgates[6]------->|inpin[6]----------->outpin[15]|---------->|133       |  |
+   |  UART0   |         20|     orgates[7]------->|inpin[7]----------->outpin[16]|---------->|134       |  |
+   | -------->|7        21|     orgates[8]------->|inpin[8]----------->outpin[17]|---------->|135       |  |
+   |  UART1   |         22|     orgates[9]------->|inpin[9]----------->outpin[18]|---------->|136       |  |
+   | -------->|8        23|                       |------------------------------|           |----------|  |
+   |  UART2   |         24|                                                                                |
+   | -------->|9        25|                       AST2700 A0 Design                                        |
+   |  UART3   |         26|                                                                                |
+   | -------->|10       27|                                                                                |
+   |  UART5   |         28|                                                                                |
+   | -------->|11       29| GICINT132                                                                      |
+   |  UART6   |           |                                                                                |
+   | -------->|12       30|                                                                                |
+   |  UART7   |         31|                                                                                |
+   | -------->|13         |                                                                                |
+   |  UART8   |  OR[0:31] |                                                                                |
+   | -------->|14         |                                                                                |
+   |  UART9   |           |                                                                                |
+   | -------->|15         |                                                                                |
+   |  UART10  |           |                                                                                |
+   | -------->|16         |                                                                                |
+   |  UART11  |           |                                                                                |
+   | -------->|17         |                                                                                |
+   |  UART12  |           |                                                                                |
+   | -------->|18         |                                                                                |
+   |          |-----------|                                                                                |
+   |                                                                                                       |
+   |-------------------------------------------------------------------------------------------------------|
diff --git a/docs/specs/index.rst b/docs/specs/index.rst
index d7675cebc2d94..f19d73c9f6e13 100644
--- a/docs/specs/index.rst
+++ b/docs/specs/index.rst
@@ -38,3 +38,4 @@ guest hardware that is specific to QEMU.
    rocker
    riscv-iommu
    riscv-aia
+   aspeed-intc
diff --git a/docs/specs/riscv-iommu.rst b/docs/specs/riscv-iommu.rst
index b1538c9ead5f8..000c7e1f57f72 100644
--- a/docs/specs/riscv-iommu.rst
+++ b/docs/specs/riscv-iommu.rst
@@ -82,6 +82,8 @@ Several options are available to control the capabilities of the device, namely:
 - "off" (Out-of-reset translation mode: 'on' for DMA disabled, 'off' for 'BARE' (passthrough))
 - "s-stage": enable s-stage support
 - "g-stage": enable g-stage support
+- "hpm-counters": number of hardware performance counters available. Maximum value is 31.
+  Default value is 31. Use 0 (zero) to disable HPM support
 
 riscv-iommu-sys device
 ----------------------
diff --git a/docs/sphinx-static/theme_overrides.css b/docs/sphinx-static/theme_overrides.css
index 965ecac54fdd0..b225bf706f505 100644
--- a/docs/sphinx-static/theme_overrides.css
+++ b/docs/sphinx-static/theme_overrides.css
@@ -18,8 +18,8 @@ h1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend {
 
 .rst-content dl:not(.docutils) dt {
     border-top: none;
-    border-left: solid 3px #ccc;
-    background-color: #f0f0f0;
+    border-left: solid 5px #bcc6d2;
+    background-color: #eaedf1;
     color: black;
 }
 
@@ -208,3 +208,97 @@ div[class^="highlight"] pre {
         color: inherit;
     }
 }
+
+/* QAPI domain theming */
+
+/* most content in a QAPI object definition should not eclipse about
+   80ch, but nested field lists are explicitly exempt due to their
+   two-column nature */
+.qapi dd *:not(dl) {
+    max-width: 80ch;
+}
+
+/* but the content column itself should still be less than ~80ch. */
+.qapi .field-list dd {
+    max-width: 80ch;
+}
+
+.qapi-infopips {
+    margin-bottom: 1em;
+}
+
+.qapi-infopip {
+    display: inline-block;
+    padding: 0em 0.5em 0em 0.5em;
+    margin: 0.25em;
+}
+
+.qapi-deprecated,.qapi-unstable {
+    background-color: #fffef5;
+    border: solid #fff176 6px;
+    font-weight: bold;
+    padding: 8px;
+    border-radius: 15px;
+    margin: 5px;
+}
+
+.qapi-unstable::before {
+    content: '🚧 ';
+}
+
+.qapi-deprecated::before {
+    content: '⚠️ ';
+}
+
+.qapi-ifcond::before {
+    /* gaze ye into the crystal ball to determine feature availability */
+    content: '🔮 ';
+}
+
+.qapi-ifcond {
+    background-color: #f9f5ff;
+    border: solid #dac2ff 6px;
+    padding: 8px;
+    border-radius: 15px;
+    margin: 5px;
+}
+
+/* code blocks */
+.qapi div[class^="highlight"] {
+    width: fit-content;
+    background-color: #fffafd;
+    border: 2px solid #ffe1f3;
+}
+
+/* note, warning, etc. */
+.qapi .admonition {
+    width: fit-content;
+}
+
+/* pad the top of the field-list so the text doesn't start directly at
+   the top border; primarily for the field list labels, but adjust the
+   field bodies as well for parity. */
+dl.field-list > dt:first-of-type, dl.field-list > dd:first-of-type {
+    padding-top: 0.3em;
+}
+
+dl.field-list > dt:last-of-type, dl.field-list > dd:last-of-type {
+    padding-bottom: 0.3em;
+}
+
+/* pad the field list labels so they don't crash into the border */
+dl.field-list > dt {
+    padding-left: 0.5em;
+    padding-right: 0.5em;
+}
+
+/* Add a little padding between field list sections */
+dl.field-list > dd:not(:last-child) {
+    padding-bottom: 1em;
+}
+
+/* Sphinx 3.x: unresolved xrefs */
+.rst-content *:not(a) > code.xref {
+    font-weight: 400;
+    color: #333333;
+}
diff --git a/docs/sphinx/compat.py b/docs/sphinx/compat.py
new file mode 100644
index 0000000000000..9cf7fe006e447
--- /dev/null
+++ b/docs/sphinx/compat.py
@@ -0,0 +1,230 @@
+"""
+Sphinx cross-version compatibility goop
+"""
+
+import re
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Optional,
+    Type,
+)
+
+from docutils import nodes
+from docutils.nodes import Element, Node, Text
+from docutils.statemachine import StringList
+
+import sphinx
+from sphinx import addnodes, util
+from sphinx.directives import ObjectDescription
+from sphinx.environment import BuildEnvironment
+from sphinx.roles import XRefRole
+from sphinx.util import docfields
+from sphinx.util.docutils import (
+    ReferenceRole,
+    SphinxDirective,
+    switch_source_input,
+)
+from sphinx.util.typing import TextlikeNode
+
+
+MAKE_XREF_WORKAROUND = sphinx.version_info[:3] < (4, 1, 0)
+
+
+SpaceNode: Callable[[str], Node]
+KeywordNode: Callable[[str, str], Node]
+
+if sphinx.version_info[:3] >= (4, 0, 0):
+    SpaceNode = addnodes.desc_sig_space
+    KeywordNode = addnodes.desc_sig_keyword
+else:
+    SpaceNode = Text
+    KeywordNode = addnodes.desc_annotation
+
+
+def nested_parse_with_titles(
+    directive: SphinxDirective, content_node: Element
+) -> None:
+    """
+    This helper preserves error parsing context across sphinx versions.
+    """
+
+    # necessary so that the child nodes get the right source/line set
+    content_node.document = directive.state.document
+
+    try:
+        # Modern sphinx (6.2.0+) supports proper offsetting for
+        # nested parse error context management
+        util.nodes.nested_parse_with_titles(
+            directive.state,
+            directive.content,
+            content_node,
+            content_offset=directive.content_offset,
+        )
+    except TypeError:
+        # No content_offset argument. Fall back to SSI method.
+        with switch_source_input(directive.state, directive.content):
+            util.nodes.nested_parse_with_titles(
+                directive.state, directive.content, content_node
+            )
+
+
+# ###########################################
+# xref compatibility hacks for Sphinx < 4.1 #
+# ###########################################
+
+# When we require >= Sphinx 4.1, the following function and the
+# subsequent 3 compatibility classes can be removed. Anywhere in
+# qapi_domain that uses one of these Compat* types can be switched to
+# using the garden-variety lib-provided classes with no trickery.
+
+
+def _compat_make_xref(  # pylint: disable=unused-argument
+    self: sphinx.util.docfields.Field,
+    rolename: str,
+    domain: str,
+    target: str,
+    innernode: Type[TextlikeNode] = addnodes.literal_emphasis,
+    contnode: Optional[Node] = None,
+    env: Optional[BuildEnvironment] = None,
+    inliner: Any = None,
+    location: Any = None,
+) -> Node:
+    """
+    Compatibility workaround for Sphinx versions prior to 4.1.0.
+
+    Older sphinx versions do not use the domain's XRefRole for parsing
+    and formatting cross-references, so we need to perform this magick
+    ourselves to avoid needing to write the parser/formatter in two
+    separate places.
+
+    This workaround isn't brick-for-brick compatible with modern Sphinx
+    versions, because we do not have access to the parent directive's
+    state during this parsing like we do in more modern versions.
+
+    It's no worse than what pre-Sphinx 4.1.0 does, so... oh well!
+    """
+
+    # Yes, this function is gross. Pre-4.1 support is a miracle.
+    # pylint: disable=too-many-locals
+
+    assert env
+    # Note: Sphinx's own code ignores the type warning here, too.
+    if not rolename:
+        return contnode or innernode(target, target)  # type: ignore[call-arg]
+
+    # Get the role instance, but don't *execute it* - we lack the
+    # correct state to do so. Instead, we'll just use its public
+    # methods to do our reference formatting, and emulate the rest.
+    role = env.get_domain(domain).roles[rolename]
+    assert isinstance(role, XRefRole)
+
+    # XRefRole features not supported by this compatibility shim;
+    # these were not supported in Sphinx 3.x either, so nothing of
+    # value is really lost.
+    assert not target.startswith("!")
+    assert not re.match(ReferenceRole.explicit_title_re, target)
+    assert not role.lowercase
+    assert not role.fix_parens
+
+    # Code below based mostly on sphinx.roles.XRefRole; run() and
+    # create_xref_node()
+    options = {
+        "refdoc": env.docname,
+        "refdomain": domain,
+        "reftype": rolename,
+        "refexplicit": False,
+        "refwarn": role.warn_dangling,
+    }
+    refnode = role.nodeclass(target, **options)
+    title, target = role.process_link(env, refnode, False, target, target)
+    refnode["reftarget"] = target
+    classes = ["xref", domain, f"{domain}-{rolename}"]
+    refnode += role.innernodeclass(target, title, classes=classes)
+
+    # This is the very gross part of the hack. Normally,
+    # result_nodes takes a document object to which we would pass
+    # self.inliner.document. Prior to Sphinx 4.1, we don't *have* an
+    # inliner to pass, so we have nothing to pass here. However, the
+    # actual implementation of role.result_nodes in this case
+    # doesn't actually use that argument, so this winds up being
+    # ... fine. Rest easy at night knowing this code only runs under
+    # old versions of Sphinx, so at least it won't change in the
+    # future on us and lead to surprising new failures.
+    # Gross, I know.
+    result_nodes, _messages = role.result_nodes(
+        None,  # type: ignore
+        env,
+        refnode,
+        is_ref=True,
+    )
+    return nodes.inline(target, "", *result_nodes)
+
+
+class CompatField(docfields.Field):
+    if MAKE_XREF_WORKAROUND:
+        make_xref = _compat_make_xref
+
+
+class CompatGroupedField(docfields.GroupedField):
+    if MAKE_XREF_WORKAROUND:
+        make_xref = _compat_make_xref
+
+
+class CompatTypedField(docfields.TypedField):
+    if MAKE_XREF_WORKAROUND:
+        make_xref = _compat_make_xref
+
+
+# ################################################################
+# Nested parsing error location fix for Sphinx 5.3.0 < x < 6.2.0 #
+# ################################################################
+
+# When we require Sphinx 4.x, the TYPE_CHECKING hack where we avoid
+# subscripting ObjectDescription at runtime can be removed in favor of
+# just always subscripting the class.
+
+# When we require Sphinx > 6.2.0, the rest of this compatibility hack
+# can be dropped and QAPIObject can just inherit directly from
+# ObjectDescription[Signature].
+
+SOURCE_LOCATION_FIX = (5, 3, 0) <= sphinx.version_info[:3] < (6, 2, 0)
+
+Signature = str
+
+
+if TYPE_CHECKING:
+    _BaseClass = ObjectDescription[Signature]
+else:
+    _BaseClass = ObjectDescription
+
+
+class ParserFix(_BaseClass):
+
+    _temp_content: StringList
+    _temp_offset: int
+    _temp_node: Optional[addnodes.desc_content]
+
+    def before_content(self) -> None:
+        # Work around a sphinx bug and parse the content ourselves.
+        self._temp_content = self.content
+        self._temp_offset = self.content_offset
+        self._temp_node = None
+
+        if SOURCE_LOCATION_FIX:
+            self._temp_node = addnodes.desc_content()
+            self.state.nested_parse(
+                self.content, self.content_offset, self._temp_node
+            )
+            # Sphinx will try to parse the content block itself,
+            # Give it nothingness to parse instead.
+            self.content = StringList()
+            self.content_offset = 0
+
+    def transform_content(self, content_node: addnodes.desc_content) -> None:
+        # Sphinx workaround: Inject our parsed content and restore state.
+        if self._temp_node:
+            content_node += self._temp_node.children
+            self.content = self._temp_content
+            self.content_offset = self._temp_offset
diff --git a/docs/sphinx/qapi_domain.py b/docs/sphinx/qapi_domain.py
new file mode 100644
index 0000000000000..c94af5719ca07
--- /dev/null
+++ b/docs/sphinx/qapi_domain.py
@@ -0,0 +1,1052 @@
+"""
+QAPI domain extension.
+"""
+
+# The best laid plans of mice and men, ...
+# pylint: disable=too-many-lines
+
+from __future__ import annotations
+
+import re
+import types
+from typing import (
+    TYPE_CHECKING,
+    List,
+    NamedTuple,
+    Tuple,
+    Type,
+    cast,
+)
+
+from docutils import nodes
+from docutils.parsers.rst import directives
+
+from compat import (
+    CompatField,
+    CompatGroupedField,
+    CompatTypedField,
+    KeywordNode,
+    ParserFix,
+    Signature,
+    SpaceNode,
+)
+from sphinx import addnodes
+from sphinx.directives import ObjectDescription
+from sphinx.domains import (
+    Domain,
+    Index,
+    IndexEntry,
+    ObjType,
+)
+from sphinx.locale import _, __
+from sphinx.roles import XRefRole
+from sphinx.util import logging
+from sphinx.util.docutils import SphinxDirective
+from sphinx.util.nodes import make_id, make_refnode
+
+
+if TYPE_CHECKING:
+    from typing import (
+        AbstractSet,
+        Any,
+        Dict,
+        Iterable,
+        Optional,
+        Union,
+    )
+
+    from docutils.nodes import Element, Node
+
+    from sphinx.addnodes import desc_signature, pending_xref
+    from sphinx.application import Sphinx
+    from sphinx.builders import Builder
+    from sphinx.environment import BuildEnvironment
+    from sphinx.util.typing import OptionSpec
+
+
+logger = logging.getLogger(__name__)
+
+
+def _unpack_field(
+    field: nodes.Node,
+) -> Tuple[nodes.field_name, nodes.field_body]:
+    """
+    docutils helper: unpack a field node in a type-safe manner.
+    """
+    assert isinstance(field, nodes.field)
+    assert len(field.children) == 2
+    assert isinstance(field.children[0], nodes.field_name)
+    assert isinstance(field.children[1], nodes.field_body)
+    return (field.children[0], field.children[1])
+
+
+class ObjectEntry(NamedTuple):
+    docname: str
+    node_id: str
+    objtype: str
+    aliased: bool
+
+
+class QAPIXRefRole(XRefRole):
+
+    def process_link(
+        self,
+        env: BuildEnvironment,
+        refnode: Element,
+        has_explicit_title: bool,
+        title: str,
+        target: str,
+    ) -> tuple[str, str]:
+        refnode["qapi:namespace"] = env.ref_context.get("qapi:namespace")
+        refnode["qapi:module"] = env.ref_context.get("qapi:module")
+
+        # Cross-references that begin with a tilde adjust the title to
+        # only show the reference without a leading module, even if one
+        # was provided. This is a Sphinx-standard syntax; give it
+        # priority over QAPI-specific type markup below.
+        hide_module = False
+        if target.startswith("~"):
+            hide_module = True
+            target = target[1:]
+
+        # Type names that end with "?" are considered optional
+        # arguments and should be documented as such, but it's not
+        # part of the xref itself.
+        if target.endswith("?"):
+            refnode["qapi:optional"] = True
+            target = target[:-1]
+
+        # Type names wrapped in brackets denote lists. strip the
+        # brackets and remember to add them back later.
+        if target.startswith("[") and target.endswith("]"):
+            refnode["qapi:array"] = True
+            target = target[1:-1]
+
+        if has_explicit_title:
+            # Don't mess with the title at all if it was explicitly set.
+            # Explicit title syntax for references is e.g.
+            # :qapi:type:`target <explicit title>`
+            # and this explicit title overrides everything else here.
+            return title, target
+
+        title = target
+        if hide_module:
+            title = target.split(".")[-1]
+
+        return title, target
+
+    def result_nodes(
+        self,
+        document: nodes.document,
+        env: BuildEnvironment,
+        node: Element,
+        is_ref: bool,
+    ) -> Tuple[List[nodes.Node], List[nodes.system_message]]:
+
+        # node here is the pending_xref node (or whatever nodeclass was
+        # configured at XRefRole class instantiation time).
+        results: List[nodes.Node] = [node]
+
+        if node.get("qapi:array"):
+            results.insert(0, nodes.literal("[", "["))
+            results.append(nodes.literal("]", "]"))
+
+        if node.get("qapi:optional"):
+            results.append(nodes.Text(", "))
+            results.append(nodes.emphasis("?", "optional"))
+
+        return results, []
+
+
+class QAPIDescription(ParserFix):
+    """
+    Generic QAPI description.
+
+    This is meant to be an abstract class, not instantiated
+    directly. This class handles the abstract details of indexing, the
+    TOC, and reference targets for QAPI descriptions.
+    """
+
+    def handle_signature(self, sig: str, signode: desc_signature) -> Signature:
+        # Do nothing. The return value here is the "name" of the entity
+        # being documented; for QAPI, this is the same as the
+        # "signature", which is just a name.
+
+        # Normally this method must also populate signode with nodes to
+        # render the signature; here we do nothing instead - the
+        # subclasses will handle this.
+        return sig
+
+    def get_index_text(self, name: Signature) -> Tuple[str, str]:
+        """Return the text for the index entry of the object."""
+
+        # NB: this is used for the global index, not the QAPI index.
+        return ("single", f"{name} (QMP {self.objtype})")
+
+    def _get_context(self) -> Tuple[str, str]:
+        namespace = self.options.get(
+            "namespace", self.env.ref_context.get("qapi:namespace", "")
+        )
+        modname = self.options.get(
+            "module", self.env.ref_context.get("qapi:module", "")
+        )
+
+        return namespace, modname
+
+    def _get_fqn(self, name: Signature) -> str:
+        namespace, modname = self._get_context()
+
+        # If we're documenting a module, don't include the module as
+        # part of the FQN; we ARE the module!
+        if self.objtype == "module":
+            modname = ""
+
+        if modname:
+            name = f"{modname}.{name}"
+        if namespace:
+            name = f"{namespace}:{name}"
+        return name
+
+    def add_target_and_index(
+        self, name: Signature, sig: str, signode: desc_signature
+    ) -> None:
+        # name is the return value of handle_signature.
+        # sig is the original, raw text argument to handle_signature.
+        # For QAPI, these are identical, currently.
+
+        assert self.objtype
+
+        if not (fullname := signode.get("fullname", "")):
+            fullname = self._get_fqn(name)
+
+        node_id = make_id(
+            self.env, self.state.document, self.objtype, fullname
+        )
+        signode["ids"].append(node_id)
+
+        self.state.document.note_explicit_target(signode)
+        domain = cast(QAPIDomain, self.env.get_domain("qapi"))
+        domain.note_object(fullname, self.objtype, node_id, location=signode)
+
+        if "no-index-entry" not in self.options:
+            arity, indextext = self.get_index_text(name)
+            assert self.indexnode is not None
+            if indextext:
+                self.indexnode["entries"].append(
+                    (arity, indextext, node_id, "", None)
+                )
+
+    @staticmethod
+    def split_fqn(name: str) -> Tuple[str, str, str]:
+        if ":" in name:
+            ns, name = name.split(":")
+        else:
+            ns = ""
+
+        if "." in name:
+            module, name = name.split(".")
+        else:
+            module = ""
+
+        return (ns, module, name)
+
+    def _object_hierarchy_parts(
+        self, sig_node: desc_signature
+    ) -> Tuple[str, ...]:
+        if "fullname" not in sig_node:
+            return ()
+        return self.split_fqn(sig_node["fullname"])
+
+    def _toc_entry_name(self, sig_node: desc_signature) -> str:
+        # This controls the name in the TOC and on the sidebar.
+
+        # This is the return type of _object_hierarchy_parts().
+        toc_parts = cast(Tuple[str, ...], sig_node.get("_toc_parts", ()))
+        if not toc_parts:
+            return ""
+
+        config = self.env.app.config
+        namespace, modname, name = toc_parts
+
+        if config.toc_object_entries_show_parents == "domain":
+            ret = name
+            if modname and modname != self.env.ref_context.get(
+                "qapi:module", ""
+            ):
+                ret = f"{modname}.{name}"
+            if namespace and namespace != self.env.ref_context.get(
+                "qapi:namespace", ""
+            ):
+                ret = f"{namespace}:{ret}"
+            return ret
+        if config.toc_object_entries_show_parents == "hide":
+            return name
+        if config.toc_object_entries_show_parents == "all":
+            return sig_node.get("fullname", name)
+        return ""
+
+
+class QAPIObject(QAPIDescription):
+    """
+    Description of a generic QAPI object.
+
+    It's not used directly, but is instead subclassed by specific directives.
+    """
+
+    # Inherit some standard options from Sphinx's ObjectDescription
+    option_spec: OptionSpec = (  # type:ignore[misc]
+        ObjectDescription.option_spec.copy()
+    )
+    option_spec.update(
+        {
+            # Context overrides:
+            "namespace": directives.unchanged,
+            "module": directives.unchanged,
+            # These are QAPI originals:
+            "since": directives.unchanged,
+            "ifcond": directives.unchanged,
+            "deprecated": directives.flag,
+            "unstable": directives.flag,
+        }
+    )
+
+    doc_field_types = [
+        # :feat name: descr
+        CompatGroupedField(
+            "feature",
+            label=_("Features"),
+            names=("feat",),
+            can_collapse=False,
+        ),
+    ]
+
+    def get_signature_prefix(self) -> List[nodes.Node]:
+        """Return a prefix to put before the object name in the signature."""
+        assert self.objtype
+        return [
+            KeywordNode("", self.objtype.title()),
+            SpaceNode(" "),
+        ]
+
+    def get_signature_suffix(self) -> List[nodes.Node]:
+        """Return a suffix to put after the object name in the signature."""
+        ret: List[nodes.Node] = []
+
+        if "since" in self.options:
+            ret += [
+                SpaceNode(" "),
+                addnodes.desc_sig_element(
+                    "", f"(Since: {self.options['since']})"
+                ),
+            ]
+
+        return ret
+
+    def handle_signature(self, sig: str, signode: desc_signature) -> Signature:
+        """
+        Transform a QAPI definition name into RST nodes.
+
+        This method was originally intended for handling function
+        signatures. In the QAPI domain, however, we only pass the
+        definition name as the directive argument and handle everything
+        else in the content body with field lists.
+
+        As such, the only argument here is "sig", which is just the QAPI
+        definition name.
+        """
+        # No module or domain info allowed in the signature!
+        assert ":" not in sig
+        assert "." not in sig
+
+        namespace, modname = self._get_context()
+        signode["fullname"] = self._get_fqn(sig)
+        signode["namespace"] = namespace
+        signode["module"] = modname
+
+        sig_prefix = self.get_signature_prefix()
+        if sig_prefix:
+            signode += addnodes.desc_annotation(
+                str(sig_prefix), "", *sig_prefix
+            )
+        signode += addnodes.desc_name(sig, sig)
+        signode += self.get_signature_suffix()
+
+        return sig
+
+    def _add_infopips(self, contentnode: addnodes.desc_content) -> None:
+        # Add various eye-catches and things that go below the signature
+        # bar, but precede the user-defined content.
+        infopips = nodes.container()
+        infopips.attributes["classes"].append("qapi-infopips")
+
+        def _add_pip(
+            source: str, content: Union[str, List[nodes.Node]], classname: str
+        ) -> None:
+            node = nodes.container(source)
+            if isinstance(content, str):
+                node.append(nodes.Text(content))
+            else:
+                node.extend(content)
+            node.attributes["classes"].extend(["qapi-infopip", classname])
+            infopips.append(node)
+
+        if "deprecated" in self.options:
+            _add_pip(
+                ":deprecated:",
+                f"This {self.objtype} is deprecated.",
+                "qapi-deprecated",
+            )
+
+        if "unstable" in self.options:
+            _add_pip(
+                ":unstable:",
+                f"This {self.objtype} is unstable/experimental.",
+                "qapi-unstable",
+            )
+
+        if self.options.get("ifcond", ""):
+            ifcond = self.options["ifcond"]
+            _add_pip(
+                f":ifcond: {ifcond}",
+                [
+                    nodes.emphasis("", "Availability"),
+                    nodes.Text(": "),
+                    nodes.literal(ifcond, ifcond),
+                ],
+                "qapi-ifcond",
+            )
+
+        if infopips.children:
+            contentnode.insert(0, infopips)
+
+    def _validate_field(self, field: nodes.field) -> None:
+        """Validate field lists in this QAPI Object Description."""
+        name, _ = _unpack_field(field)
+        allowed_fields = set(self.env.app.config.qapi_allowed_fields)
+
+        field_label = name.astext()
+        if field_label in allowed_fields:
+            # Explicitly allowed field list name, OK.
+            return
+
+        try:
+            # split into field type and argument (if provided)
+            # e.g. `:arg type name: descr` is
+            # field_type = "arg", field_arg = "type name".
+            field_type, field_arg = field_label.split(None, 1)
+        except ValueError:
+            # No arguments provided
+            field_type = field_label
+            field_arg = ""
+
+        typemap = self.get_field_type_map()
+        if field_type in typemap:
+            # This is a special docfield, yet-to-be-processed. Catch
+            # correct names, but incorrect arguments. This mismatch WILL
+            # cause Sphinx to render this field incorrectly (without a
+            # warning), which is never what we want.
+            typedesc = typemap[field_type][0]
+            if typedesc.has_arg != bool(field_arg):
+                msg = f"docfield field list type {field_type!r} "
+                if typedesc.has_arg:
+                    msg += "requires an argument."
+                else:
+                    msg += "takes no arguments."
+                logger.warning(msg, location=field)
+        else:
+            # This is unrecognized entirely. It's valid rST to use
+            # arbitrary fields, but let's ensure the documentation
+            # writer has done this intentionally.
+            valid = ", ".join(sorted(set(typemap) | allowed_fields))
+            msg = (
+                f"Unrecognized field list name {field_label!r}.\n"
+                f"Valid fields for qapi:{self.objtype} are: {valid}\n"
+                "\n"
+                "If this usage is intentional, please add it to "
+                "'qapi_allowed_fields' in docs/conf.py."
+            )
+            logger.warning(msg, location=field)
+
+    def transform_content(self, content_node: addnodes.desc_content) -> None:
+        # This hook runs after before_content and the nested parse, but
+        # before the DocFieldTransformer is executed.
+        super().transform_content(content_node)
+
+        self._add_infopips(content_node)
+
+        # Validate field lists.
+        for child in content_node:
+            if isinstance(child, nodes.field_list):
+                for field in child.children:
+                    assert isinstance(field, nodes.field)
+                    self._validate_field(field)
+
+
+class SpecialTypedField(CompatTypedField):
+    def make_field(self, *args: Any, **kwargs: Any) -> nodes.field:
+        ret = super().make_field(*args, **kwargs)
+
+        # Look for the characteristic " -- " text node that Sphinx
+        # inserts for each TypedField entry ...
+        for node in ret.traverse(lambda n: str(n) == " -- "):
+            par = node.parent
+            if par.children[0].astext() != "q_dummy":
+                continue
+
+            # If the first node's text is q_dummy, this is a dummy
+            # field we want to strip down to just its contents.
+            del par.children[:-1]
+
+        return ret
+
+
+class QAPICommand(QAPIObject):
+    """Description of a QAPI Command."""
+
+    doc_field_types = QAPIObject.doc_field_types.copy()
+    doc_field_types.extend(
+        [
+            # :arg TypeName ArgName: descr
+            SpecialTypedField(
+                "argument",
+                label=_("Arguments"),
+                names=("arg",),
+                typerolename="type",
+                can_collapse=False,
+            ),
+            # :error: descr
+            CompatField(
+                "error",
+                label=_("Errors"),
+                names=("error", "errors"),
+                has_arg=False,
+            ),
+            # :return TypeName: descr
+            CompatGroupedField(
+                "returnvalue",
+                label=_("Return"),
+                rolename="type",
+                names=("return",),
+                can_collapse=True,
+            ),
+        ]
+    )
+
+
+class QAPIEnum(QAPIObject):
+    """Description of a QAPI Enum."""
+
+    doc_field_types = QAPIObject.doc_field_types.copy()
+    doc_field_types.extend(
+        [
+            # :value name: descr
+            CompatGroupedField(
+                "value",
+                label=_("Values"),
+                names=("value",),
+                can_collapse=False,
+            )
+        ]
+    )
+
+
+class QAPIAlternate(QAPIObject):
+    """Description of a QAPI Alternate."""
+
+    doc_field_types = QAPIObject.doc_field_types.copy()
+    doc_field_types.extend(
+        [
+            # :alt type name: descr
+            CompatTypedField(
+                "alternative",
+                label=_("Alternatives"),
+                names=("alt",),
+                typerolename="type",
+                can_collapse=False,
+            ),
+        ]
+    )
+
+
+class QAPIObjectWithMembers(QAPIObject):
+    """Base class for Events/Structs/Unions"""
+
+    doc_field_types = QAPIObject.doc_field_types.copy()
+    doc_field_types.extend(
+        [
+            # :member type name: descr
+            SpecialTypedField(
+                "member",
+                label=_("Members"),
+                names=("memb",),
+                typerolename="type",
+                can_collapse=False,
+            ),
+        ]
+    )
+
+
+class QAPIEvent(QAPIObjectWithMembers):
+    # pylint: disable=too-many-ancestors
+    """Description of a QAPI Event."""
+
+
+class QAPIJSONObject(QAPIObjectWithMembers):
+    # pylint: disable=too-many-ancestors
+    """Description of a QAPI Object: structs and unions."""
+
+
+class QAPIModule(QAPIDescription):
+    """
+    Directive to mark description of a new module.
+
+    This directive doesn't generate any special formatting, and is just
+    a pass-through for the content body. Named section titles are
+    allowed in the content body.
+
+    Use this directive to create entries for the QAPI module in the
+    global index and the QAPI index; as well as to associate subsequent
+    definitions with the module they are defined in for purposes of
+    search and QAPI index organization.
+
+    :arg: The name of the module.
+    :opt no-index: Don't add cross-reference targets or index entries.
+    :opt no-typesetting: Don't render the content body (but preserve any
+       cross-reference target IDs in the squelched output.)
+
+    Example::
+
+       .. qapi:module:: block-core
+          :no-index:
+          :no-typesetting:
+
+          Lorem ipsum, dolor sit amet ...
+    """
+
+    def run(self) -> List[Node]:
+        modname = self.arguments[0].strip()
+        self.env.ref_context["qapi:module"] = modname
+        ret = super().run()
+
+        # ObjectDescription always creates a visible signature bar. We
+        # want module items to be "invisible", however.
+
+        # Extract the content body of the directive:
+        assert isinstance(ret[-1], addnodes.desc)
+        desc_node = ret.pop(-1)
+        assert isinstance(desc_node.children[1], addnodes.desc_content)
+        ret.extend(desc_node.children[1].children)
+
+        # Re-home node_ids so anchor refs still work:
+        node_ids: List[str]
+        if node_ids := [
+            node_id
+            for el in desc_node.children[0].traverse(nodes.Element)
+            for node_id in cast(List[str], el.get("ids", ()))
+        ]:
+            target_node = nodes.target(ids=node_ids)
+            ret.insert(1, target_node)
+
+        return ret
+
+
+class QAPINamespace(SphinxDirective):
+    has_content = False
+    required_arguments = 1
+
+    def run(self) -> List[Node]:
+        namespace = self.arguments[0].strip()
+        self.env.ref_context["qapi:namespace"] = namespace
+
+        return []
+
+
+class QAPIIndex(Index):
+    """
+    Index subclass to provide the QAPI definition index.
+    """
+
+    # pylint: disable=too-few-public-methods
+
+    name = "index"
+    localname = _("QAPI Index")
+    shortname = _("QAPI Index")
+    namespace = ""
+
+    def generate(
+        self,
+        docnames: Optional[Iterable[str]] = None,
+    ) -> Tuple[List[Tuple[str, List[IndexEntry]]], bool]:
+        assert isinstance(self.domain, QAPIDomain)
+        content: Dict[str, List[IndexEntry]] = {}
+        collapse = False
+
+        for objname, obj in self.domain.objects.items():
+            if docnames and obj.docname not in docnames:
+                continue
+
+            ns, _mod, name = QAPIDescription.split_fqn(objname)
+
+            if self.namespace != ns:
+                continue
+
+            # Add an alphabetical entry:
+            entries = content.setdefault(name[0].upper(), [])
+            entries.append(
+                IndexEntry(
+                    name, 0, obj.docname, obj.node_id, obj.objtype, "", ""
+                )
+            )
+
+            # Add a categorical entry:
+            category = obj.objtype.title() + "s"
+            entries = content.setdefault(category, [])
+            entries.append(
+                IndexEntry(name, 0, obj.docname, obj.node_id, "", "", "")
+            )
+
+        # Sort entries within each category alphabetically
+        for category in content:
+            content[category] = sorted(content[category])
+
+        # Sort the categories themselves; type names first, ABC entries last.
+        sorted_content = sorted(
+            content.items(),
+            key=lambda x: (len(x[0]) == 1, x[0]),
+        )
+        return sorted_content, collapse
+
+
+class QAPIDomain(Domain):
+    """QAPI language domain."""
+
+    name = "qapi"
+    label = "QAPI"
+
+    # This table associates cross-reference object types (key) with an
+    # ObjType instance, which defines the valid cross-reference roles
+    # for each object type.
+    #
+    # e.g., the :qapi:type: cross-reference role can refer to enum,
+    # struct, union, or alternate objects; but :qapi:obj: can refer to
+    # anything. Each object also gets its own targeted cross-reference role.
+    object_types: Dict[str, ObjType] = {
+        "module": ObjType(_("module"), "mod", "any"),
+        "command": ObjType(_("command"), "cmd", "any"),
+        "event": ObjType(_("event"), "event", "any"),
+        "enum": ObjType(_("enum"), "enum", "type", "any"),
+        "object": ObjType(_("object"), "obj", "type", "any"),
+        "alternate": ObjType(_("alternate"), "alt", "type", "any"),
+    }
+
+    # Each of these provides a rST directive,
+    # e.g. .. qapi:module:: block-core
+    directives = {
+        "namespace": QAPINamespace,
+        "module": QAPIModule,
+        "command": QAPICommand,
+        "event": QAPIEvent,
+        "enum": QAPIEnum,
+        "object": QAPIJSONObject,
+        "alternate": QAPIAlternate,
+    }
+
+    # These are all cross-reference roles; e.g.
+    # :qapi:cmd:`query-block`. The keys correlate to the names used in
+    # the object_types table values above.
+    roles = {
+        "mod": QAPIXRefRole(),
+        "cmd": QAPIXRefRole(),
+        "event": QAPIXRefRole(),
+        "enum": QAPIXRefRole(),
+        "obj": QAPIXRefRole(),  # specifically structs and unions.
+        "alt": QAPIXRefRole(),
+        # reference any data type (excludes modules, commands, events)
+        "type": QAPIXRefRole(),
+        "any": QAPIXRefRole(),  # reference *any* type of QAPI object.
+    }
+
+    # Moved into the data property at runtime;
+    # this is the internal index of reference-able objects.
+    initial_data: Dict[str, Dict[str, Tuple[Any]]] = {
+        "objects": {},  # fullname -> ObjectEntry
+    }
+
+    # Index pages to generate; each entry is an Index class.
+    indices = [
+        QAPIIndex,
+    ]
+
+    @property
+    def objects(self) -> Dict[str, ObjectEntry]:
+        ret = self.data.setdefault("objects", {})
+        return ret  # type: ignore[no-any-return]
+
+    def setup(self) -> None:
+        namespaces = set(self.env.app.config.qapi_namespaces)
+        for namespace in namespaces:
+            new_index: Type[QAPIIndex] = types.new_class(
+                f"{namespace}Index", bases=(QAPIIndex,)
+            )
+            new_index.name = f"{namespace.lower()}-index"
+            new_index.localname = _(f"{namespace} Index")
+            new_index.shortname = _(f"{namespace} Index")
+            new_index.namespace = namespace
+
+            self.indices.append(new_index)
+
+        super().setup()
+
+    def note_object(
+        self,
+        name: str,
+        objtype: str,
+        node_id: str,
+        aliased: bool = False,
+        location: Any = None,
+    ) -> None:
+        """Note a QAPI object for cross reference."""
+        if name in self.objects:
+            other = self.objects[name]
+            if other.aliased and aliased is False:
+                # The original definition found. Override it!
+                pass
+            elif other.aliased is False and aliased:
+                # The original definition is already registered.
+                return
+            else:
+                # duplicated
+                logger.warning(
+                    __(
+                        "duplicate object description of %s, "
+                        "other instance in %s, use :no-index: for one of them"
+                    ),
+                    name,
+                    other.docname,
+                    location=location,
+                )
+        self.objects[name] = ObjectEntry(
+            self.env.docname, node_id, objtype, aliased
+        )
+
+    def clear_doc(self, docname: str) -> None:
+        for fullname, obj in list(self.objects.items()):
+            if obj.docname == docname:
+                del self.objects[fullname]
+
+    def merge_domaindata(
+        self, docnames: AbstractSet[str], otherdata: Dict[str, Any]
+    ) -> None:
+        for fullname, obj in otherdata["objects"].items():
+            if obj.docname in docnames:
+                # Sphinx's own python domain doesn't appear to bother to
+                # check for collisions. Assert they don't happen and
+                # we'll fix it if/when the case arises.
+                assert fullname not in self.objects, (
+                    "bug - collision on merge?"
+                    f" {fullname=} {obj=} {self.objects[fullname]=}"
+                )
+                self.objects[fullname] = obj
+
+    def find_obj(
+        self, namespace: str, modname: str, name: str, typ: Optional[str]
+    ) -> List[Tuple[str, ObjectEntry]]:
+        """
+        Find a QAPI object for "name", maybe using contextual information.
+
+        Returns a list of (name, object entry) tuples.
+
+        :param namespace: The current namespace context (if any!) under
+            which we are searching.
+        :param modname: The current module context (if any!) under
+            which we are searching.
+        :param name: The name of the x-ref to resolve; may or may not
+            include leading context.
+        :param type: The role name of the x-ref we're resolving, if
+            provided. This is absent for "any" role lookups.
+        """
+        if not name:
+            return []
+
+        # ##
+        # what to search for
+        # ##
+
+        parts = list(QAPIDescription.split_fqn(name))
+        explicit = tuple(bool(x) for x in parts)
+
+        # Fill in the blanks where possible:
+        if namespace and not parts[0]:
+            parts[0] = namespace
+        if modname and not parts[1]:
+            parts[1] = modname
+
+        implicit_fqn = ""
+        if all(parts):
+            implicit_fqn = f"{parts[0]}:{parts[1]}.{parts[2]}"
+
+        if typ is None:
+            # :any: lookup, search everything:
+            objtypes: List[str] = list(self.object_types)
+        else:
+            # type is specified and will be a role (e.g. obj, mod, cmd)
+            # convert this to eligible object types (e.g. command, module)
+            # using the QAPIDomain.object_types table.
+            objtypes = self.objtypes_for_role(typ, [])
+
+        # ##
+        # search!
+        # ##
+
+        def _search(needle: str) -> List[str]:
+            if (
+                needle
+                and needle in self.objects
+                and self.objects[needle].objtype in objtypes
+            ):
+                return [needle]
+            return []
+
+        if found := _search(name):
+            # Exact match!
+            pass
+        elif found := _search(implicit_fqn):
+            # Exact match using contextual information to fill in the gaps.
+            pass
+        else:
+            # No exact hits, perform applicable fuzzy searches.
+            searches = []
+
+            esc = tuple(re.escape(s) for s in parts)
+
+            # Try searching for ns:*.name or ns:name
+            if explicit[0] and not explicit[1]:
+                searches.append(f"^{esc[0]}:([^\\.]+\\.)?{esc[2]}$")
+            # Try searching for *:module.name or module.name
+            if explicit[1] and not explicit[0]:
+                searches.append(f"(^|:){esc[1]}\\.{esc[2]}$")
+            # Try searching for context-ns:*.name or context-ns:name
+            if parts[0] and not (explicit[0] or explicit[1]):
+                searches.append(f"^{esc[0]}:([^\\.]+\\.)?{esc[2]}$")
+            # Try searching for *:context-mod.name or context-mod.name
+            if parts[1] and not (explicit[0] or explicit[1]):
+                searches.append(f"(^|:){esc[1]}\\.{esc[2]}$")
+            # Try searching for *:name, *.name, or name
+            if not (explicit[0] or explicit[1]):
+                searches.append(f"(^|:|\\.){esc[2]}$")
+
+            for search in searches:
+                if found := [
+                    oname
+                    for oname in self.objects
+                    if re.search(search, oname)
+                    and self.objects[oname].objtype in objtypes
+                ]:
+                    break
+
+        matches = [(oname, self.objects[oname]) for oname in found]
+        if len(matches) > 1:
+            matches = [m for m in matches if not m[1].aliased]
+        return matches
+
+    def resolve_xref(
+        self,
+        env: BuildEnvironment,
+        fromdocname: str,
+        builder: Builder,
+        typ: str,
+        target: str,
+        node: pending_xref,
+        contnode: Element,
+    ) -> nodes.reference | None:
+        namespace = node.get("qapi:namespace")
+        modname = node.get("qapi:module")
+        matches = self.find_obj(namespace, modname, target, typ)
+
+        if not matches:
+            # Normally, we could pass warn_dangling=True to QAPIXRefRole(),
+            # but that will trigger on references to these built-in types,
+            # which we'd like to ignore instead.
+
+            # Take care of that warning here instead, so long as the
+            # reference isn't to one of our built-in core types.
+            if target not in (
+                "string",
+                "number",
+                "int",
+                "boolean",
+                "null",
+                "value",
+                "q_empty",
+            ):
+                logger.warning(
+                    __("qapi:%s reference target not found: %r"),
+                    typ,
+                    target,
+                    type="ref",
+                    subtype="qapi",
+                    location=node,
+                )
+            return None
+
+        if len(matches) > 1:
+            logger.warning(
+                __("more than one target found for cross-reference %r: %s"),
+                target,
+                ", ".join(match[0] for match in matches),
+                type="ref",
+                subtype="qapi",
+                location=node,
+            )
+
+        name, obj = matches[0]
+        return make_refnode(
+            builder, fromdocname, obj.docname, obj.node_id, contnode, name
+        )
+
+    def resolve_any_xref(
+        self,
+        env: BuildEnvironment,
+        fromdocname: str,
+        builder: Builder,
+        target: str,
+        node: pending_xref,
+        contnode: Element,
+    ) -> List[Tuple[str, nodes.reference]]:
+        results: List[Tuple[str, nodes.reference]] = []
+        matches = self.find_obj(
+            node.get("qapi:namespace"), node.get("qapi:module"), target, None
+        )
+        for name, obj in matches:
+            rolename = self.role_for_objtype(obj.objtype)
+            assert rolename is not None
+            role = f"qapi:{rolename}"
+            refnode = make_refnode(
+                builder, fromdocname, obj.docname, obj.node_id, contnode, name
+            )
+            results.append((role, refnode))
+        return results
+
+
+def setup(app: Sphinx) -> Dict[str, Any]:
+    app.setup_extension("sphinx.directives")
+    app.add_config_value(
+        "qapi_allowed_fields",
+        set(),
+        "env",  # Setting impacts parsing phase
+        types=set,
+    )
+    app.add_config_value(
+        "qapi_namespaces",
+        set(),
+        "env",
+        types=set,
+    )
+    app.add_domain(QAPIDomain)
+
+    return {
+        "version": "1.0",
+        "env_version": 1,
+        "parallel_read_safe": True,
+        "parallel_write_safe": True,
+    }
diff --git a/docs/sphinx/qapidoc.py b/docs/sphinx/qapidoc.py
index 5f96b46270bbd..661b2c4ed0e1f 100644
--- a/docs/sphinx/qapidoc.py
+++ b/docs/sphinx/qapidoc.py
@@ -2,6 +2,7 @@
 #
 # QEMU qapidoc QAPI file parsing extension
 #
+# Copyright (c) 2024-2025 Red Hat
 # Copyright (c) 2020 Linaro
 #
 # This work is licensed under the terms of the GNU GPLv2 or later.
@@ -24,445 +25,437 @@
 https://www.sphinx-doc.org/en/master/development/index.html
 """
 
+from __future__ import annotations
+
+__version__ = "2.0"
+
+from contextlib import contextmanager
 import os
+from pathlib import Path
 import re
 import sys
-import textwrap
-from typing import List
+from typing import TYPE_CHECKING
 
 from docutils import nodes
-from docutils.parsers.rst import Directive, directives
-from docutils.statemachine import ViewList
-from qapi.error import QAPIError, QAPISemError
-from qapi.gen import QAPISchemaVisitor
-from qapi.schema import QAPISchema
-
+from docutils.parsers.rst import directives
+from docutils.statemachine import StringList
+from qapi.error import QAPIError
+from qapi.parser import QAPIDoc
+from qapi.schema import (
+    QAPISchema,
+    QAPISchemaArrayType,
+    QAPISchemaCommand,
+    QAPISchemaDefinition,
+    QAPISchemaEnumMember,
+    QAPISchemaEvent,
+    QAPISchemaFeature,
+    QAPISchemaMember,
+    QAPISchemaObjectType,
+    QAPISchemaObjectTypeMember,
+    QAPISchemaType,
+    QAPISchemaVisitor,
+)
+from qapi.source import QAPISourceInfo
+
+from qapidoc_legacy import QAPISchemaGenRSTVisitor  # type: ignore
 from sphinx import addnodes
 from sphinx.directives.code import CodeBlock
 from sphinx.errors import ExtensionError
-from sphinx.util.docutils import switch_source_input
+from sphinx.util import logging
+from sphinx.util.docutils import SphinxDirective, switch_source_input
 from sphinx.util.nodes import nested_parse_with_titles
 
 
-__version__ = "1.0"
+if TYPE_CHECKING:
+    from typing import (
+        Any,
+        Generator,
+        List,
+        Optional,
+        Sequence,
+        Union,
+    )
 
+    from sphinx.application import Sphinx
+    from sphinx.util.typing import ExtensionMetadata
 
-def dedent(text: str) -> str:
-    # Adjust indentation to make description text parse as paragraph.
 
-    lines = text.splitlines(True)
-    if re.match(r"\s+", lines[0]):
-        # First line is indented; description started on the line after
-        # the name. dedent the whole block.
-        return textwrap.dedent(text)
+logger = logging.getLogger(__name__)
 
-    # Descr started on same line. Dedent line 2+.
-    return lines[0] + textwrap.dedent("".join(lines[1:]))
 
+class Transmogrifier:
+    # pylint: disable=too-many-public-methods
 
-# Disable black auto-formatter until re-enabled:
-# fmt: off
+    # Field names used for different entity types:
+    field_types = {
+        "enum": "value",
+        "struct": "memb",
+        "union": "memb",
+        "event": "memb",
+        "command": "arg",
+        "alternate": "alt",
+    }
 
+    def __init__(self) -> None:
+        self._curr_ent: Optional[QAPISchemaDefinition] = None
+        self._result = StringList()
+        self.indent = 0
 
-class QAPISchemaGenRSTVisitor(QAPISchemaVisitor):
-    """A QAPI schema visitor which generates docutils/Sphinx nodes
+    @property
+    def result(self) -> StringList:
+        return self._result
 
-    This class builds up a tree of docutils/Sphinx nodes corresponding
-    to documentation for the various QAPI objects. To use it, first
-    create a QAPISchemaGenRSTVisitor object, and call its
-    visit_begin() method.  Then you can call one of the two methods
-    'freeform' (to add documentation for a freeform documentation
-    chunk) or 'symbol' (to add documentation for a QAPI symbol). These
-    will cause the visitor to build up the tree of document
-    nodes. Once you've added all the documentation via 'freeform' and
-    'symbol' method calls, you can call 'get_document_nodes' to get
-    the final list of document nodes (in a form suitable for returning
-    from a Sphinx directive's 'run' method).
-    """
-    def __init__(self, sphinx_directive):
-        self._cur_doc = None
-        self._sphinx_directive = sphinx_directive
-        self._top_node = nodes.section()
-        self._active_headings = [self._top_node]
-
-    def _make_dlitem(self, term, defn):
-        """Return a dlitem node with the specified term and definition.
-
-        term should be a list of Text and literal nodes.
-        defn should be one of:
-        - a string, which will be handed to _parse_text_into_node
-        - a list of Text and literal nodes, which will be put into
-          a paragraph node
-        """
-        dlitem = nodes.definition_list_item()
-        dlterm = nodes.term('', '', *term)
-        dlitem += dlterm
-        if defn:
-            dldef = nodes.definition()
-            if isinstance(defn, list):
-                dldef += nodes.paragraph('', '', *defn)
-            else:
-                self._parse_text_into_node(defn, dldef)
-            dlitem += dldef
-        return dlitem
-
-    def _make_section(self, title):
-        """Return a section node with optional title"""
-        section = nodes.section(ids=[self._sphinx_directive.new_serialno()])
-        if title:
-            section += nodes.title(title, title)
-        return section
-
-    def _nodes_for_ifcond(self, ifcond, with_if=True):
-        """Return list of Text, literal nodes for the ifcond
-
-        Return a list which gives text like ' (If: condition)'.
-        If with_if is False, we don't return the "(If: " and ")".
-        """
+    @property
+    def entity(self) -> QAPISchemaDefinition:
+        assert self._curr_ent is not None
+        return self._curr_ent
 
-        doc = ifcond.docgen()
-        if not doc:
-            return []
-        doc = nodes.literal('', doc)
-        if not with_if:
-            return [doc]
+    @property
+    def member_field_type(self) -> str:
+        return self.field_types[self.entity.meta]
 
-        nodelist = [nodes.Text(' ('), nodes.strong('', 'If: ')]
-        nodelist.append(doc)
-        nodelist.append(nodes.Text(')'))
-        return nodelist
+    # General-purpose rST generation functions
 
-    def _nodes_for_one_member(self, member):
-        """Return list of Text, literal nodes for this member
+    def get_indent(self) -> str:
+        return "   " * self.indent
 
-        Return a list of doctree nodes which give text like
-        'name: type (optional) (If: ...)' suitable for use as the
-        'term' part of a definition list item.
-        """
-        term = [nodes.literal('', member.name)]
-        if member.type.doc_type():
-            term.append(nodes.Text(': '))
-            term.append(nodes.literal('', member.type.doc_type()))
-        if member.optional:
-            term.append(nodes.Text(' (optional)'))
-        if member.ifcond.is_present():
-            term.extend(self._nodes_for_ifcond(member.ifcond))
-        return term
-
-    def _nodes_for_variant_when(self, branches, variant):
-        """Return list of Text, literal nodes for variant 'when' clause
-
-        Return a list of doctree nodes which give text like
-        'when tagname is variant (If: ...)' suitable for use in
-        the 'branches' part of a definition list.
-        """
-        term = [nodes.Text(' when '),
-                nodes.literal('', branches.tag_member.name),
-                nodes.Text(' is '),
-                nodes.literal('', '"%s"' % variant.name)]
-        if variant.ifcond.is_present():
-            term.extend(self._nodes_for_ifcond(variant.ifcond))
-        return term
-
-    def _nodes_for_members(self, doc, what, base=None, branches=None):
-        """Return list of doctree nodes for the table of members"""
-        dlnode = nodes.definition_list()
-        for section in doc.args.values():
-            term = self._nodes_for_one_member(section.member)
-            # TODO drop fallbacks when undocumented members are outlawed
-            if section.text:
-                defn = dedent(section.text)
-            else:
-                defn = [nodes.Text('Not documented')]
+    @contextmanager
+    def indented(self) -> Generator[None]:
+        self.indent += 1
+        try:
+            yield
+        finally:
+            self.indent -= 1
 
-            dlnode += self._make_dlitem(term, defn)
+    def add_line_raw(self, line: str, source: str, *lineno: int) -> None:
+        """Append one line of generated reST to the output."""
 
-        if base:
-            dlnode += self._make_dlitem([nodes.Text('The members of '),
-                                         nodes.literal('', base.doc_type())],
-                                        None)
+        # NB: Sphinx uses zero-indexed lines; subtract one.
+        lineno = tuple((n - 1 for n in lineno))
 
-        if branches:
-            for v in branches.variants:
-                if v.type.name == 'q_empty':
-                    continue
-                assert not v.type.is_implicit()
-                term = [nodes.Text('The members of '),
-                        nodes.literal('', v.type.doc_type())]
-                term.extend(self._nodes_for_variant_when(branches, v))
-                dlnode += self._make_dlitem(term, None)
-
-        if not dlnode.children:
-            return []
-
-        section = self._make_section(what)
-        section += dlnode
-        return [section]
-
-    def _nodes_for_enum_values(self, doc):
-        """Return list of doctree nodes for the table of enum values"""
-        seen_item = False
-        dlnode = nodes.definition_list()
-        for section in doc.args.values():
-            termtext = [nodes.literal('', section.member.name)]
-            if section.member.ifcond.is_present():
-                termtext.extend(self._nodes_for_ifcond(section.member.ifcond))
-            # TODO drop fallbacks when undocumented members are outlawed
-            if section.text:
-                defn = dedent(section.text)
-            else:
-                defn = [nodes.Text('Not documented')]
-
-            dlnode += self._make_dlitem(termtext, defn)
-            seen_item = True
-
-        if not seen_item:
-            return []
-
-        section = self._make_section('Values')
-        section += dlnode
-        return [section]
-
-    def _nodes_for_arguments(self, doc, arg_type):
-        """Return list of doctree nodes for the arguments section"""
-        if arg_type and not arg_type.is_implicit():
-            assert not doc.args
-            section = self._make_section('Arguments')
-            dlnode = nodes.definition_list()
-            dlnode += self._make_dlitem(
-                [nodes.Text('The members of '),
-                 nodes.literal('', arg_type.name)],
-                None)
-            section += dlnode
-            return [section]
-
-        return self._nodes_for_members(doc, 'Arguments')
-
-    def _nodes_for_features(self, doc):
-        """Return list of doctree nodes for the table of features"""
-        seen_item = False
-        dlnode = nodes.definition_list()
-        for section in doc.features.values():
-            dlnode += self._make_dlitem(
-                [nodes.literal('', section.member.name)], dedent(section.text))
-            seen_item = True
-
-        if not seen_item:
-            return []
-
-        section = self._make_section('Features')
-        section += dlnode
-        return [section]
-
-    def _nodes_for_example(self, exampletext):
-        """Return list of doctree nodes for a code example snippet"""
-        return [nodes.literal_block(exampletext, exampletext)]
-
-    def _nodes_for_sections(self, doc):
-        """Return list of doctree nodes for additional sections"""
-        nodelist = []
-        for section in doc.sections:
-            if section.tag and section.tag == 'TODO':
-                # Hide TODO: sections
-                continue
-
-            if not section.tag:
-                # Sphinx cannot handle sectionless titles;
-                # Instead, just append the results to the prior section.
-                container = nodes.container()
-                self._parse_text_into_node(section.text, container)
-                nodelist += container.children
-                continue
-
-            snode = self._make_section(section.tag)
-            if section.tag.startswith('Example'):
-                snode += self._nodes_for_example(dedent(section.text))
-            else:
-                self._parse_text_into_node(dedent(section.text), snode)
-            nodelist.append(snode)
-        return nodelist
-
-    def _nodes_for_if_section(self, ifcond):
-        """Return list of doctree nodes for the "If" section"""
-        nodelist = []
-        if ifcond.is_present():
-            snode = self._make_section('If')
-            snode += nodes.paragraph(
-                '', '', *self._nodes_for_ifcond(ifcond, with_if=False)
+        if line.strip():
+            # not a blank line
+            self._result.append(
+                self.get_indent() + line.rstrip("\n"), source, *lineno
             )
-            nodelist.append(snode)
-        return nodelist
-
-    def _add_doc(self, typ, sections):
-        """Add documentation for a command/object/enum...
-
-        We assume we're documenting the thing defined in self._cur_doc.
-        typ is the type of thing being added ("Command", "Object", etc)
-
-        sections is a list of nodes for sections to add to the definition.
-        """
-
-        doc = self._cur_doc
-        snode = nodes.section(ids=[self._sphinx_directive.new_serialno()])
-        snode += nodes.title('', '', *[nodes.literal(doc.symbol, doc.symbol),
-                                       nodes.Text(' (' + typ + ')')])
-        self._parse_text_into_node(doc.body.text, snode)
-        for s in sections:
-            if s is not None:
-                snode += s
-        self._add_node_to_current_heading(snode)
-
-    def visit_enum_type(self, name, info, ifcond, features, members, prefix):
-        doc = self._cur_doc
-        self._add_doc('Enum',
-                      self._nodes_for_enum_values(doc)
-                      + self._nodes_for_features(doc)
-                      + self._nodes_for_sections(doc)
-                      + self._nodes_for_if_section(ifcond))
-
-    def visit_object_type(self, name, info, ifcond, features,
-                          base, members, branches):
-        doc = self._cur_doc
-        if base and base.is_implicit():
-            base = None
-        self._add_doc('Object',
-                      self._nodes_for_members(doc, 'Members', base, branches)
-                      + self._nodes_for_features(doc)
-                      + self._nodes_for_sections(doc)
-                      + self._nodes_for_if_section(ifcond))
-
-    def visit_alternate_type(self, name, info, ifcond, features,
-                             alternatives):
-        doc = self._cur_doc
-        self._add_doc('Alternate',
-                      self._nodes_for_members(doc, 'Members')
-                      + self._nodes_for_features(doc)
-                      + self._nodes_for_sections(doc)
-                      + self._nodes_for_if_section(ifcond))
-
-    def visit_command(self, name, info, ifcond, features, arg_type,
-                      ret_type, gen, success_response, boxed, allow_oob,
-                      allow_preconfig, coroutine):
-        doc = self._cur_doc
-        self._add_doc('Command',
-                      self._nodes_for_arguments(doc, arg_type)
-                      + self._nodes_for_features(doc)
-                      + self._nodes_for_sections(doc)
-                      + self._nodes_for_if_section(ifcond))
-
-    def visit_event(self, name, info, ifcond, features, arg_type, boxed):
-        doc = self._cur_doc
-        self._add_doc('Event',
-                      self._nodes_for_arguments(doc, arg_type)
-                      + self._nodes_for_features(doc)
-                      + self._nodes_for_sections(doc)
-                      + self._nodes_for_if_section(ifcond))
-
-    def symbol(self, doc, entity):
-        """Add documentation for one symbol to the document tree
-
-        This is the main entry point which causes us to add documentation
-        nodes for a symbol (which could be a 'command', 'object', 'event',
-        etc). We do this by calling 'visit' on the schema entity, which
-        will then call back into one of our visit_* methods, depending
-        on what kind of thing this symbol is.
-        """
-        self._cur_doc = doc
-        entity.visit(self)
-        self._cur_doc = None
-
-    def _start_new_heading(self, heading, level):
-        """Start a new heading at the specified heading level
+        else:
+            self._result.append("", source, *lineno)
+
+    def add_line(self, content: str, info: QAPISourceInfo) -> None:
+        # NB: We *require* an info object; this works out OK because we
+        # don't document built-in objects that don't have
+        # one. Everything else should.
+        self.add_line_raw(content, info.fname, info.line)
+
+    def add_lines(
+        self,
+        content: str,
+        info: QAPISourceInfo,
+    ) -> None:
+        lines = content.splitlines(True)
+        for i, line in enumerate(lines):
+            self.add_line_raw(line, info.fname, info.line + i)
+
+    def ensure_blank_line(self) -> None:
+        # Empty document -- no blank line required.
+        if not self._result:
+            return
+
+        # Last line isn't blank, add one.
+        if self._result[-1].strip():  # pylint: disable=no-member
+            fname, line = self._result.info(-1)
+            assert isinstance(line, int)
+            # New blank line is credited to one-after the current last line.
+            # +2: correct for zero/one index, then increment by one.
+            self.add_line_raw("", fname, line + 2)
+
+    def add_field(
+        self,
+        kind: str,
+        name: str,
+        body: str,
+        info: QAPISourceInfo,
+        typ: Optional[str] = None,
+    ) -> None:
+        if typ:
+            text = f":{kind} {typ} {name}: {body}"
+        else:
+            text = f":{kind} {name}: {body}"
+        self.add_lines(text, info)
+
+    def format_type(
+        self, ent: Union[QAPISchemaDefinition | QAPISchemaMember]
+    ) -> Optional[str]:
+        if isinstance(ent, (QAPISchemaEnumMember, QAPISchemaFeature)):
+            return None
+
+        qapi_type = ent
+        optional = False
+        if isinstance(ent, QAPISchemaObjectTypeMember):
+            qapi_type = ent.type
+            optional = ent.optional
+
+        if isinstance(qapi_type, QAPISchemaArrayType):
+            ret = f"[{qapi_type.element_type.doc_type()}]"
+        else:
+            assert isinstance(qapi_type, QAPISchemaType)
+            tmp = qapi_type.doc_type()
+            assert tmp
+            ret = tmp
+        if optional:
+            ret += "?"
+
+        return ret
+
+    def generate_field(
+        self,
+        kind: str,
+        member: QAPISchemaMember,
+        body: str,
+        info: QAPISourceInfo,
+    ) -> None:
+        typ = self.format_type(member)
+        self.add_field(kind, member.name, body, info, typ)
+
+    # Transmogrification helpers
+
+    def visit_paragraph(self, section: QAPIDoc.Section) -> None:
+        # Squelch empty paragraphs.
+        if not section.text:
+            return
+
+        self.ensure_blank_line()
+        self.add_lines(section.text, section.info)
+        self.ensure_blank_line()
+
+    def visit_member(self, section: QAPIDoc.ArgSection) -> None:
+        # FIXME: ifcond for members
+        # TODO: features for members (documented at entity-level,
+        # but sometimes defined per-member. Should we add such
+        # information to member descriptions when we can?)
+        assert section.member
+        self.generate_field(
+            self.member_field_type,
+            section.member,
+            # TODO drop fallbacks when undocumented members are outlawed
+            section.text if section.text else "Not documented",
+            section.info,
+        )
 
-        Create a new section whose title is 'heading' and which is placed
-        in the docutils node tree as a child of the most recent level-1
-        heading. Subsequent document sections (commands, freeform doc chunks,
-        etc) will be placed as children of this new heading section.
+    def visit_feature(self, section: QAPIDoc.ArgSection) -> None:
+        # FIXME - ifcond for features is not handled at all yet!
+        # Proposal: decorate the right-hand column with some graphical
+        # element to indicate conditional availability?
+        assert section.text  # Guaranteed by parser.py
+        assert section.member
+
+        self.generate_field("feat", section.member, section.text, section.info)
+
+    def visit_returns(self, section: QAPIDoc.Section) -> None:
+        assert isinstance(self.entity, QAPISchemaCommand)
+        rtype = self.entity.ret_type
+        # q_empty can produce None, but we won't be documenting anything
+        # without an explicit return statement in the doc block, and we
+        # should not have any such explicit statements when there is no
+        # return value.
+        assert rtype
+
+        typ = self.format_type(rtype)
+        assert typ
+        assert section.text
+        self.add_field("return", typ, section.text, section.info)
+
+    def visit_errors(self, section: QAPIDoc.Section) -> None:
+        # FIXME: the formatting for errors may be inconsistent and may
+        # or may not require different newline placement to ensure
+        # proper rendering as a nested list.
+        self.add_lines(f":error:\n{section.text}", section.info)
+
+    def preamble(self, ent: QAPISchemaDefinition) -> None:
         """
-        if len(self._active_headings) < level:
-            raise QAPISemError(self._cur_doc.info,
-                               'Level %d subheading found outside a '
-                               'level %d heading'
-                               % (level, level - 1))
-        snode = self._make_section(heading)
-        self._active_headings[level - 1] += snode
-        self._active_headings = self._active_headings[:level]
-        self._active_headings.append(snode)
-        return snode
-
-    def _add_node_to_current_heading(self, node):
-        """Add the node to whatever the current active heading is"""
-        self._active_headings[-1] += node
-
-    def freeform(self, doc):
-        """Add a piece of 'freeform' documentation to the document tree
-
-        A 'freeform' document chunk doesn't relate to any particular
-        symbol (for instance, it could be an introduction).
-
-        If the freeform document starts with a line of the form
-        '= Heading text', this is a section or subsection heading, with
-        the heading level indicated by the number of '=' signs.
+        Generate option lines for QAPI entity directives.
         """
+        if ent.doc and ent.doc.since:
+            assert ent.doc.since.kind == QAPIDoc.Kind.SINCE
+            # Generated from the entity's docblock; info location is exact.
+            self.add_line(f":since: {ent.doc.since.text}", ent.doc.since.info)
+
+        if ent.ifcond.is_present():
+            doc = ent.ifcond.docgen()
+            assert ent.info
+            # Generated from entity definition; info location is approximate.
+            self.add_line(f":ifcond: {doc}", ent.info)
+
+        # Hoist special features such as :deprecated: and :unstable:
+        # into the options block for the entity. If, in the future, new
+        # special features are added, qapi-domain will chirp about
+        # unrecognized options and fail until they are handled in
+        # qapi-domain.
+        for feat in ent.features:
+            if feat.is_special():
+                # FIXME: handle ifcond if present. How to display that
+                # information is TBD.
+                # Generated from entity def; info location is approximate.
+                assert feat.info
+                self.add_line(f":{feat.name}:", feat.info)
+
+        self.ensure_blank_line()
+
+    def _insert_member_pointer(self, ent: QAPISchemaDefinition) -> None:
+
+        def _get_target(
+            ent: QAPISchemaDefinition,
+        ) -> Optional[QAPISchemaDefinition]:
+            if isinstance(ent, (QAPISchemaCommand, QAPISchemaEvent)):
+                return ent.arg_type
+            if isinstance(ent, QAPISchemaObjectType):
+                return ent.base
+            return None
+
+        target = _get_target(ent)
+        if target is not None and not target.is_implicit():
+            assert ent.info
+            self.add_field(
+                self.member_field_type,
+                "q_dummy",
+                f"The members of :qapi:type:`{target.name}`.",
+                ent.info,
+                "q_dummy",
+            )
 
-        # QAPIDoc documentation says free-form documentation blocks
-        # must have only a body section, nothing else.
-        assert not doc.sections
-        assert not doc.args
-        assert not doc.features
-        self._cur_doc = doc
-
-        text = doc.body.text
-        if re.match(r'=+ ', text):
-            # Section/subsection heading (if present, will always be
-            # the first line of the block)
-            (heading, _, text) = text.partition('\n')
-            (leader, _, heading) = heading.partition(' ')
-            node = self._start_new_heading(heading, len(leader))
-            if text == '':
-                return
-
-        self._parse_text_into_node(text, node)
-        self._cur_doc = None
-
-    def _parse_text_into_node(self, doctext, node):
-        """Parse a chunk of QAPI-doc-format text into the node
-
-        The doc comment can contain most inline rST markup, including
-        bulleted and enumerated lists.
-        As an extra permitted piece of markup, @var will be turned
-        into ``var``.
-        """
+        if isinstance(ent, QAPISchemaObjectType) and ent.branches is not None:
+            for variant in ent.branches.variants:
+                if variant.type.name == "q_empty":
+                    continue
+                assert ent.info
+                self.add_field(
+                    self.member_field_type,
+                    "q_dummy",
+                    f" When ``{ent.branches.tag_member.name}`` is "
+                    f"``{variant.name}``: "
+                    f"The members of :qapi:type:`{variant.type.name}`.",
+                    ent.info,
+                    "q_dummy",
+                )
+
+    def visit_sections(self, ent: QAPISchemaDefinition) -> None:
+        sections = ent.doc.all_sections if ent.doc else []
+
+        # Determine the index location at which we should generate
+        # documentation for "The members of ..." pointers. This should
+        # go at the end of the members section(s) if any. Note that
+        # index 0 is assumed to be a plain intro section, even if it is
+        # empty; and that a members section if present will always
+        # immediately follow the opening PLAIN section.
+        gen_index = 1
+        if len(sections) > 1:
+            while sections[gen_index].kind == QAPIDoc.Kind.MEMBER:
+                gen_index += 1
+                if gen_index >= len(sections):
+                    break
+
+        # Add sections in source order:
+        for i, section in enumerate(sections):
+            # @var is translated to ``var``:
+            section.text = re.sub(r"@([\w-]+)", r"``\1``", section.text)
+
+            if section.kind == QAPIDoc.Kind.PLAIN:
+                self.visit_paragraph(section)
+            elif section.kind == QAPIDoc.Kind.MEMBER:
+                assert isinstance(section, QAPIDoc.ArgSection)
+                self.visit_member(section)
+            elif section.kind == QAPIDoc.Kind.FEATURE:
+                assert isinstance(section, QAPIDoc.ArgSection)
+                self.visit_feature(section)
+            elif section.kind in (QAPIDoc.Kind.SINCE, QAPIDoc.Kind.TODO):
+                # Since is handled in preamble, TODO is skipped intentionally.
+                pass
+            elif section.kind == QAPIDoc.Kind.RETURNS:
+                self.visit_returns(section)
+            elif section.kind == QAPIDoc.Kind.ERRORS:
+                self.visit_errors(section)
+            else:
+                assert False
+
+            # Generate "The members of ..." entries if necessary:
+            if i == gen_index - 1:
+                self._insert_member_pointer(ent)
+
+        self.ensure_blank_line()
+
+    # Transmogrification core methods
+
+    def visit_module(self, path: str) -> None:
+        name = Path(path).stem
+        # module directives are credited to the first line of a module file.
+        self.add_line_raw(f".. qapi:module:: {name}", path, 1)
+        self.ensure_blank_line()
+
+    def visit_freeform(self, doc: QAPIDoc) -> None:
+        # TODO: Once the old qapidoc transformer is deprecated, freeform
+        # sections can be updated to pure rST, and this transformed removed.
+        #
+        # For now, translate our micro-format into rST. Code adapted
+        # from Peter Maydell's freeform().
+
+        assert len(doc.all_sections) == 1, doc.all_sections
+        body = doc.all_sections[0]
+        text = body.text
+        info = doc.info
+
+        if re.match(r"=+ ", text):
+            # Section/subsection heading (if present, will always be the
+            # first line of the block)
+            (heading, _, text) = text.partition("\n")
+            (leader, _, heading) = heading.partition(" ")
+            # Implicit +1 for heading in the containing .rst doc
+            level = len(leader) + 1
+
+            # https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#sections
+            markers = ' #*=_^"'
+            overline = level <= 2
+            marker = markers[level]
+
+            self.ensure_blank_line()
+            # This credits all 2 or 3 lines to the single source line.
+            if overline:
+                self.add_line(marker * len(heading), info)
+            self.add_line(heading, info)
+            self.add_line(marker * len(heading), info)
+            self.ensure_blank_line()
+
+            # Eat blank line(s) and advance info
+            trimmed = text.lstrip("\n")
+            text = trimmed
+            info = info.next_line(len(text) - len(trimmed) + 1)
+
+        self.add_lines(text, info)
+        self.ensure_blank_line()
+
+    def visit_entity(self, ent: QAPISchemaDefinition) -> None:
+        assert ent.info
 
-        # Handle the "@var means ``var`` case
-        doctext = re.sub(r'@([\w-]+)', r'``\1``', doctext)
-
-        rstlist = ViewList()
-        for line in doctext.splitlines():
-            # The reported line number will always be that of the start line
-            # of the doc comment, rather than the actual location of the error.
-            # Being more precise would require overhaul of the QAPIDoc class
-            # to track lines more exactly within all the sub-parts of the doc
-            # comment, as well as counting lines here.
-            rstlist.append(line, self._cur_doc.info.fname,
-                           self._cur_doc.info.line)
-        # Append a blank line -- in some cases rST syntax errors get
-        # attributed to the line after one with actual text, and if there
-        # isn't anything in the ViewList corresponding to that then Sphinx
-        # 1.6's AutodocReporter will then misidentify the source/line location
-        # in the error message (usually attributing it to the top-level
-        # .rst file rather than the offending .json file). The extra blank
-        # line won't affect the rendered output.
-        rstlist.append("", self._cur_doc.info.fname, self._cur_doc.info.line)
-        self._sphinx_directive.do_parse(rstlist, node)
-
-    def get_document_nodes(self):
-        """Return the list of docutils nodes which make up the document"""
-        return self._top_node.children
-
-
-# Turn the black formatter on for the rest of the file.
-# fmt: on
+        try:
+            self._curr_ent = ent
+
+            # Squish structs and unions together into an "object" directive.
+            meta = ent.meta
+            if meta in ("struct", "union"):
+                meta = "object"
+
+            # This line gets credited to the start of the /definition/.
+            self.add_line(f".. qapi:{meta}:: {ent.name}", ent.info)
+            with self.indented():
+                self.preamble(ent)
+                self.visit_sections(ent)
+        finally:
+            self._curr_ent = None
+
+    def set_namespace(self, namespace: str, source: str, lineno: int) -> None:
+        self.add_line_raw(
+            f".. qapi:namespace:: {namespace}", source, lineno + 1
+        )
+        self.ensure_blank_line()
 
 
 class QAPISchemaGenDepVisitor(QAPISchemaVisitor):
@@ -473,22 +466,22 @@ class QAPISchemaGenDepVisitor(QAPISchemaVisitor):
     schema file associated with each module in the QAPI input.
     """
 
-    def __init__(self, env, qapidir):
+    def __init__(self, env: Any, qapidir: str) -> None:
         self._env = env
         self._qapidir = qapidir
 
-    def visit_module(self, name):
+    def visit_module(self, name: str) -> None:
         if name != "./builtin":
             qapifile = self._qapidir + "/" + name
             self._env.note_dependency(os.path.abspath(qapifile))
         super().visit_module(name)
 
 
-class NestedDirective(Directive):
-    def run(self):
+class NestedDirective(SphinxDirective):
+    def run(self) -> Sequence[nodes.Node]:
         raise NotImplementedError
 
-    def do_parse(self, rstlist, node):
+    def do_parse(self, rstlist: StringList, node: nodes.Node) -> None:
         """
         Parse rST source lines and add them to the specified node
 
@@ -507,18 +500,110 @@ class QAPIDocDirective(NestedDirective):
 
     required_argument = 1
     optional_arguments = 1
-    option_spec = {"qapifile": directives.unchanged_required}
+    option_spec = {
+        "qapifile": directives.unchanged_required,
+        "namespace": directives.unchanged,
+        "transmogrify": directives.flag,
+    }
     has_content = False
 
-    def new_serialno(self):
+    def new_serialno(self) -> str:
         """Return a unique new ID string suitable for use as a node's ID"""
         env = self.state.document.settings.env
         return "qapidoc-%d" % env.new_serialno("qapidoc")
 
-    def run(self):
+    def transmogrify(self, schema: QAPISchema) -> nodes.Element:
+        logger.info("Transmogrifying QAPI to rST ...")
+        vis = Transmogrifier()
+        modules = set()
+
+        if "namespace" in self.options:
+            vis.set_namespace(
+                self.options["namespace"], *self.get_source_info()
+            )
+
+        for doc in schema.docs:
+            module_source = doc.info.fname
+            if module_source not in modules:
+                vis.visit_module(module_source)
+                modules.add(module_source)
+
+            if doc.symbol:
+                ent = schema.lookup_entity(doc.symbol)
+                assert isinstance(ent, QAPISchemaDefinition)
+                vis.visit_entity(ent)
+            else:
+                vis.visit_freeform(doc)
+
+        logger.info("Transmogrification complete.")
+
+        contentnode = nodes.section()
+        content = vis.result
+        titles_allowed = True
+
+        logger.info("Transmogrifier running nested parse ...")
+        with switch_source_input(self.state, content):
+            if titles_allowed:
+                node: nodes.Element = nodes.section()
+                node.document = self.state.document
+                nested_parse_with_titles(self.state, content, contentnode)
+            else:
+                node = nodes.paragraph()
+                node.document = self.state.document
+                self.state.nested_parse(content, 0, contentnode)
+        logger.info("Transmogrifier's nested parse completed.")
+
+        if self.env.app.verbosity >= 2 or os.environ.get("DEBUG"):
+            argname = "_".join(Path(self.arguments[0]).parts)
+            name = Path(argname).stem + ".ir"
+            self.write_intermediate(content, name)
+
+        sys.stdout.flush()
+        return contentnode
+
+    def write_intermediate(self, content: StringList, filename: str) -> None:
+        logger.info(
+            "writing intermediate rST for '%s' to '%s'",
+            self.arguments[0],
+            filename,
+        )
+
+        srctree = Path(self.env.app.config.qapidoc_srctree).resolve()
+        outlines = []
+        lcol_width = 0
+
+        for i, line in enumerate(content):
+            src, lineno = content.info(i)
+            srcpath = Path(src).resolve()
+            srcpath = srcpath.relative_to(srctree)
+
+            lcol = f"{srcpath}:{lineno:04d}"
+            lcol_width = max(lcol_width, len(lcol))
+            outlines.append((lcol, line))
+
+        with open(filename, "w", encoding="UTF-8") as outfile:
+            for lcol, rcol in outlines:
+                outfile.write(lcol.rjust(lcol_width))
+                outfile.write(" |")
+                if rcol:
+                    outfile.write(f" {rcol}")
+                outfile.write("\n")
+
+    def legacy(self, schema: QAPISchema) -> nodes.Element:
+        vis = QAPISchemaGenRSTVisitor(self)
+        vis.visit_begin(schema)
+        for doc in schema.docs:
+            if doc.symbol:
+                vis.symbol(doc, schema.lookup_entity(doc.symbol))
+            else:
+                vis.freeform(doc)
+        return vis.get_document_node()  # type: ignore
+
+    def run(self) -> Sequence[nodes.Node]:
         env = self.state.document.settings.env
         qapifile = env.config.qapidoc_srctree + "/" + self.arguments[0]
         qapidir = os.path.dirname(qapifile)
+        transmogrify = "transmogrify" in self.options
 
         try:
             schema = QAPISchema(qapifile)
@@ -526,20 +611,18 @@ def run(self):
             # First tell Sphinx about all the schema files that the
             # output documentation depends on (including 'qapifile' itself)
             schema.visit(QAPISchemaGenDepVisitor(env, qapidir))
-
-            vis = QAPISchemaGenRSTVisitor(self)
-            vis.visit_begin(schema)
-            for doc in schema.docs:
-                if doc.symbol:
-                    vis.symbol(doc, schema.lookup_entity(doc.symbol))
-                else:
-                    vis.freeform(doc)
-            return vis.get_document_nodes()
         except QAPIError as err:
             # Launder QAPI parse errors into Sphinx extension errors
             # so they are displayed nicely to the user
             raise ExtensionError(str(err)) from err
 
+        if transmogrify:
+            contentnode = self.transmogrify(schema)
+        else:
+            contentnode = self.legacy(schema)
+
+        return contentnode.children
+
 
 class QMPExample(CodeBlock, NestedDirective):
     """
@@ -590,7 +673,7 @@ def _highlightlang(self) -> addnodes.highlightlang:
         )
         return node
 
-    def admonition_wrap(self, *content) -> List[nodes.Node]:
+    def admonition_wrap(self, *content: nodes.Node) -> List[nodes.Node]:
         title = "Example:"
         if "title" in self.options:
             title = f"{title} {self.options['title']}"
@@ -636,8 +719,9 @@ def run(self) -> List[nodes.Node]:
         return self.admonition_wrap(*content_nodes)
 
 
-def setup(app):
+def setup(app: Sphinx) -> ExtensionMetadata:
     """Register qapi-doc directive with Sphinx"""
+    app.setup_extension("qapi_domain")
     app.add_config_value("qapidoc_srctree", None, "env")
     app.add_directive("qapi-doc", QAPIDocDirective)
     app.add_directive("qmp-example", QMPExample)
diff --git a/docs/sphinx/qapidoc_legacy.py b/docs/sphinx/qapidoc_legacy.py
new file mode 100644
index 0000000000000..13520f4c26b3a
--- /dev/null
+++ b/docs/sphinx/qapidoc_legacy.py
@@ -0,0 +1,440 @@
+# coding=utf-8
+# type: ignore
+#
+# QEMU qapidoc QAPI file parsing extension
+#
+# Copyright (c) 2020 Linaro
+#
+# This work is licensed under the terms of the GNU GPLv2 or later.
+# See the COPYING file in the top-level directory.
+
+"""
+qapidoc is a Sphinx extension that implements the qapi-doc directive
+
+The purpose of this extension is to read the documentation comments
+in QAPI schema files, and insert them all into the current document.
+
+It implements one new rST directive, "qapi-doc::".
+Each qapi-doc:: directive takes one argument, which is the
+pathname of the schema file to process, relative to the source tree.
+
+The docs/conf.py file must set the qapidoc_srctree config value to
+the root of the QEMU source tree.
+
+The Sphinx documentation on writing extensions is at:
+https://www.sphinx-doc.org/en/master/development/index.html
+"""
+
+import re
+import textwrap
+
+from docutils import nodes
+from docutils.statemachine import ViewList
+from qapi.error import QAPISemError
+from qapi.gen import QAPISchemaVisitor
+from qapi.parser import QAPIDoc
+
+
+def dedent(text: str) -> str:
+    # Adjust indentation to make description text parse as paragraph.
+
+    lines = text.splitlines(True)
+    if re.match(r"\s+", lines[0]):
+        # First line is indented; description started on the line after
+        # the name. dedent the whole block.
+        return textwrap.dedent(text)
+
+    # Descr started on same line. Dedent line 2+.
+    return lines[0] + textwrap.dedent("".join(lines[1:]))
+
+
+class QAPISchemaGenRSTVisitor(QAPISchemaVisitor):
+    """A QAPI schema visitor which generates docutils/Sphinx nodes
+
+    This class builds up a tree of docutils/Sphinx nodes corresponding
+    to documentation for the various QAPI objects. To use it, first
+    create a QAPISchemaGenRSTVisitor object, and call its
+    visit_begin() method.  Then you can call one of the two methods
+    'freeform' (to add documentation for a freeform documentation
+    chunk) or 'symbol' (to add documentation for a QAPI symbol). These
+    will cause the visitor to build up the tree of document
+    nodes. Once you've added all the documentation via 'freeform' and
+    'symbol' method calls, you can call 'get_document_nodes' to get
+    the final list of document nodes (in a form suitable for returning
+    from a Sphinx directive's 'run' method).
+    """
+    def __init__(self, sphinx_directive):
+        self._cur_doc = None
+        self._sphinx_directive = sphinx_directive
+        self._top_node = nodes.section()
+        self._active_headings = [self._top_node]
+
+    def _make_dlitem(self, term, defn):
+        """Return a dlitem node with the specified term and definition.
+
+        term should be a list of Text and literal nodes.
+        defn should be one of:
+        - a string, which will be handed to _parse_text_into_node
+        - a list of Text and literal nodes, which will be put into
+          a paragraph node
+        """
+        dlitem = nodes.definition_list_item()
+        dlterm = nodes.term('', '', *term)
+        dlitem += dlterm
+        if defn:
+            dldef = nodes.definition()
+            if isinstance(defn, list):
+                dldef += nodes.paragraph('', '', *defn)
+            else:
+                self._parse_text_into_node(defn, dldef)
+            dlitem += dldef
+        return dlitem
+
+    def _make_section(self, title):
+        """Return a section node with optional title"""
+        section = nodes.section(ids=[self._sphinx_directive.new_serialno()])
+        if title:
+            section += nodes.title(title, title)
+        return section
+
+    def _nodes_for_ifcond(self, ifcond, with_if=True):
+        """Return list of Text, literal nodes for the ifcond
+
+        Return a list which gives text like ' (If: condition)'.
+        If with_if is False, we don't return the "(If: " and ")".
+        """
+
+        doc = ifcond.docgen()
+        if not doc:
+            return []
+        doc = nodes.literal('', doc)
+        if not with_if:
+            return [doc]
+
+        nodelist = [nodes.Text(' ('), nodes.strong('', 'If: ')]
+        nodelist.append(doc)
+        nodelist.append(nodes.Text(')'))
+        return nodelist
+
+    def _nodes_for_one_member(self, member):
+        """Return list of Text, literal nodes for this member
+
+        Return a list of doctree nodes which give text like
+        'name: type (optional) (If: ...)' suitable for use as the
+        'term' part of a definition list item.
+        """
+        term = [nodes.literal('', member.name)]
+        if member.type.doc_type():
+            term.append(nodes.Text(': '))
+            term.append(nodes.literal('', member.type.doc_type()))
+        if member.optional:
+            term.append(nodes.Text(' (optional)'))
+        if member.ifcond.is_present():
+            term.extend(self._nodes_for_ifcond(member.ifcond))
+        return term
+
+    def _nodes_for_variant_when(self, branches, variant):
+        """Return list of Text, literal nodes for variant 'when' clause
+
+        Return a list of doctree nodes which give text like
+        'when tagname is variant (If: ...)' suitable for use in
+        the 'branches' part of a definition list.
+        """
+        term = [nodes.Text(' when '),
+                nodes.literal('', branches.tag_member.name),
+                nodes.Text(' is '),
+                nodes.literal('', '"%s"' % variant.name)]
+        if variant.ifcond.is_present():
+            term.extend(self._nodes_for_ifcond(variant.ifcond))
+        return term
+
+    def _nodes_for_members(self, doc, what, base=None, branches=None):
+        """Return list of doctree nodes for the table of members"""
+        dlnode = nodes.definition_list()
+        for section in doc.args.values():
+            term = self._nodes_for_one_member(section.member)
+            # TODO drop fallbacks when undocumented members are outlawed
+            if section.text:
+                defn = dedent(section.text)
+            else:
+                defn = [nodes.Text('Not documented')]
+
+            dlnode += self._make_dlitem(term, defn)
+
+        if base:
+            dlnode += self._make_dlitem([nodes.Text('The members of '),
+                                         nodes.literal('', base.doc_type())],
+                                        None)
+
+        if branches:
+            for v in branches.variants:
+                if v.type.name == 'q_empty':
+                    continue
+                assert not v.type.is_implicit()
+                term = [nodes.Text('The members of '),
+                        nodes.literal('', v.type.doc_type())]
+                term.extend(self._nodes_for_variant_when(branches, v))
+                dlnode += self._make_dlitem(term, None)
+
+        if not dlnode.children:
+            return []
+
+        section = self._make_section(what)
+        section += dlnode
+        return [section]
+
+    def _nodes_for_enum_values(self, doc):
+        """Return list of doctree nodes for the table of enum values"""
+        seen_item = False
+        dlnode = nodes.definition_list()
+        for section in doc.args.values():
+            termtext = [nodes.literal('', section.member.name)]
+            if section.member.ifcond.is_present():
+                termtext.extend(self._nodes_for_ifcond(section.member.ifcond))
+            # TODO drop fallbacks when undocumented members are outlawed
+            if section.text:
+                defn = dedent(section.text)
+            else:
+                defn = [nodes.Text('Not documented')]
+
+            dlnode += self._make_dlitem(termtext, defn)
+            seen_item = True
+
+        if not seen_item:
+            return []
+
+        section = self._make_section('Values')
+        section += dlnode
+        return [section]
+
+    def _nodes_for_arguments(self, doc, arg_type):
+        """Return list of doctree nodes for the arguments section"""
+        if arg_type and not arg_type.is_implicit():
+            assert not doc.args
+            section = self._make_section('Arguments')
+            dlnode = nodes.definition_list()
+            dlnode += self._make_dlitem(
+                [nodes.Text('The members of '),
+                 nodes.literal('', arg_type.name)],
+                None)
+            section += dlnode
+            return [section]
+
+        return self._nodes_for_members(doc, 'Arguments')
+
+    def _nodes_for_features(self, doc):
+        """Return list of doctree nodes for the table of features"""
+        seen_item = False
+        dlnode = nodes.definition_list()
+        for section in doc.features.values():
+            dlnode += self._make_dlitem(
+                [nodes.literal('', section.member.name)], dedent(section.text))
+            seen_item = True
+
+        if not seen_item:
+            return []
+
+        section = self._make_section('Features')
+        section += dlnode
+        return [section]
+
+    def _nodes_for_sections(self, doc):
+        """Return list of doctree nodes for additional sections"""
+        nodelist = []
+        for section in doc.sections:
+            if section.kind == QAPIDoc.Kind.TODO:
+                # Hide TODO: sections
+                continue
+
+            if section.kind == QAPIDoc.Kind.PLAIN:
+                # Sphinx cannot handle sectionless titles;
+                # Instead, just append the results to the prior section.
+                container = nodes.container()
+                self._parse_text_into_node(section.text, container)
+                nodelist += container.children
+                continue
+
+            snode = self._make_section(section.kind.name.title())
+            self._parse_text_into_node(dedent(section.text), snode)
+            nodelist.append(snode)
+        return nodelist
+
+    def _nodes_for_if_section(self, ifcond):
+        """Return list of doctree nodes for the "If" section"""
+        nodelist = []
+        if ifcond.is_present():
+            snode = self._make_section('If')
+            snode += nodes.paragraph(
+                '', '', *self._nodes_for_ifcond(ifcond, with_if=False)
+            )
+            nodelist.append(snode)
+        return nodelist
+
+    def _add_doc(self, typ, sections):
+        """Add documentation for a command/object/enum...
+
+        We assume we're documenting the thing defined in self._cur_doc.
+        typ is the type of thing being added ("Command", "Object", etc)
+
+        sections is a list of nodes for sections to add to the definition.
+        """
+
+        doc = self._cur_doc
+        snode = nodes.section(ids=[self._sphinx_directive.new_serialno()])
+        snode += nodes.title('', '', *[nodes.literal(doc.symbol, doc.symbol),
+                                       nodes.Text(' (' + typ + ')')])
+        self._parse_text_into_node(doc.body.text, snode)
+        for s in sections:
+            if s is not None:
+                snode += s
+        self._add_node_to_current_heading(snode)
+
+    def visit_enum_type(self, name, info, ifcond, features, members, prefix):
+        doc = self._cur_doc
+        self._add_doc('Enum',
+                      self._nodes_for_enum_values(doc)
+                      + self._nodes_for_features(doc)
+                      + self._nodes_for_sections(doc)
+                      + self._nodes_for_if_section(ifcond))
+
+    def visit_object_type(self, name, info, ifcond, features,
+                          base, members, branches):
+        doc = self._cur_doc
+        if base and base.is_implicit():
+            base = None
+        self._add_doc('Object',
+                      self._nodes_for_members(doc, 'Members', base, branches)
+                      + self._nodes_for_features(doc)
+                      + self._nodes_for_sections(doc)
+                      + self._nodes_for_if_section(ifcond))
+
+    def visit_alternate_type(self, name, info, ifcond, features,
+                             alternatives):
+        doc = self._cur_doc
+        self._add_doc('Alternate',
+                      self._nodes_for_members(doc, 'Members')
+                      + self._nodes_for_features(doc)
+                      + self._nodes_for_sections(doc)
+                      + self._nodes_for_if_section(ifcond))
+
+    def visit_command(self, name, info, ifcond, features, arg_type,
+                      ret_type, gen, success_response, boxed, allow_oob,
+                      allow_preconfig, coroutine):
+        doc = self._cur_doc
+        self._add_doc('Command',
+                      self._nodes_for_arguments(doc, arg_type)
+                      + self._nodes_for_features(doc)
+                      + self._nodes_for_sections(doc)
+                      + self._nodes_for_if_section(ifcond))
+
+    def visit_event(self, name, info, ifcond, features, arg_type, boxed):
+        doc = self._cur_doc
+        self._add_doc('Event',
+                      self._nodes_for_arguments(doc, arg_type)
+                      + self._nodes_for_features(doc)
+                      + self._nodes_for_sections(doc)
+                      + self._nodes_for_if_section(ifcond))
+
+    def symbol(self, doc, entity):
+        """Add documentation for one symbol to the document tree
+
+        This is the main entry point which causes us to add documentation
+        nodes for a symbol (which could be a 'command', 'object', 'event',
+        etc). We do this by calling 'visit' on the schema entity, which
+        will then call back into one of our visit_* methods, depending
+        on what kind of thing this symbol is.
+        """
+        self._cur_doc = doc
+        entity.visit(self)
+        self._cur_doc = None
+
+    def _start_new_heading(self, heading, level):
+        """Start a new heading at the specified heading level
+
+        Create a new section whose title is 'heading' and which is placed
+        in the docutils node tree as a child of the most recent level-1
+        heading. Subsequent document sections (commands, freeform doc chunks,
+        etc) will be placed as children of this new heading section.
+        """
+        if len(self._active_headings) < level:
+            raise QAPISemError(self._cur_doc.info,
+                               'Level %d subheading found outside a '
+                               'level %d heading'
+                               % (level, level - 1))
+        snode = self._make_section(heading)
+        self._active_headings[level - 1] += snode
+        self._active_headings = self._active_headings[:level]
+        self._active_headings.append(snode)
+        return snode
+
+    def _add_node_to_current_heading(self, node):
+        """Add the node to whatever the current active heading is"""
+        self._active_headings[-1] += node
+
+    def freeform(self, doc):
+        """Add a piece of 'freeform' documentation to the document tree
+
+        A 'freeform' document chunk doesn't relate to any particular
+        symbol (for instance, it could be an introduction).
+
+        If the freeform document starts with a line of the form
+        '= Heading text', this is a section or subsection heading, with
+        the heading level indicated by the number of '=' signs.
+        """
+
+        # QAPIDoc documentation says free-form documentation blocks
+        # must have only a body section, nothing else.
+        assert not doc.sections
+        assert not doc.args
+        assert not doc.features
+        self._cur_doc = doc
+
+        text = doc.body.text
+        if re.match(r'=+ ', text):
+            # Section/subsection heading (if present, will always be
+            # the first line of the block)
+            (heading, _, text) = text.partition('\n')
+            (leader, _, heading) = heading.partition(' ')
+            node = self._start_new_heading(heading, len(leader))
+            if text == '':
+                return
+        else:
+            node = nodes.container()
+
+        self._parse_text_into_node(text, node)
+        self._cur_doc = None
+
+    def _parse_text_into_node(self, doctext, node):
+        """Parse a chunk of QAPI-doc-format text into the node
+
+        The doc comment can contain most inline rST markup, including
+        bulleted and enumerated lists.
+        As an extra permitted piece of markup, @var will be turned
+        into ``var``.
+        """
+
+        # Handle the "@var means ``var`` case
+        doctext = re.sub(r'@([\w-]+)', r'``\1``', doctext)
+
+        rstlist = ViewList()
+        for line in doctext.splitlines():
+            # The reported line number will always be that of the start line
+            # of the doc comment, rather than the actual location of the error.
+            # Being more precise would require overhaul of the QAPIDoc class
+            # to track lines more exactly within all the sub-parts of the doc
+            # comment, as well as counting lines here.
+            rstlist.append(line, self._cur_doc.info.fname,
+                           self._cur_doc.info.line)
+        # Append a blank line -- in some cases rST syntax errors get
+        # attributed to the line after one with actual text, and if there
+        # isn't anything in the ViewList corresponding to that then Sphinx
+        # 1.6's AutodocReporter will then misidentify the source/line location
+        # in the error message (usually attributing it to the top-level
+        # .rst file rather than the offending .json file). The extra blank
+        # line won't affect the rendered output.
+        rstlist.append("", self._cur_doc.info.fname, self._cur_doc.info.line)
+        self._sphinx_directive.do_parse(rstlist, node)
+
+    def get_document_node(self):
+        """Return the root docutils node which makes up the document"""
+        return self._top_node
diff --git a/docs/system/arm/bananapi_m2u.rst b/docs/system/arm/bananapi_m2u.rst
index 587b488655347..d30db8d04c362 100644
--- a/docs/system/arm/bananapi_m2u.rst
+++ b/docs/system/arm/bananapi_m2u.rst
@@ -135,6 +135,5 @@ provide the following command:
 .. code-block:: bash
 
   $ cd qemu-build-dir
-  $ AVOCADO_ALLOW_LARGE_STORAGE=yes tests/venv/bin/avocado \
-    --verbose --show=app,console run -t machine:bpim2u \
-    ../tests/avocado/boot_linux_console.py
+  $ QEMU_TEST_ALLOW_LARGE_STORAGE=1 \
+    meson test --suite thorough func-arm-arm_bpim2u
diff --git a/docs/system/arm/imx8mp-evk.rst b/docs/system/arm/imx8mp-evk.rst
new file mode 100644
index 0000000000000..00527b0cbedbd
--- /dev/null
+++ b/docs/system/arm/imx8mp-evk.rst
@@ -0,0 +1,70 @@
+NXP i.MX 8M Plus Evaluation Kit (``imx8mp-evk``)
+================================================
+
+The ``imx8mp-evk`` machine models the i.MX 8M Plus Evaluation Kit, based on an
+i.MX 8M Plus SoC.
+
+Supported devices
+-----------------
+
+The ``imx8mp-evk`` machine implements the following devices:
+
+ * Up to 4 Cortex-A53 cores
+ * Generic Interrupt Controller (GICv3)
+ * 4 UARTs
+ * 3 USDHC Storage Controllers
+ * 1 Designware PCI Express Controller
+ * 1 Ethernet Controller
+ * 2 Designware USB 3 Controllers
+ * 5 GPIO Controllers
+ * 6 I2C Controllers
+ * 3 SPI Controllers
+ * 3 Watchdogs
+ * 6 General Purpose Timers
+ * Secure Non-Volatile Storage (SNVS) including an RTC
+ * Clock Tree
+
+Boot options
+------------
+
+The ``imx8mp-evk`` machine can start a Linux kernel directly using the standard
+``-kernel`` functionality.
+
+Direct Linux Kernel Boot
+''''''''''''''''''''''''
+
+Probably the easiest way to get started with a whole Linux system on the machine
+is to generate an image with Buildroot. Version 2024.11.1 is tested at the time
+of writing and involves three steps. First run the following commands in the
+toplevel directory of the Buildroot source tree:
+
+.. code-block:: bash
+
+  $ make freescale_imx8mpevk_defconfig
+  $ make
+
+Once finished successfully there is an ``output/image`` subfolder. Navigate into
+it and resize the SD card image to a power of two:
+
+.. code-block:: bash
+
+  $ qemu-img resize sdcard.img 256M
+
+Finally, the device tree needs to be patched with the following commands which
+will remove the ``cpu-idle-states`` properties from CPU nodes:
+
+.. code-block:: bash
+
+  $ dtc imx8mp-evk.dtb | sed '/cpu-idle-states/d' > imx8mp-evk-patched.dts
+  $ dtc imx8mp-evk-patched.dts -o imx8mp-evk-patched.dtb
+
+Now that everything is prepared the machine can be started as follows:
+
+.. code-block:: bash
+
+  $ qemu-system-aarch64 -M imx8mp-evk -smp 4 -m 3G \
+      -display none -serial null -serial stdio \
+      -kernel Image \
+      -dtb imx8mp-evk-patched.dtb \
+      -append "root=/dev/mmcblk2p2" \
+      -drive file=sdcard.img,if=sd,bus=2,format=raw,id=mmcblk2
diff --git a/docs/system/arm/nuvoton.rst b/docs/system/arm/nuvoton.rst
index 05059378e5597..e4827fb43a10e 100644
--- a/docs/system/arm/nuvoton.rst
+++ b/docs/system/arm/nuvoton.rst
@@ -1,12 +1,13 @@
-Nuvoton iBMC boards (``kudo-bmc``, ``mori-bmc``, ``npcm750-evb``, ``quanta-gbs-bmc``, ``quanta-gsj``)
-=====================================================================================================
+Nuvoton iBMC boards (``kudo-bmc``, ``mori-bmc``, ``npcm750-evb``, ``quanta-gbs-bmc``, ``quanta-gsj``, ``npcm845-evb``)
+======================================================================================================================
 
-The `Nuvoton iBMC`_ chips (NPCM7xx) are a family of ARM-based SoCs that are
+The `Nuvoton iBMC`_ chips are a family of Arm-based SoCs that are
 designed to be used as Baseboard Management Controllers (BMCs) in various
-servers. They all feature one or two ARM Cortex-A9 CPU cores, as well as an
-assortment of peripherals targeted for either Enterprise or Data Center /
-Hyperscale applications. The former is a superset of the latter, so NPCM750 has
-all the peripherals of NPCM730 and more.
+servers. Currently there are two families: NPCM7XX series and
+NPCM8XX series. NPCM7XX series feature one or two Arm Cortex-A9 CPU cores,
+while NPCM8XX feature 4 Arm Cortex-A35 CPU cores. Both series contain a
+different assortment of peripherals targeted for either Enterprise or Data
+Center / Hyperscale applications.
 
 .. _Nuvoton iBMC: https://www.nuvoton.com/products/cloud-computing/ibmc/
 
@@ -27,6 +28,11 @@ There are also two more SoCs, NPCM710 and NPCM705, which are single-core
 variants of NPCM750 and NPCM730, respectively. These are currently not
 supported by QEMU.
 
+The NPCM8xx SoC is the successor of the NPCM7xx SoC. It has 4 Cortex-A35 cores.
+The following machines are based on this chip :
+
+- ``npcm845-evb``       Nuvoton NPCM845 Evaluation board
+
 Supported devices
 -----------------
 
@@ -62,6 +68,8 @@ Missing devices
    * System Wake-up Control (SWC)
    * Shared memory (SHM)
    * eSPI slave interface
+   * Block-transfer interface (8XX only)
+   * Virtual UART (8XX only)
 
  * Ethernet controller (GMAC)
  * USB device (USBD)
@@ -76,6 +84,11 @@ Missing devices
  * Video capture
  * Encoding compression engine
  * Security features
+ * I3C buses (8XX only)
+ * Temperature sensor interface (8XX only)
+ * Virtual UART (8XX only)
+ * Flash monitor (8XX only)
+ * JTAG master (8XX only)
 
 Boot options
 ------------
diff --git a/docs/system/arm/orangepi.rst b/docs/system/arm/orangepi.rst
index db87e81fec40b..8b9448ca7b0a9 100644
--- a/docs/system/arm/orangepi.rst
+++ b/docs/system/arm/orangepi.rst
@@ -257,9 +257,9 @@ Orange Pi PC integration tests
 
 The Orange Pi PC machine has several integration tests included.
 To run the whole set of tests, build QEMU from source and simply
-provide the following command:
+provide the following command from the build directory:
 
 .. code-block:: bash
 
-  $ AVOCADO_ALLOW_LARGE_STORAGE=yes avocado --show=app,console run \
-     -t machine:orangepi-pc tests/avocado/boot_linux_console.py
+  $ QEMU_TEST_ALLOW_LARGE_STORAGE=1 \
+    meson test --suite thorough func-arm-arm_orangepi
diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
index 0c9c2ce0351c4..adf446c0a295e 100644
--- a/docs/system/arm/virt.rst
+++ b/docs/system/arm/virt.rst
@@ -144,6 +144,10 @@ highmem-mmio
   Set ``on``/``off`` to enable/disable the high memory region for PCI MMIO.
   The default is ``on``.
 
+highmem-mmio-size
+  Set the high memory region size for PCI MMIO. Must be a power of 2 and
+  greater than or equal to the default size (512G).
+
 gic-version
   Specify the version of the Generic Interrupt Controller (GIC) to provide.
   Valid values are:
diff --git a/docs/system/arm/vmapple.rst b/docs/system/arm/vmapple.rst
new file mode 100644
index 0000000000000..35c329ea5a812
--- /dev/null
+++ b/docs/system/arm/vmapple.rst
@@ -0,0 +1,65 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+VMApple machine emulation
+========================================================================================
+
+VMApple is the device model that the macOS built-in hypervisor called "Virtualization.framework"
+exposes to Apple Silicon macOS guests. The "vmapple" machine model in QEMU implements the same
+device model, but does not use any code from Virtualization.Framework.
+
+Prerequisites
+-------------
+
+To run the vmapple machine model, you need to
+
+ * Run on Apple Silicon
+ * Run on macOS 12.0 or above
+ * Have an already installed copy of a Virtualization.Framework macOS 12 virtual
+   machine. Note that newer versions than 12.x are currently NOT supported on
+   the guest side. I will assume that you installed it using the
+   `macosvm <https://github.com/s-u/macosvm>`__ CLI.
+
+First, we need to extract the UUID from the virtual machine that you installed. You can do this
+by running the shell script in contrib/vmapple/uuid.sh on the macosvm.json file.
+
+.. code-block:: bash
+  :caption: uuid.sh script to extract the UUID from a macosvm.json file
+
+  $ contrib/vmapple/uuid.sh "path/to/macosvm.json"
+
+Now we also need to trim the aux partition. It contains metadata that we can just discard:
+
+.. code-block:: bash
+  :caption: Command to trim the aux file
+
+  $ dd if="aux.img" of="aux.img.trimmed" bs=$(( 0x4000 )) skip=1
+
+How to run
+----------
+
+Then, we can launch QEMU with the Virtualization.Framework pre-boot environment and the readily
+installed target disk images. I recommend to port forward the VM's ssh and vnc ports to the host
+to get better interactive access into the target system:
+
+.. code-block:: bash
+  :caption: Example execution command line
+
+  $ UUID="$(contrib/vmapple/uuid.sh 'macosvm.json')"
+  $ AVPBOOTER="/System/Library/Frameworks/Virtualization.framework/Resources/AVPBooter.vmapple2.bin"
+  $ AUX="aux.img.trimmed"
+  $ DISK="disk.img"
+  $ qemu-system-aarch64 \
+       -serial mon:stdio \
+       -m 4G \
+       -accel hvf \
+       -M vmapple,uuid="$UUID" \
+       -bios "$AVPBOOTER" \
+       -drive file="$AUX",if=pflash,format=raw \
+       -drive file="$DISK",if=pflash,format=raw \
+       -drive file="$AUX",if=none,id=aux,format=raw \
+       -drive file="$DISK",if=none,id=root,format=raw \
+       -device vmapple-virtio-blk-pci,variant=aux,drive=aux \
+       -device vmapple-virtio-blk-pci,variant=root,drive=root \
+       -netdev user,id=net0,ipv6=off,hostfwd=tcp::2222-:22,hostfwd=tcp::5901-:5900 \
+       -device virtio-net-pci,netdev=net0
+
diff --git a/docs/system/devices/igb.rst b/docs/system/devices/igb.rst
index 04e79dfe54979..9145af5c757c4 100644
--- a/docs/system/devices/igb.rst
+++ b/docs/system/devices/igb.rst
@@ -57,11 +57,11 @@ directory:
   meson test qtest-x86_64/qos-test
 
 ethtool can test register accesses, interrupts, etc. It is automated as an
-Avocado test and can be ran with the following command:
+functional test and can be ran with the following command:
 
 .. code:: shell
 
-  make check-avocado AVOCADO_TESTS=tests/avocado/netdev-ethtool.py
+  meson test --suite thorough func-x86_64-netdev_ethtool
 
 References
 ==========
diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst
index d2b1ca96455fd..6509b35fcb4e4 100644
--- a/docs/system/devices/nvme.rst
+++ b/docs/system/devices/nvme.rst
@@ -53,6 +53,13 @@ parameters.
   Vendor ID. Set this to ``on`` to revert to the unallocated Intel ID
   previously used.
 
+``ocp`` (default: ``off``)
+  The Open Compute Project defines the Datacenter NVMe SSD Specification that
+  sits on top of NVMe. It describes additional commands and NVMe behaviors
+  specific for the Datacenter. When this option is ``on`` OCP features such as
+  the SMART / Health information extended log become available in the
+  controller. We emulate version 5 of this log page.
+
 Additional Namespaces
 ---------------------
 
diff --git a/docs/system/hexagon/cdsp.rst b/docs/system/hexagon/cdsp.rst
new file mode 100644
index 0000000000000..f755fbe0a5bab
--- /dev/null
+++ b/docs/system/hexagon/cdsp.rst
@@ -0,0 +1,10 @@
+Compute DSP
+===========
+
+A Hexagon CDSP is designed as a computation offload device for an SoC.  The
+``V66G_1024`` machine contains:
+
+* L2VIC interrupt controller
+* QTimer timer device
+
+This machine will support any Hexagon CPU, but will default to ``v66``.
diff --git a/docs/system/hexagon/emulation.rst b/docs/system/hexagon/emulation.rst
new file mode 100644
index 0000000000000..03a6092a12816
--- /dev/null
+++ b/docs/system/hexagon/emulation.rst
@@ -0,0 +1,16 @@
+.. _Hexagon Emulation:
+
+Hexagon CPU architecture support
+================================
+
+QEMU's TCG emulation includes support for v65, v66, v67, v68, v69, v71, v73.
+It also has support for the following architecture extensions:
+
+- HVX (Hexagon Vector eXtensions)
+
+For information on the specifics of the HVX extension, please refer
+to the `Qualcomm Hexagon V69 HVX Programmer's Reference Manual
+<https://docs.qualcomm.com/bundle/publicresource/80-N2040-49_REV_AA_Qualcomm_Hexagon_V69_HVX_ProgrammerS_Reference_Manual.pdf>`_.
+
+.. code-block:: bash
+
diff --git a/docs/system/ppc/amigang.rst b/docs/system/ppc/amigang.rst
index e2c9cb74b7f0c..21bb14ed09910 100644
--- a/docs/system/ppc/amigang.rst
+++ b/docs/system/ppc/amigang.rst
@@ -21,6 +21,7 @@ Emulated devices
  * VIA VT82C686B south bridge
  * PCI VGA compatible card (guests may need other card instead)
  * PS/2 keyboard and mouse
+ * 4 KiB NVRAM (use ``-drive if=mtd,format=raw,file=nvram.bin`` to keep contents persistent)
 
 Firmware
 --------
@@ -54,14 +55,14 @@ To boot the system run:
                     -cdrom "A1 Linux Net Installer.iso" \
                     -device ati-vga,model=rv100,romfile=VGABIOS-lgpl-latest.bin
 
-From the firmware menu that appears select ``Boot sequence`` →
-``Amiga Multiboot Options`` and set ``Boot device 1`` to
-``Onboard VIA IDE CDROM``. Then hit escape until the main screen appears again,
-hit escape once more and from the exit menu that appears select either
-``Save settings and exit`` or ``Use settings for this session only``. It may
-take a long time loading the kernel into memory but eventually it boots and the
-installer becomes visible. The ``ati-vga`` RV100 emulation is not
-complete yet so only frame buffer works, DRM and 3D is not available.
+If a firmware menu appears, select ``Boot sequence`` → ``Amiga Multiboot Options``
+and set ``Boot device 1`` to ``Onboard VIA IDE CDROM``. Then hit escape until
+the main screen appears again, hit escape once more and from the exit menu that
+appears select either ``Save settings and exit`` or ``Use settings for this
+session only``. It may take a long time loading the kernel into memory but
+eventually it boots and the installer becomes visible. The ``ati-vga`` RV100
+emulation is not complete yet so only frame buffer works, DRM and 3D is not
+available.
 
 Genesi/bPlan Pegasos II (``pegasos2``)
 ======================================
diff --git a/docs/system/ppc/embedded.rst b/docs/system/ppc/embedded.rst
index af3b3d9fa4603..5cb7d98b450d4 100644
--- a/docs/system/ppc/embedded.rst
+++ b/docs/system/ppc/embedded.rst
@@ -4,6 +4,5 @@ Embedded family boards
 - ``bamboo``               bamboo
 - ``mpc8544ds``            mpc8544ds
 - ``ppce500``              generic paravirt e500 platform
-- ``ref405ep``             ref405ep
 - ``sam460ex``             aCube Sam460ex
 - ``virtex-ml507``         Xilinx Virtex ML507 reference design
diff --git a/docs/system/ppc/powernv.rst b/docs/system/ppc/powernv.rst
index de7a807ac7627..f3ec2cc69c0da 100644
--- a/docs/system/ppc/powernv.rst
+++ b/docs/system/ppc/powernv.rst
@@ -195,6 +195,13 @@ Use a MTD drive to add a PNOR to the machine, and get a NVRAM :
 
   -drive file=./witherspoon.pnor,format=raw,if=mtd
 
+If no mtd drive is provided, the powernv platform will create a default
+PNOR device using a tiny formatted PNOR in pc-bios/pnv-pnor.bin opened
+read-only (PNOR changes will be persistent across reboots but not across
+invocations of QEMU). If no defaults are used, an erased 128MB PNOR is
+provided (which skiboot will probably not recognize since it is not
+formatted).
+
 Maintainer contact information
 ------------------------------
 
diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst
index 9aaa9c414c9e8..b96a05a920647 100644
--- a/docs/system/target-arm.rst
+++ b/docs/system/target-arm.rst
@@ -95,6 +95,7 @@ Board-specific documentation
    arm/imx25-pdk
    arm/mcimx6ul-evk
    arm/mcimx7d-sabre
+   arm/imx8mp-evk
    arm/orangepi
    arm/raspi
    arm/collie
@@ -102,6 +103,7 @@ Board-specific documentation
    arm/stellaris
    arm/stm32
    arm/virt
+   arm/vmapple
    arm/xenpvh
    arm/xlnx-versal-virt
    arm/xlnx-zynq
diff --git a/docs/system/target-hexagon.rst b/docs/system/target-hexagon.rst
new file mode 100644
index 0000000000000..894337a533cde
--- /dev/null
+++ b/docs/system/target-hexagon.rst
@@ -0,0 +1,112 @@
+.. _Hexagon-System-emulator:
+
+Hexagon System emulator
+-----------------------
+
+Use the ``qemu-system-hexagon`` executable to simulate a 32-bit Hexagon
+machine.
+
+Hexagon Machines
+================
+
+Hexagon DSPs are suited to various functions and generally appear in a
+"DSP subsystem" of a larger system-on-chip (SoC).
+
+Hexagon DSPs are often included in a subsystem that looks like the diagram
+below.  Instructions are loaded into DDR before the DSP is brought out of
+reset and the first instructions are fetched from DDR via the EVB/reset vector.
+
+In a real system, a TBU/SMMU would normally arbitrate AXI accesses but
+we don't have a need to model that for QEMU.
+
+Hexagon DSP cores use simultaneous multithreading (SMT) with as many as 8
+hardware threads.
+
+.. admonition:: Diagram
+
+ .. code:: text
+
+              AHB (local) bus                     AXI (global) bus
+                    │                                 │
+                    │                                 │
+       ┌─────────┐  │       ┌─────────────────┐       │
+       │ L2VIC   ├──┤       │                 │       │
+       │         ├──┼───────►                 ├───────┤
+       └─────▲───┘  │       │   Hexagon DSP   │       │
+             │      │       │                 │       │        ┌─────┐
+             │      │       │    N threads    │       │        │ DDR │
+             │      ├───────┤                 │       │        │     │
+        ┌────┴──┐   │       │                 │       ├────────┤     │
+        │QTimer ├───┤       │                 │       │        │     │
+        │       │   │       │                 │       │        │     │
+        └───────┘   │       │   ┌─────────┐   │       │        │     │
+                    │       │  ┌─────────┐│   │       │        │     │
+        ┌───────┐   │       │  │  HVX xM ││   │       │        │     │
+        │QDSP6SS├───┤       │  │         │┘   │       │        │     │
+        └───────┘   │       │  └─────────┘    │       │        └─────┘
+                    │       │                 │       │
+        ┌───────┐   │       └─────────────────┘       │
+        │  CSR  ├───┤
+        └───────┘   │   ┌──────┐   ┌───────────┐
+                    │   │ TCM  │   │   VTCM    │
+                        │      │   │           │
+                        └──────┘   │           │
+                                   │           │
+                                   │           │
+                                   │           │
+                                   └───────────┘
+
+Components
+----------
+Other than l2vic and HVX, the components below are not implemented in QEMU.
+
+* L2VIC: the L2 vectored interrupt controller.  Supports 1024 input
+  interrupts, edge- or level-triggered.  The core ISA has system registers
+  ``VID``, ``VID1`` which read through to the L2VIC device.
+* QTimer: ARMSSE-based programmable timer device. Its interrupts are
+  wired to the L2VIC.  System registers ``TIMER``, ``UTIMER`` read
+  through to the QTimer device.
+* QDSP6SS: DSP subsystem features, accessible to the entire SoC, including
+  DSP NMI, watchdog, reset, etc.
+* CSR: Configuration/Status Registers.
+* TCM: DSP-exclusive tightly-coupled memory.  This memory can be used for
+  DSPs when isolated from DDR and in some bootstrapping modes.
+* VTCM: DSP-exclusive vector tightly-coupled memory.  This memory is accessed
+  by some HVX instructions.
+* HVX: the vector coprocessor supports 64 and 128-byte vector registers.
+  64-byte mode is not implemented in QEMU.
+
+
+Bootstrapping
+-------------
+Hexagon systems do not generally have access to a block device.  So, for
+QEMU the typical use case involves loading a binary or ELF file into memory
+and executing from the indicated start address::
+
+    $ qemu-system-hexagon -kernel ./prog -append 'arg1 arg2'
+
+Semihosting
+-----------
+Hexagon supports a semihosting interface similar to other architectures'.
+The ``trap0`` instruction can activate these semihosting calls so that the
+guest software can access the host console and filesystem.  Semihosting
+is not yet implemented in QEMU hexagon.
+
+Hexagon Virtual Machine
+-----------------------
+
+The hexagon virtual machine is a hypervisor that can partition a single
+Hexagon DSP among multiple guest operating systems, and abstracts the
+specific details of a DSP architectural revision for the sake of consistency
+among generations.
+
+[minivm](https://github.com/quic/hexagonMVM) is a reference implementation
+of this VM interface.
+
+
+Hexagon Features
+================
+.. toctree::
+   hexagon/emulation
+   hexagon/cdsp
+
diff --git a/docs/system/targets.rst b/docs/system/targets.rst
index 224fadae71c45..e6dcdb9d41610 100644
--- a/docs/system/targets.rst
+++ b/docs/system/targets.rst
@@ -29,3 +29,4 @@ Contents:
    target-sparc64
    target-i386
    target-xtensa
+   target-hexagon
diff --git a/fpu/meson.build b/fpu/meson.build
index 1a9992ded56b6..646c76f0c69e3 100644
--- a/fpu/meson.build
+++ b/fpu/meson.build
@@ -1 +1 @@
-specific_ss.add(when: 'CONFIG_TCG', if_true: files('softfloat.c'))
+common_ss.add(when: 'CONFIG_TCG', if_true: files('softfloat.c'))
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index 1d09f066c5d83..171bfd06e3a40 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -195,6 +195,25 @@ static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
 static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
                                  const FloatFmt *fmt)
 {
+    /*
+     * It's target-dependent how to handle the case of exponent 0
+     * and Integer bit set. Intel calls these "pseudodenormals",
+     * and treats them as if the integer bit was 0, and never
+     * produces them on output. This is the default behaviour for QEMU.
+     * For m68k, the integer bit is considered validly part of the
+     * input value when the exponent is 0, and may be 0 or 1,
+     * giving extra range. They may also be generated as outputs.
+     * (The m68k manual actually calls these values part of the
+     * normalized number range, not the denormalized number range,
+     * but that distinction is not important for us, because
+     * m68k doesn't care about the input_denormal_used status flag.)
+     * floatx80_pseudo_denormal_valid selects the m68k behaviour,
+     * which changes both how we canonicalize such a value and
+     * how we uncanonicalize results.
+     */
+    bool has_pseudo_denormals = fmt->has_explicit_bit &&
+        (status->floatx80_behaviour & floatx80_pseudo_denormal_valid);
+
     if (unlikely(p->exp == 0)) {
         if (likely(frac_eqz(p))) {
             p->cls = float_class_zero;
@@ -206,7 +225,7 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
             int shift = frac_normalize(p);
             p->cls = float_class_denormal;
             p->exp = fmt->frac_shift - fmt->exp_bias
-                   - shift + !fmt->m68k_denormal;
+                   - shift + !has_pseudo_denormals;
         }
     } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
         p->cls = float_class_normal;
@@ -342,13 +361,15 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
         frac_clear(p);
     } else {
         bool is_tiny = s->tininess_before_rounding || exp < 0;
+        bool has_pseudo_denormals = fmt->has_explicit_bit &&
+            (s->floatx80_behaviour & floatx80_pseudo_denormal_valid);
 
         if (!is_tiny) {
             FloatPartsN discard;
             is_tiny = !frac_addi(&discard, p, inc);
         }
 
-        frac_shrjam(p, !fmt->m68k_denormal - exp);
+        frac_shrjam(p, !has_pseudo_denormals - exp);
 
         if (p->frac_lo & round_mask) {
             /* Need to recompute round-to-even/round-to-odd. */
@@ -379,7 +400,7 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
             p->frac_lo &= ~round_mask;
         }
 
-        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
+        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !has_pseudo_denormals;
         frac_shr(p, frac_shift);
 
         if (is_tiny) {
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index cbbbab52ba39e..ba4fa08b7beb7 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -85,11 +85,7 @@ this code that are retained.
  */
 static inline bool no_signaling_nans(float_status *status)
 {
-#if defined(TARGET_XTENSA)
     return status->no_signaling_nans;
-#else
-    return false;
-#endif
 }
 
 /* Define how the architecture discriminates signaling NaNs.
@@ -97,17 +93,10 @@ static inline bool no_signaling_nans(float_status *status)
  * In IEEE 754-1985 this was implementation defined, but in IEEE 754-2008
  * the msb must be zero.  MIPS is (so far) unique in supporting both the
  * 2008 revision and backward compatibility with their original choice.
- * Thus for MIPS we must make the choice at runtime.
  */
 static inline bool snan_bit_is_one(float_status *status)
 {
-#if defined(TARGET_MIPS)
     return status->snan_bit_is_one;
-#elif defined(TARGET_HPPA) || defined(TARGET_SH4)
-    return 1;
-#else
-    return 0;
-#endif
 }
 
 /*----------------------------------------------------------------------------
@@ -227,15 +216,15 @@ floatx80 floatx80_default_nan(float_status *status)
 | The pattern for a default generated extended double-precision inf.
 *----------------------------------------------------------------------------*/
 
-#define floatx80_infinity_high 0x7FFF
-#if defined(TARGET_M68K)
-#define floatx80_infinity_low  UINT64_C(0x0000000000000000)
-#else
-#define floatx80_infinity_low  UINT64_C(0x8000000000000000)
-#endif
-
-const floatx80 floatx80_infinity
-    = make_floatx80_init(floatx80_infinity_high, floatx80_infinity_low);
+floatx80 floatx80_default_inf(bool zSign, float_status *status)
+{
+    /*
+     * Whether the Integer bit is set in the default Infinity is
+     * target dependent.
+     */
+    bool z = status->floatx80_behaviour & floatx80_default_inf_int_bit_is_zero;
+    return packFloatx80(zSign, 0x7fff, z ? 0 : (1ULL << 63));
+}
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the half-precision floating-point value `a' is a quiet
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index f4fed9bfda929..34c962d6bd983 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -79,9 +79,6 @@ this code that are retained.
  * version 2 or later. See the COPYING file in the top-level directory.
  */
 
-/* softfloat (and in particular the code in softfloat-specialize.h) is
- * target-dependent and needs the TARGET_* macros.
- */
 #include "qemu/osdep.h"
 #include <math.h>
 #include "qemu/bitops.h"
@@ -220,11 +217,9 @@ GEN_INPUT_FLUSH3(float64_input_flush3, float64)
  * the use of hardfloat, since hardfloat relies on the inexact flag being
  * already set.
  */
-#if defined(TARGET_PPC) || defined(__FAST_MATH__)
 # if defined(__FAST_MATH__)
 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
     IEEE implementation
-# endif
 # define QEMU_NO_HARDFLOAT 1
 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 #else
@@ -537,7 +532,8 @@ typedef struct {
  *   round_mask: bits below lsb which must be rounded
  * The following optional modifiers are available:
  *   arm_althp: handle ARM Alternative Half Precision
- *   m68k_denormal: explicit integer bit for extended precision may be 1
+ *   has_explicit_bit: has an explicit integer bit; this affects whether
+ *   the float_status floatx80_behaviour handling applies
  */
 typedef struct {
     int exp_size;
@@ -547,7 +543,7 @@ typedef struct {
     int frac_size;
     int frac_shift;
     bool arm_althp;
-    bool m68k_denormal;
+    bool has_explicit_bit;
     uint64_t round_mask;
 } FloatFmt;
 
@@ -600,9 +596,7 @@ static const FloatFmt floatx80_params[3] = {
     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
     [floatx80_precision_x] = {
         FLOATX80_PARAMS(64),
-#ifdef TARGET_M68K
-        .m68k_denormal = true,
-#endif
+        .has_explicit_bit = true,
     },
 };
 
@@ -1810,7 +1804,7 @@ static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
         g_assert_not_reached();
     }
 
-    if (unlikely(floatx80_invalid_encoding(f))) {
+    if (unlikely(floatx80_invalid_encoding(f, s))) {
         float_raise(float_flag_invalid, s);
         return false;
     }
@@ -1860,7 +1854,8 @@ static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
 
     case float_class_inf:
         /* x86 and m68k differ in the setting of the integer bit. */
-        frac = floatx80_infinity_low;
+        frac = s->floatx80_behaviour & floatx80_default_inf_int_bit_is_zero ?
+            0 : (1ULL << 63);
         exp = fmt->exp_max;
         break;
 
@@ -5144,9 +5139,7 @@ floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
                ) {
                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
             }
-            return packFloatx80(zSign,
-                                floatx80_infinity_high,
-                                floatx80_infinity_low);
+            return floatx80_default_inf(zSign, status);
         }
         if ( zExp <= 0 ) {
             isTiny = status->tininess_before_rounding
diff --git a/gdb-xml/hexagon-sys.xml b/gdb-xml/hexagon-sys.xml
new file mode 100644
index 0000000000000..1d9c21172253f
--- /dev/null
+++ b/gdb-xml/hexagon-sys.xml
@@ -0,0 +1,116 @@
+<?xml version="1.0"?>
+<!--
+  Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+
+  This work is licensed under the terms of the GNU GPL, version 2 or
+  (at your option) any later version. See the COPYING file in the
+  top-level directory.
+
+  Note: this file is intended to be use with LLDB, so it contains fields
+  that may be unknown to GDB. For more information on such fields, please
+  see:
+  https://github.com/llvm/llvm-project/blob/287aa6c4536408413b860e61fca0318a27214cf3/lldb/docs/lldb-gdb-remote.txt#L738-L860
+  https://github.com/llvm/llvm-project/blob/287aa6c4536408413b860e61fca0318a27214cf3/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp#L4275-L4335
+-->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.hexagon.sys">
+
+  <reg name="sgp0"       bitsize="32" offset="4416" encoding="uint" format="hex" group="System Registers" dwarf_regnum="135" />
+  <reg name="sgp1"       bitsize="32" offset="4420" encoding="uint" format="hex" group="System Registers" dwarf_regnum="136" />
+  <reg name="stid"       bitsize="32" offset="4424" encoding="uint" format="hex" group="System Registers" dwarf_regnum="137" />
+  <reg name="elr"        bitsize="32" offset="4428" encoding="uint" format="hex" group="System Registers" dwarf_regnum="138" />
+  <reg name="badva0"     bitsize="32" offset="4432" encoding="uint" format="hex" group="System Registers" dwarf_regnum="139" />
+  <reg name="badva1"     bitsize="32" offset="4436" encoding="uint" format="hex" group="System Registers" dwarf_regnum="140" />
+  <reg name="ssr"        bitsize="32" offset="4440" encoding="uint" format="hex" group="System Registers" dwarf_regnum="141" />
+  <reg name="ccr"        bitsize="32" offset="4444" encoding="uint" format="hex" group="System Registers" dwarf_regnum="142" />
+  <reg name="htid"       bitsize="32" offset="4448" encoding="uint" format="hex" group="System Registers" dwarf_regnum="143" />
+  <reg name="badva"      bitsize="32" offset="4452" encoding="uint" format="hex" group="System Registers" dwarf_regnum="144" />
+  <reg name="imask"      bitsize="32" offset="4456" encoding="uint" format="hex" group="System Registers" dwarf_regnum="145" />
+  <reg name="gevb"       bitsize="32" offset="4460" encoding="uint" format="hex" group="System Registers" dwarf_regnum="146" />
+  <reg name="rsv12"      bitsize="32" offset="4464" encoding="uint" format="hex" group="System Registers" dwarf_regnum="147" />
+  <reg name="rsv13"      bitsize="32" offset="4468" encoding="uint" format="hex" group="System Registers" dwarf_regnum="148" />
+  <reg name="rsv14"      bitsize="32" offset="4472" encoding="uint" format="hex" group="System Registers" dwarf_regnum="149" />
+  <reg name="rsv15"      bitsize="32" offset="4476" encoding="uint" format="hex" group="System Registers" dwarf_regnum="150" />
+  <reg name="evb"        bitsize="32" offset="4480" encoding="uint" format="hex" group="System Registers" dwarf_regnum="151" />
+  <reg name="modectl"    bitsize="32" offset="4484" encoding="uint" format="hex" group="System Registers" dwarf_regnum="152" />
+  <reg name="syscfg"     bitsize="32" offset="4488" encoding="uint" format="hex" group="System Registers" dwarf_regnum="153" />
+  <reg name="free19"     bitsize="32" offset="4492" encoding="uint" format="hex" group="System Registers" dwarf_regnum="154" />
+  <reg name="ipendad"    bitsize="32" offset="4496" encoding="uint" format="hex" group="System Registers" dwarf_regnum="155" />
+  <reg name="vid"        bitsize="32" offset="4500" encoding="uint" format="hex" group="System Registers" dwarf_regnum="156" />
+  <reg name="vid1"       bitsize="32" offset="4504" encoding="uint" format="hex" group="System Registers" dwarf_regnum="157" />
+  <reg name="bestwait"   bitsize="32" offset="4508" encoding="uint" format="hex" group="System Registers" dwarf_regnum="158" />
+  <reg name="free24"     bitsize="32" offset="4512" encoding="uint" format="hex" group="System Registers" dwarf_regnum="159" />
+  <reg name="schedcfg"   bitsize="32" offset="4516" encoding="uint" format="hex" group="System Registers" dwarf_regnum="160" />
+  <reg name="free26"     bitsize="32" offset="4520" encoding="uint" format="hex" group="System Registers" dwarf_regnum="161" />
+  <reg name="cfgbase"    bitsize="32" offset="4524" encoding="uint" format="hex" group="System Registers" dwarf_regnum="162" />
+  <reg name="diag"       bitsize="32" offset="4528" encoding="uint" format="hex" group="System Registers" dwarf_regnum="163" />
+  <reg name="rev"        bitsize="32" offset="4532" encoding="uint" format="hex" group="System Registers" dwarf_regnum="164" />
+  <reg name="pcyclelo"   bitsize="32" offset="4536" encoding="uint" format="hex" group="System Registers" dwarf_regnum="165" />
+  <reg name="pcyclehi"   bitsize="32" offset="4540" encoding="uint" format="hex" group="System Registers" dwarf_regnum="166" />
+  <reg name="isdbst"     bitsize="32" offset="4544" encoding="uint" format="hex" group="System Registers" dwarf_regnum="167" />
+  <reg name="isdbcfg0"   bitsize="32" offset="4548" encoding="uint" format="hex" group="System Registers" dwarf_regnum="168" />
+  <reg name="isdbcfg1"   bitsize="32" offset="4552" encoding="uint" format="hex" group="System Registers" dwarf_regnum="169" />
+  <reg name="livelock"   bitsize="32" offset="4556" encoding="uint" format="hex" group="System Registers" dwarf_regnum="170" />
+  <reg name="brkptpc0"   bitsize="32" offset="4560" encoding="uint" format="hex" group="System Registers" dwarf_regnum="171" />
+  <reg name="brkptccfg0" bitsize="32" offset="4564" encoding="uint" format="hex" group="System Registers" dwarf_regnum="172" />
+  <reg name="brkptpc1"   bitsize="32" offset="4568" encoding="uint" format="hex" group="System Registers" dwarf_regnum="173" />
+  <reg name="brkptcfg1"  bitsize="32" offset="4572" encoding="uint" format="hex" group="System Registers" dwarf_regnum="174" />
+  <reg name="isdbmbxin"  bitsize="32" offset="4576" encoding="uint" format="hex" group="System Registers" dwarf_regnum="175" />
+  <reg name="isdbmbxout" bitsize="32" offset="4580" encoding="uint" format="hex" group="System Registers" dwarf_regnum="176" />
+  <reg name="isdben"     bitsize="32" offset="4584" encoding="uint" format="hex" group="System Registers" dwarf_regnum="177" />
+  <reg name="isdbgpr"    bitsize="32" offset="4588" encoding="uint" format="hex" group="System Registers" dwarf_regnum="178" />
+  <reg name="pmucnt4"    bitsize="32" offset="4592" encoding="uint" format="hex" group="System Registers" dwarf_regnum="179" />
+  <reg name="pmucnt5"    bitsize="32" offset="4596" encoding="uint" format="hex" group="System Registers" dwarf_regnum="180" />
+  <reg name="pmucnt6"    bitsize="32" offset="4600" encoding="uint" format="hex" group="System Registers" dwarf_regnum="181" />
+  <reg name="pmucnt7"    bitsize="32" offset="4604" encoding="uint" format="hex" group="System Registers" dwarf_regnum="182" />
+  <reg name="pmucnt0"    bitsize="32" offset="4608" encoding="uint" format="hex" group="System Registers" dwarf_regnum="183" />
+  <reg name="pmucnt1"    bitsize="32" offset="4612" encoding="uint" format="hex" group="System Registers" dwarf_regnum="184" />
+  <reg name="pmucnt2"    bitsize="32" offset="4616" encoding="uint" format="hex" group="System Registers" dwarf_regnum="185" />
+  <reg name="pmucnt3"    bitsize="32" offset="4620" encoding="uint" format="hex" group="System Registers" dwarf_regnum="186" />
+  <reg name="pmuevtcfg"  bitsize="32" offset="4624" encoding="uint" format="hex" group="System Registers" dwarf_regnum="187" />
+  <reg name="pmustid0"   bitsize="32" offset="4628" encoding="uint" format="hex" group="System Registers" dwarf_regnum="188" />
+  <reg name="pmuevtcfg1" bitsize="32" offset="4632" encoding="uint" format="hex" group="System Registers" dwarf_regnum="189" />
+  <reg name="pmustid1"   bitsize="32" offset="4636" encoding="uint" format="hex" group="System Registers" dwarf_regnum="190" />
+  <reg name="timerlo"    bitsize="32" offset="4640" encoding="uint" format="hex" group="System Registers" dwarf_regnum="191" />
+  <reg name="timerhi"    bitsize="32" offset="4644" encoding="uint" format="hex" group="System Registers" dwarf_regnum="192" />
+  <reg name="pmucfg"     bitsize="32" offset="4648" encoding="uint" format="hex" group="System Registers" dwarf_regnum="193" />
+  <reg name="rsv59"      bitsize="32" offset="4652" encoding="uint" format="hex" group="System Registers" dwarf_regnum="194" />
+  <reg name="rsv60"      bitsize="32" offset="4656" encoding="uint" format="hex" group="System Registers" dwarf_regnum="195" />
+  <reg name="rsv61"      bitsize="32" offset="4660" encoding="uint" format="hex" group="System Registers" dwarf_regnum="196" />
+  <reg name="rsv62"      bitsize="32" offset="4664" encoding="uint" format="hex" group="System Registers" dwarf_regnum="197" />
+  <reg name="rsv63"      bitsize="32" offset="4668" encoding="uint" format="hex" group="System Registers" dwarf_regnum="198" />
+  <reg name="g0"         bitsize="32" offset="4672" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="179" />
+  <reg name="g1"         bitsize="32" offset="4676" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="180" />
+  <reg name="g2"         bitsize="32" offset="4680" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="181" />
+  <reg name="g3"         bitsize="32" offset="4684" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="182" />
+  <reg name="rsv4"       bitsize="32" offset="4688" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="183" />
+  <reg name="rsv5"       bitsize="32" offset="4692" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="184" />
+  <reg name="rsv6"       bitsize="32" offset="4696" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="185" />
+  <reg name="rsv7"       bitsize="32" offset="4700" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="186" />
+  <reg name="rsv8"       bitsize="32" offset="4704" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="187" />
+  <reg name="rsv9"       bitsize="32" offset="4708" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="188" />
+  <reg name="rsv10"      bitsize="32" offset="4712" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="189" />
+  <reg name="rsv11"      bitsize="32" offset="4716" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="190" />
+  <reg name="rsv12"      bitsize="32" offset="4720" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="191" />
+  <reg name="rsv13"      bitsize="32" offset="4724" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="192" />
+  <reg name="rsv14"      bitsize="32" offset="4728" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="193" />
+  <reg name="rsv15"      bitsize="32" offset="4732" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="194" />
+  <reg name="gpmucnt4"   bitsize="32" offset="4736" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="195" />
+  <reg name="gpmucnt5"   bitsize="32" offset="4740" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="196" />
+  <reg name="gpmucnt6"   bitsize="32" offset="4744" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="197" />
+  <reg name="gpmucnt7"   bitsize="32" offset="4748" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="198" />
+  <reg name="rsv20"      bitsize="32" offset="4752" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="199" />
+  <reg name="rsv21"      bitsize="32" offset="4756" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="200" />
+  <reg name="rsv22"      bitsize="32" offset="4760" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="201" />
+  <reg name="rsv23"      bitsize="32" offset="4764" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="202" />
+  <reg name="gpcyclelo"  bitsize="32" offset="4768" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="203" />
+  <reg name="gpcyclehi"  bitsize="32" offset="4772" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="204" />
+  <reg name="gpmucnt0"   bitsize="32" offset="4776" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="205" />
+  <reg name="gpmucnt1"   bitsize="32" offset="4780" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="206" />
+  <reg name="gpmucnt2"   bitsize="32" offset="4784" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="207" />
+  <reg name="gpmucnt3"   bitsize="32" offset="4788" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="208" />
+  <reg name="rsv30"      bitsize="32" offset="4792" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="209" />
+  <reg name="rsv31"      bitsize="32" offset="4796" encoding="uint" format="hex" group="Guest Registers"  dwarf_regnum="210" />
+
+</feature>
diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c
index e366df12d4aef..282e13e163f50 100644
--- a/gdbstub/gdbstub.c
+++ b/gdbstub/gdbstub.c
@@ -354,7 +354,6 @@ static const char *get_feature_xml(const char *p, const char **newp,
                                    GDBProcess *process)
 {
     CPUState *cpu = gdb_get_first_cpu_in_process(process);
-    CPUClass *cc = CPU_GET_CLASS(cpu);
     GDBRegisterState *r;
     size_t len;
 
@@ -377,11 +376,11 @@ static const char *get_feature_xml(const char *p, const char **newp,
                          "<!DOCTYPE target SYSTEM \"gdb-target.dtd\">"
                          "<target>"));
 
-            if (cc->gdb_arch_name) {
+            if (cpu->cc->gdb_arch_name) {
                 g_ptr_array_add(
                     xml,
                     g_markup_printf_escaped("<architecture>%s</architecture>",
-                                            cc->gdb_arch_name(cpu)));
+                                            cpu->cc->gdb_arch_name(cpu)));
             }
             for (guint i = 0; i < cpu->gdb_regs->len; i++) {
                 r = &g_array_index(cpu->gdb_regs, GDBRegisterState, i);
@@ -520,11 +519,10 @@ GArray *gdb_get_register_list(CPUState *cpu)
 
 int gdb_read_register(CPUState *cpu, GByteArray *buf, int reg)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
     GDBRegisterState *r;
 
-    if (reg < cc->gdb_num_core_regs) {
-        return cc->gdb_read_register(cpu, buf, reg);
+    if (reg < cpu->cc->gdb_num_core_regs) {
+        return cpu->cc->gdb_read_register(cpu, buf, reg);
     }
 
     for (guint i = 0; i < cpu->gdb_regs->len; i++) {
@@ -538,11 +536,10 @@ int gdb_read_register(CPUState *cpu, GByteArray *buf, int reg)
 
 static int gdb_write_register(CPUState *cpu, uint8_t *mem_buf, int reg)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
     GDBRegisterState *r;
 
-    if (reg < cc->gdb_num_core_regs) {
-        return cc->gdb_write_register(cpu, mem_buf, reg);
+    if (reg < cpu->cc->gdb_num_core_regs) {
+        return cpu->cc->gdb_write_register(cpu, mem_buf, reg);
     }
 
     for (guint i = 0; i < cpu->gdb_regs->len; i++) {
@@ -570,7 +567,7 @@ static void gdb_register_feature(CPUState *cpu, int base_reg,
 
 void gdb_init_cpu(CPUState *cpu)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
+    CPUClass *cc = cpu->cc;
     const GDBFeature *feature;
 
     cpu->gdb_regs = g_array_new(false, false, sizeof(GDBRegisterState));
@@ -1646,11 +1643,8 @@ void gdb_extend_qsupported_features(char *qflags)
 
 static void handle_query_supported(GArray *params, void *user_ctx)
 {
-    CPUClass *cc;
-
     g_string_printf(gdbserver_state.str_buf, "PacketSize=%x", MAX_PACKET_LENGTH);
-    cc = CPU_GET_CLASS(first_cpu);
-    if (cc->gdb_core_xml_file) {
+    if (first_cpu->cc->gdb_core_xml_file) {
         g_string_append(gdbserver_state.str_buf, ";qXfer:features:read+");
     }
 
@@ -1697,7 +1691,6 @@ static void handle_query_supported(GArray *params, void *user_ctx)
 static void handle_query_xfer_features(GArray *params, void *user_ctx)
 {
     GDBProcess *process;
-    CPUClass *cc;
     unsigned long len, total_len, addr;
     const char *xml;
     const char *p;
@@ -1708,8 +1701,7 @@ static void handle_query_xfer_features(GArray *params, void *user_ctx)
     }
 
     process = gdb_get_cpu_process(gdbserver_state.g_cpu);
-    cc = CPU_GET_CLASS(gdbserver_state.g_cpu);
-    if (!cc->gdb_core_xml_file) {
+    if (!gdbserver_state.g_cpu->cc->gdb_core_xml_file) {
         gdb_put_packet("");
         return;
     }
diff --git a/gdbstub/system.c b/gdbstub/system.c
index 8ce79fa88cf4f..dd22ff0fb3a0c 100644
--- a/gdbstub/system.c
+++ b/gdbstub/system.c
@@ -19,9 +19,11 @@
 #include "gdbstub/commands.h"
 #include "exec/hwaddr.h"
 #include "exec/tb-flush.h"
+#include "system/accel-ops.h"
 #include "system/cpus.h"
 #include "system/runstate.h"
 #include "system/replay.h"
+#include "system/tcg.h"
 #include "hw/core/cpu.h"
 #include "hw/cpu/cluster.h"
 #include "hw/boards.h"
@@ -171,7 +173,9 @@ static void gdb_vm_state_change(void *opaque, bool running, RunState state)
         } else {
             trace_gdbstub_hit_break();
         }
-        tb_flush(cpu);
+        if (tcg_enabled()) {
+            tb_flush(cpu);
+        }
         ret = GDB_SIGNAL_TRAP;
         break;
     case RUN_STATE_PAUSED:
@@ -452,8 +456,6 @@ static int phy_memory_mode;
 int gdb_target_memory_rw_debug(CPUState *cpu, hwaddr addr,
                                uint8_t *buf, int len, bool is_write)
 {
-    CPUClass *cc;
-
     if (phy_memory_mode) {
         if (is_write) {
             cpu_physical_memory_write(addr, buf, len);
@@ -463,9 +465,8 @@ int gdb_target_memory_rw_debug(CPUState *cpu, hwaddr addr,
         return 0;
     }
 
-    cc = CPU_GET_CLASS(cpu);
-    if (cc->memory_rw_debug) {
-        return cc->memory_rw_debug(cpu, addr, buf, len, is_write);
+    if (cpu->cc->memory_rw_debug) {
+        return cpu->cc->memory_rw_debug(cpu, addr, buf, len, is_write);
     }
 
     return cpu_memory_rw_debug(cpu, addr, buf, len, is_write);
diff --git a/gdbstub/user-target.c b/gdbstub/user-target.c
index 4bfcf78aaab08..43231e695e847 100644
--- a/gdbstub/user-target.c
+++ b/gdbstub/user-target.c
@@ -233,10 +233,8 @@ void gdb_handle_query_offsets(GArray *params, void *user_ctx)
 static inline int target_memory_rw_debug(CPUState *cpu, target_ulong addr,
                                          uint8_t *buf, int len, bool is_write)
 {
-    CPUClass *cc;
-    cc = CPU_GET_CLASS(cpu);
-    if (cc->memory_rw_debug) {
-        return cc->memory_rw_debug(cpu, addr, buf, len, is_write);
+    if (cpu->cc->memory_rw_debug) {
+        return cpu->cc->memory_rw_debug(cpu, addr, buf, len, is_write);
     }
     return cpu_memory_rw_debug(cpu, addr, buf, len, is_write);
 }
diff --git a/gdbstub/user.c b/gdbstub/user.c
index 3730f32c415f8..67403e5a252a6 100644
--- a/gdbstub/user.c
+++ b/gdbstub/user.c
@@ -743,11 +743,8 @@ int gdb_continue_partial(char *newstates)
 int gdb_target_memory_rw_debug(CPUState *cpu, hwaddr addr,
                                uint8_t *buf, int len, bool is_write)
 {
-    CPUClass *cc;
-
-    cc = CPU_GET_CLASS(cpu);
-    if (cc->memory_rw_debug) {
-        return cc->memory_rw_debug(cpu, addr, buf, len, is_write);
+    if (cpu->cc->memory_rw_debug) {
+        return cpu->cc->memory_rw_debug(cpu, addr, buf, len, is_write);
     }
     return cpu_memory_rw_debug(cpu, addr, buf, len, is_write);
 }
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index c59cd6637b97e..ecdbdf623d11c 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -196,7 +196,8 @@ SRST
 ERST
 
 #if defined(TARGET_I386) || defined(TARGET_SH4) || defined(TARGET_SPARC) || \
-    defined(TARGET_PPC) || defined(TARGET_XTENSA) || defined(TARGET_M68K)
+    defined(TARGET_PPC) || defined(TARGET_XTENSA) || defined(TARGET_M68K) || \
+    defined(TARGET_HEXAGON)
     {
         .name       = "tlb",
         .args_type  = "",
diff --git a/host/include/aarch64/host/atomic128-cas.h b/host/include/aarch64/host/atomic128-cas.h
index 58630107bcce1..991da4ef54333 100644
--- a/host/include/aarch64/host/atomic128-cas.h
+++ b/host/include/aarch64/host/atomic128-cas.h
@@ -13,7 +13,7 @@
 
 /* Through gcc 10, aarch64 has no support for 128-bit atomics.  */
 #if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128)
-#include "host/include/generic/host/atomic128-cas.h"
+#include "host/include/generic/host/atomic128-cas.h.inc"
 #else
 static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
 {
diff --git a/host/include/aarch64/host/atomic128-ldst.h b/host/include/aarch64/host/atomic128-ldst.h.inc
similarity index 100%
rename from host/include/aarch64/host/atomic128-ldst.h
rename to host/include/aarch64/host/atomic128-ldst.h.inc
diff --git a/host/include/generic/host/atomic128-cas.h b/host/include/generic/host/atomic128-cas.h.inc
similarity index 100%
rename from host/include/generic/host/atomic128-cas.h
rename to host/include/generic/host/atomic128-cas.h.inc
diff --git a/host/include/generic/host/atomic128-ldst.h b/host/include/generic/host/atomic128-ldst.h.inc
similarity index 100%
rename from host/include/generic/host/atomic128-ldst.h
rename to host/include/generic/host/atomic128-ldst.h.inc
diff --git a/host/include/loongarch64/host/atomic128-ldst.h b/host/include/loongarch64/host/atomic128-ldst.h.inc
similarity index 100%
rename from host/include/loongarch64/host/atomic128-ldst.h
rename to host/include/loongarch64/host/atomic128-ldst.h.inc
diff --git a/host/include/x86_64/host/atomic128-ldst.h b/host/include/x86_64/host/atomic128-ldst.h.inc
similarity index 96%
rename from host/include/x86_64/host/atomic128-ldst.h
rename to host/include/x86_64/host/atomic128-ldst.h.inc
index 8d6f909d3c9c0..4c698e3246f4c 100644
--- a/host/include/x86_64/host/atomic128-ldst.h
+++ b/host/include/x86_64/host/atomic128-ldst.h.inc
@@ -69,7 +69,7 @@ static inline void atomic16_set(Int128 *ptr, Int128 val)
 }
 #else
 /* Provide QEMU_ERROR stubs. */
-#include "host/include/generic/host/atomic128-ldst.h"
+#include "host/include/generic/host/atomic128-ldst.h.inc"
 #endif
 
 #endif /* X86_64_ATOMIC128_LDST_H */
diff --git a/host/include/x86_64/host/load-extract-al16-al8.h.inc b/host/include/x86_64/host/load-extract-al16-al8.h.inc
index baa506b7b5b14..b837c37868432 100644
--- a/host/include/x86_64/host/load-extract-al16-al8.h.inc
+++ b/host/include/x86_64/host/load-extract-al16-al8.h.inc
@@ -9,7 +9,7 @@
 #define X86_64_LOAD_EXTRACT_AL16_AL8_H
 
 #ifdef CONFIG_INT128_TYPE
-#include "host/atomic128-ldst.h"
+#include "host/atomic128-ldst.h.inc"
 
 /**
  * load_atom_extract_al16_or_al8:
diff --git a/hw/Kconfig b/hw/Kconfig
index 1b4e9bb07f7d5..4dc7914c13f52 100644
--- a/hw/Kconfig
+++ b/hw/Kconfig
@@ -37,10 +37,12 @@ source smbios/Kconfig
 source ssi/Kconfig
 source timer/Kconfig
 source tpm/Kconfig
+source uefi/Kconfig
 source ufs/Kconfig
 source usb/Kconfig
 source virtio/Kconfig
 source vfio/Kconfig
+source vmapple/Kconfig
 source xen/Kconfig
 source watchdog/Kconfig
 
@@ -65,6 +67,7 @@ source sparc/Kconfig
 source sparc64/Kconfig
 source tricore/Kconfig
 source xtensa/Kconfig
+source hexagon/Kconfig
 
 # Symbols used by multiple targets
 config TEST_DEVICES
diff --git a/hw/acpi/acpi-stub.c b/hw/acpi/acpi-stub.c
index e268ce9b1a9c7..fd0b62fad9e42 100644
--- a/hw/acpi/acpi-stub.c
+++ b/hw/acpi/acpi-stub.c
@@ -21,7 +21,15 @@
 #include "qemu/osdep.h"
 #include "hw/acpi/acpi.h"
 
+char unsigned *acpi_tables;
+size_t acpi_tables_len;
+
 void acpi_table_add(const QemuOpts *opts, Error **errp)
 {
     g_assert_not_reached();
 }
+
+bool acpi_builtin(void)
+{
+    return false;
+}
diff --git a/hw/acpi/core.c b/hw/acpi/core.c
index 870391ed7c8a7..58f8964e13090 100644
--- a/hw/acpi/core.c
+++ b/hw/acpi/core.c
@@ -78,6 +78,11 @@ static void acpi_register_config(void)
 
 opts_init(acpi_register_config);
 
+bool acpi_builtin(void)
+{
+    return true;
+}
+
 static int acpi_checksum(const uint8_t *data, int len)
 {
     int sum, i;
diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
index f70a2c045e1b7..6f1ae79edbf3f 100644
--- a/hw/acpi/cpu.c
+++ b/hw/acpi/cpu.c
@@ -235,8 +235,8 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner,
 
 static AcpiCpuStatus *get_cpu_status(CPUHotplugState *cpu_st, DeviceState *dev)
 {
-    CPUClass *k = CPU_GET_CLASS(dev);
-    uint64_t cpu_arch_id = k->get_arch_id(CPU(dev));
+    CPUState *cpu = CPU(dev);
+    uint64_t cpu_arch_id = cpu->cc->get_arch_id(cpu);
     int i;
 
     for (i = 0; i < cpu_st->dev_count; i++) {
diff --git a/hw/acpi/cpu_hotplug.c b/hw/acpi/cpu_hotplug.c
index 83b8bc5deb87e..aa0e1e3efa544 100644
--- a/hw/acpi/cpu_hotplug.c
+++ b/hw/acpi/cpu_hotplug.c
@@ -62,10 +62,9 @@ static const MemoryRegionOps AcpiCpuHotplug_ops = {
 static void acpi_set_cpu_present_bit(AcpiCpuHotplug *g, CPUState *cpu,
                                      bool *swtchd_to_modern)
 {
-    CPUClass *k = CPU_GET_CLASS(cpu);
     int64_t cpu_id;
 
-    cpu_id = k->get_arch_id(cpu);
+    cpu_id = cpu->cc->get_arch_id(cpu);
     if ((cpu_id / 8) >= ACPI_GPE_PROC_LEN) {
         object_property_set_bool(g->device, "cpu-hotplug-legacy", false,
                                  &error_abort);
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index b709c177cdea9..b85bb48195a0c 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -390,8 +390,8 @@ static void get_hw_error_offsets(uint64_t ghes_addr,
     *read_ack_register_addr = ghes_addr + sizeof(uint64_t);
 }
 
-void ghes_record_cper_errors(const void *cper, size_t len,
-                             uint16_t source_id, Error **errp)
+static void ghes_record_cper_errors(const void *cper, size_t len,
+                                    uint16_t source_id, Error **errp)
 {
     uint64_t cper_addr = 0, read_ack_register_addr = 0, read_ack_register;
     AcpiGedState *acpi_ged_state;
@@ -440,8 +440,6 @@ void ghes_record_cper_errors(const void *cper, size_t len,
 
     /* Write the generic error data entry into guest memory */
     cpu_physical_memory_write(cper_addr, cper, len);
-
-    return;
 }
 
 int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address)
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 256013ca8081b..15200a2d7e722 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -2,6 +2,7 @@ config ARM_VIRT
     bool
     default y
     depends on ARM
+    depends on TCG || KVM || HVF
     imply PCI_DEVICES
     imply TEST_DEVICES
     imply VFIO_AMD_XGBE
@@ -303,7 +304,7 @@ config ZYNQ
     select PL330
     select SDHCI
     select SSI_M25P80
-    select USB_EHCI_SYSBUS
+    select USB_CHIPIDEA
     select XILINX # UART
     select XILINX_AXI
     select XILINX_SPI
@@ -481,6 +482,19 @@ config NPCM7XX
     select PCA954X
     select USB_OHCI_SYSBUS
 
+config NPCM8XX
+    bool
+    default y
+    depends on TCG && AARCH64
+    select ARM_GIC
+    select SMBUS
+    select PL310  # cache controller
+    select NPCM7XX
+    select SERIAL
+    select SSI
+    select UNIMP
+
+
 config FSL_IMX25
     bool
     default y
@@ -489,6 +503,7 @@ config FSL_IMX25
     select IMX
     select IMX_FEC
     select IMX_I2C
+    select USB_CHIPIDEA
     select WDT_IMX2
     select SDHCI
 
@@ -516,6 +531,7 @@ config FSL_IMX6
     select PL310  # cache controller
     select PCI_EXPRESS_DESIGNWARE
     select SDHCI
+    select USB_CHIPIDEA
     select OR_IRQ
 
 config ASPEED_SOC
@@ -576,6 +592,31 @@ config FSL_IMX7
     select SDHCI
     select OR_IRQ
     select UNIMP
+    select USB_CHIPIDEA
+
+config FSL_IMX8MP
+    bool
+    imply I2C_DEVICES
+    imply PCI_DEVICES
+    select ARM_GIC
+    select FSL_IMX8MP_ANALOG
+    select FSL_IMX8MP_CCM
+    select IMX
+    select IMX_FEC
+    select IMX_I2C
+    select OR_IRQ
+    select PCI_EXPRESS_DESIGNWARE
+    select PCI_EXPRESS_FSL_IMX8M_PHY
+    select SDHCI
+    select UNIMP
+    select USB_DWC3
+    select WDT_IMX2
+
+config FSL_IMX8MP_EVK
+    bool
+    default y
+    depends on TCG && AARCH64
+    select FSL_IMX8MP
 
 config ARM_SMMUV3
     bool
@@ -591,6 +632,7 @@ config FSL_IMX6UL
     select IMX_I2C
     select WDT_IMX2
     select SDHCI
+    select USB_CHIPIDEA
     select UNIMP
 
 config MICROBIT
diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c
index a829913f1b5b7..f1b399759a17e 100644
--- a/hw/arm/allwinner-a10.c
+++ b/hw/arm/allwinner-a10.c
@@ -158,7 +158,7 @@ static void aw_a10_realize(DeviceState *dev, Error **errp)
     /* FIXME use a qdev chardev prop instead of serial_hd() */
     serial_mm_init(get_system_memory(), AW_A10_UART0_REG_BASE, 2,
                    qdev_get_gpio_in(dev, 1),
-                   115200, serial_hd(0), DEVICE_NATIVE_ENDIAN);
+                   115200, serial_hd(0), DEVICE_LITTLE_ENDIAN);
 
     for (size_t i = 0; i < AW_A10_NUM_USB; i++) {
         g_autofree char *bus = g_strdup_printf("usb-bus.%zu", i);
diff --git a/hw/arm/allwinner-h3.c b/hw/arm/allwinner-h3.c
index 2efced3f66a2b..1b1afa4fb6f2f 100644
--- a/hw/arm/allwinner-h3.c
+++ b/hw/arm/allwinner-h3.c
@@ -408,19 +408,19 @@ static void allwinner_h3_realize(DeviceState *dev, Error **errp)
     /* UART0. For future clocktree API: All UARTS are connected to APB2_CLK. */
     serial_mm_init(get_system_memory(), s->memmap[AW_H3_DEV_UART0], 2,
                    qdev_get_gpio_in(DEVICE(&s->gic), AW_H3_GIC_SPI_UART0),
-                   115200, serial_hd(0), DEVICE_NATIVE_ENDIAN);
+                   115200, serial_hd(0), DEVICE_LITTLE_ENDIAN);
     /* UART1 */
     serial_mm_init(get_system_memory(), s->memmap[AW_H3_DEV_UART1], 2,
                    qdev_get_gpio_in(DEVICE(&s->gic), AW_H3_GIC_SPI_UART1),
-                   115200, serial_hd(1), DEVICE_NATIVE_ENDIAN);
+                   115200, serial_hd(1), DEVICE_LITTLE_ENDIAN);
     /* UART2 */
     serial_mm_init(get_system_memory(), s->memmap[AW_H3_DEV_UART2], 2,
                    qdev_get_gpio_in(DEVICE(&s->gic), AW_H3_GIC_SPI_UART2),
-                   115200, serial_hd(2), DEVICE_NATIVE_ENDIAN);
+                   115200, serial_hd(2), DEVICE_LITTLE_ENDIAN);
     /* UART3 */
     serial_mm_init(get_system_memory(), s->memmap[AW_H3_DEV_UART3], 2,
                    qdev_get_gpio_in(DEVICE(&s->gic), AW_H3_GIC_SPI_UART3),
-                   115200, serial_hd(3), DEVICE_NATIVE_ENDIAN);
+                   115200, serial_hd(3), DEVICE_LITTLE_ENDIAN);
 
     /* DRAMC */
     sysbus_realize(SYS_BUS_DEVICE(&s->dramc), &error_fatal);
diff --git a/hw/arm/allwinner-r40.c b/hw/arm/allwinner-r40.c
index 47b3180f0ec0c..cef6e4d18c262 100644
--- a/hw/arm/allwinner-r40.c
+++ b/hw/arm/allwinner-r40.c
@@ -492,7 +492,7 @@ static void allwinner_r40_realize(DeviceState *dev, Error **errp)
 
         serial_mm_init(get_system_memory(), addr, 2,
                        qdev_get_gpio_in(DEVICE(&s->gic), uart_irqs[i]),
-                       115200, serial_hd(i), DEVICE_NATIVE_ENDIAN);
+                       115200, serial_hd(i), DEVICE_LITTLE_ENDIAN);
     }
 
     /* I2C */
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index d9418e2b9f2c6..82f42582fa3c8 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -181,8 +181,10 @@ struct AspeedMachineState {
 
 #ifdef TARGET_AARCH64
 /* AST2700 evb hardware value */
-#define AST2700_EVB_HW_STRAP1 0x000000C0
-#define AST2700_EVB_HW_STRAP2 0x00000003
+/* SCU HW Strap1 */
+#define AST2700_EVB_HW_STRAP1 0x00000800
+/* SCUIO HW Strap1 */
+#define AST2700_EVB_HW_STRAP2 0x00000700
 #endif
 
 /* Rainier hardware value: (QEMU prototype) */
@@ -1253,6 +1255,7 @@ static void aspeed_machine_palmetto_class_init(ObjectClass *oc, void *data)
     amc->spi_model = "mx25l25635f";
     amc->num_cs    = 1;
     amc->i2c_init  = palmetto_bmc_i2c_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size       = 256 * MiB;
     aspeed_machine_class_init_cpus_defaults(mc);
 };
@@ -1269,6 +1272,7 @@ static void aspeed_machine_quanta_q71l_class_init(ObjectClass *oc, void *data)
     amc->spi_model = "mx25l25635e";
     amc->num_cs    = 1;
     amc->i2c_init  = quanta_q71l_bmc_i2c_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size       = 128 * MiB;
     aspeed_machine_class_init_cpus_defaults(mc);
 }
@@ -1287,6 +1291,7 @@ static void aspeed_machine_supermicrox11_bmc_class_init(ObjectClass *oc,
     amc->num_cs    = 1;
     amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON;
     amc->i2c_init  = palmetto_bmc_i2c_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size = 256 * MiB;
     aspeed_machine_class_init_cpus_defaults(mc);
 }
@@ -1305,6 +1310,7 @@ static void aspeed_machine_supermicro_x11spi_bmc_class_init(ObjectClass *oc,
     amc->num_cs    = 1;
     amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON;
     amc->i2c_init  = palmetto_bmc_i2c_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size = 512 * MiB;
     aspeed_machine_class_init_cpus_defaults(mc);
 }
@@ -1321,6 +1327,7 @@ static void aspeed_machine_ast2500_evb_class_init(ObjectClass *oc, void *data)
     amc->spi_model = "mx25l25635f";
     amc->num_cs    = 1;
     amc->i2c_init  = ast2500_evb_i2c_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size       = 512 * MiB;
     aspeed_machine_class_init_cpus_defaults(mc);
 };
@@ -1338,6 +1345,7 @@ static void aspeed_machine_yosemitev2_class_init(ObjectClass *oc, void *data)
     amc->spi_model = "mx25l25635e";
     amc->num_cs    = 2;
     amc->i2c_init  = yosemitev2_bmc_i2c_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size       = 512 * MiB;
     aspeed_machine_class_init_cpus_defaults(mc);
 };
@@ -1354,6 +1362,7 @@ static void aspeed_machine_romulus_class_init(ObjectClass *oc, void *data)
     amc->spi_model = "mx66l1g45g";
     amc->num_cs    = 2;
     amc->i2c_init  = romulus_bmc_i2c_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size       = 512 * MiB;
     aspeed_machine_class_init_cpus_defaults(mc);
 };
@@ -1371,6 +1380,7 @@ static void aspeed_machine_tiogapass_class_init(ObjectClass *oc, void *data)
     amc->spi_model = "mx25l25635e";
     amc->num_cs    = 2;
     amc->i2c_init  = tiogapass_bmc_i2c_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size       = 1 * GiB;
     aspeed_machine_class_init_cpus_defaults(mc);
 };
@@ -1387,6 +1397,7 @@ static void aspeed_machine_sonorapass_class_init(ObjectClass *oc, void *data)
     amc->spi_model = "mx66l1g45g";
     amc->num_cs    = 2;
     amc->i2c_init  = sonorapass_bmc_i2c_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size       = 512 * MiB;
     aspeed_machine_class_init_cpus_defaults(mc);
 };
@@ -1403,6 +1414,7 @@ static void aspeed_machine_witherspoon_class_init(ObjectClass *oc, void *data)
     amc->spi_model = "mx66l1g45g";
     amc->num_cs    = 2;
     amc->i2c_init  = witherspoon_bmc_i2c_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size = 512 * MiB;
     aspeed_machine_class_init_cpus_defaults(mc);
 };
@@ -1423,6 +1435,7 @@ static void aspeed_machine_ast2600_evb_class_init(ObjectClass *oc, void *data)
                      ASPEED_MAC3_ON;
     amc->sdhci_wp_inverted = true;
     amc->i2c_init  = ast2600_evb_i2c_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size = 1 * GiB;
     aspeed_machine_class_init_cpus_defaults(mc);
     aspeed_machine_ast2600_class_emmc_init(oc);
@@ -1441,6 +1454,7 @@ static void aspeed_machine_g220a_class_init(ObjectClass *oc, void *data)
     amc->num_cs    = 2;
     amc->macs_mask  = ASPEED_MAC0_ON | ASPEED_MAC1_ON;
     amc->i2c_init  = g220a_bmc_i2c_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size = 1024 * MiB;
     aspeed_machine_class_init_cpus_defaults(mc);
 };
@@ -1458,6 +1472,7 @@ static void aspeed_machine_fp5280g2_class_init(ObjectClass *oc, void *data)
     amc->num_cs    = 2;
     amc->macs_mask  = ASPEED_MAC0_ON | ASPEED_MAC1_ON;
     amc->i2c_init  = fp5280g2_bmc_i2c_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size = 512 * MiB;
     aspeed_machine_class_init_cpus_defaults(mc);
 };
@@ -1476,6 +1491,7 @@ static void aspeed_machine_rainier_class_init(ObjectClass *oc, void *data)
     amc->num_cs    = 2;
     amc->macs_mask  = ASPEED_MAC2_ON | ASPEED_MAC3_ON;
     amc->i2c_init  = rainier_bmc_i2c_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size = 1 * GiB;
     aspeed_machine_class_init_cpus_defaults(mc);
     aspeed_machine_ast2600_class_emmc_init(oc);
@@ -1498,6 +1514,7 @@ static void aspeed_machine_fuji_class_init(ObjectClass *oc, void *data)
     amc->macs_mask = ASPEED_MAC3_ON;
     amc->i2c_init = fuji_bmc_i2c_init;
     amc->uart_default = ASPEED_DEV_UART1;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size = FUJI_BMC_RAM_SIZE;
     aspeed_machine_class_init_cpus_defaults(mc);
 };
@@ -1518,6 +1535,7 @@ static void aspeed_machine_bletchley_class_init(ObjectClass *oc, void *data)
     amc->num_cs    = 2;
     amc->macs_mask = ASPEED_MAC2_ON;
     amc->i2c_init  = bletchley_bmc_i2c_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size = BLETCHLEY_BMC_RAM_SIZE;
     aspeed_machine_class_init_cpus_defaults(mc);
 }
@@ -1559,6 +1577,7 @@ static void aspeed_machine_fby35_class_init(ObjectClass *oc, void *data)
     amc->num_cs    = 2;
     amc->macs_mask = ASPEED_MAC3_ON;
     amc->i2c_init  = fby35_i2c_init;
+    mc->auto_create_sdcard = true;
     /* FIXME: Replace this macro with something more general */
     mc->default_ram_size = FUJI_BMC_RAM_SIZE;
     aspeed_machine_class_init_cpus_defaults(mc);
@@ -1654,12 +1673,13 @@ static void ast2700_evb_i2c_init(AspeedMachineState *bmc)
                             TYPE_TMP105, 0x4d);
 }
 
-static void aspeed_machine_ast2700_evb_class_init(ObjectClass *oc, void *data)
+static void aspeed_machine_ast2700a0_evb_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
     AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
 
-    mc->desc = "Aspeed AST2700 EVB (Cortex-A35)";
+    mc->alias = "ast2700-evb";
+    mc->desc = "Aspeed AST2700 A0 EVB (Cortex-A35)";
     amc->soc_name  = "ast2700-a0";
     amc->hw_strap1 = AST2700_EVB_HW_STRAP1;
     amc->hw_strap2 = AST2700_EVB_HW_STRAP2;
@@ -1669,6 +1689,27 @@ static void aspeed_machine_ast2700_evb_class_init(ObjectClass *oc, void *data)
     amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON | ASPEED_MAC2_ON;
     amc->uart_default = ASPEED_DEV_UART12;
     amc->i2c_init  = ast2700_evb_i2c_init;
+    mc->auto_create_sdcard = true;
+    mc->default_ram_size = 1 * GiB;
+    aspeed_machine_class_init_cpus_defaults(mc);
+}
+
+static void aspeed_machine_ast2700a1_evb_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
+
+    mc->desc = "Aspeed AST2700 A1 EVB (Cortex-A35)";
+    amc->soc_name  = "ast2700-a1";
+    amc->hw_strap1 = AST2700_EVB_HW_STRAP1;
+    amc->hw_strap2 = AST2700_EVB_HW_STRAP2;
+    amc->fmc_model = "w25q01jvq";
+    amc->spi_model = "w25q512jv";
+    amc->num_cs    = 2;
+    amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON | ASPEED_MAC2_ON;
+    amc->uart_default = ASPEED_DEV_UART12;
+    amc->i2c_init  = ast2700_evb_i2c_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size = 1 * GiB;
     aspeed_machine_class_init_cpus_defaults(mc);
 }
@@ -1689,6 +1730,7 @@ static void aspeed_machine_qcom_dc_scm_v1_class_init(ObjectClass *oc,
     amc->num_cs    = 2;
     amc->macs_mask = ASPEED_MAC2_ON | ASPEED_MAC3_ON;
     amc->i2c_init  = qcom_dc_scm_bmc_i2c_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size = 1 * GiB;
     aspeed_machine_class_init_cpus_defaults(mc);
 };
@@ -1708,6 +1750,7 @@ static void aspeed_machine_qcom_firework_class_init(ObjectClass *oc,
     amc->num_cs    = 2;
     amc->macs_mask = ASPEED_MAC2_ON | ASPEED_MAC3_ON;
     amc->i2c_init  = qcom_dc_scm_firework_i2c_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size = 1 * GiB;
     aspeed_machine_class_init_cpus_defaults(mc);
 };
@@ -1795,9 +1838,13 @@ static const TypeInfo aspeed_machine_types[] = {
         .class_init     = aspeed_minibmc_machine_ast1030_evb_class_init,
 #ifdef TARGET_AARCH64
     }, {
-        .name          = MACHINE_TYPE_NAME("ast2700-evb"),
+        .name          = MACHINE_TYPE_NAME("ast2700a0-evb"),
+        .parent        = TYPE_ASPEED_MACHINE,
+        .class_init    = aspeed_machine_ast2700a0_evb_class_init,
+        }, {
+        .name          = MACHINE_TYPE_NAME("ast2700a1-evb"),
         .parent        = TYPE_ASPEED_MACHINE,
-        .class_init    = aspeed_machine_ast2700_evb_class_init,
+        .class_init    = aspeed_machine_ast2700a1_evb_class_init,
 #endif
     }, {
         .name          = TYPE_ASPEED_MACHINE,
diff --git a/hw/arm/aspeed_ast10x0.c b/hw/arm/aspeed_ast10x0.c
index e76c7100a1d23..ec329f4991c90 100644
--- a/hw/arm/aspeed_ast10x0.c
+++ b/hw/arm/aspeed_ast10x0.c
@@ -116,7 +116,7 @@ static void aspeed_soc_ast1030_init(Object *obj)
     char typename[64];
     int i;
 
-    if (sscanf(sc->name, "%7s", socname) != 1) {
+    if (sscanf(object_get_typename(obj), "%7s", socname) != 1) {
         g_assert_not_reached();
     }
 
@@ -428,7 +428,6 @@ static void aspeed_soc_ast1030_class_init(ObjectClass *klass, void *data)
     dc->user_creatable = false;
     dc->realize = aspeed_soc_ast1030_realize;
 
-    sc->name = "ast1030-a1";
     sc->valid_cpu_types = valid_cpu_types;
     sc->silicon_rev = AST1030_A1_SILICON_REV;
     sc->sram_size = 0xc0000;
diff --git a/hw/arm/aspeed_ast2400.c b/hw/arm/aspeed_ast2400.c
index 8784b6e793050..0158f6e9c24e9 100644
--- a/hw/arm/aspeed_ast2400.c
+++ b/hw/arm/aspeed_ast2400.c
@@ -151,7 +151,7 @@ static void aspeed_ast2400_soc_init(Object *obj)
     char socname[8];
     char typename[64];
 
-    if (sscanf(sc->name, "%7s", socname) != 1) {
+    if (sscanf(object_get_typename(obj), "%7s", socname) != 1) {
         g_assert_not_reached();
     }
 
@@ -515,7 +515,6 @@ static void aspeed_soc_ast2400_class_init(ObjectClass *oc, void *data)
     /* Reason: Uses serial_hds and nd_table in realize() directly */
     dc->user_creatable = false;
 
-    sc->name         = "ast2400-a1";
     sc->valid_cpu_types = valid_cpu_types;
     sc->silicon_rev  = AST2400_A1_SILICON_REV;
     sc->sram_size    = 0x8000;
@@ -544,7 +543,6 @@ static void aspeed_soc_ast2500_class_init(ObjectClass *oc, void *data)
     /* Reason: Uses serial_hds and nd_table in realize() directly */
     dc->user_creatable = false;
 
-    sc->name         = "ast2500-a1";
     sc->valid_cpu_types = valid_cpu_types;
     sc->silicon_rev  = AST2500_A1_SILICON_REV;
     sc->sram_size    = 0x9000;
diff --git a/hw/arm/aspeed_ast2600.c b/hw/arm/aspeed_ast2600.c
index 07210483bb29a..1f994ba26c65d 100644
--- a/hw/arm/aspeed_ast2600.c
+++ b/hw/arm/aspeed_ast2600.c
@@ -157,7 +157,7 @@ static void aspeed_soc_ast2600_init(Object *obj)
     char socname[8];
     char typename[64];
 
-    if (sscanf(sc->name, "%7s", socname) != 1) {
+    if (sscanf(object_get_typename(obj), "%7s", socname) != 1) {
         g_assert_not_reached();
     }
 
@@ -666,7 +666,6 @@ static void aspeed_soc_ast2600_class_init(ObjectClass *oc, void *data)
     /* Reason: The Aspeed SoC can only be instantiated from a board */
     dc->user_creatable = false;
 
-    sc->name         = "ast2600-a3";
     sc->valid_cpu_types = valid_cpu_types;
     sc->silicon_rev  = AST2600_A3_SILICON_REV;
     sc->sram_size    = 0x16400;
diff --git a/hw/arm/aspeed_ast27x0.c b/hw/arm/aspeed_ast27x0.c
index 2d0c99f1591ce..dce7255a2c0de 100644
--- a/hw/arm/aspeed_ast27x0.c
+++ b/hw/arm/aspeed_ast27x0.c
@@ -24,16 +24,40 @@
 #include "qemu/log.h"
 
 static const hwaddr aspeed_soc_ast2700_memmap[] = {
-    [ASPEED_DEV_SPI_BOOT]  =  0x400000000,
     [ASPEED_DEV_SRAM]      =  0x10000000,
+    [ASPEED_DEV_HACE]      =  0x12070000,
+    [ASPEED_DEV_EMMC]      =  0x12090000,
+    [ASPEED_DEV_INTC]      =  0x12100000,
+    [ASPEED_GIC_DIST]      =  0x12200000,
+    [ASPEED_GIC_REDIST]    =  0x12280000,
     [ASPEED_DEV_SDMC]      =  0x12C00000,
     [ASPEED_DEV_SCU]       =  0x12C02000,
+    [ASPEED_DEV_RTC]       =  0x12C0F000,
+    [ASPEED_DEV_TIMER1]    =  0x12C10000,
+    [ASPEED_DEV_SLI]       =  0x12C17000,
+    [ASPEED_DEV_UART4]     =  0X12C1A000,
+    [ASPEED_DEV_FMC]       =  0x14000000,
+    [ASPEED_DEV_SPI0]      =  0x14010000,
+    [ASPEED_DEV_SPI1]      =  0x14020000,
+    [ASPEED_DEV_SPI2]      =  0x14030000,
+    [ASPEED_DEV_MII1]      =  0x14040000,
+    [ASPEED_DEV_MII2]      =  0x14040008,
+    [ASPEED_DEV_MII3]      =  0x14040010,
+    [ASPEED_DEV_ETH1]      =  0x14050000,
+    [ASPEED_DEV_ETH2]      =  0x14060000,
+    [ASPEED_DEV_ETH3]      =  0x14070000,
+    [ASPEED_DEV_SDHCI]     =  0x14080000,
+    [ASPEED_DEV_ADC]       =  0x14C00000,
     [ASPEED_DEV_SCUIO]     =  0x14C02000,
+    [ASPEED_DEV_GPIO]      =  0x14C0B000,
+    [ASPEED_DEV_I2C]       =  0x14C0F000,
+    [ASPEED_DEV_INTCIO]    =  0x14C18000,
+    [ASPEED_DEV_SLIIO]     =  0x14C1E000,
+    [ASPEED_DEV_VUART]     =  0X14C30000,
     [ASPEED_DEV_UART0]     =  0X14C33000,
     [ASPEED_DEV_UART1]     =  0X14C33100,
     [ASPEED_DEV_UART2]     =  0X14C33200,
     [ASPEED_DEV_UART3]     =  0X14C33300,
-    [ASPEED_DEV_UART4]     =  0X12C1A000,
     [ASPEED_DEV_UART5]     =  0X14C33400,
     [ASPEED_DEV_UART6]     =  0X14C33500,
     [ASPEED_DEV_UART7]     =  0X14C33600,
@@ -43,41 +67,44 @@ static const hwaddr aspeed_soc_ast2700_memmap[] = {
     [ASPEED_DEV_UART11]    =  0X14C33A00,
     [ASPEED_DEV_UART12]    =  0X14C33B00,
     [ASPEED_DEV_WDT]       =  0x14C37000,
-    [ASPEED_DEV_VUART]     =  0X14C30000,
-    [ASPEED_DEV_FMC]       =  0x14000000,
-    [ASPEED_DEV_SPI0]      =  0x14010000,
-    [ASPEED_DEV_SPI1]      =  0x14020000,
-    [ASPEED_DEV_SPI2]      =  0x14030000,
+    [ASPEED_DEV_SPI_BOOT]  =  0x100000000,
     [ASPEED_DEV_SDRAM]     =  0x400000000,
-    [ASPEED_DEV_MII1]      =  0x14040000,
-    [ASPEED_DEV_MII2]      =  0x14040008,
-    [ASPEED_DEV_MII3]      =  0x14040010,
-    [ASPEED_DEV_ETH1]      =  0x14050000,
-    [ASPEED_DEV_ETH2]      =  0x14060000,
-    [ASPEED_DEV_ETH3]      =  0x14070000,
-    [ASPEED_DEV_EMMC]      =  0x12090000,
-    [ASPEED_DEV_INTC]      =  0x12100000,
-    [ASPEED_DEV_SLI]       =  0x12C17000,
-    [ASPEED_DEV_SLIIO]     =  0x14C1E000,
-    [ASPEED_GIC_DIST]      =  0x12200000,
-    [ASPEED_GIC_REDIST]    =  0x12280000,
-    [ASPEED_DEV_ADC]       =  0x14C00000,
-    [ASPEED_DEV_I2C]       =  0x14C0F000,
-    [ASPEED_DEV_GPIO]      =  0x14C0B000,
-    [ASPEED_DEV_RTC]       =  0x12C0F000,
-    [ASPEED_DEV_SDHCI]     =  0x14080000,
-    [ASPEED_DEV_TIMER1]    =  0x12C10000,
 };
 
 #define AST2700_MAX_IRQ 256
 
 /* Shared Peripheral Interrupt values below are offset by -32 from datasheet */
-static const int aspeed_soc_ast2700_irqmap[] = {
+static const int aspeed_soc_ast2700a0_irqmap[] = {
+    [ASPEED_DEV_SDMC]      = 0,
+    [ASPEED_DEV_HACE]      = 4,
+    [ASPEED_DEV_XDMA]      = 5,
+    [ASPEED_DEV_UART4]     = 8,
+    [ASPEED_DEV_SCU]       = 12,
+    [ASPEED_DEV_RTC]       = 13,
+    [ASPEED_DEV_EMMC]      = 15,
+    [ASPEED_DEV_TIMER1]    = 16,
+    [ASPEED_DEV_TIMER2]    = 17,
+    [ASPEED_DEV_TIMER3]    = 18,
+    [ASPEED_DEV_TIMER4]    = 19,
+    [ASPEED_DEV_TIMER5]    = 20,
+    [ASPEED_DEV_TIMER6]    = 21,
+    [ASPEED_DEV_TIMER7]    = 22,
+    [ASPEED_DEV_TIMER8]    = 23,
+    [ASPEED_DEV_DP]        = 28,
+    [ASPEED_DEV_LPC]       = 128,
+    [ASPEED_DEV_IBT]       = 128,
+    [ASPEED_DEV_KCS]       = 128,
+    [ASPEED_DEV_ADC]       = 130,
+    [ASPEED_DEV_GPIO]      = 130,
+    [ASPEED_DEV_I2C]       = 130,
+    [ASPEED_DEV_FMC]       = 131,
+    [ASPEED_DEV_WDT]       = 131,
+    [ASPEED_DEV_PWM]       = 131,
+    [ASPEED_DEV_I3C]       = 131,
     [ASPEED_DEV_UART0]     = 132,
     [ASPEED_DEV_UART1]     = 132,
     [ASPEED_DEV_UART2]     = 132,
     [ASPEED_DEV_UART3]     = 132,
-    [ASPEED_DEV_UART4]     = 8,
     [ASPEED_DEV_UART5]     = 132,
     [ASPEED_DEV_UART6]     = 132,
     [ASPEED_DEV_UART7]     = 132,
@@ -86,14 +113,21 @@ static const int aspeed_soc_ast2700_irqmap[] = {
     [ASPEED_DEV_UART10]    = 132,
     [ASPEED_DEV_UART11]    = 132,
     [ASPEED_DEV_UART12]    = 132,
-    [ASPEED_DEV_FMC]       = 131,
+    [ASPEED_DEV_ETH1]      = 132,
+    [ASPEED_DEV_ETH2]      = 132,
+    [ASPEED_DEV_ETH3]      = 132,
+    [ASPEED_DEV_PECI]      = 133,
+    [ASPEED_DEV_SDHCI]     = 133,
+};
+
+static const int aspeed_soc_ast2700a1_irqmap[] = {
     [ASPEED_DEV_SDMC]      = 0,
-    [ASPEED_DEV_SCU]       = 12,
-    [ASPEED_DEV_ADC]       = 130,
+    [ASPEED_DEV_HACE]      = 4,
     [ASPEED_DEV_XDMA]      = 5,
-    [ASPEED_DEV_EMMC]      = 15,
-    [ASPEED_DEV_GPIO]      = 130,
+    [ASPEED_DEV_UART4]     = 8,
+    [ASPEED_DEV_SCU]       = 12,
     [ASPEED_DEV_RTC]       = 13,
+    [ASPEED_DEV_EMMC]      = 15,
     [ASPEED_DEV_TIMER1]    = 16,
     [ASPEED_DEV_TIMER2]    = 17,
     [ASPEED_DEV_TIMER3]    = 18,
@@ -102,38 +136,58 @@ static const int aspeed_soc_ast2700_irqmap[] = {
     [ASPEED_DEV_TIMER6]    = 21,
     [ASPEED_DEV_TIMER7]    = 22,
     [ASPEED_DEV_TIMER8]    = 23,
-    [ASPEED_DEV_WDT]       = 131,
-    [ASPEED_DEV_PWM]       = 131,
-    [ASPEED_DEV_LPC]       = 128,
-    [ASPEED_DEV_IBT]       = 128,
-    [ASPEED_DEV_I2C]       = 130,
-    [ASPEED_DEV_PECI]      = 133,
-    [ASPEED_DEV_ETH1]      = 132,
-    [ASPEED_DEV_ETH2]      = 132,
-    [ASPEED_DEV_ETH3]      = 132,
-    [ASPEED_DEV_HACE]      = 4,
-    [ASPEED_DEV_KCS]       = 128,
     [ASPEED_DEV_DP]        = 28,
-    [ASPEED_DEV_I3C]       = 131,
-    [ASPEED_DEV_SDHCI]     = 133,
+    [ASPEED_DEV_LPC]       = 192,
+    [ASPEED_DEV_IBT]       = 192,
+    [ASPEED_DEV_KCS]       = 192,
+    [ASPEED_DEV_I2C]       = 194,
+    [ASPEED_DEV_ADC]       = 194,
+    [ASPEED_DEV_GPIO]      = 194,
+    [ASPEED_DEV_FMC]       = 195,
+    [ASPEED_DEV_WDT]       = 195,
+    [ASPEED_DEV_PWM]       = 195,
+    [ASPEED_DEV_I3C]       = 195,
+    [ASPEED_DEV_UART0]     = 196,
+    [ASPEED_DEV_UART1]     = 196,
+    [ASPEED_DEV_UART2]     = 196,
+    [ASPEED_DEV_UART3]     = 196,
+    [ASPEED_DEV_UART5]     = 196,
+    [ASPEED_DEV_UART6]     = 196,
+    [ASPEED_DEV_UART7]     = 196,
+    [ASPEED_DEV_UART8]     = 196,
+    [ASPEED_DEV_UART9]     = 196,
+    [ASPEED_DEV_UART10]    = 196,
+    [ASPEED_DEV_UART11]    = 196,
+    [ASPEED_DEV_UART12]    = 196,
+    [ASPEED_DEV_ETH1]      = 196,
+    [ASPEED_DEV_ETH2]      = 196,
+    [ASPEED_DEV_ETH3]      = 196,
+    [ASPEED_DEV_PECI]      = 197,
+    [ASPEED_DEV_SDHCI]     = 197,
 };
 
 /* GICINT 128 */
-static const int aspeed_soc_ast2700_gic128_intcmap[] = {
+/* GICINT 192 */
+static const int ast2700_gic128_gic192_intcmap[] = {
     [ASPEED_DEV_LPC]       = 0,
     [ASPEED_DEV_IBT]       = 2,
     [ASPEED_DEV_KCS]       = 4,
 };
 
+/* GICINT 129 */
+/* GICINT 193 */
+
 /* GICINT 130 */
-static const int aspeed_soc_ast2700_gic130_intcmap[] = {
+/* GICINT 194 */
+static const int ast2700_gic130_gic194_intcmap[] = {
     [ASPEED_DEV_I2C]        = 0,
     [ASPEED_DEV_ADC]        = 16,
     [ASPEED_DEV_GPIO]       = 18,
 };
 
 /* GICINT 131 */
-static const int aspeed_soc_ast2700_gic131_intcmap[] = {
+/* GICINT 195 */
+static const int ast2700_gic131_gic195_intcmap[] = {
     [ASPEED_DEV_I3C]       = 0,
     [ASPEED_DEV_WDT]       = 16,
     [ASPEED_DEV_FMC]       = 25,
@@ -141,7 +195,8 @@ static const int aspeed_soc_ast2700_gic131_intcmap[] = {
 };
 
 /* GICINT 132 */
-static const int aspeed_soc_ast2700_gic132_intcmap[] = {
+/* GICINT 196 */
+static const int ast2700_gic132_gic196_intcmap[] = {
     [ASPEED_DEV_ETH1]      = 0,
     [ASPEED_DEV_ETH2]      = 1,
     [ASPEED_DEV_ETH3]      = 2,
@@ -160,40 +215,58 @@ static const int aspeed_soc_ast2700_gic132_intcmap[] = {
 };
 
 /* GICINT 133 */
-static const int aspeed_soc_ast2700_gic133_intcmap[] = {
+/* GICINT 197 */
+static const int ast2700_gic133_gic197_intcmap[] = {
     [ASPEED_DEV_SDHCI]     = 1,
     [ASPEED_DEV_PECI]      = 4,
 };
 
 /* GICINT 128 ~ 136 */
+/* GICINT 192 ~ 201 */
 struct gic_intc_irq_info {
     int irq;
+    int intc_idx;
+    int orgate_idx;
     const int *ptr;
 };
 
-static const struct gic_intc_irq_info aspeed_soc_ast2700_gic_intcmap[] = {
-    {128,  aspeed_soc_ast2700_gic128_intcmap},
-    {129,  NULL},
-    {130,  aspeed_soc_ast2700_gic130_intcmap},
-    {131,  aspeed_soc_ast2700_gic131_intcmap},
-    {132,  aspeed_soc_ast2700_gic132_intcmap},
-    {133,  aspeed_soc_ast2700_gic133_intcmap},
-    {134,  NULL},
-    {135,  NULL},
-    {136,  NULL},
+static const struct gic_intc_irq_info ast2700_gic_intcmap[] = {
+    {192, 1, 0, ast2700_gic128_gic192_intcmap},
+    {193, 1, 1, NULL},
+    {194, 1, 2, ast2700_gic130_gic194_intcmap},
+    {195, 1, 3, ast2700_gic131_gic195_intcmap},
+    {196, 1, 4, ast2700_gic132_gic196_intcmap},
+    {197, 1, 5, ast2700_gic133_gic197_intcmap},
+    {198, 1, 6, NULL},
+    {199, 1, 7, NULL},
+    {200, 1, 8, NULL},
+    {201, 1, 9, NULL},
+    {128, 0, 1, ast2700_gic128_gic192_intcmap},
+    {129, 0, 2, NULL},
+    {130, 0, 3, ast2700_gic130_gic194_intcmap},
+    {131, 0, 4, ast2700_gic131_gic195_intcmap},
+    {132, 0, 5, ast2700_gic132_gic196_intcmap},
+    {133, 0, 6, ast2700_gic133_gic197_intcmap},
+    {134, 0, 7, NULL},
+    {135, 0, 8, NULL},
+    {136, 0, 9, NULL},
 };
 
 static qemu_irq aspeed_soc_ast2700_get_irq(AspeedSoCState *s, int dev)
 {
     Aspeed27x0SoCState *a = ASPEED27X0_SOC(s);
     AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
+    int or_idx;
+    int idx;
     int i;
 
-    for (i = 0; i < ARRAY_SIZE(aspeed_soc_ast2700_gic_intcmap); i++) {
-        if (sc->irqmap[dev] == aspeed_soc_ast2700_gic_intcmap[i].irq) {
-            assert(aspeed_soc_ast2700_gic_intcmap[i].ptr);
-            return qdev_get_gpio_in(DEVICE(&a->intc.orgates[i]),
-                aspeed_soc_ast2700_gic_intcmap[i].ptr[dev]);
+    for (i = 0; i < ARRAY_SIZE(ast2700_gic_intcmap); i++) {
+        if (sc->irqmap[dev] == ast2700_gic_intcmap[i].irq) {
+            assert(ast2700_gic_intcmap[i].ptr);
+            or_idx = ast2700_gic_intcmap[i].orgate_idx;
+            idx = ast2700_gic_intcmap[i].intc_idx;
+            return qdev_get_gpio_in(DEVICE(&a->intc[idx].orgates[or_idx]),
+                                    ast2700_gic_intcmap[i].ptr[dev]);
         }
     }
 
@@ -205,18 +278,23 @@ static qemu_irq aspeed_soc_ast2700_get_irq_index(AspeedSoCState *s, int dev,
 {
     Aspeed27x0SoCState *a = ASPEED27X0_SOC(s);
     AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
+    int or_idx;
+    int idx;
     int i;
 
-    for (i = 0; i < ARRAY_SIZE(aspeed_soc_ast2700_gic_intcmap); i++) {
-        if (sc->irqmap[dev] == aspeed_soc_ast2700_gic_intcmap[i].irq) {
-            assert(aspeed_soc_ast2700_gic_intcmap[i].ptr);
-            return qdev_get_gpio_in(DEVICE(&a->intc.orgates[i]),
-                aspeed_soc_ast2700_gic_intcmap[i].ptr[dev] + index);
+    for (i = 0; i < ARRAY_SIZE(ast2700_gic_intcmap); i++) {
+        if (sc->irqmap[dev] == ast2700_gic_intcmap[i].irq) {
+            assert(ast2700_gic_intcmap[i].ptr);
+            or_idx = ast2700_gic_intcmap[i].orgate_idx;
+            idx = ast2700_gic_intcmap[i].intc_idx;
+            return qdev_get_gpio_in(DEVICE(&a->intc[idx].orgates[or_idx]),
+                                    ast2700_gic_intcmap[i].ptr[dev] + index);
         }
     }
 
     /*
-     * Invalid orgate index, device irq should be 128 to 136.
+     * Invalid OR gate index, device IRQ should be between 128 to 136
+     * and 192 to 201.
      */
     g_assert_not_reached();
 }
@@ -316,7 +394,7 @@ static void aspeed_soc_ast2700_init(Object *obj)
     char socname[8];
     char typename[64];
 
-    if (sscanf(sc->name, "%7s", socname) != 1) {
+    if (sscanf(object_get_typename(obj), "%7s", socname) != 1) {
         g_assert_not_reached();
     }
 
@@ -332,14 +410,21 @@ static void aspeed_soc_ast2700_init(Object *obj)
                          sc->silicon_rev);
     object_property_add_alias(obj, "hw-strap1", OBJECT(&s->scu),
                               "hw-strap1");
-    object_property_add_alias(obj, "hw-strap2", OBJECT(&s->scu),
-                              "hw-strap2");
     object_property_add_alias(obj, "hw-prot-key", OBJECT(&s->scu),
                               "hw-prot-key");
 
     object_initialize_child(obj, "scuio", &s->scuio, TYPE_ASPEED_2700_SCUIO);
     qdev_prop_set_uint32(DEVICE(&s->scuio), "silicon-rev",
                          sc->silicon_rev);
+    /*
+     * There is one hw-strap1 register in the SCU (CPU DIE) and another
+     * hw-strap1 register in the SCUIO (IO DIE). To reuse the current design
+     * of hw-strap, hw-strap1 is assigned to the SCU and sets the value in the
+     * SCU hw-strap1 register, while hw-strap2 is assigned to the SCUIO and
+     * sets the value in the SCUIO hw-strap1 register.
+     */
+    object_property_add_alias(obj, "hw-strap2", OBJECT(&s->scuio),
+                                  "hw-strap1");
 
     snprintf(typename, sizeof(typename), "aspeed.fmc-%s", socname);
     object_initialize_child(obj, "fmc", &s->fmc, typename);
@@ -372,7 +457,9 @@ static void aspeed_soc_ast2700_init(Object *obj)
 
     object_initialize_child(obj, "sli", &s->sli, TYPE_ASPEED_2700_SLI);
     object_initialize_child(obj, "sliio", &s->sliio, TYPE_ASPEED_2700_SLIIO);
-    object_initialize_child(obj, "intc", &a->intc, TYPE_ASPEED_2700_INTC);
+    object_initialize_child(obj, "intc", &a->intc[0], TYPE_ASPEED_2700_INTC);
+    object_initialize_child(obj, "intcio", &a->intc[1],
+                            TYPE_ASPEED_2700_INTCIO);
 
     snprintf(typename, sizeof(typename), "aspeed.adc-%s", socname);
     object_initialize_child(obj, "adc", &s->adc, typename);
@@ -401,6 +488,9 @@ static void aspeed_soc_ast2700_init(Object *obj)
 
     snprintf(typename, sizeof(typename), "aspeed.timer-%s", socname);
     object_initialize_child(obj, "timerctrl", &s->timerctrl, typename);
+
+    snprintf(typename, sizeof(typename), "aspeed.hace-%s", socname);
+    object_initialize_child(obj, "hace", &s->hace, typename);
 }
 
 /*
@@ -470,6 +560,10 @@ static bool aspeed_soc_ast2700_gic_realize(DeviceState *dev, Error **errp)
                            qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ));
         sysbus_connect_irq(gicbusdev, i + 3 * sc->num_cpus,
                            qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ));
+        sysbus_connect_irq(gicbusdev, i + 4 * sc->num_cpus,
+                           qdev_get_gpio_in(cpudev, ARM_CPU_NMI));
+        sysbus_connect_irq(gicbusdev, i + 5 * sc->num_cpus,
+                           qdev_get_gpio_in(cpudev, ARM_CPU_VINMI));
     }
 
     return true;
@@ -481,7 +575,8 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
     Aspeed27x0SoCState *a = ASPEED27X0_SOC(dev);
     AspeedSoCState *s = ASPEED_SOC(dev);
     AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
-    AspeedINTCClass *ic = ASPEED_INTC_GET_CLASS(&a->intc);
+    AspeedINTCClass *ic = ASPEED_INTC_GET_CLASS(&a->intc[0]);
+    AspeedINTCClass *icio = ASPEED_INTC_GET_CLASS(&a->intc[1]);
     g_autofree char *sram_name = NULL;
     qemu_irq irq;
 
@@ -512,20 +607,45 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
     }
 
     /* INTC */
-    if (!sysbus_realize(SYS_BUS_DEVICE(&a->intc), errp)) {
+    if (!sysbus_realize(SYS_BUS_DEVICE(&a->intc[0]), errp)) {
         return;
     }
 
-    aspeed_mmio_map(s, SYS_BUS_DEVICE(&a->intc), 0,
+    aspeed_mmio_map(s, SYS_BUS_DEVICE(&a->intc[0]), 0,
                     sc->memmap[ASPEED_DEV_INTC]);
 
-    /* GICINT orgates -> INTC -> GIC */
-    for (i = 0; i < ic->num_ints; i++) {
-        qdev_connect_gpio_out(DEVICE(&a->intc.orgates[i]), 0,
-                                qdev_get_gpio_in(DEVICE(&a->intc), i));
-        sysbus_connect_irq(SYS_BUS_DEVICE(&a->intc), i,
+    /* INTCIO */
+    if (!sysbus_realize(SYS_BUS_DEVICE(&a->intc[1]), errp)) {
+        return;
+    }
+
+    aspeed_mmio_map(s, SYS_BUS_DEVICE(&a->intc[1]), 0,
+                    sc->memmap[ASPEED_DEV_INTCIO]);
+
+    /* irq sources -> orgates -> INTC */
+    for (i = 0; i < ic->num_inpins; i++) {
+        qdev_connect_gpio_out(DEVICE(&a->intc[0].orgates[i]), 0,
+                              qdev_get_gpio_in(DEVICE(&a->intc[0]), i));
+    }
+
+    /* INTC -> GIC192 - GIC201 */
+    /* INTC -> GIC128 - GIC136 */
+    for (i = 0; i < ic->num_outpins; i++) {
+        sysbus_connect_irq(SYS_BUS_DEVICE(&a->intc[0]), i,
                            qdev_get_gpio_in(DEVICE(&a->gic),
-                                aspeed_soc_ast2700_gic_intcmap[i].irq));
+                                            ast2700_gic_intcmap[i].irq));
+    }
+
+    /* irq source -> orgates -> INTCIO */
+    for (i = 0; i < icio->num_inpins; i++) {
+        qdev_connect_gpio_out(DEVICE(&a->intc[1].orgates[i]), 0,
+                              qdev_get_gpio_in(DEVICE(&a->intc[1]), i));
+    }
+
+    /* INTCIO -> INTC */
+    for (i = 0; i < icio->num_outpins; i++) {
+        sysbus_connect_irq(SYS_BUS_DEVICE(&a->intc[1]), i,
+                           qdev_get_gpio_in(DEVICE(&a->intc[0].orgates[0]), i));
     }
 
     /* SRAM */
@@ -676,10 +796,22 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
     for (i = 0; i < ASPEED_I2C_GET_CLASS(&s->i2c)->num_busses; i++) {
         /*
          * The AST2700 I2C controller has one source INTC per bus.
-         * I2C buses interrupt are connected to GICINT130_INTC
-         * from bit 0 to bit 15.
-         * I2C bus 0 is connected to GICINT130_INTC at bit 0.
-         * I2C bus 15 is connected to GICINT130_INTC at bit 15.
+         *
+         * For AST2700 A0:
+         * I2C bus interrupts are connected to the OR gate from bit 0 to bit
+         * 15, and the OR gate output pin is connected to the input pin of
+         * GICINT130 of INTC (CPU Die). Then, the output pin is connected to
+         * the GIC.
+         *
+         * For AST2700 A1:
+         * I2C bus interrupts are connected to the OR gate from bit 0 to bit
+         * 15, and the OR gate output pin is connected to the input pin of
+         * GICINT194 of INTCIO (IO Die). Then, the output pin is connected
+         * to the INTC (CPU Die) input pin, and its output pin is connected
+         * to the GIC.
+         *
+         * I2C bus 0 is connected to the OR gate at bit 0.
+         * I2C bus 15 is connected to the OR gate at bit 15.
          */
         irq = aspeed_soc_ast2700_get_irq_index(s, ASPEED_DEV_I2C, i);
         sysbus_connect_irq(SYS_BUS_DEVICE(&s->i2c.busses[i]), 0, irq);
@@ -733,6 +865,17 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
         sysbus_connect_irq(SYS_BUS_DEVICE(&s->timerctrl), i, irq);
     }
 
+    /* HACE */
+    object_property_set_link(OBJECT(&s->hace), "dram", OBJECT(s->dram_mr),
+                             &error_abort);
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->hace), errp)) {
+        return;
+    }
+    aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->hace), 0,
+                    sc->memmap[ASPEED_DEV_HACE]);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->hace), 0,
+                       aspeed_soc_get_irq(s, ASPEED_DEV_HACE));
+
     create_unimplemented_device("ast2700.dpmcu", 0x11000000, 0x40000);
     create_unimplemented_device("ast2700.iomem0", 0x12000000, 0x01000000);
     create_unimplemented_device("ast2700.iomem1", 0x14000000, 0x01000000);
@@ -740,7 +883,7 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
     create_unimplemented_device("ast2700.io", 0x0, 0x4000000);
 }
 
-static void aspeed_soc_ast2700_class_init(ObjectClass *oc, void *data)
+static void aspeed_soc_ast2700a0_class_init(ObjectClass *oc, void *data)
 {
     static const char * const valid_cpu_types[] = {
         ARM_CPU_TYPE_NAME("cortex-a35"),
@@ -753,7 +896,6 @@ static void aspeed_soc_ast2700_class_init(ObjectClass *oc, void *data)
     dc->user_creatable = false;
     dc->realize      = aspeed_soc_ast2700_realize;
 
-    sc->name         = "ast2700-a0";
     sc->valid_cpu_types = valid_cpu_types;
     sc->silicon_rev  = AST2700_A0_SILICON_REV;
     sc->sram_size    = 0x20000;
@@ -763,7 +905,34 @@ static void aspeed_soc_ast2700_class_init(ObjectClass *oc, void *data)
     sc->uarts_num    = 13;
     sc->num_cpus     = 4;
     sc->uarts_base   = ASPEED_DEV_UART0;
-    sc->irqmap       = aspeed_soc_ast2700_irqmap;
+    sc->irqmap       = aspeed_soc_ast2700a0_irqmap;
+    sc->memmap       = aspeed_soc_ast2700_memmap;
+    sc->get_irq      = aspeed_soc_ast2700_get_irq;
+}
+
+static void aspeed_soc_ast2700a1_class_init(ObjectClass *oc, void *data)
+{
+    static const char * const valid_cpu_types[] = {
+        ARM_CPU_TYPE_NAME("cortex-a35"),
+        NULL
+    };
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    AspeedSoCClass *sc = ASPEED_SOC_CLASS(oc);
+
+    /* Reason: The Aspeed SoC can only be instantiated from a board */
+    dc->user_creatable = false;
+    dc->realize      = aspeed_soc_ast2700_realize;
+
+    sc->valid_cpu_types = valid_cpu_types;
+    sc->silicon_rev  = AST2700_A1_SILICON_REV;
+    sc->sram_size    = 0x20000;
+    sc->spis_num     = 3;
+    sc->wdts_num     = 8;
+    sc->macs_num     = 3;
+    sc->uarts_num    = 13;
+    sc->num_cpus     = 4;
+    sc->uarts_base   = ASPEED_DEV_UART0;
+    sc->irqmap       = aspeed_soc_ast2700a1_irqmap;
     sc->memmap       = aspeed_soc_ast2700_memmap;
     sc->get_irq      = aspeed_soc_ast2700_get_irq;
 }
@@ -778,7 +947,13 @@ static const TypeInfo aspeed_soc_ast27x0_types[] = {
         .name           = "ast2700-a0",
         .parent         = TYPE_ASPEED27X0_SOC,
         .instance_init  = aspeed_soc_ast2700_init,
-        .class_init     = aspeed_soc_ast2700_class_init,
+        .class_init     = aspeed_soc_ast2700a0_class_init,
+    },
+    {
+        .name           = "ast2700-a1",
+        .parent         = TYPE_ASPEED27X0_SOC,
+        .instance_init  = aspeed_soc_ast2700_init,
+        .class_init     = aspeed_soc_ast2700a1_class_init,
     },
 };
 
diff --git a/hw/arm/bananapi_m2u.c b/hw/arm/bananapi_m2u.c
index 0a4b6f29b1cdb..4d84d10d24c16 100644
--- a/hw/arm/bananapi_m2u.c
+++ b/hw/arm/bananapi_m2u.c
@@ -141,6 +141,7 @@ static void bpim2u_machine_init(MachineClass *mc)
     mc->valid_cpu_types = valid_cpu_types;
     mc->default_ram_size = 1 * GiB;
     mc->default_ram_id = "bpim2u.ram";
+    mc->auto_create_sdcard = true;
 }
 
 DEFINE_MACHINE("bpim2u", bpim2u_machine_init)
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 42c18355e852d..e296b62fa12c0 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -661,8 +661,6 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
         binfo->modify_dtb(binfo, fdt);
     }
 
-    qemu_fdt_dumpdtb(fdt, size);
-
     /* Put the DTB into the memory map as a ROM image: this will ensure
      * the DTB is copied again upon reset, even if addr points into RAM.
      */
diff --git a/hw/arm/cubieboard.c b/hw/arm/cubieboard.c
index b976727eefdad..d665d4edd9763 100644
--- a/hw/arm/cubieboard.c
+++ b/hw/arm/cubieboard.c
@@ -122,6 +122,7 @@ static void cubieboard_machine_init(MachineClass *mc)
     mc->units_per_default_bus = 1;
     mc->ignore_memory_transaction_failures = true;
     mc->default_ram_id = "cubieboard.ram";
+    mc->auto_create_sdcard = true;
 }
 
 DEFINE_MACHINE("cubieboard", cubieboard_machine_init)
diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c
index dd0edc81d5c84..b452470598b65 100644
--- a/hw/arm/exynos4210.c
+++ b/hw/arm/exynos4210.c
@@ -103,6 +103,8 @@
 #define EXYNOS4210_PL330_BASE1_ADDR         0x12690000
 #define EXYNOS4210_PL330_BASE2_ADDR         0x12850000
 
+#define GIC_EXT_IRQS 64 /* FIXME: verify for this SoC */
+
 enum ExtGicId {
     EXT_GIC_ID_MDMA_LCD0 = 66,
     EXT_GIC_ID_PDMA0,
@@ -394,7 +396,8 @@ static void exynos4210_init_board_irqs(Exynos4210State *s)
         }
         if (irq_id) {
             qdev_connect_gpio_out(splitter, splitin,
-                                  qdev_get_gpio_in(extgicdev, irq_id - 32));
+                                  qdev_get_gpio_in(extgicdev,
+                                                   irq_id - GIC_INTERNAL));
         }
     }
     for (; n < EXYNOS4210_MAX_INT_COMBINER_IN_IRQ; n++) {
@@ -421,7 +424,8 @@ static void exynos4210_init_board_irqs(Exynos4210State *s)
             s->irq_table[n] = qdev_get_gpio_in(splitter, 0);
             qdev_connect_gpio_out(splitter, 0, qdev_get_gpio_in(intcdev, n));
             qdev_connect_gpio_out(splitter, 1,
-                                  qdev_get_gpio_in(extgicdev, irq_id - 32));
+                                  qdev_get_gpio_in(extgicdev,
+                                                   irq_id - GIC_INTERNAL));
         } else {
             s->irq_table[n] = qdev_get_gpio_in(intcdev, n);
         }
@@ -586,6 +590,8 @@ static void exynos4210_realize(DeviceState *socdev, Error **errp)
 
     /* Private memory region and Internal GIC */
     qdev_prop_set_uint32(DEVICE(&s->a9mpcore), "num-cpu", EXYNOS4210_NCPUS);
+    qdev_prop_set_uint32(DEVICE(&s->a9mpcore), "num-irq",
+                         GIC_EXT_IRQS + GIC_INTERNAL);
     busdev = SYS_BUS_DEVICE(&s->a9mpcore);
     sysbus_realize(busdev, &error_fatal);
     sysbus_mmio_map(busdev, 0, EXYNOS4210_SMP_PRIVATE_BASE_ADDR);
diff --git a/hw/arm/exynos4_boards.c b/hw/arm/exynos4_boards.c
index 2410e2a28e817..43dc89d902e93 100644
--- a/hw/arm/exynos4_boards.c
+++ b/hw/arm/exynos4_boards.c
@@ -165,6 +165,7 @@ static void nuri_class_init(ObjectClass *oc, void *data)
     mc->min_cpus = EXYNOS4210_NCPUS;
     mc->default_cpus = EXYNOS4210_NCPUS;
     mc->ignore_memory_transaction_failures = true;
+    mc->auto_create_sdcard = true;
 }
 
 static const TypeInfo nuri_type = {
@@ -184,6 +185,7 @@ static void smdkc210_class_init(ObjectClass *oc, void *data)
     mc->min_cpus = EXYNOS4210_NCPUS;
     mc->default_cpus = EXYNOS4210_NCPUS;
     mc->ignore_memory_transaction_failures = true;
+    mc->auto_create_sdcard = true;
 }
 
 static const TypeInfo smdkc210_type = {
diff --git a/hw/arm/fby35.c b/hw/arm/fby35.c
index 83d08e578b72f..6d3663f14a1e4 100644
--- a/hw/arm/fby35.c
+++ b/hw/arm/fby35.c
@@ -170,6 +170,7 @@ static void fby35_class_init(ObjectClass *oc, void *data)
     mc->init = fby35_init;
     mc->no_floppy = 1;
     mc->no_cdrom = 1;
+    mc->auto_create_sdcard = true;
     mc->min_cpus = mc->max_cpus = mc->default_cpus = 3;
 
     object_class_property_add_bool(oc, "execute-in-place",
diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c
index 5359a6d8d3b35..02214ca1a1cc4 100644
--- a/hw/arm/fsl-imx25.c
+++ b/hw/arm/fsl-imx25.c
@@ -243,8 +243,6 @@ static void fsl_imx25_realize(DeviceState *dev, Error **errp)
                                  &error_abort);
         object_property_set_uint(OBJECT(&s->esdhc[i]), "capareg",
                                  IMX25_ESDHC_CAPABILITIES, &error_abort);
-        object_property_set_uint(OBJECT(&s->esdhc[i]), "vendor",
-                                 SDHCI_VENDOR_IMX, &error_abort);
         if (!sysbus_realize(SYS_BUS_DEVICE(&s->esdhc[i]), errp)) {
             return;
         }
diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c
index dc86338b3a5ef..a114dc0d63d42 100644
--- a/hw/arm/fsl-imx6.c
+++ b/hw/arm/fsl-imx6.c
@@ -327,8 +327,6 @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp)
                                  &error_abort);
         object_property_set_uint(OBJECT(&s->esdhc[i]), "capareg",
                                  IMX6_ESDHC_CAPABILITIES, &error_abort);
-        object_property_set_uint(OBJECT(&s->esdhc[i]), "vendor",
-                                 SDHCI_VENDOR_IMX, &error_abort);
         if (!sysbus_realize(SYS_BUS_DEVICE(&s->esdhc[i]), errp)) {
             return;
         }
diff --git a/hw/arm/fsl-imx6ul.c b/hw/arm/fsl-imx6ul.c
index 34c4aa15cd06d..ce8d3ef535f1d 100644
--- a/hw/arm/fsl-imx6ul.c
+++ b/hw/arm/fsl-imx6ul.c
@@ -531,8 +531,6 @@ static void fsl_imx6ul_realize(DeviceState *dev, Error **errp)
             FSL_IMX6UL_USDHC2_IRQ,
         };
 
-        object_property_set_uint(OBJECT(&s->usdhc[i]), "vendor",
-                                 SDHCI_VENDOR_IMX, &error_abort);
         sysbus_realize(SYS_BUS_DEVICE(&s->usdhc[i]), &error_abort);
 
         sysbus_mmio_map(SYS_BUS_DEVICE(&s->usdhc[i]), 0,
diff --git a/hw/arm/fsl-imx7.c b/hw/arm/fsl-imx7.c
index 3374018cde07e..ed1f10bca26ba 100644
--- a/hw/arm/fsl-imx7.c
+++ b/hw/arm/fsl-imx7.c
@@ -471,8 +471,6 @@ static void fsl_imx7_realize(DeviceState *dev, Error **errp)
             FSL_IMX7_USDHC3_IRQ,
         };
 
-        object_property_set_uint(OBJECT(&s->usdhc[i]), "vendor",
-                                 SDHCI_VENDOR_IMX, &error_abort);
         sysbus_realize(SYS_BUS_DEVICE(&s->usdhc[i]), &error_abort);
 
         sysbus_mmio_map(SYS_BUS_DEVICE(&s->usdhc[i]), 0,
diff --git a/hw/arm/fsl-imx8mp.c b/hw/arm/fsl-imx8mp.c
new file mode 100644
index 0000000000000..c3f6da63220fe
--- /dev/null
+++ b/hw/arm/fsl-imx8mp.c
@@ -0,0 +1,712 @@
+/*
+ * i.MX 8M Plus SoC Implementation
+ *
+ * Based on hw/arm/fsl-imx6.c
+ *
+ * Copyright (c) 2024, Bernhard Beschow <shentey@gmail.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "exec/address-spaces.h"
+#include "hw/arm/bsa.h"
+#include "hw/arm/fsl-imx8mp.h"
+#include "hw/intc/arm_gicv3.h"
+#include "hw/misc/unimp.h"
+#include "hw/boards.h"
+#include "system/system.h"
+#include "target/arm/cpu-qom.h"
+#include "qapi/error.h"
+#include "qobject/qlist.h"
+
+static const struct {
+    hwaddr addr;
+    size_t size;
+    const char *name;
+} fsl_imx8mp_memmap[] = {
+    [FSL_IMX8MP_RAM] = { FSL_IMX8MP_RAM_START, FSL_IMX8MP_RAM_SIZE_MAX, "ram" },
+    [FSL_IMX8MP_DDR_PHY_BROADCAST] = { 0x3dc00000, 4 * MiB, "ddr_phy_broadcast" },
+    [FSL_IMX8MP_DDR_PERF_MON] = { 0x3d800000, 4 * MiB, "ddr_perf_mon" },
+    [FSL_IMX8MP_DDR_CTL] = { 0x3d400000, 4 * MiB, "ddr_ctl" },
+    [FSL_IMX8MP_DDR_BLK_CTRL] = { 0x3d000000, 1 * MiB, "ddr_blk_ctrl" },
+    [FSL_IMX8MP_DDR_PHY] = { 0x3c000000, 16 * MiB, "ddr_phy" },
+    [FSL_IMX8MP_AUDIO_DSP] = { 0x3b000000, 16 * MiB, "audio_dsp" },
+    [FSL_IMX8MP_GIC_DIST] = { 0x38800000, 512 * KiB, "gic_dist" },
+    [FSL_IMX8MP_GIC_REDIST] = { 0x38880000, 512 * KiB, "gic_redist" },
+    [FSL_IMX8MP_NPU] = { 0x38500000, 2 * MiB, "npu" },
+    [FSL_IMX8MP_VPU] = { 0x38340000, 2 * MiB, "vpu" },
+    [FSL_IMX8MP_VPU_BLK_CTRL] = { 0x38330000, 2 * MiB, "vpu_blk_ctrl" },
+    [FSL_IMX8MP_VPU_VC8000E_ENCODER] = { 0x38320000, 2 * MiB, "vpu_vc8000e_encoder" },
+    [FSL_IMX8MP_VPU_G2_DECODER] = { 0x38310000, 2 * MiB, "vpu_g2_decoder" },
+    [FSL_IMX8MP_VPU_G1_DECODER] = { 0x38300000, 2 * MiB, "vpu_g1_decoder" },
+    [FSL_IMX8MP_USB2_GLUE] = { 0x382f0000, 0x100, "usb2_glue" },
+    [FSL_IMX8MP_USB2_OTG] = { 0x3820cc00, 0x100, "usb2_otg" },
+    [FSL_IMX8MP_USB2_DEV] = { 0x3820c700, 0x500, "usb2_dev" },
+    [FSL_IMX8MP_USB2] = { 0x38200000, 0xc700, "usb2" },
+    [FSL_IMX8MP_USB1_GLUE] = { 0x381f0000, 0x100, "usb1_glue" },
+    [FSL_IMX8MP_USB1_OTG] = { 0x3810cc00, 0x100, "usb1_otg" },
+    [FSL_IMX8MP_USB1_DEV] = { 0x3810c700, 0x500, "usb1_dev" },
+    [FSL_IMX8MP_USB1] = { 0x38100000, 0xc700, "usb1" },
+    [FSL_IMX8MP_GPU2D] = { 0x38008000, 32 * KiB, "gpu2d" },
+    [FSL_IMX8MP_GPU3D] = { 0x38000000, 32 * KiB, "gpu3d" },
+    [FSL_IMX8MP_QSPI1_RX_BUFFER] = { 0x34000000, 32 * MiB, "qspi1_rx_buffer" },
+    [FSL_IMX8MP_PCIE1] = { 0x33800000, 4 * MiB, "pcie1" },
+    [FSL_IMX8MP_QSPI1_TX_BUFFER] = { 0x33008000, 32 * KiB, "qspi1_tx_buffer" },
+    [FSL_IMX8MP_APBH_DMA] = { 0x33000000, 32 * KiB, "apbh_dma" },
+
+    /* AIPS-5 Begin */
+    [FSL_IMX8MP_MU_3_B] = { 0x30e90000, 64 * KiB, "mu_3_b" },
+    [FSL_IMX8MP_MU_3_A] = { 0x30e80000, 64 * KiB, "mu_3_a" },
+    [FSL_IMX8MP_MU_2_B] = { 0x30e70000, 64 * KiB, "mu_2_b" },
+    [FSL_IMX8MP_MU_2_A] = { 0x30e60000, 64 * KiB, "mu_2_a" },
+    [FSL_IMX8MP_EDMA_CHANNELS] = { 0x30e40000, 128 * KiB, "edma_channels" },
+    [FSL_IMX8MP_EDMA_MANAGEMENT_PAGE] = { 0x30e30000, 64 * KiB, "edma_management_page" },
+    [FSL_IMX8MP_AUDIO_BLK_CTRL] = { 0x30e20000, 64 * KiB, "audio_blk_ctrl" },
+    [FSL_IMX8MP_SDMA2] = { 0x30e10000, 64 * KiB, "sdma2" },
+    [FSL_IMX8MP_SDMA3] = { 0x30e00000, 64 * KiB, "sdma3" },
+    [FSL_IMX8MP_AIPS5_CONFIGURATION] = { 0x30df0000, 64 * KiB, "aips5_configuration" },
+    [FSL_IMX8MP_SPBA2] = { 0x30cf0000, 64 * KiB, "spba2" },
+    [FSL_IMX8MP_AUDIO_XCVR_RX] = { 0x30cc0000, 64 * KiB, "audio_xcvr_rx" },
+    [FSL_IMX8MP_HDMI_TX_AUDLNK_MSTR] = { 0x30cb0000, 64 * KiB, "hdmi_tx_audlnk_mstr" },
+    [FSL_IMX8MP_PDM] = { 0x30ca0000, 64 * KiB, "pdm" },
+    [FSL_IMX8MP_ASRC] = { 0x30c90000, 64 * KiB, "asrc" },
+    [FSL_IMX8MP_SAI7] = { 0x30c80000, 64 * KiB, "sai7" },
+    [FSL_IMX8MP_SAI6] = { 0x30c60000, 64 * KiB, "sai6" },
+    [FSL_IMX8MP_SAI5] = { 0x30c50000, 64 * KiB, "sai5" },
+    [FSL_IMX8MP_SAI3] = { 0x30c30000, 64 * KiB, "sai3" },
+    [FSL_IMX8MP_SAI2] = { 0x30c20000, 64 * KiB, "sai2" },
+    [FSL_IMX8MP_SAI1] = { 0x30c10000, 64 * KiB, "sai1" },
+    /* AIPS-5 End */
+
+    /* AIPS-4 Begin */
+    [FSL_IMX8MP_HDMI_TX] = { 0x32fc0000, 128 * KiB, "hdmi_tx" },
+    [FSL_IMX8MP_TZASC] = { 0x32f80000, 64 * KiB, "tzasc" },
+    [FSL_IMX8MP_HSIO_BLK_CTL] = { 0x32f10000, 64 * KiB, "hsio_blk_ctl" },
+    [FSL_IMX8MP_PCIE_PHY1] = { 0x32f00000, 64 * KiB, "pcie_phy1" },
+    [FSL_IMX8MP_MEDIA_BLK_CTL] = { 0x32ec0000, 64 * KiB, "media_blk_ctl" },
+    [FSL_IMX8MP_LCDIF2] = { 0x32e90000, 64 * KiB, "lcdif2" },
+    [FSL_IMX8MP_LCDIF1] = { 0x32e80000, 64 * KiB, "lcdif1" },
+    [FSL_IMX8MP_MIPI_DSI1] = { 0x32e60000, 64 * KiB, "mipi_dsi1" },
+    [FSL_IMX8MP_MIPI_CSI2] = { 0x32e50000, 64 * KiB, "mipi_csi2" },
+    [FSL_IMX8MP_MIPI_CSI1] = { 0x32e40000, 64 * KiB, "mipi_csi1" },
+    [FSL_IMX8MP_IPS_DEWARP] = { 0x32e30000, 64 * KiB, "ips_dewarp" },
+    [FSL_IMX8MP_ISP2] = { 0x32e20000, 64 * KiB, "isp2" },
+    [FSL_IMX8MP_ISP1] = { 0x32e10000, 64 * KiB, "isp1" },
+    [FSL_IMX8MP_ISI] = { 0x32e00000, 64 * KiB, "isi" },
+    [FSL_IMX8MP_AIPS4_CONFIGURATION] = { 0x32df0000, 64 * KiB, "aips4_configuration" },
+    /* AIPS-4 End */
+
+    [FSL_IMX8MP_INTERCONNECT] = { 0x32700000, 1 * MiB, "interconnect" },
+
+    /* AIPS-3 Begin */
+    [FSL_IMX8MP_ENET2_TSN] = { 0x30bf0000, 64 * KiB, "enet2_tsn" },
+    [FSL_IMX8MP_ENET1] = { 0x30be0000, 64 * KiB, "enet1" },
+    [FSL_IMX8MP_SDMA1] = { 0x30bd0000, 64 * KiB, "sdma1" },
+    [FSL_IMX8MP_QSPI] = { 0x30bb0000, 64 * KiB, "qspi" },
+    [FSL_IMX8MP_USDHC3] = { 0x30b60000, 64 * KiB, "usdhc3" },
+    [FSL_IMX8MP_USDHC2] = { 0x30b50000, 64 * KiB, "usdhc2" },
+    [FSL_IMX8MP_USDHC1] = { 0x30b40000, 64 * KiB, "usdhc1" },
+    [FSL_IMX8MP_I2C6] = { 0x30ae0000, 64 * KiB, "i2c6" },
+    [FSL_IMX8MP_I2C5] = { 0x30ad0000, 64 * KiB, "i2c5" },
+    [FSL_IMX8MP_SEMAPHORE_HS] = { 0x30ac0000, 64 * KiB, "semaphore_hs" },
+    [FSL_IMX8MP_MU_1_B] = { 0x30ab0000, 64 * KiB, "mu_1_b" },
+    [FSL_IMX8MP_MU_1_A] = { 0x30aa0000, 64 * KiB, "mu_1_a" },
+    [FSL_IMX8MP_AUD_IRQ_STEER] = { 0x30a80000, 64 * KiB, "aud_irq_steer" },
+    [FSL_IMX8MP_UART4] = { 0x30a60000, 64 * KiB, "uart4" },
+    [FSL_IMX8MP_I2C4] = { 0x30a50000, 64 * KiB, "i2c4" },
+    [FSL_IMX8MP_I2C3] = { 0x30a40000, 64 * KiB, "i2c3" },
+    [FSL_IMX8MP_I2C2] = { 0x30a30000, 64 * KiB, "i2c2" },
+    [FSL_IMX8MP_I2C1] = { 0x30a20000, 64 * KiB, "i2c1" },
+    [FSL_IMX8MP_AIPS3_CONFIGURATION] = { 0x309f0000, 64 * KiB, "aips3_configuration" },
+    [FSL_IMX8MP_CAAM] = { 0x30900000, 256 * KiB, "caam" },
+    [FSL_IMX8MP_SPBA1] = { 0x308f0000, 64 * KiB, "spba1" },
+    [FSL_IMX8MP_FLEXCAN2] = { 0x308d0000, 64 * KiB, "flexcan2" },
+    [FSL_IMX8MP_FLEXCAN1] = { 0x308c0000, 64 * KiB, "flexcan1" },
+    [FSL_IMX8MP_UART2] = { 0x30890000, 64 * KiB, "uart2" },
+    [FSL_IMX8MP_UART3] = { 0x30880000, 64 * KiB, "uart3" },
+    [FSL_IMX8MP_UART1] = { 0x30860000, 64 * KiB, "uart1" },
+    [FSL_IMX8MP_ECSPI3] = { 0x30840000, 64 * KiB, "ecspi3" },
+    [FSL_IMX8MP_ECSPI2] = { 0x30830000, 64 * KiB, "ecspi2" },
+    [FSL_IMX8MP_ECSPI1] = { 0x30820000, 64 * KiB, "ecspi1" },
+    /* AIPS-3 End */
+
+    /* AIPS-2 Begin */
+    [FSL_IMX8MP_QOSC] = { 0x307f0000, 64 * KiB, "qosc" },
+    [FSL_IMX8MP_PERFMON2] = { 0x307d0000, 64 * KiB, "perfmon2" },
+    [FSL_IMX8MP_PERFMON1] = { 0x307c0000, 64 * KiB, "perfmon1" },
+    [FSL_IMX8MP_GPT4] = { 0x30700000, 64 * KiB, "gpt4" },
+    [FSL_IMX8MP_GPT5] = { 0x306f0000, 64 * KiB, "gpt5" },
+    [FSL_IMX8MP_GPT6] = { 0x306e0000, 64 * KiB, "gpt6" },
+    [FSL_IMX8MP_SYSCNT_CTRL] = { 0x306c0000, 64 * KiB, "syscnt_ctrl" },
+    [FSL_IMX8MP_SYSCNT_CMP] = { 0x306b0000, 64 * KiB, "syscnt_cmp" },
+    [FSL_IMX8MP_SYSCNT_RD] = { 0x306a0000, 64 * KiB, "syscnt_rd" },
+    [FSL_IMX8MP_PWM4] = { 0x30690000, 64 * KiB, "pwm4" },
+    [FSL_IMX8MP_PWM3] = { 0x30680000, 64 * KiB, "pwm3" },
+    [FSL_IMX8MP_PWM2] = { 0x30670000, 64 * KiB, "pwm2" },
+    [FSL_IMX8MP_PWM1] = { 0x30660000, 64 * KiB, "pwm1" },
+    [FSL_IMX8MP_AIPS2_CONFIGURATION] = { 0x305f0000, 64 * KiB, "aips2_configuration" },
+    /* AIPS-2 End */
+
+    /* AIPS-1 Begin */
+    [FSL_IMX8MP_CSU] = { 0x303e0000, 64 * KiB, "csu" },
+    [FSL_IMX8MP_RDC] = { 0x303d0000, 64 * KiB, "rdc" },
+    [FSL_IMX8MP_SEMAPHORE2] = { 0x303c0000, 64 * KiB, "semaphore2" },
+    [FSL_IMX8MP_SEMAPHORE1] = { 0x303b0000, 64 * KiB, "semaphore1" },
+    [FSL_IMX8MP_GPC] = { 0x303a0000, 64 * KiB, "gpc" },
+    [FSL_IMX8MP_SRC] = { 0x30390000, 64 * KiB, "src" },
+    [FSL_IMX8MP_CCM] = { 0x30380000, 64 * KiB, "ccm" },
+    [FSL_IMX8MP_SNVS_HP] = { 0x30370000, 64 * KiB, "snvs_hp" },
+    [FSL_IMX8MP_ANA_PLL] = { 0x30360000, 64 * KiB, "ana_pll" },
+    [FSL_IMX8MP_OCOTP_CTRL] = { 0x30350000, 64 * KiB, "ocotp_ctrl" },
+    [FSL_IMX8MP_IOMUXC_GPR] = { 0x30340000, 64 * KiB, "iomuxc_gpr" },
+    [FSL_IMX8MP_IOMUXC] = { 0x30330000, 64 * KiB, "iomuxc" },
+    [FSL_IMX8MP_GPT3] = { 0x302f0000, 64 * KiB, "gpt3" },
+    [FSL_IMX8MP_GPT2] = { 0x302e0000, 64 * KiB, "gpt2" },
+    [FSL_IMX8MP_GPT1] = { 0x302d0000, 64 * KiB, "gpt1" },
+    [FSL_IMX8MP_WDOG3] = { 0x302a0000, 64 * KiB, "wdog3" },
+    [FSL_IMX8MP_WDOG2] = { 0x30290000, 64 * KiB, "wdog2" },
+    [FSL_IMX8MP_WDOG1] = { 0x30280000, 64 * KiB, "wdog1" },
+    [FSL_IMX8MP_ANA_OSC] = { 0x30270000, 64 * KiB, "ana_osc" },
+    [FSL_IMX8MP_ANA_TSENSOR] = { 0x30260000, 64 * KiB, "ana_tsensor" },
+    [FSL_IMX8MP_GPIO5] = { 0x30240000, 64 * KiB, "gpio5" },
+    [FSL_IMX8MP_GPIO4] = { 0x30230000, 64 * KiB, "gpio4" },
+    [FSL_IMX8MP_GPIO3] = { 0x30220000, 64 * KiB, "gpio3" },
+    [FSL_IMX8MP_GPIO2] = { 0x30210000, 64 * KiB, "gpio2" },
+    [FSL_IMX8MP_GPIO1] = { 0x30200000, 64 * KiB, "gpio1" },
+    [FSL_IMX8MP_AIPS1_CONFIGURATION] = { 0x301f0000, 64 * KiB, "aips1_configuration" },
+    /* AIPS-1 End */
+
+    [FSL_IMX8MP_A53_DAP] = { 0x28000000, 16 * MiB, "a53_dap" },
+    [FSL_IMX8MP_PCIE1_MEM] = { 0x18000000, 128 * MiB, "pcie1_mem" },
+    [FSL_IMX8MP_QSPI_MEM] = { 0x08000000, 256 * MiB, "qspi_mem" },
+    [FSL_IMX8MP_OCRAM] = { 0x00900000, 576 * KiB, "ocram" },
+    [FSL_IMX8MP_TCM_DTCM] = { 0x00800000, 128 * KiB, "tcm_dtcm" },
+    [FSL_IMX8MP_TCM_ITCM] = { 0x007e0000, 128 * KiB, "tcm_itcm" },
+    [FSL_IMX8MP_OCRAM_S] = { 0x00180000, 36 * KiB, "ocram_s" },
+    [FSL_IMX8MP_CAAM_MEM] = { 0x00100000, 32 * KiB, "caam_mem" },
+    [FSL_IMX8MP_BOOT_ROM_PROTECTED] = { 0x0003f000, 4 * KiB, "boot_rom_protected" },
+    [FSL_IMX8MP_BOOT_ROM] = { 0x00000000, 252 * KiB, "boot_rom" },
+};
+
+static void fsl_imx8mp_init(Object *obj)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    FslImx8mpState *s = FSL_IMX8MP(obj);
+    int i;
+
+    for (i = 0; i < MIN(ms->smp.cpus, FSL_IMX8MP_NUM_CPUS); i++) {
+        g_autofree char *name = g_strdup_printf("cpu%d", i);
+        object_initialize_child(obj, name, &s->cpu[i],
+                                ARM_CPU_TYPE_NAME("cortex-a53"));
+    }
+
+    object_initialize_child(obj, "gic", &s->gic, TYPE_ARM_GICV3);
+
+    object_initialize_child(obj, "ccm", &s->ccm, TYPE_IMX8MP_CCM);
+
+    object_initialize_child(obj, "analog", &s->analog, TYPE_IMX8MP_ANALOG);
+
+    object_initialize_child(obj, "snvs", &s->snvs, TYPE_IMX7_SNVS);
+
+    for (i = 0; i < FSL_IMX8MP_NUM_UARTS; i++) {
+        g_autofree char *name = g_strdup_printf("uart%d", i + 1);
+        object_initialize_child(obj, name, &s->uart[i], TYPE_IMX_SERIAL);
+    }
+
+    for (i = 0; i < FSL_IMX8MP_NUM_GPTS; i++) {
+        g_autofree char *name = g_strdup_printf("gpt%d", i + 1);
+        object_initialize_child(obj, name, &s->gpt[i], TYPE_IMX8MP_GPT);
+    }
+    object_initialize_child(obj, "gpt5-gpt6-irq", &s->gpt5_gpt6_irq,
+                            TYPE_OR_IRQ);
+
+    for (i = 0; i < FSL_IMX8MP_NUM_I2CS; i++) {
+        g_autofree char *name = g_strdup_printf("i2c%d", i + 1);
+        object_initialize_child(obj, name, &s->i2c[i], TYPE_IMX_I2C);
+    }
+
+    for (i = 0; i < FSL_IMX8MP_NUM_GPIOS; i++) {
+        g_autofree char *name = g_strdup_printf("gpio%d", i + 1);
+        object_initialize_child(obj, name, &s->gpio[i], TYPE_IMX_GPIO);
+    }
+
+    for (i = 0; i < FSL_IMX8MP_NUM_USDHCS; i++) {
+        g_autofree char *name = g_strdup_printf("usdhc%d", i + 1);
+        object_initialize_child(obj, name, &s->usdhc[i], TYPE_IMX_USDHC);
+    }
+
+    for (i = 0; i < FSL_IMX8MP_NUM_USBS; i++) {
+        g_autofree char *name = g_strdup_printf("usb%d", i);
+        object_initialize_child(obj, name, &s->usb[i], TYPE_USB_DWC3);
+    }
+
+    for (i = 0; i < FSL_IMX8MP_NUM_ECSPIS; i++) {
+        g_autofree char *name = g_strdup_printf("spi%d", i + 1);
+        object_initialize_child(obj, name, &s->spi[i], TYPE_IMX_SPI);
+    }
+
+    for (i = 0; i < FSL_IMX8MP_NUM_WDTS; i++) {
+        g_autofree char *name = g_strdup_printf("wdt%d", i);
+        object_initialize_child(obj, name, &s->wdt[i], TYPE_IMX2_WDT);
+    }
+
+    object_initialize_child(obj, "eth0", &s->enet, TYPE_IMX_ENET);
+
+    object_initialize_child(obj, "pcie", &s->pcie, TYPE_DESIGNWARE_PCIE_HOST);
+    object_initialize_child(obj, "pcie_phy", &s->pcie_phy,
+                            TYPE_FSL_IMX8M_PCIE_PHY);
+}
+
+static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    FslImx8mpState *s = FSL_IMX8MP(dev);
+    DeviceState *gicdev = DEVICE(&s->gic);
+    int i;
+
+    if (ms->smp.cpus > FSL_IMX8MP_NUM_CPUS) {
+        error_setg(errp, "%s: Only %d CPUs are supported (%d requested)",
+                   TYPE_FSL_IMX8MP, FSL_IMX8MP_NUM_CPUS, ms->smp.cpus);
+        return;
+    }
+
+    /* CPUs */
+    for (i = 0; i < ms->smp.cpus; i++) {
+        /* On uniprocessor, the CBAR is set to 0 */
+        if (ms->smp.cpus > 1) {
+            object_property_set_int(OBJECT(&s->cpu[i]), "reset-cbar",
+                                    fsl_imx8mp_memmap[FSL_IMX8MP_GIC_DIST].addr,
+                                    &error_abort);
+        }
+
+        /*
+         * CNTFID0 base frequency in Hz of system counter
+         */
+        object_property_set_int(OBJECT(&s->cpu[i]), "cntfrq", 8000000,
+                                &error_abort);
+
+        if (i) {
+            /*
+             * Secondary CPUs start in powered-down state (and can be
+             * powered up via the SRC system reset controller)
+             */
+            object_property_set_bool(OBJECT(&s->cpu[i]), "start-powered-off",
+                                     true, &error_abort);
+        }
+
+        if (!qdev_realize(DEVICE(&s->cpu[i]), NULL, errp)) {
+            return;
+        }
+    }
+
+    /* GIC */
+    {
+        SysBusDevice *gicsbd = SYS_BUS_DEVICE(&s->gic);
+        QList *redist_region_count;
+
+        qdev_prop_set_uint32(gicdev, "num-cpu", ms->smp.cpus);
+        qdev_prop_set_uint32(gicdev, "num-irq",
+                             FSL_IMX8MP_NUM_IRQS + GIC_INTERNAL);
+        redist_region_count = qlist_new();
+        qlist_append_int(redist_region_count, ms->smp.cpus);
+        qdev_prop_set_array(gicdev, "redist-region-count", redist_region_count);
+        object_property_set_link(OBJECT(&s->gic), "sysmem",
+                                 OBJECT(get_system_memory()), &error_fatal);
+        if (!sysbus_realize(gicsbd, errp)) {
+            return;
+        }
+        sysbus_mmio_map(gicsbd, 0, fsl_imx8mp_memmap[FSL_IMX8MP_GIC_DIST].addr);
+        sysbus_mmio_map(gicsbd, 1, fsl_imx8mp_memmap[FSL_IMX8MP_GIC_REDIST].addr);
+
+        /*
+         * Wire the outputs from each CPU's generic timer and the GICv3
+         * maintenance interrupt signal to the appropriate GIC PPI inputs, and
+         * the GIC's IRQ/FIQ interrupt outputs to the CPU's inputs.
+         */
+        for (i = 0; i < ms->smp.cpus; i++) {
+            DeviceState *cpudev = DEVICE(&s->cpu[i]);
+            int intidbase = FSL_IMX8MP_NUM_IRQS + i * GIC_INTERNAL;
+            qemu_irq irq;
+
+            /*
+             * Mapping from the output timer irq lines from the CPU to the
+             * GIC PPI inputs.
+             */
+            static const int timer_irqs[] = {
+                [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ,
+                [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ,
+                [GTIMER_HYP]  = ARCH_TIMER_NS_EL2_IRQ,
+                [GTIMER_SEC]  = ARCH_TIMER_S_EL1_IRQ,
+            };
+
+            for (int j = 0; j < ARRAY_SIZE(timer_irqs); j++) {
+                irq = qdev_get_gpio_in(gicdev, intidbase + timer_irqs[j]);
+                qdev_connect_gpio_out(cpudev, j, irq);
+            }
+
+            irq = qdev_get_gpio_in(gicdev, intidbase + ARCH_GIC_MAINT_IRQ);
+            qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt",
+                                        0, irq);
+
+            irq = qdev_get_gpio_in(gicdev, intidbase + VIRTUAL_PMU_IRQ);
+            qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, irq);
+
+            sysbus_connect_irq(gicsbd, i,
+                               qdev_get_gpio_in(cpudev, ARM_CPU_IRQ));
+            sysbus_connect_irq(gicsbd, i + ms->smp.cpus,
+                               qdev_get_gpio_in(cpudev, ARM_CPU_FIQ));
+        }
+    }
+
+    /* CCM */
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->ccm), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->ccm), 0,
+                    fsl_imx8mp_memmap[FSL_IMX8MP_CCM].addr);
+
+    /* Analog */
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->analog), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->analog), 0,
+                    fsl_imx8mp_memmap[FSL_IMX8MP_ANA_PLL].addr);
+
+    /* UARTs */
+    for (i = 0; i < FSL_IMX8MP_NUM_UARTS; i++) {
+        struct {
+            hwaddr addr;
+            unsigned int irq;
+        } serial_table[FSL_IMX8MP_NUM_UARTS] = {
+            { fsl_imx8mp_memmap[FSL_IMX8MP_UART1].addr, FSL_IMX8MP_UART1_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_UART2].addr, FSL_IMX8MP_UART2_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_UART3].addr, FSL_IMX8MP_UART3_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_UART4].addr, FSL_IMX8MP_UART4_IRQ },
+        };
+
+        qdev_prop_set_chr(DEVICE(&s->uart[i]), "chardev", serial_hd(i));
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->uart[i]), errp)) {
+            return;
+        }
+
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->uart[i]), 0, serial_table[i].addr);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->uart[i]), 0,
+                           qdev_get_gpio_in(gicdev, serial_table[i].irq));
+    }
+
+    /* GPTs */
+    object_property_set_int(OBJECT(&s->gpt5_gpt6_irq), "num-lines", 2,
+                            &error_abort);
+    if (!qdev_realize(DEVICE(&s->gpt5_gpt6_irq), NULL, errp)) {
+        return;
+    }
+
+    qdev_connect_gpio_out(DEVICE(&s->gpt5_gpt6_irq), 0,
+                          qdev_get_gpio_in(gicdev, FSL_IMX8MP_GPT5_GPT6_IRQ));
+
+    for (i = 0; i < FSL_IMX8MP_NUM_GPTS; i++) {
+        hwaddr gpt_addrs[FSL_IMX8MP_NUM_GPTS] = {
+            fsl_imx8mp_memmap[FSL_IMX8MP_GPT1].addr,
+            fsl_imx8mp_memmap[FSL_IMX8MP_GPT2].addr,
+            fsl_imx8mp_memmap[FSL_IMX8MP_GPT3].addr,
+            fsl_imx8mp_memmap[FSL_IMX8MP_GPT4].addr,
+            fsl_imx8mp_memmap[FSL_IMX8MP_GPT5].addr,
+            fsl_imx8mp_memmap[FSL_IMX8MP_GPT6].addr,
+        };
+
+        s->gpt[i].ccm = IMX_CCM(&s->ccm);
+
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->gpt[i]), errp)) {
+            return;
+        }
+
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->gpt[i]), 0, gpt_addrs[i]);
+
+        if (i < FSL_IMX8MP_NUM_GPTS - 2) {
+            static const unsigned int gpt_irqs[FSL_IMX8MP_NUM_GPTS - 2] = {
+                FSL_IMX8MP_GPT1_IRQ,
+                FSL_IMX8MP_GPT2_IRQ,
+                FSL_IMX8MP_GPT3_IRQ,
+                FSL_IMX8MP_GPT4_IRQ,
+            };
+
+            sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpt[i]), 0,
+                               qdev_get_gpio_in(gicdev, gpt_irqs[i]));
+        } else {
+            int irq = i - FSL_IMX8MP_NUM_GPTS + 2;
+
+            sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpt[i]), 0,
+                               qdev_get_gpio_in(DEVICE(&s->gpt5_gpt6_irq), irq));
+        }
+    }
+
+    /* I2Cs */
+    for (i = 0; i < FSL_IMX8MP_NUM_I2CS; i++) {
+        struct {
+            hwaddr addr;
+            unsigned int irq;
+        } i2c_table[FSL_IMX8MP_NUM_I2CS] = {
+            { fsl_imx8mp_memmap[FSL_IMX8MP_I2C1].addr, FSL_IMX8MP_I2C1_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_I2C2].addr, FSL_IMX8MP_I2C2_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_I2C3].addr, FSL_IMX8MP_I2C3_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_I2C4].addr, FSL_IMX8MP_I2C4_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_I2C5].addr, FSL_IMX8MP_I2C5_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_I2C6].addr, FSL_IMX8MP_I2C6_IRQ },
+        };
+
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->i2c[i]), errp)) {
+            return;
+        }
+
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->i2c[i]), 0, i2c_table[i].addr);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->i2c[i]), 0,
+                           qdev_get_gpio_in(gicdev, i2c_table[i].irq));
+    }
+
+    /* GPIOs */
+    for (i = 0; i < FSL_IMX8MP_NUM_GPIOS; i++) {
+        struct {
+            hwaddr addr;
+            unsigned int irq_low;
+            unsigned int irq_high;
+        } gpio_table[FSL_IMX8MP_NUM_GPIOS] = {
+            {
+                fsl_imx8mp_memmap[FSL_IMX8MP_GPIO1].addr,
+                FSL_IMX8MP_GPIO1_LOW_IRQ,
+                FSL_IMX8MP_GPIO1_HIGH_IRQ
+            },
+            {
+                fsl_imx8mp_memmap[FSL_IMX8MP_GPIO2].addr,
+                FSL_IMX8MP_GPIO2_LOW_IRQ,
+                FSL_IMX8MP_GPIO2_HIGH_IRQ
+            },
+            {
+                fsl_imx8mp_memmap[FSL_IMX8MP_GPIO3].addr,
+                FSL_IMX8MP_GPIO3_LOW_IRQ,
+                FSL_IMX8MP_GPIO3_HIGH_IRQ
+            },
+            {
+                fsl_imx8mp_memmap[FSL_IMX8MP_GPIO4].addr,
+                FSL_IMX8MP_GPIO4_LOW_IRQ,
+                FSL_IMX8MP_GPIO4_HIGH_IRQ
+            },
+            {
+                fsl_imx8mp_memmap[FSL_IMX8MP_GPIO5].addr,
+                FSL_IMX8MP_GPIO5_LOW_IRQ,
+                FSL_IMX8MP_GPIO5_HIGH_IRQ
+            },
+        };
+
+        object_property_set_bool(OBJECT(&s->gpio[i]), "has-edge-sel", true,
+                                 &error_abort);
+        object_property_set_bool(OBJECT(&s->gpio[i]), "has-upper-pin-irq",
+                                 true, &error_abort);
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->gpio[i]), errp)) {
+            return;
+        }
+
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->gpio[i]), 0, gpio_table[i].addr);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpio[i]), 0,
+                           qdev_get_gpio_in(gicdev, gpio_table[i].irq_low));
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpio[i]), 1,
+                           qdev_get_gpio_in(gicdev, gpio_table[i].irq_high));
+    }
+
+    /* USDHCs */
+    for (i = 0; i < FSL_IMX8MP_NUM_USDHCS; i++) {
+        struct {
+            hwaddr addr;
+            unsigned int irq;
+        } usdhc_table[FSL_IMX8MP_NUM_USDHCS] = {
+            { fsl_imx8mp_memmap[FSL_IMX8MP_USDHC1].addr, FSL_IMX8MP_USDHC1_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_USDHC2].addr, FSL_IMX8MP_USDHC2_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_USDHC3].addr, FSL_IMX8MP_USDHC3_IRQ },
+        };
+
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->usdhc[i]), errp)) {
+            return;
+        }
+
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->usdhc[i]), 0, usdhc_table[i].addr);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->usdhc[i]), 0,
+                           qdev_get_gpio_in(gicdev, usdhc_table[i].irq));
+    }
+
+    /* USBs */
+    for (i = 0; i < FSL_IMX8MP_NUM_USBS; i++) {
+        struct {
+            hwaddr addr;
+            unsigned int irq;
+        } usb_table[FSL_IMX8MP_NUM_USBS] = {
+            { fsl_imx8mp_memmap[FSL_IMX8MP_USB1].addr, FSL_IMX8MP_USB1_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_USB2].addr, FSL_IMX8MP_USB2_IRQ },
+        };
+
+        qdev_prop_set_uint32(DEVICE(&s->usb[i].sysbus_xhci), "p2", 1);
+        qdev_prop_set_uint32(DEVICE(&s->usb[i].sysbus_xhci), "p3", 1);
+        qdev_prop_set_uint32(DEVICE(&s->usb[i].sysbus_xhci), "slots", 2);
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->usb[i]), errp)) {
+            return;
+        }
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->usb[i]), 0, usb_table[i].addr);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->usb[i].sysbus_xhci), 0,
+                           qdev_get_gpio_in(gicdev, usb_table[i].irq));
+    }
+
+    /* ECSPIs */
+    for (i = 0; i < FSL_IMX8MP_NUM_ECSPIS; i++) {
+        struct {
+            hwaddr addr;
+            unsigned int irq;
+        } spi_table[FSL_IMX8MP_NUM_ECSPIS] = {
+            { fsl_imx8mp_memmap[FSL_IMX8MP_ECSPI1].addr, FSL_IMX8MP_ECSPI1_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_ECSPI2].addr, FSL_IMX8MP_ECSPI2_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_ECSPI3].addr, FSL_IMX8MP_ECSPI3_IRQ },
+        };
+
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->spi[i]), errp)) {
+            return;
+        }
+
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->spi[i]), 0, spi_table[i].addr);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->spi[i]), 0,
+                           qdev_get_gpio_in(gicdev, spi_table[i].irq));
+    }
+
+    /* ENET1 */
+    object_property_set_uint(OBJECT(&s->enet), "phy-num", s->phy_num,
+                             &error_abort);
+    object_property_set_uint(OBJECT(&s->enet), "tx-ring-num", 3, &error_abort);
+    qemu_configure_nic_device(DEVICE(&s->enet), true, NULL);
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->enet), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->enet), 0,
+                    fsl_imx8mp_memmap[FSL_IMX8MP_ENET1].addr);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->enet), 0,
+                       qdev_get_gpio_in(gicdev, FSL_IMX8MP_ENET1_MAC_IRQ));
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->enet), 1,
+                       qdev_get_gpio_in(gicdev, FSL_IMX6_ENET1_MAC_1588_IRQ));
+
+    /* SNVS */
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->snvs), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->snvs), 0,
+                    fsl_imx8mp_memmap[FSL_IMX8MP_SNVS_HP].addr);
+
+    /* Watchdogs */
+    for (i = 0; i < FSL_IMX8MP_NUM_WDTS; i++) {
+        struct {
+            hwaddr addr;
+            unsigned int irq;
+        } wdog_table[FSL_IMX8MP_NUM_WDTS] = {
+            { fsl_imx8mp_memmap[FSL_IMX8MP_WDOG1].addr, FSL_IMX8MP_WDOG1_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_WDOG2].addr, FSL_IMX8MP_WDOG2_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_WDOG3].addr, FSL_IMX8MP_WDOG3_IRQ },
+        };
+
+        object_property_set_bool(OBJECT(&s->wdt[i]), "pretimeout-support",
+                                 true, &error_abort);
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->wdt[i]), errp)) {
+            return;
+        }
+
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->wdt[i]), 0, wdog_table[i].addr);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->wdt[i]), 0,
+                           qdev_get_gpio_in(gicdev, wdog_table[i].irq));
+    }
+
+    /* PCIe */
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->pcie), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->pcie), 0,
+                    fsl_imx8mp_memmap[FSL_IMX8MP_PCIE1].addr);
+
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 0,
+                       qdev_get_gpio_in(gicdev, FSL_IMX8MP_PCI_INTA_IRQ));
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 1,
+                       qdev_get_gpio_in(gicdev, FSL_IMX8MP_PCI_INTB_IRQ));
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 2,
+                       qdev_get_gpio_in(gicdev, FSL_IMX8MP_PCI_INTC_IRQ));
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 3,
+                       qdev_get_gpio_in(gicdev, FSL_IMX8MP_PCI_INTD_IRQ));
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 4,
+                       qdev_get_gpio_in(gicdev, FSL_IMX8MP_PCI_MSI_IRQ));
+
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->pcie_phy), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->pcie_phy), 0,
+                    fsl_imx8mp_memmap[FSL_IMX8MP_PCIE_PHY1].addr);
+
+    /* On-Chip RAM */
+    if (!memory_region_init_ram(&s->ocram, NULL, "imx8mp.ocram",
+                                fsl_imx8mp_memmap[FSL_IMX8MP_OCRAM].size,
+                                errp)) {
+        return;
+    }
+    memory_region_add_subregion(get_system_memory(),
+                                fsl_imx8mp_memmap[FSL_IMX8MP_OCRAM].addr,
+                                &s->ocram);
+
+    /* Unimplemented devices */
+    for (i = 0; i < ARRAY_SIZE(fsl_imx8mp_memmap); i++) {
+        switch (i) {
+        case FSL_IMX8MP_ANA_PLL:
+        case FSL_IMX8MP_CCM:
+        case FSL_IMX8MP_GIC_DIST:
+        case FSL_IMX8MP_GIC_REDIST:
+        case FSL_IMX8MP_GPIO1 ... FSL_IMX8MP_GPIO5:
+        case FSL_IMX8MP_ECSPI1 ... FSL_IMX8MP_ECSPI3:
+        case FSL_IMX8MP_ENET1:
+        case FSL_IMX8MP_I2C1 ... FSL_IMX8MP_I2C6:
+        case FSL_IMX8MP_OCRAM:
+        case FSL_IMX8MP_PCIE1:
+        case FSL_IMX8MP_PCIE_PHY1:
+        case FSL_IMX8MP_RAM:
+        case FSL_IMX8MP_SNVS_HP:
+        case FSL_IMX8MP_UART1 ... FSL_IMX8MP_UART4:
+        case FSL_IMX8MP_USB1 ... FSL_IMX8MP_USB2:
+        case FSL_IMX8MP_USDHC1 ... FSL_IMX8MP_USDHC3:
+        case FSL_IMX8MP_WDOG1 ... FSL_IMX8MP_WDOG3:
+            /* device implemented and treated above */
+            break;
+
+        default:
+            create_unimplemented_device(fsl_imx8mp_memmap[i].name,
+                                        fsl_imx8mp_memmap[i].addr,
+                                        fsl_imx8mp_memmap[i].size);
+            break;
+        }
+    }
+}
+
+static const Property fsl_imx8mp_properties[] = {
+    DEFINE_PROP_UINT32("fec1-phy-num", FslImx8mpState, phy_num, 0),
+    DEFINE_PROP_BOOL("fec1-phy-connected", FslImx8mpState, phy_connected, true),
+};
+
+static void fsl_imx8mp_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    device_class_set_props(dc, fsl_imx8mp_properties);
+    dc->realize = fsl_imx8mp_realize;
+
+    dc->desc = "i.MX 8M Plus SoC";
+}
+
+static const TypeInfo fsl_imx8mp_types[] = {
+    {
+        .name = TYPE_FSL_IMX8MP,
+        .parent = TYPE_DEVICE,
+        .instance_size = sizeof(FslImx8mpState),
+        .instance_init = fsl_imx8mp_init,
+        .class_init = fsl_imx8mp_class_init,
+    },
+};
+
+DEFINE_TYPES(fsl_imx8mp_types)
diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c
index 495704d97265b..0f3c207d54895 100644
--- a/hw/arm/highbank.c
+++ b/hw/arm/highbank.c
@@ -45,7 +45,7 @@
 #define MVBAR_ADDR              0x200
 #define BOARD_SETUP_ADDR        (MVBAR_ADDR + 8 * sizeof(uint32_t))
 
-#define NIRQ_GIC                160
+#define GIC_EXT_IRQS            128 /* EnergyCore ECX-1000 & ECX-2000 */
 
 /* Board init.  */
 
@@ -180,7 +180,7 @@ static void calxeda_init(MachineState *machine, enum cxmachines machine_id)
 {
     DeviceState *dev = NULL;
     SysBusDevice *busdev;
-    qemu_irq pic[128];
+    qemu_irq pic[GIC_EXT_IRQS];
     int n;
     unsigned int smp_cpus = machine->smp.cpus;
     qemu_irq cpu_irq[4];
@@ -260,7 +260,7 @@ static void calxeda_init(MachineState *machine, enum cxmachines machine_id)
         break;
     }
     qdev_prop_set_uint32(dev, "num-cpu", smp_cpus);
-    qdev_prop_set_uint32(dev, "num-irq", NIRQ_GIC);
+    qdev_prop_set_uint32(dev, "num-irq", GIC_EXT_IRQS + GIC_INTERNAL);
     busdev = SYS_BUS_DEVICE(dev);
     sysbus_realize_and_unref(busdev, &error_fatal);
     sysbus_mmio_map(busdev, 0, MPCORE_PERIPHBASE);
@@ -271,7 +271,7 @@ static void calxeda_init(MachineState *machine, enum cxmachines machine_id)
         sysbus_connect_irq(busdev, n + 3 * smp_cpus, cpu_vfiq[n]);
     }
 
-    for (n = 0; n < 128; n++) {
+    for (n = 0; n < GIC_EXT_IRQS; n++) {
         pic[n] = qdev_get_gpio_in(dev, n);
     }
 
diff --git a/hw/arm/imx25_pdk.c b/hw/arm/imx25_pdk.c
index c9c2e5dd3b1b5..e95ea5e4e189f 100644
--- a/hw/arm/imx25_pdk.c
+++ b/hw/arm/imx25_pdk.c
@@ -147,6 +147,7 @@ static void imx25_pdk_machine_init(MachineClass *mc)
     mc->init = imx25_pdk_init;
     mc->ignore_memory_transaction_failures = true;
     mc->default_ram_id = "imx25.ram";
+    mc->auto_create_sdcard = true;
 }
 
 DEFINE_MACHINE("imx25-pdk", imx25_pdk_machine_init)
diff --git a/hw/arm/imx8mp-evk.c b/hw/arm/imx8mp-evk.c
new file mode 100644
index 0000000000000..e1a7892fd7ce9
--- /dev/null
+++ b/hw/arm/imx8mp-evk.c
@@ -0,0 +1,74 @@
+/*
+ * NXP i.MX 8M Plus Evaluation Kit System Emulation
+ *
+ * Copyright (c) 2024, Bernhard Beschow <shentey@gmail.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "exec/address-spaces.h"
+#include "hw/arm/boot.h"
+#include "hw/arm/fsl-imx8mp.h"
+#include "hw/boards.h"
+#include "hw/qdev-properties.h"
+#include "system/qtest.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+
+static void imx8mp_evk_init(MachineState *machine)
+{
+    static struct arm_boot_info boot_info;
+    FslImx8mpState *s;
+
+    if (machine->ram_size > FSL_IMX8MP_RAM_SIZE_MAX) {
+        error_report("RAM size " RAM_ADDR_FMT " above max supported (%08" PRIx64 ")",
+                     machine->ram_size, FSL_IMX8MP_RAM_SIZE_MAX);
+        exit(1);
+    }
+
+    boot_info = (struct arm_boot_info) {
+        .loader_start = FSL_IMX8MP_RAM_START,
+        .board_id = -1,
+        .ram_size = machine->ram_size,
+        .psci_conduit = QEMU_PSCI_CONDUIT_SMC,
+    };
+
+    s = FSL_IMX8MP(object_new(TYPE_FSL_IMX8MP));
+    object_property_add_child(OBJECT(machine), "soc", OBJECT(s));
+    object_property_set_uint(OBJECT(s), "fec1-phy-num", 1, &error_fatal);
+    qdev_realize(DEVICE(s), NULL, &error_fatal);
+
+    memory_region_add_subregion(get_system_memory(), FSL_IMX8MP_RAM_START,
+                                machine->ram);
+
+    for (int i = 0; i < FSL_IMX8MP_NUM_USDHCS; i++) {
+        BusState *bus;
+        DeviceState *carddev;
+        BlockBackend *blk;
+        DriveInfo *di = drive_get(IF_SD, i, 0);
+
+        if (!di) {
+            continue;
+        }
+
+        blk = blk_by_legacy_dinfo(di);
+        bus = qdev_get_child_bus(DEVICE(&s->usdhc[i]), "sd-bus");
+        carddev = qdev_new(TYPE_SD_CARD);
+        qdev_prop_set_drive_err(carddev, "drive", blk, &error_fatal);
+        qdev_realize_and_unref(carddev, bus, &error_fatal);
+    }
+
+    if (!qtest_enabled()) {
+        arm_load_kernel(&s->cpu[0], machine, &boot_info);
+    }
+}
+
+static void imx8mp_evk_machine_init(MachineClass *mc)
+{
+    mc->desc = "NXP i.MX 8M Plus EVK Board";
+    mc->init = imx8mp_evk_init;
+    mc->max_cpus = FSL_IMX8MP_NUM_CPUS;
+    mc->default_ram_id = "imx8mp-evk.ram";
+}
+DEFINE_MACHINE("imx8mp-evk", imx8mp_evk_machine_init)
diff --git a/hw/arm/integratorcp.c b/hw/arm/integratorcp.c
index 4e1b8627d3246..8aa2e6e98e391 100644
--- a/hw/arm/integratorcp.c
+++ b/hw/arm/integratorcp.c
@@ -688,6 +688,7 @@ static void integratorcp_machine_init(MachineClass *mc)
     mc->ignore_memory_transaction_failures = true;
     mc->default_cpu_type = ARM_CPU_TYPE_NAME("arm926");
     mc->default_ram_id = "integrator.ram";
+    mc->auto_create_sdcard = true;
 
     machine_add_audiodev_property(mc);
 }
diff --git a/hw/arm/mcimx6ul-evk.c b/hw/arm/mcimx6ul-evk.c
index 690cb64ef3617..86982cb0772a8 100644
--- a/hw/arm/mcimx6ul-evk.c
+++ b/hw/arm/mcimx6ul-evk.c
@@ -74,5 +74,6 @@ static void mcimx6ul_evk_machine_init(MachineClass *mc)
     mc->init = mcimx6ul_evk_init;
     mc->max_cpus = FSL_IMX6UL_NUM_CPUS;
     mc->default_ram_id = "mcimx6ul-evk.ram";
+    mc->auto_create_sdcard = true;
 }
 DEFINE_MACHINE("mcimx6ul-evk", mcimx6ul_evk_machine_init)
diff --git a/hw/arm/mcimx7d-sabre.c b/hw/arm/mcimx7d-sabre.c
index b3e8e50779ffc..3311961011370 100644
--- a/hw/arm/mcimx7d-sabre.c
+++ b/hw/arm/mcimx7d-sabre.c
@@ -74,5 +74,6 @@ static void mcimx7d_sabre_machine_init(MachineClass *mc)
     mc->init = mcimx7d_sabre_init;
     mc->max_cpus = FSL_IMX7_NUM_CPUS;
     mc->default_ram_id = "mcimx7d-sabre.ram";
+    mc->auto_create_sdcard = true;
 }
 DEFINE_MACHINE("mcimx7d-sabre", mcimx7d_sabre_machine_init)
diff --git a/hw/arm/meson.build b/hw/arm/meson.build
index 490234b3b8472..ac473ce7cdac9 100644
--- a/hw/arm/meson.build
+++ b/hw/arm/meson.build
@@ -12,6 +12,7 @@ arm_ss.add(when: 'CONFIG_MUSICPAL', if_true: files('musicpal.c'))
 arm_ss.add(when: 'CONFIG_NETDUINOPLUS2', if_true: files('netduinoplus2.c'))
 arm_ss.add(when: 'CONFIG_OLIMEX_STM32_H405', if_true: files('olimex-stm32-h405.c'))
 arm_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx.c', 'npcm7xx_boards.c'))
+arm_ss.add(when: 'CONFIG_NPCM8XX', if_true: files('npcm8xx.c', 'npcm8xx_boards.c'))
 arm_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview.c'))
 arm_ss.add(when: 'CONFIG_SBSA_REF', if_true: files('sbsa-ref.c'))
 arm_ss.add(when: 'CONFIG_STELLARIS', if_true: files('stellaris.c'))
@@ -53,6 +54,8 @@ arm_ss.add(when: 'CONFIG_MSF2', if_true: files('msf2-soc.c'))
 arm_ss.add(when: 'CONFIG_MUSCA', if_true: files('musca.c'))
 arm_ss.add(when: 'CONFIG_ARMSSE', if_true: files('armsse.c'))
 arm_ss.add(when: 'CONFIG_FSL_IMX7', if_true: files('fsl-imx7.c', 'mcimx7d-sabre.c'))
+arm_ss.add(when: 'CONFIG_FSL_IMX8MP', if_true: files('fsl-imx8mp.c'))
+arm_ss.add(when: 'CONFIG_FSL_IMX8MP_EVK', if_true: files('imx8mp-evk.c'))
 arm_ss.add(when: 'CONFIG_ARM_SMMUV3', if_true: files('smmuv3.c'))
 arm_ss.add(when: 'CONFIG_FSL_IMX6UL', if_true: files('fsl-imx6ul.c', 'mcimx6ul-evk.c'))
 arm_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_soc.c'))
diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c
index 386b2c35e9b81..2d6e08b72ba06 100644
--- a/hw/arm/npcm7xx.c
+++ b/hw/arm/npcm7xx.c
@@ -292,17 +292,21 @@ static const struct {
     hwaddr regs_addr;
     int cs_count;
     const hwaddr *flash_addr;
+    size_t flash_size;
 } npcm7xx_fiu[] = {
     {
         .name = "fiu0",
         .regs_addr = 0xfb000000,
         .cs_count = ARRAY_SIZE(npcm7xx_fiu0_flash_addr),
         .flash_addr = npcm7xx_fiu0_flash_addr,
+        .flash_size = 128 * MiB,
+
     }, {
         .name = "fiu3",
         .regs_addr = 0xc0000000,
         .cs_count = ARRAY_SIZE(npcm7xx_fiu3_flash_addr),
         .flash_addr = npcm7xx_fiu3_flash_addr,
+        .flash_size = 128 * MiB,
     },
 };
 
@@ -735,6 +739,8 @@ static void npcm7xx_realize(DeviceState *dev, Error **errp)
 
         object_property_set_int(OBJECT(sbd), "cs-count",
                                 npcm7xx_fiu[i].cs_count, &error_abort);
+        object_property_set_int(OBJECT(sbd), "flash-size",
+                                npcm7xx_fiu[i].flash_size, &error_abort);
         sysbus_realize(sbd, &error_abort);
 
         sysbus_mmio_map(sbd, 0, npcm7xx_fiu[i].regs_addr);
diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c
index 7727e0dc4bba6..eb28b97ad83e7 100644
--- a/hw/arm/npcm7xx_boards.c
+++ b/hw/arm/npcm7xx_boards.c
@@ -481,6 +481,7 @@ static void npcm750_evb_machine_class_init(ObjectClass *oc, void *data)
 
     mc->desc = "Nuvoton NPCM750 Evaluation Board (Cortex-A9)";
     mc->init = npcm750_evb_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size = 512 * MiB;
 };
 
@@ -493,6 +494,7 @@ static void gsj_machine_class_init(ObjectClass *oc, void *data)
 
     mc->desc = "Quanta GSJ (Cortex-A9)";
     mc->init = quanta_gsj_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size = 512 * MiB;
 };
 
@@ -505,6 +507,7 @@ static void gbs_bmc_machine_class_init(ObjectClass *oc, void *data)
 
     mc->desc = "Quanta GBS (Cortex-A9)";
     mc->init = quanta_gbs_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size = 1 * GiB;
 }
 
@@ -517,6 +520,7 @@ static void kudo_bmc_machine_class_init(ObjectClass *oc, void *data)
 
     mc->desc = "Kudo BMC (Cortex-A9)";
     mc->init = kudo_bmc_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size = 1 * GiB;
 };
 
@@ -529,6 +533,7 @@ static void mori_bmc_machine_class_init(ObjectClass *oc, void *data)
 
     mc->desc = "Mori BMC (Cortex-A9)";
     mc->init = mori_bmc_init;
+    mc->auto_create_sdcard = true;
     mc->default_ram_size = 1 * GiB;
 }
 
diff --git a/hw/arm/npcm8xx.c b/hw/arm/npcm8xx.c
new file mode 100644
index 0000000000000..f182accc47cf5
--- /dev/null
+++ b/hw/arm/npcm8xx.c
@@ -0,0 +1,805 @@
+/*
+ * Nuvoton NPCM8xx SoC family.
+ *
+ * Copyright 2022 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/boards.h"
+#include "hw/arm/boot.h"
+#include "hw/arm/bsa.h"
+#include "hw/arm/npcm8xx.h"
+#include "hw/char/serial-mm.h"
+#include "hw/intc/arm_gic.h"
+#include "hw/loader.h"
+#include "hw/misc/unimp.h"
+#include "hw/qdev-clock.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+#include "qemu/units.h"
+#include "system/system.h"
+
+/*
+ * This covers the whole MMIO space. We'll use this to catch any MMIO accesses
+ * that aren't handled by a device.
+ */
+#define NPCM8XX_MMIO_BA         0x80000000
+#define NPCM8XX_MMIO_SZ         0x7ffd0000
+
+/* OTP fuse array */
+#define NPCM8XX_OTP_BA          0xf0189000
+
+/* GIC Distributor */
+#define NPCM8XX_GICD_BA         0xdfff9000
+#define NPCM8XX_GICC_BA         0xdfffa000
+
+/* Core system modules. */
+#define NPCM8XX_CPUP_BA         0xf03fe000
+#define NPCM8XX_GCR_BA          0xf0800000
+#define NPCM8XX_CLK_BA          0xf0801000
+#define NPCM8XX_MC_BA           0xf0824000
+#define NPCM8XX_RNG_BA          0xf000b000
+
+/* ADC Module */
+#define NPCM8XX_ADC_BA          0xf000c000
+
+/* Internal AHB SRAM */
+#define NPCM8XX_RAM3_BA         0xc0008000
+#define NPCM8XX_RAM3_SZ         (4 * KiB)
+
+/* Memory blocks at the end of the address space */
+#define NPCM8XX_RAM2_BA         0xfffb0000
+#define NPCM8XX_RAM2_SZ         (256 * KiB)
+#define NPCM8XX_ROM_BA          0xffff0100
+#define NPCM8XX_ROM_SZ          (64 * KiB)
+
+/* SDHCI Modules */
+#define NPCM8XX_MMC_BA          0xf0842000
+
+/* Run PLL1 at 1600 MHz */
+#define NPCM8XX_PLLCON1_FIXUP_VAL   0x00402101
+/* Run the CPU from PLL1 and UART from PLL2 */
+#define NPCM8XX_CLKSEL_FIXUP_VAL    0x004aaba9
+
+/* Clock configuration values to be fixed up when bypassing bootloader */
+
+/*
+ * Interrupt lines going into the GIC. This does not include internal Cortex-A35
+ * interrupts.
+ */
+enum NPCM8xxInterrupt {
+    NPCM8XX_ADC_IRQ             = 0,
+    NPCM8XX_PECI_IRQ            = 6,
+    NPCM8XX_KCS_HIB_IRQ         = 9,
+    NPCM8XX_MMC_IRQ             = 26,
+    NPCM8XX_TIMER0_IRQ          = 32,   /* Timer Module 0 */
+    NPCM8XX_TIMER1_IRQ,
+    NPCM8XX_TIMER2_IRQ,
+    NPCM8XX_TIMER3_IRQ,
+    NPCM8XX_TIMER4_IRQ,
+    NPCM8XX_TIMER5_IRQ,                 /* Timer Module 1 */
+    NPCM8XX_TIMER6_IRQ,
+    NPCM8XX_TIMER7_IRQ,
+    NPCM8XX_TIMER8_IRQ,
+    NPCM8XX_TIMER9_IRQ,
+    NPCM8XX_TIMER10_IRQ,                /* Timer Module 2 */
+    NPCM8XX_TIMER11_IRQ,
+    NPCM8XX_TIMER12_IRQ,
+    NPCM8XX_TIMER13_IRQ,
+    NPCM8XX_TIMER14_IRQ,
+    NPCM8XX_WDG0_IRQ            = 47,   /* Timer Module 0 Watchdog */
+    NPCM8XX_WDG1_IRQ,                   /* Timer Module 1 Watchdog */
+    NPCM8XX_WDG2_IRQ,                   /* Timer Module 2 Watchdog */
+    NPCM8XX_EHCI1_IRQ           = 61,
+    NPCM8XX_OHCI1_IRQ,
+    NPCM8XX_EHCI2_IRQ,
+    NPCM8XX_OHCI2_IRQ,
+    NPCM8XX_PWM0_IRQ            = 93,   /* PWM module 0 */
+    NPCM8XX_PWM1_IRQ,                   /* PWM module 1 */
+    NPCM8XX_MFT0_IRQ            = 96,   /* MFT module 0 */
+    NPCM8XX_MFT1_IRQ,                   /* MFT module 1 */
+    NPCM8XX_MFT2_IRQ,                   /* MFT module 2 */
+    NPCM8XX_MFT3_IRQ,                   /* MFT module 3 */
+    NPCM8XX_MFT4_IRQ,                   /* MFT module 4 */
+    NPCM8XX_MFT5_IRQ,                   /* MFT module 5 */
+    NPCM8XX_MFT6_IRQ,                   /* MFT module 6 */
+    NPCM8XX_MFT7_IRQ,                   /* MFT module 7 */
+    NPCM8XX_PCI_MBOX1_IRQ       = 105,
+    NPCM8XX_PCI_MBOX2_IRQ,
+    NPCM8XX_GPIO0_IRQ           = 116,
+    NPCM8XX_GPIO1_IRQ,
+    NPCM8XX_GPIO2_IRQ,
+    NPCM8XX_GPIO3_IRQ,
+    NPCM8XX_GPIO4_IRQ,
+    NPCM8XX_GPIO5_IRQ,
+    NPCM8XX_GPIO6_IRQ,
+    NPCM8XX_GPIO7_IRQ,
+    NPCM8XX_SMBUS0_IRQ          = 128,
+    NPCM8XX_SMBUS1_IRQ,
+    NPCM8XX_SMBUS2_IRQ,
+    NPCM8XX_SMBUS3_IRQ,
+    NPCM8XX_SMBUS4_IRQ,
+    NPCM8XX_SMBUS5_IRQ,
+    NPCM8XX_SMBUS6_IRQ,
+    NPCM8XX_SMBUS7_IRQ,
+    NPCM8XX_SMBUS8_IRQ,
+    NPCM8XX_SMBUS9_IRQ,
+    NPCM8XX_SMBUS10_IRQ,
+    NPCM8XX_SMBUS11_IRQ,
+    NPCM8XX_SMBUS12_IRQ,
+    NPCM8XX_SMBUS13_IRQ,
+    NPCM8XX_SMBUS14_IRQ,
+    NPCM8XX_SMBUS15_IRQ,
+    NPCM8XX_SMBUS16_IRQ,
+    NPCM8XX_SMBUS17_IRQ,
+    NPCM8XX_SMBUS18_IRQ,
+    NPCM8XX_SMBUS19_IRQ,
+    NPCM8XX_SMBUS20_IRQ,
+    NPCM8XX_SMBUS21_IRQ,
+    NPCM8XX_SMBUS22_IRQ,
+    NPCM8XX_SMBUS23_IRQ,
+    NPCM8XX_SMBUS24_IRQ,
+    NPCM8XX_SMBUS25_IRQ,
+    NPCM8XX_SMBUS26_IRQ,
+    NPCM8XX_UART0_IRQ           = 192,
+    NPCM8XX_UART1_IRQ,
+    NPCM8XX_UART2_IRQ,
+    NPCM8XX_UART3_IRQ,
+    NPCM8XX_UART4_IRQ,
+    NPCM8XX_UART5_IRQ,
+    NPCM8XX_UART6_IRQ,
+};
+
+/* Total number of GIC interrupts, including internal Cortex-A35 interrupts. */
+#define NPCM8XX_NUM_IRQ         (288)
+#define NPCM8XX_PPI_BASE(cpu)   \
+    ((NPCM8XX_NUM_IRQ - GIC_INTERNAL) + (cpu) * GIC_INTERNAL)
+
+/* Register base address for each Timer Module */
+static const hwaddr npcm8xx_tim_addr[] = {
+    0xf0008000,
+    0xf0009000,
+    0xf000a000,
+};
+
+/* Register base address for each 16550 UART */
+static const hwaddr npcm8xx_uart_addr[] = {
+    0xf0000000,
+    0xf0001000,
+    0xf0002000,
+    0xf0003000,
+    0xf0004000,
+    0xf0005000,
+    0xf0006000,
+};
+
+/* Direct memory-mapped access to SPI0 CS0-1. */
+static const hwaddr npcm8xx_fiu0_flash_addr[] = {
+    0x80000000, /* CS0 */
+    0x88000000, /* CS1 */
+};
+
+/* Direct memory-mapped access to SPI1 CS0-3. */
+static const hwaddr npcm8xx_fiu1_flash_addr[] = {
+    0x90000000, /* CS0 */
+    0x91000000, /* CS1 */
+    0x92000000, /* CS2 */
+    0x93000000, /* CS3 */
+};
+
+/* Direct memory-mapped access to SPI3 CS0-3. */
+static const hwaddr npcm8xx_fiu3_flash_addr[] = {
+    0xa0000000, /* CS0 */
+    0xa8000000, /* CS1 */
+    0xb0000000, /* CS2 */
+    0xb8000000, /* CS3 */
+};
+
+/* Register base address for each PWM Module */
+static const hwaddr npcm8xx_pwm_addr[] = {
+    0xf0103000,
+    0xf0104000,
+    0xf0105000,
+};
+
+/* Register base address for each MFT Module */
+static const hwaddr npcm8xx_mft_addr[] = {
+    0xf0180000,
+    0xf0181000,
+    0xf0182000,
+    0xf0183000,
+    0xf0184000,
+    0xf0185000,
+    0xf0186000,
+    0xf0187000,
+};
+
+/* Direct memory-mapped access to each SMBus Module. */
+static const hwaddr npcm8xx_smbus_addr[] = {
+    0xf0080000,
+    0xf0081000,
+    0xf0082000,
+    0xf0083000,
+    0xf0084000,
+    0xf0085000,
+    0xf0086000,
+    0xf0087000,
+    0xf0088000,
+    0xf0089000,
+    0xf008a000,
+    0xf008b000,
+    0xf008c000,
+    0xf008d000,
+    0xf008e000,
+    0xf008f000,
+    0xfff00000,
+    0xfff01000,
+    0xfff02000,
+    0xfff03000,
+    0xfff04000,
+    0xfff05000,
+    0xfff06000,
+    0xfff07000,
+    0xfff08000,
+    0xfff09000,
+    0xfff0a000,
+};
+
+/* Register base address for each USB host EHCI registers */
+static const hwaddr npcm8xx_ehci_addr[] = {
+    0xf0828100,
+    0xf082a100,
+};
+
+/* Register base address for each USB host OHCI registers */
+static const hwaddr npcm8xx_ohci_addr[] = {
+    0xf0829000,
+    0xf082b000,
+};
+
+static const struct {
+    hwaddr regs_addr;
+    uint32_t reset_pu;
+    uint32_t reset_pd;
+    uint32_t reset_osrc;
+    uint32_t reset_odsc;
+} npcm8xx_gpio[] = {
+    {
+        .regs_addr = 0xf0010000,
+        .reset_pu = 0x00000300,
+        .reset_pd = 0x000f0000,
+    }, {
+        .regs_addr = 0xf0011000,
+        .reset_pu = 0xe0fefe01,
+        .reset_pd = 0x07000000,
+    }, {
+        .regs_addr = 0xf0012000,
+        .reset_pu = 0xc00fffff,
+        .reset_pd = 0x3ff00000,
+    }, {
+        .regs_addr = 0xf0013000,
+        .reset_pd = 0x00003000,
+    }, {
+        .regs_addr = 0xf0014000,
+        .reset_pu = 0xffff0000,
+    }, {
+        .regs_addr = 0xf0015000,
+        .reset_pu = 0xff8387fe,
+        .reset_pd = 0x007c0001,
+        .reset_osrc = 0x08000000,
+    }, {
+        .regs_addr = 0xf0016000,
+        .reset_pu = 0x00000801,
+        .reset_pd = 0x00000302,
+    }, {
+        .regs_addr = 0xf0017000,
+        .reset_pu = 0x000002ff,
+        .reset_pd = 0x00000c00,
+    },
+};
+
+static const struct {
+    const char *name;
+    hwaddr regs_addr;
+    int cs_count;
+    const hwaddr *flash_addr;
+    size_t flash_size;
+} npcm8xx_fiu[] = {
+    {
+        .name = "fiu0",
+        .regs_addr = 0xfb000000,
+        .cs_count = ARRAY_SIZE(npcm8xx_fiu0_flash_addr),
+        .flash_addr = npcm8xx_fiu0_flash_addr,
+        .flash_size = 128 * MiB,
+    },
+    {
+        .name = "fiu1",
+        .regs_addr = 0xfb002000,
+        .cs_count = ARRAY_SIZE(npcm8xx_fiu1_flash_addr),
+        .flash_addr = npcm8xx_fiu1_flash_addr,
+        .flash_size = 16 * MiB,
+    }, {
+        .name = "fiu3",
+        .regs_addr = 0xc0000000,
+        .cs_count = ARRAY_SIZE(npcm8xx_fiu3_flash_addr),
+        .flash_addr = npcm8xx_fiu3_flash_addr,
+        .flash_size = 128 * MiB,
+    },
+};
+
+static struct arm_boot_info npcm8xx_binfo = {
+    .loader_start           = NPCM8XX_LOADER_START,
+    .smp_loader_start       = NPCM8XX_SMP_LOADER_START,
+    .smp_bootreg_addr       = NPCM8XX_SMP_BOOTREG_ADDR,
+    .gic_cpu_if_addr        = NPCM8XX_GICC_BA,
+    .secure_boot            = false,
+    .board_id               = -1,
+    .board_setup_addr       = NPCM8XX_BOARD_SETUP_ADDR,
+};
+
+void npcm8xx_load_kernel(MachineState *machine, NPCM8xxState *soc)
+{
+    npcm8xx_binfo.ram_size = machine->ram_size;
+
+    arm_load_kernel(&soc->cpu[0], machine, &npcm8xx_binfo);
+}
+
+static void npcm8xx_init_fuses(NPCM8xxState *s)
+{
+    NPCM8xxClass *nc = NPCM8XX_GET_CLASS(s);
+    uint32_t value;
+
+    /*
+     * The initial mask of disabled modules indicates the chip derivative (e.g.
+     * NPCM750 or NPCM730).
+     */
+    value = cpu_to_le32(nc->disabled_modules);
+    npcm7xx_otp_array_write(&s->fuse_array, &value, NPCM7XX_FUSE_DERIVATIVE,
+                            sizeof(value));
+}
+
+static void npcm8xx_write_adc_calibration(NPCM8xxState *s)
+{
+    /* Both ADC and the fuse array must have realized. */
+    QEMU_BUILD_BUG_ON(sizeof(s->adc.calibration_r_values) != 4);
+    npcm7xx_otp_array_write(&s->fuse_array, s->adc.calibration_r_values,
+            NPCM7XX_FUSE_ADC_CALIB, sizeof(s->adc.calibration_r_values));
+}
+
+static qemu_irq npcm8xx_irq(NPCM8xxState *s, int n)
+{
+    return qdev_get_gpio_in(DEVICE(&s->gic), n);
+}
+
+static void npcm8xx_init(Object *obj)
+{
+    NPCM8xxState *s = NPCM8XX(obj);
+    int i;
+
+    object_initialize_child(obj, "cpu-cluster", &s->cpu_cluster,
+                            TYPE_CPU_CLUSTER);
+    for (i = 0; i < NPCM8XX_MAX_NUM_CPUS; i++) {
+        object_initialize_child(OBJECT(&s->cpu_cluster), "cpu[*]", &s->cpu[i],
+                                ARM_CPU_TYPE_NAME("cortex-a35"));
+    }
+    object_initialize_child(obj, "gic", &s->gic, TYPE_ARM_GIC);
+    object_initialize_child(obj, "gcr", &s->gcr, TYPE_NPCM8XX_GCR);
+    object_property_add_alias(obj, "power-on-straps", OBJECT(&s->gcr),
+                              "power-on-straps");
+    object_initialize_child(obj, "clk", &s->clk, TYPE_NPCM8XX_CLK);
+    object_initialize_child(obj, "otp", &s->fuse_array,
+                            TYPE_NPCM7XX_FUSE_ARRAY);
+    object_initialize_child(obj, "mc", &s->mc, TYPE_NPCM7XX_MC);
+    object_initialize_child(obj, "rng", &s->rng, TYPE_NPCM7XX_RNG);
+    object_initialize_child(obj, "adc", &s->adc, TYPE_NPCM7XX_ADC);
+
+    for (i = 0; i < ARRAY_SIZE(s->tim); i++) {
+        object_initialize_child(obj, "tim[*]", &s->tim[i], TYPE_NPCM7XX_TIMER);
+    }
+
+    for (i = 0; i < ARRAY_SIZE(s->gpio); i++) {
+        object_initialize_child(obj, "gpio[*]", &s->gpio[i], TYPE_NPCM7XX_GPIO);
+    }
+
+
+    for (i = 0; i < ARRAY_SIZE(s->smbus); i++) {
+        object_initialize_child(obj, "smbus[*]", &s->smbus[i],
+                                TYPE_NPCM7XX_SMBUS);
+        DEVICE(&s->smbus[i])->id = g_strdup_printf("smbus[%d]", i);
+    }
+
+    for (i = 0; i < ARRAY_SIZE(s->ehci); i++) {
+        object_initialize_child(obj, "ehci[*]", &s->ehci[i], TYPE_NPCM7XX_EHCI);
+    }
+    for (i = 0; i < ARRAY_SIZE(s->ohci); i++) {
+        object_initialize_child(obj, "ohci[*]", &s->ohci[i], TYPE_SYSBUS_OHCI);
+    }
+
+    QEMU_BUILD_BUG_ON(ARRAY_SIZE(npcm8xx_fiu) != ARRAY_SIZE(s->fiu));
+    for (i = 0; i < ARRAY_SIZE(s->fiu); i++) {
+        object_initialize_child(obj, npcm8xx_fiu[i].name, &s->fiu[i],
+                                TYPE_NPCM7XX_FIU);
+    }
+
+    for (i = 0; i < ARRAY_SIZE(s->pwm); i++) {
+        object_initialize_child(obj, "pwm[*]", &s->pwm[i], TYPE_NPCM7XX_PWM);
+    }
+
+    for (i = 0; i < ARRAY_SIZE(s->mft); i++) {
+        object_initialize_child(obj, "mft[*]", &s->mft[i], TYPE_NPCM7XX_MFT);
+    }
+
+    object_initialize_child(obj, "mmc", &s->mmc, TYPE_NPCM7XX_SDHCI);
+}
+
+static void npcm8xx_realize(DeviceState *dev, Error **errp)
+{
+    NPCM8xxState *s = NPCM8XX(dev);
+    NPCM8xxClass *nc = NPCM8XX_GET_CLASS(s);
+    int i;
+
+    if (memory_region_size(s->dram) > NPCM8XX_DRAM_SZ) {
+        error_setg(errp, "%s: NPCM8xx cannot address more than %" PRIu64
+                   " MiB of DRAM", __func__, NPCM8XX_DRAM_SZ / MiB);
+        return;
+    }
+
+    /* CPUs */
+    for (i = 0; i < nc->num_cpus; i++) {
+        object_property_set_int(OBJECT(&s->cpu[i]), "mp-affinity",
+                                arm_build_mp_affinity(i, NPCM8XX_MAX_NUM_CPUS),
+                                &error_abort);
+        object_property_set_bool(OBJECT(&s->cpu[i]), "reset-hivecs", true,
+                                 &error_abort);
+        object_property_set_int(OBJECT(&s->cpu[i]), "core-count",
+                                nc->num_cpus, &error_abort);
+
+        /* Disable security extensions. */
+        object_property_set_bool(OBJECT(&s->cpu[i]), "has_el3", false,
+                                 &error_abort);
+
+        if (!qdev_realize(DEVICE(&s->cpu[i]), NULL, errp)) {
+            return;
+        }
+    }
+
+    /* ARM GIC for Cortex A35. Can only fail if we pass bad parameters here. */
+    object_property_set_uint(OBJECT(&s->gic), "num-cpu", nc->num_cpus, errp);
+    object_property_set_uint(OBJECT(&s->gic), "num-irq", NPCM8XX_NUM_IRQ, errp);
+    object_property_set_uint(OBJECT(&s->gic), "revision", 2, errp);
+    object_property_set_bool(OBJECT(&s->gic), "has-security-extensions", true,
+                             errp);
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->gic), errp)) {
+        return;
+    }
+    for (i = 0; i < nc->num_cpus; i++) {
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->gic), i,
+                           qdev_get_gpio_in(DEVICE(&s->cpu[i]), ARM_CPU_IRQ));
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->gic), i + nc->num_cpus,
+                           qdev_get_gpio_in(DEVICE(&s->cpu[i]), ARM_CPU_FIQ));
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->gic), i + nc->num_cpus * 2,
+                           qdev_get_gpio_in(DEVICE(&s->cpu[i]), ARM_CPU_VIRQ));
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->gic), i + nc->num_cpus * 3,
+                           qdev_get_gpio_in(DEVICE(&s->cpu[i]), ARM_CPU_VFIQ));
+
+        qdev_connect_gpio_out(DEVICE(&s->cpu[i]), GTIMER_PHYS,
+            qdev_get_gpio_in(DEVICE(&s->gic),
+                NPCM8XX_PPI_BASE(i) + ARCH_TIMER_NS_EL1_IRQ));
+        qdev_connect_gpio_out(DEVICE(&s->cpu[i]), GTIMER_VIRT,
+            qdev_get_gpio_in(DEVICE(&s->gic),
+                NPCM8XX_PPI_BASE(i) + ARCH_TIMER_VIRT_IRQ));
+        qdev_connect_gpio_out(DEVICE(&s->cpu[i]), GTIMER_HYP,
+            qdev_get_gpio_in(DEVICE(&s->gic),
+                NPCM8XX_PPI_BASE(i) + ARCH_TIMER_NS_EL2_IRQ));
+        qdev_connect_gpio_out(DEVICE(&s->cpu[i]), GTIMER_SEC,
+            qdev_get_gpio_in(DEVICE(&s->gic),
+                NPCM8XX_PPI_BASE(i) + ARCH_TIMER_S_EL1_IRQ));
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->gic), 0, NPCM8XX_GICD_BA);
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->gic), 1, NPCM8XX_GICC_BA);
+
+    /* CPU cluster */
+    qdev_prop_set_uint32(DEVICE(&s->cpu_cluster), "cluster-id", 0);
+    qdev_realize(DEVICE(&s->cpu_cluster), NULL, &error_fatal);
+
+    /* System Global Control Registers (GCR). Can fail due to user input. */
+    object_property_set_int(OBJECT(&s->gcr), "disabled-modules",
+                            nc->disabled_modules, &error_abort);
+    object_property_add_const_link(OBJECT(&s->gcr), "dram-mr", OBJECT(s->dram));
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->gcr), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->gcr), 0, NPCM8XX_GCR_BA);
+
+    /* Clock Control Registers (CLK). Cannot fail. */
+    sysbus_realize(SYS_BUS_DEVICE(&s->clk), &error_abort);
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->clk), 0, NPCM8XX_CLK_BA);
+
+    /* OTP fuse strap array. Cannot fail. */
+    sysbus_realize(SYS_BUS_DEVICE(&s->fuse_array), &error_abort);
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->fuse_array), 0, NPCM8XX_OTP_BA);
+    npcm8xx_init_fuses(s);
+
+    /* Fake Memory Controller (MC). Cannot fail. */
+    sysbus_realize(SYS_BUS_DEVICE(&s->mc), &error_abort);
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->mc), 0, NPCM8XX_MC_BA);
+
+    /* ADC Modules. Cannot fail. */
+    qdev_connect_clock_in(DEVICE(&s->adc), "clock", qdev_get_clock_out(
+                          DEVICE(&s->clk), "adc-clock"));
+    sysbus_realize(SYS_BUS_DEVICE(&s->adc), &error_abort);
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->adc), 0, NPCM8XX_ADC_BA);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->adc), 0,
+                       npcm8xx_irq(s, NPCM8XX_ADC_IRQ));
+    npcm8xx_write_adc_calibration(s);
+
+    /* Timer Modules (TIM). Cannot fail. */
+    QEMU_BUILD_BUG_ON(ARRAY_SIZE(npcm8xx_tim_addr) != ARRAY_SIZE(s->tim));
+    for (i = 0; i < ARRAY_SIZE(s->tim); i++) {
+        SysBusDevice *sbd = SYS_BUS_DEVICE(&s->tim[i]);
+        int first_irq;
+        int j;
+
+        /* Connect the timer clock. */
+        qdev_connect_clock_in(DEVICE(&s->tim[i]), "clock", qdev_get_clock_out(
+                    DEVICE(&s->clk), "timer-clock"));
+
+        sysbus_realize(sbd, &error_abort);
+        sysbus_mmio_map(sbd, 0, npcm8xx_tim_addr[i]);
+
+        first_irq = NPCM8XX_TIMER0_IRQ + i * NPCM7XX_TIMERS_PER_CTRL;
+        for (j = 0; j < NPCM7XX_TIMERS_PER_CTRL; j++) {
+            qemu_irq irq = npcm8xx_irq(s, first_irq + j);
+            sysbus_connect_irq(sbd, j, irq);
+        }
+
+        /* IRQ for watchdogs */
+        sysbus_connect_irq(sbd, NPCM7XX_TIMERS_PER_CTRL,
+                npcm8xx_irq(s, NPCM8XX_WDG0_IRQ + i));
+        /* GPIO that connects clk module with watchdog */
+        qdev_connect_gpio_out_named(DEVICE(&s->tim[i]),
+                NPCM7XX_WATCHDOG_RESET_GPIO_OUT, 0,
+                qdev_get_gpio_in_named(DEVICE(&s->clk),
+                        NPCM7XX_WATCHDOG_RESET_GPIO_IN, i));
+    }
+
+    /* UART0..6 (16550 compatible) */
+    for (i = 0; i < ARRAY_SIZE(npcm8xx_uart_addr); i++) {
+        serial_mm_init(get_system_memory(), npcm8xx_uart_addr[i], 2,
+                       npcm8xx_irq(s, NPCM8XX_UART0_IRQ + i), 115200,
+                       serial_hd(i), DEVICE_LITTLE_ENDIAN);
+    }
+
+    /* Random Number Generator. Cannot fail. */
+    sysbus_realize(SYS_BUS_DEVICE(&s->rng), &error_abort);
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->rng), 0, NPCM8XX_RNG_BA);
+
+    /* GPIO modules. Cannot fail. */
+    QEMU_BUILD_BUG_ON(ARRAY_SIZE(npcm8xx_gpio) != ARRAY_SIZE(s->gpio));
+    for (i = 0; i < ARRAY_SIZE(s->gpio); i++) {
+        Object *obj = OBJECT(&s->gpio[i]);
+
+        object_property_set_uint(obj, "reset-pullup",
+                                 npcm8xx_gpio[i].reset_pu, &error_abort);
+        object_property_set_uint(obj, "reset-pulldown",
+                                 npcm8xx_gpio[i].reset_pd, &error_abort);
+        object_property_set_uint(obj, "reset-osrc",
+                                 npcm8xx_gpio[i].reset_osrc, &error_abort);
+        object_property_set_uint(obj, "reset-odsc",
+                                 npcm8xx_gpio[i].reset_odsc, &error_abort);
+        sysbus_realize(SYS_BUS_DEVICE(obj), &error_abort);
+        sysbus_mmio_map(SYS_BUS_DEVICE(obj), 0, npcm8xx_gpio[i].regs_addr);
+        sysbus_connect_irq(SYS_BUS_DEVICE(obj), 0,
+                           npcm8xx_irq(s, NPCM8XX_GPIO0_IRQ + i));
+    }
+
+    /* SMBus modules. Cannot fail. */
+    QEMU_BUILD_BUG_ON(ARRAY_SIZE(npcm8xx_smbus_addr) != ARRAY_SIZE(s->smbus));
+    for (i = 0; i < ARRAY_SIZE(s->smbus); i++) {
+        Object *obj = OBJECT(&s->smbus[i]);
+
+        sysbus_realize(SYS_BUS_DEVICE(obj), &error_abort);
+        sysbus_mmio_map(SYS_BUS_DEVICE(obj), 0, npcm8xx_smbus_addr[i]);
+        sysbus_connect_irq(SYS_BUS_DEVICE(obj), 0,
+                           npcm8xx_irq(s, NPCM8XX_SMBUS0_IRQ + i));
+    }
+
+    /* USB Host */
+    QEMU_BUILD_BUG_ON(ARRAY_SIZE(s->ohci) != ARRAY_SIZE(s->ehci));
+    for (i = 0; i < ARRAY_SIZE(s->ehci); i++) {
+        object_property_set_bool(OBJECT(&s->ehci[i]), "companion-enable", true,
+                                 &error_abort);
+        sysbus_realize(SYS_BUS_DEVICE(&s->ehci[i]), &error_abort);
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->ehci[i]), 0, npcm8xx_ehci_addr[i]);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->ehci[i]), 0,
+                           npcm8xx_irq(s, NPCM8XX_EHCI1_IRQ + 2 * i));
+    }
+    for (i = 0; i < ARRAY_SIZE(s->ohci); i++) {
+        object_property_set_str(OBJECT(&s->ohci[i]), "masterbus", "usb-bus.0",
+                                &error_abort);
+        object_property_set_uint(OBJECT(&s->ohci[i]), "num-ports", 1,
+                                 &error_abort);
+        object_property_set_uint(OBJECT(&s->ohci[i]), "firstport", i,
+                                 &error_abort);
+        sysbus_realize(SYS_BUS_DEVICE(&s->ohci[i]), &error_abort);
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->ohci[i]), 0, npcm8xx_ohci_addr[i]);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->ohci[i]), 0,
+                           npcm8xx_irq(s, NPCM8XX_OHCI1_IRQ + 2 * i));
+    }
+
+    /* PWM Modules. Cannot fail. */
+    QEMU_BUILD_BUG_ON(ARRAY_SIZE(npcm8xx_pwm_addr) != ARRAY_SIZE(s->pwm));
+    for (i = 0; i < ARRAY_SIZE(s->pwm); i++) {
+        SysBusDevice *sbd = SYS_BUS_DEVICE(&s->pwm[i]);
+
+        qdev_connect_clock_in(DEVICE(&s->pwm[i]), "clock", qdev_get_clock_out(
+                    DEVICE(&s->clk), "apb3-clock"));
+        sysbus_realize(sbd, &error_abort);
+        sysbus_mmio_map(sbd, 0, npcm8xx_pwm_addr[i]);
+        sysbus_connect_irq(sbd, i, npcm8xx_irq(s, NPCM8XX_PWM0_IRQ + i));
+    }
+
+    /* MFT Modules. Cannot fail. */
+    QEMU_BUILD_BUG_ON(ARRAY_SIZE(npcm8xx_mft_addr) != ARRAY_SIZE(s->mft));
+    for (i = 0; i < ARRAY_SIZE(s->mft); i++) {
+        SysBusDevice *sbd = SYS_BUS_DEVICE(&s->mft[i]);
+
+        qdev_connect_clock_in(DEVICE(&s->mft[i]), "clock-in",
+                              qdev_get_clock_out(DEVICE(&s->clk),
+                                                 "apb4-clock"));
+        sysbus_realize(sbd, &error_abort);
+        sysbus_mmio_map(sbd, 0, npcm8xx_mft_addr[i]);
+        sysbus_connect_irq(sbd, 0, npcm8xx_irq(s, NPCM8XX_MFT0_IRQ + i));
+    }
+
+    /*
+     * Flash Interface Unit (FIU). Can fail if incorrect number of chip selects
+     * specified, but this is a programming error.
+     */
+    QEMU_BUILD_BUG_ON(ARRAY_SIZE(npcm8xx_fiu) != ARRAY_SIZE(s->fiu));
+    for (i = 0; i < ARRAY_SIZE(s->fiu); i++) {
+        SysBusDevice *sbd = SYS_BUS_DEVICE(&s->fiu[i]);
+        int j;
+
+        object_property_set_int(OBJECT(sbd), "cs-count",
+                                npcm8xx_fiu[i].cs_count, &error_abort);
+        object_property_set_int(OBJECT(sbd), "flash-size",
+                                npcm8xx_fiu[i].flash_size, &error_abort);
+        sysbus_realize(sbd, &error_abort);
+
+        sysbus_mmio_map(sbd, 0, npcm8xx_fiu[i].regs_addr);
+        for (j = 0; j < npcm8xx_fiu[i].cs_count; j++) {
+            sysbus_mmio_map(sbd, j + 1, npcm8xx_fiu[i].flash_addr[j]);
+        }
+    }
+
+    /* RAM2 (SRAM) */
+    memory_region_init_ram(&s->sram, OBJECT(dev), "ram2",
+                           NPCM8XX_RAM2_SZ, &error_abort);
+    memory_region_add_subregion(get_system_memory(), NPCM8XX_RAM2_BA, &s->sram);
+
+    /* RAM3 (SRAM) */
+    memory_region_init_ram(&s->ram3, OBJECT(dev), "ram3",
+                           NPCM8XX_RAM3_SZ, &error_abort);
+    memory_region_add_subregion(get_system_memory(), NPCM8XX_RAM3_BA, &s->ram3);
+
+    /* Internal ROM */
+    memory_region_init_rom(&s->irom, OBJECT(dev), "irom", NPCM8XX_ROM_SZ,
+                           &error_abort);
+    memory_region_add_subregion(get_system_memory(), NPCM8XX_ROM_BA, &s->irom);
+
+    /* SDHCI */
+    sysbus_realize(SYS_BUS_DEVICE(&s->mmc), &error_abort);
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->mmc), 0, NPCM8XX_MMC_BA);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->mmc), 0,
+            npcm8xx_irq(s, NPCM8XX_MMC_IRQ));
+
+
+    create_unimplemented_device("npcm8xx.shm",          0xc0001000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.gicextra",     0xdfffa000,  24 * KiB);
+    create_unimplemented_device("npcm8xx.vdmx",         0xe0800000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.pcierc",       0xe1000000,  64 * KiB);
+    create_unimplemented_device("npcm8xx.rootc",        0xe8000000, 128 * MiB);
+    create_unimplemented_device("npcm8xx.kcs",          0xf0007000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.gfxi",         0xf000e000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.fsw",          0xf000f000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.bt",           0xf0030000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.espi",         0xf009f000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.peci",         0xf0100000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.siox[1]",      0xf0101000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.siox[2]",      0xf0102000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.tmps",         0xf0188000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.pspi",         0xf0201000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.viru1",        0xf0204000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.viru2",        0xf0205000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.jtm1",         0xf0208000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.jtm2",         0xf0209000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.flm0",         0xf0210000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.flm1",         0xf0211000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.flm2",         0xf0212000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.flm3",         0xf0213000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.ahbpci",       0xf0400000,   1 * MiB);
+    create_unimplemented_device("npcm8xx.dap",          0xf0500000, 960 * KiB);
+    create_unimplemented_device("npcm8xx.mcphy",        0xf05f0000,  64 * KiB);
+    create_unimplemented_device("npcm8xx.pcs",          0xf0780000, 256 * KiB);
+    create_unimplemented_device("npcm8xx.tsgen",        0xf07fc000,   8 * KiB);
+    create_unimplemented_device("npcm8xx.gmac1",        0xf0802000,   8 * KiB);
+    create_unimplemented_device("npcm8xx.gmac2",        0xf0804000,   8 * KiB);
+    create_unimplemented_device("npcm8xx.gmac3",        0xf0806000,   8 * KiB);
+    create_unimplemented_device("npcm8xx.gmac4",        0xf0808000,   8 * KiB);
+    create_unimplemented_device("npcm8xx.copctl",       0xf080c000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.tipctl",       0xf080d000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.rst",          0xf080e000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.vcd",          0xf0810000,  64 * KiB);
+    create_unimplemented_device("npcm8xx.ece",          0xf0820000,   8 * KiB);
+    create_unimplemented_device("npcm8xx.vdma",         0xf0822000,   8 * KiB);
+    create_unimplemented_device("npcm8xx.usbd[0]",      0xf0830000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.usbd[1]",      0xf0831000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.usbd[2]",      0xf0832000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.usbd[3]",      0xf0833000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.usbd[4]",      0xf0834000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.usbd[5]",      0xf0835000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.usbd[6]",      0xf0836000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.usbd[7]",      0xf0837000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.usbd[8]",      0xf0838000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.usbd[9]",      0xf0839000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.pci_mbox1",    0xf0848000,  64 * KiB);
+    create_unimplemented_device("npcm8xx.gdma0",        0xf0850000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.gdma1",        0xf0851000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.gdma2",        0xf0852000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.aes",          0xf0858000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.des",          0xf0859000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.sha",          0xf085a000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.pci_mbox2",    0xf0868000,  64 * KiB);
+    create_unimplemented_device("npcm8xx.i3c0",         0xfff10000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.i3c1",         0xfff11000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.i3c2",         0xfff12000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.i3c3",         0xfff13000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.i3c4",         0xfff14000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.i3c5",         0xfff15000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.spixcs0",      0xf8000000,  16 * MiB);
+    create_unimplemented_device("npcm8xx.spixcs1",      0xf9000000,  16 * MiB);
+    create_unimplemented_device("npcm8xx.spix",         0xfb001000,   4 * KiB);
+    create_unimplemented_device("npcm8xx.vect",         0xffff0000,   256);
+}
+
+static const Property npcm8xx_properties[] = {
+    DEFINE_PROP_LINK("dram-mr", NPCM8xxState, dram, TYPE_MEMORY_REGION,
+                     MemoryRegion *),
+};
+
+static void npcm8xx_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    NPCM8xxClass *nc = NPCM8XX_CLASS(oc);
+
+    dc->realize = npcm8xx_realize;
+    dc->user_creatable = false;
+    nc->disabled_modules = 0x00000000;
+    nc->num_cpus = NPCM8XX_MAX_NUM_CPUS;
+    device_class_set_props(dc, npcm8xx_properties);
+}
+
+static const TypeInfo npcm8xx_soc_types[] = {
+    {
+        .name           = TYPE_NPCM8XX,
+        .parent         = TYPE_DEVICE,
+        .instance_size  = sizeof(NPCM8xxState),
+        .instance_init  = npcm8xx_init,
+        .class_size     = sizeof(NPCM8xxClass),
+        .class_init     = npcm8xx_class_init,
+    },
+};
+
+DEFINE_TYPES(npcm8xx_soc_types);
diff --git a/hw/arm/npcm8xx_boards.c b/hw/arm/npcm8xx_boards.c
new file mode 100644
index 0000000000000..3fb8478e72e7a
--- /dev/null
+++ b/hw/arm/npcm8xx_boards.c
@@ -0,0 +1,254 @@
+/*
+ * Machine definitions for boards featuring an NPCM8xx SoC.
+ *
+ * Copyright 2021 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+
+#include "chardev/char.h"
+#include "hw/boards.h"
+#include "hw/arm/npcm8xx.h"
+#include "hw/core/cpu.h"
+#include "hw/loader.h"
+#include "hw/qdev-core.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/datadir.h"
+#include "qemu/units.h"
+
+#define NPCM845_EVB_POWER_ON_STRAPS 0x000017ff
+
+static const char npcm8xx_default_bootrom[] = "npcm8xx_bootrom.bin";
+
+static void npcm8xx_load_bootrom(MachineState *machine, NPCM8xxState *soc)
+{
+    const char *bios_name = machine->firmware ?: npcm8xx_default_bootrom;
+    g_autofree char *filename = NULL;
+    int ret;
+
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+    if (!filename) {
+        error_report("Could not find ROM image '%s'", bios_name);
+        if (!machine->kernel_filename) {
+            /* We can't boot without a bootrom or a kernel image. */
+            exit(1);
+        }
+        return;
+    }
+    ret = load_image_mr(filename, machine->ram);
+    if (ret < 0) {
+        error_report("Failed to load ROM image '%s'", filename);
+        exit(1);
+    }
+}
+
+static void npcm8xx_connect_flash(NPCM7xxFIUState *fiu, int cs_no,
+                                  const char *flash_type, DriveInfo *dinfo)
+{
+    DeviceState *flash;
+    qemu_irq flash_cs;
+
+    flash = qdev_new(flash_type);
+    if (dinfo) {
+        qdev_prop_set_drive(flash, "drive", blk_by_legacy_dinfo(dinfo));
+    }
+    qdev_realize_and_unref(flash, BUS(fiu->spi), &error_fatal);
+
+    flash_cs = qdev_get_gpio_in_named(flash, SSI_GPIO_CS, 0);
+    qdev_connect_gpio_out_named(DEVICE(fiu), "cs", cs_no, flash_cs);
+}
+
+static void npcm8xx_connect_dram(NPCM8xxState *soc, MemoryRegion *dram)
+{
+    memory_region_add_subregion(get_system_memory(), NPCM8XX_DRAM_BA, dram);
+
+    object_property_set_link(OBJECT(soc), "dram-mr", OBJECT(dram),
+                             &error_abort);
+}
+
+static NPCM8xxState *npcm8xx_create_soc(MachineState *machine,
+                                        uint32_t hw_straps)
+{
+    NPCM8xxMachineClass *nmc = NPCM8XX_MACHINE_GET_CLASS(machine);
+    Object *obj;
+
+    obj = object_new_with_props(nmc->soc_type, OBJECT(machine), "soc",
+                                &error_abort, NULL);
+    object_property_set_uint(obj, "power-on-straps", hw_straps, &error_abort);
+
+    return NPCM8XX(obj);
+}
+
+static I2CBus *npcm8xx_i2c_get_bus(NPCM8xxState *soc, uint32_t num)
+{
+    g_assert(num < ARRAY_SIZE(soc->smbus));
+    return I2C_BUS(qdev_get_child_bus(DEVICE(&soc->smbus[num]), "i2c-bus"));
+}
+
+static void npcm8xx_init_pwm_splitter(NPCM8xxMachine *machine,
+                                      NPCM8xxState *soc, const int *fan_counts)
+{
+    SplitIRQ *splitters = machine->fan_splitter;
+
+    /*
+     * PWM 0~3 belong to module 0 output 0~3.
+     * PWM 4~7 belong to module 1 output 0~3.
+     */
+    for (int i = 0; i < NPCM8XX_NR_PWM_MODULES; ++i) {
+        for (int j = 0; j < NPCM7XX_PWM_PER_MODULE; ++j) {
+            int splitter_no = i * NPCM7XX_PWM_PER_MODULE + j;
+            DeviceState *splitter;
+
+            if (fan_counts[splitter_no] < 1) {
+                continue;
+            }
+            object_initialize_child(OBJECT(machine), "fan-splitter[*]",
+                                    &splitters[splitter_no], TYPE_SPLIT_IRQ);
+            splitter = DEVICE(&splitters[splitter_no]);
+            qdev_prop_set_uint16(splitter, "num-lines",
+                                 fan_counts[splitter_no]);
+            qdev_realize(splitter, NULL, &error_abort);
+            qdev_connect_gpio_out_named(DEVICE(&soc->pwm[i]), "duty-gpio-out",
+                                        j, qdev_get_gpio_in(splitter, 0));
+        }
+    }
+}
+
+static void npcm8xx_connect_pwm_fan(NPCM8xxState *soc, SplitIRQ *splitter,
+                                    int fan_no, int output_no)
+{
+    DeviceState *fan;
+    int fan_input;
+    qemu_irq fan_duty_gpio;
+
+    g_assert(fan_no >= 0 && fan_no <= NPCM7XX_MFT_MAX_FAN_INPUT);
+    /*
+     * Fan 0~1 belong to module 0 input 0~1.
+     * Fan 2~3 belong to module 1 input 0~1.
+     * ...
+     * Fan 14~15 belong to module 7 input 0~1.
+     * Fan 16~17 belong to module 0 input 2~3.
+     * Fan 18~19 belong to module 1 input 2~3.
+     */
+    if (fan_no < 16) {
+        fan = DEVICE(&soc->mft[fan_no / 2]);
+        fan_input = fan_no % 2;
+    } else {
+        fan = DEVICE(&soc->mft[(fan_no - 16) / 2]);
+        fan_input = fan_no % 2 + 2;
+    }
+
+    /* Connect the Fan to PWM module */
+    fan_duty_gpio = qdev_get_gpio_in_named(fan, "duty", fan_input);
+    qdev_connect_gpio_out(DEVICE(splitter), output_no, fan_duty_gpio);
+}
+
+static void npcm845_evb_i2c_init(NPCM8xxState *soc)
+{
+    /* tmp100 temperature sensor on SVB, tmp105 is compatible */
+    i2c_slave_create_simple(npcm8xx_i2c_get_bus(soc, 6), "tmp105", 0x48);
+}
+
+static void npcm845_evb_fan_init(NPCM8xxMachine *machine, NPCM8xxState *soc)
+{
+    SplitIRQ *splitter = machine->fan_splitter;
+    static const int fan_counts[] = {2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0};
+
+    npcm8xx_init_pwm_splitter(machine, soc, fan_counts);
+    npcm8xx_connect_pwm_fan(soc, &splitter[0], 0x00, 0);
+    npcm8xx_connect_pwm_fan(soc, &splitter[0], 0x01, 1);
+    npcm8xx_connect_pwm_fan(soc, &splitter[1], 0x02, 0);
+    npcm8xx_connect_pwm_fan(soc, &splitter[1], 0x03, 1);
+    npcm8xx_connect_pwm_fan(soc, &splitter[2], 0x04, 0);
+    npcm8xx_connect_pwm_fan(soc, &splitter[2], 0x05, 1);
+    npcm8xx_connect_pwm_fan(soc, &splitter[3], 0x06, 0);
+    npcm8xx_connect_pwm_fan(soc, &splitter[3], 0x07, 1);
+    npcm8xx_connect_pwm_fan(soc, &splitter[4], 0x08, 0);
+    npcm8xx_connect_pwm_fan(soc, &splitter[4], 0x09, 1);
+    npcm8xx_connect_pwm_fan(soc, &splitter[5], 0x0a, 0);
+    npcm8xx_connect_pwm_fan(soc, &splitter[5], 0x0b, 1);
+    npcm8xx_connect_pwm_fan(soc, &splitter[6], 0x0c, 0);
+    npcm8xx_connect_pwm_fan(soc, &splitter[6], 0x0d, 1);
+    npcm8xx_connect_pwm_fan(soc, &splitter[7], 0x0e, 0);
+    npcm8xx_connect_pwm_fan(soc, &splitter[7], 0x0f, 1);
+}
+
+static void npcm845_evb_init(MachineState *machine)
+{
+    NPCM8xxState *soc;
+
+    soc = npcm8xx_create_soc(machine, NPCM845_EVB_POWER_ON_STRAPS);
+    npcm8xx_connect_dram(soc, machine->ram);
+    qdev_realize(DEVICE(soc), NULL, &error_fatal);
+
+    npcm8xx_load_bootrom(machine, soc);
+    npcm8xx_connect_flash(&soc->fiu[0], 0, "w25q256", drive_get(IF_MTD, 0, 0));
+    npcm845_evb_i2c_init(soc);
+    npcm845_evb_fan_init(NPCM8XX_MACHINE(machine), soc);
+    npcm8xx_load_kernel(machine, soc);
+}
+
+static void npcm8xx_set_soc_type(NPCM8xxMachineClass *nmc, const char *type)
+{
+    NPCM8xxClass *sc = NPCM8XX_CLASS(object_class_by_name(type));
+    MachineClass *mc = MACHINE_CLASS(nmc);
+
+    nmc->soc_type = type;
+    mc->default_cpus = mc->min_cpus = mc->max_cpus = sc->num_cpus;
+}
+
+static void npcm8xx_machine_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    static const char * const valid_cpu_types[] = {
+        ARM_CPU_TYPE_NAME("cortex-a9"),
+        NULL
+    };
+
+    mc->no_floppy = 1;
+    mc->no_cdrom = 1;
+    mc->no_parallel = 1;
+    mc->default_ram_id = "ram";
+    mc->valid_cpu_types = valid_cpu_types;
+}
+
+static void npcm845_evb_machine_class_init(ObjectClass *oc, void *data)
+{
+    NPCM8xxMachineClass *nmc = NPCM8XX_MACHINE_CLASS(oc);
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    npcm8xx_set_soc_type(nmc, TYPE_NPCM8XX);
+
+    mc->desc = "Nuvoton NPCM845 Evaluation Board (Cortex-A35)";
+    mc->init = npcm845_evb_init;
+    mc->default_ram_size = 1 * GiB;
+};
+
+static const TypeInfo npcm8xx_machine_types[] = {
+    {
+        .name           = TYPE_NPCM8XX_MACHINE,
+        .parent         = TYPE_MACHINE,
+        .instance_size  = sizeof(NPCM8xxMachine),
+        .class_size     = sizeof(NPCM8xxMachineClass),
+        .class_init     = npcm8xx_machine_class_init,
+        .abstract       = true,
+    }, {
+        .name           = MACHINE_TYPE_NAME("npcm845-evb"),
+        .parent         = TYPE_NPCM8XX_MACHINE,
+        .class_init     = npcm845_evb_machine_class_init,
+    },
+};
+
+DEFINE_TYPES(npcm8xx_machine_types)
diff --git a/hw/arm/omap1.c b/hw/arm/omap1.c
index ca2eb0d157615..3ee10b477704f 100644
--- a/hw/arm/omap1.c
+++ b/hw/arm/omap1.c
@@ -42,6 +42,7 @@
 #include "qemu/cutils.h"
 #include "qemu/bcd.h"
 #include "target/arm/cpu-qom.h"
+#include "trace.h"
 
 static inline void omap_log_badwidth(const char *funcname, hwaddr addr, int sz)
 {
@@ -1731,7 +1732,7 @@ static void omap_clkm_write(void *opaque, hwaddr addr,
     case 0x18:	/* ARM_SYSST */
         if ((s->clkm.clocking_scheme ^ (value >> 11)) & 7) {
             s->clkm.clocking_scheme = (value >> 11) & 7;
-            printf("%s: clocking scheme set to %s\n", __func__,
+            trace_omap1_pwl_clocking_scheme(
                    clkschemename[s->clkm.clocking_scheme]);
         }
         s->clkm.cold_start &= value & 0x3f;
@@ -2335,7 +2336,7 @@ static void omap_pwl_update(struct omap_pwl_s *s)
 
     if (output != s->output) {
         s->output = output;
-        printf("%s: Backlight now at %i/256\n", __func__, output);
+        trace_omap1_pwl_backlight(output);
     }
 }
 
@@ -2470,8 +2471,8 @@ static void omap_pwt_write(void *opaque, hwaddr addr,
         break;
     case 0x04:	/* VRC */
         if ((value ^ s->vrc) & 1) {
-            if (value & 1)
-                printf("%s: %iHz buzz on\n", __func__, (int)
+            if (value & 1) {
+                trace_omap1_pwt_buzz(
                                 /* 1.5 MHz from a 12-MHz or 13-MHz PWT_CLK */
                                 ((omap_clk_getrate(s->clk) >> 3) /
                                  /* Pre-multiplexer divider */
@@ -2487,8 +2488,9 @@ static void omap_pwt_write(void *opaque, hwaddr addr,
                                  /*  80/127 divider */
                                  ((value & (1 << 5)) ?  80 : 127) /
                                  (107 * 55 * 63 * 127)));
-            else
-                printf("%s: silence!\n", __func__);
+            } else {
+                trace_omap1_pwt_silence();
+            }
         }
         s->vrc = value & 0x7f;
         break;
@@ -2559,8 +2561,9 @@ static void omap_rtc_interrupts_update(struct omap_rtc_s *s)
 static void omap_rtc_alarm_update(struct omap_rtc_s *s)
 {
     s->alarm_ti = mktimegm(&s->alarm_tm);
-    if (s->alarm_ti == -1)
-        printf("%s: conversion failed\n", __func__);
+    if (s->alarm_ti == -1) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: conversion failed\n", __func__);
+    }
 }
 
 static uint64_t omap_rtc_read(void *opaque, hwaddr addr, unsigned size)
@@ -2659,25 +2662,16 @@ static void omap_rtc_write(void *opaque, hwaddr addr,
 
     switch (offset) {
     case 0x00:	/* SECONDS_REG */
-#ifdef ALMDEBUG
-        printf("RTC SEC_REG <-- %02x\n", value);
-#endif
         s->ti -= s->current_tm.tm_sec;
         s->ti += from_bcd(value);
         return;
 
     case 0x04:	/* MINUTES_REG */
-#ifdef ALMDEBUG
-        printf("RTC MIN_REG <-- %02x\n", value);
-#endif
         s->ti -= s->current_tm.tm_min * 60;
         s->ti += from_bcd(value) * 60;
         return;
 
     case 0x08:	/* HOURS_REG */
-#ifdef ALMDEBUG
-        printf("RTC HRS_REG <-- %02x\n", value);
-#endif
         s->ti -= s->current_tm.tm_hour * 3600;
         if (s->pm_am) {
             s->ti += (from_bcd(value & 0x3f) & 12) * 3600;
@@ -2687,17 +2681,11 @@ static void omap_rtc_write(void *opaque, hwaddr addr,
         return;
 
     case 0x0c:	/* DAYS_REG */
-#ifdef ALMDEBUG
-        printf("RTC DAY_REG <-- %02x\n", value);
-#endif
         s->ti -= s->current_tm.tm_mday * 86400;
         s->ti += from_bcd(value) * 86400;
         return;
 
     case 0x10:	/* MONTHS_REG */
-#ifdef ALMDEBUG
-        printf("RTC MTH_REG <-- %02x\n", value);
-#endif
         memcpy(&new_tm, &s->current_tm, sizeof(new_tm));
         new_tm.tm_mon = from_bcd(value);
         ti[0] = mktimegm(&s->current_tm);
@@ -2714,9 +2702,6 @@ static void omap_rtc_write(void *opaque, hwaddr addr,
         return;
 
     case 0x14:	/* YEARS_REG */
-#ifdef ALMDEBUG
-        printf("RTC YRS_REG <-- %02x\n", value);
-#endif
         memcpy(&new_tm, &s->current_tm, sizeof(new_tm));
         new_tm.tm_year += from_bcd(value) - (new_tm.tm_year % 100);
         ti[0] = mktimegm(&s->current_tm);
@@ -2736,25 +2721,16 @@ static void omap_rtc_write(void *opaque, hwaddr addr,
         return;	/* Ignored */
 
     case 0x20:	/* ALARM_SECONDS_REG */
-#ifdef ALMDEBUG
-        printf("ALM SEC_REG <-- %02x\n", value);
-#endif
         s->alarm_tm.tm_sec = from_bcd(value);
         omap_rtc_alarm_update(s);
         return;
 
     case 0x24:	/* ALARM_MINUTES_REG */
-#ifdef ALMDEBUG
-        printf("ALM MIN_REG <-- %02x\n", value);
-#endif
         s->alarm_tm.tm_min = from_bcd(value);
         omap_rtc_alarm_update(s);
         return;
 
     case 0x28:	/* ALARM_HOURS_REG */
-#ifdef ALMDEBUG
-        printf("ALM HRS_REG <-- %02x\n", value);
-#endif
         if (s->pm_am)
             s->alarm_tm.tm_hour =
                     ((from_bcd(value & 0x3f)) % 12) +
@@ -2765,33 +2741,21 @@ static void omap_rtc_write(void *opaque, hwaddr addr,
         return;
 
     case 0x2c:	/* ALARM_DAYS_REG */
-#ifdef ALMDEBUG
-        printf("ALM DAY_REG <-- %02x\n", value);
-#endif
         s->alarm_tm.tm_mday = from_bcd(value);
         omap_rtc_alarm_update(s);
         return;
 
     case 0x30:	/* ALARM_MONTHS_REG */
-#ifdef ALMDEBUG
-        printf("ALM MON_REG <-- %02x\n", value);
-#endif
         s->alarm_tm.tm_mon = from_bcd(value);
         omap_rtc_alarm_update(s);
         return;
 
     case 0x34:	/* ALARM_YEARS_REG */
-#ifdef ALMDEBUG
-        printf("ALM YRS_REG <-- %02x\n", value);
-#endif
         s->alarm_tm.tm_year = from_bcd(value);
         omap_rtc_alarm_update(s);
         return;
 
     case 0x40:	/* RTC_CTRL_REG */
-#ifdef ALMDEBUG
-        printf("RTC CONTROL <-- %02x\n", value);
-#endif
         s->pm_am = (value >> 3) & 1;
         s->auto_comp = (value >> 2) & 1;
         s->round = (value >> 1) & 1;
@@ -2801,32 +2765,20 @@ static void omap_rtc_write(void *opaque, hwaddr addr,
         return;
 
     case 0x44:	/* RTC_STATUS_REG */
-#ifdef ALMDEBUG
-        printf("RTC STATUSL <-- %02x\n", value);
-#endif
         s->status &= ~((value & 0xc0) ^ 0x80);
         omap_rtc_interrupts_update(s);
         return;
 
     case 0x48:	/* RTC_INTERRUPTS_REG */
-#ifdef ALMDEBUG
-        printf("RTC INTRS <-- %02x\n", value);
-#endif
         s->interrupts = value;
         return;
 
     case 0x4c:	/* RTC_COMP_LSB_REG */
-#ifdef ALMDEBUG
-        printf("RTC COMPLSB <-- %02x\n", value);
-#endif
         s->comp_reg &= 0xff00;
         s->comp_reg |= 0x00ff & value;
         return;
 
     case 0x50:	/* RTC_COMP_MSB_REG */
-#ifdef ALMDEBUG
-        printf("RTC COMPMSB <-- %02x\n", value);
-#endif
         s->comp_reg &= 0x00ff;
         s->comp_reg |= 0xff00 & (value << 8);
         return;
@@ -3024,8 +2976,9 @@ static void omap_mcbsp_source_tick(void *opaque)
 
     if (!s->rx_rate)
         return;
-    if (s->rx_req)
-        printf("%s: Rx FIFO overrun\n", __func__);
+    if (s->rx_req) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Rx FIFO overrun\n", __func__);
+    }
 
     s->rx_req = s->rx_rate << bps[(s->rcr[0] >> 5) & 7];
 
@@ -3070,8 +3023,9 @@ static void omap_mcbsp_sink_tick(void *opaque)
 
     if (!s->tx_rate)
         return;
-    if (s->tx_req)
-        printf("%s: Tx FIFO underrun\n", __func__);
+    if (s->tx_req) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Tx FIFO underrun\n", __func__);
+    }
 
     s->tx_req = s->tx_rate << bps[(s->xcr[0] >> 5) & 7];
 
@@ -3173,7 +3127,7 @@ static uint64_t omap_mcbsp_read(void *opaque, hwaddr addr,
         /* Fall through.  */
     case 0x02:	/* DRR1 */
         if (s->rx_req < 2) {
-            printf("%s: Rx FIFO underrun\n", __func__);
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: Rx FIFO underrun\n", __func__);
             omap_mcbsp_rx_done(s);
         } else {
             s->tx_req -= 2;
@@ -3278,8 +3232,9 @@ static void omap_mcbsp_writeh(void *opaque, hwaddr addr,
             }
             if (s->tx_req < 2)
                 omap_mcbsp_tx_done(s);
-        } else
-            printf("%s: Tx FIFO overrun\n", __func__);
+        } else {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: Tx FIFO overrun\n", __func__);
+        }
         return;
 
     case 0x08:	/* SPCR2 */
@@ -3293,8 +3248,11 @@ static void omap_mcbsp_writeh(void *opaque, hwaddr addr,
     case 0x0a:	/* SPCR1 */
         s->spcr[0] &= 0x0006;
         s->spcr[0] |= 0xf8f9 & value;
-        if (value & (1 << 15))				/* DLB */
-            printf("%s: Digital Loopback mode enable attempt\n", __func__);
+        if (value & (1 << 15)) {                        /* DLB */
+            qemu_log_mask(LOG_UNIMP,
+                          "%s: Digital Loopback mode enable attempt\n",
+                          __func__);
+        }
         if (~value & 1) {				/* RRST */
             s->spcr[0] &= ~6;
             s->rx_req = 0;
@@ -3325,13 +3283,19 @@ static void omap_mcbsp_writeh(void *opaque, hwaddr addr,
         return;
     case 0x18:	/* MCR2 */
         s->mcr[1] = value & 0x03e3;
-        if (value & 3)					/* XMCM */
-            printf("%s: Tx channel selection mode enable attempt\n", __func__);
+        if (value & 3) {                                /* XMCM */
+            qemu_log_mask(LOG_UNIMP,
+                          "%s: Tx channel selection mode enable attempt\n",
+                          __func__);
+        }
         return;
     case 0x1a:	/* MCR1 */
         s->mcr[0] = value & 0x03e1;
-        if (value & 1)					/* RMCM */
-            printf("%s: Rx channel selection mode enable attempt\n", __func__);
+        if (value & 1) {                                /* RMCM */
+            qemu_log_mask(LOG_UNIMP,
+                          "%s: Rx channel selection mode enable attempt\n",
+                          __func__);
+        }
         return;
     case 0x1c:	/* RCERA */
         s->rcer[0] = value & 0xffff;
@@ -3412,8 +3376,9 @@ static void omap_mcbsp_writew(void *opaque, hwaddr addr,
             }
             if (s->tx_req < 4)
                 omap_mcbsp_tx_done(s);
-        } else
-            printf("%s: Tx FIFO overrun\n", __func__);
+        } else {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: Tx FIFO overrun\n", __func__);
+        }
         return;
     }
 
@@ -3531,7 +3496,7 @@ static void omap_lpg_tick(void *opaque)
         timer_mod(s->tm, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + s->on);
 
     s->cycle = !s->cycle;
-    printf("%s: LED is %s\n", __func__, s->cycle ? "on" : "off");
+    trace_omap1_lpg_led(s->cycle ? "on" : "off");
 }
 
 static void omap_lpg_update(struct omap_lpg_s *s)
@@ -3551,11 +3516,11 @@ static void omap_lpg_update(struct omap_lpg_s *s)
     }
 
     timer_del(s->tm);
-    if (on == period && s->on < s->period)
-        printf("%s: LED is on\n", __func__);
-    else if (on == 0 && s->on)
-        printf("%s: LED is off\n", __func__);
-    else if (on && (on != s->on || period != s->period)) {
+    if (on == period && s->on < s->period) {
+        trace_omap1_lpg_led("on");
+    } else if (on == 0 && s->on) {
+        trace_omap1_lpg_led("off");
+    } else if (on && (on != s->on || period != s->period)) {
         s->cycle = 0;
         s->on = on;
         s->period = period;
diff --git a/hw/arm/omap_sx1.c b/hw/arm/omap_sx1.c
index 623ebd66395f7..24b404318328e 100644
--- a/hw/arm/omap_sx1.c
+++ b/hw/arm/omap_sx1.c
@@ -76,10 +76,6 @@ static uint64_t static_read(void *opaque, hwaddr offset,
 static void static_write(void *opaque, hwaddr offset,
                          uint64_t value, unsigned size)
 {
-#ifdef SPY
-    printf("%s: value %" PRIx64 " %u bytes written at 0x%x\n",
-                    __func__, value, size, (int)offset);
-#endif
 }
 
 static const MemoryRegionOps static_ops = {
@@ -216,6 +212,7 @@ static void sx1_machine_v2_class_init(ObjectClass *oc, void *data)
     mc->default_cpu_type = ARM_CPU_TYPE_NAME("ti925t");
     mc->default_ram_size = SDRAM_SIZE;
     mc->default_ram_id = "omap1.dram";
+    mc->auto_create_sdcard = true;
 }
 
 static const TypeInfo sx1_machine_v2_type = {
@@ -234,6 +231,7 @@ static void sx1_machine_v1_class_init(ObjectClass *oc, void *data)
     mc->default_cpu_type = ARM_CPU_TYPE_NAME("ti925t");
     mc->default_ram_size = SDRAM_SIZE;
     mc->default_ram_id = "omap1.dram";
+    mc->auto_create_sdcard = true;
 }
 
 static const TypeInfo sx1_machine_v1_type = {
diff --git a/hw/arm/orangepi.c b/hw/arm/orangepi.c
index 77e328191d73a..634af9b0a10fe 100644
--- a/hw/arm/orangepi.c
+++ b/hw/arm/orangepi.c
@@ -121,6 +121,7 @@ static void orangepi_machine_init(MachineClass *mc)
     mc->valid_cpu_types = valid_cpu_types;
     mc->default_ram_size = 1 * GiB;
     mc->default_ram_id = "orangepi.ram";
+    mc->auto_create_sdcard = true;
 }
 
 DEFINE_MACHINE("orangepi-pc", orangepi_machine_init)
diff --git a/hw/arm/raspi.c b/hw/arm/raspi.c
index a7a662f40db41..dce35ca11aa55 100644
--- a/hw/arm/raspi.c
+++ b/hw/arm/raspi.c
@@ -342,6 +342,7 @@ static void raspi0_machine_class_init(ObjectClass *oc, void *data)
     MachineClass *mc = MACHINE_CLASS(oc);
     RaspiBaseMachineClass *rmc = RASPI_BASE_MACHINE_CLASS(oc);
 
+    mc->auto_create_sdcard = true;
     rmc->board_rev = 0x920092; /* Revision 1.2 */
     raspi_machine_class_init(mc, rmc->board_rev);
 };
@@ -351,6 +352,7 @@ static void raspi1ap_machine_class_init(ObjectClass *oc, void *data)
     MachineClass *mc = MACHINE_CLASS(oc);
     RaspiBaseMachineClass *rmc = RASPI_BASE_MACHINE_CLASS(oc);
 
+    mc->auto_create_sdcard = true;
     rmc->board_rev = 0x900021; /* Revision 1.1 */
     raspi_machine_class_init(mc, rmc->board_rev);
 };
@@ -360,6 +362,7 @@ static void raspi2b_machine_class_init(ObjectClass *oc, void *data)
     MachineClass *mc = MACHINE_CLASS(oc);
     RaspiBaseMachineClass *rmc = RASPI_BASE_MACHINE_CLASS(oc);
 
+    mc->auto_create_sdcard = true;
     rmc->board_rev = 0xa21041;
     raspi_machine_class_init(mc, rmc->board_rev);
 };
@@ -370,6 +373,7 @@ static void raspi3ap_machine_class_init(ObjectClass *oc, void *data)
     MachineClass *mc = MACHINE_CLASS(oc);
     RaspiBaseMachineClass *rmc = RASPI_BASE_MACHINE_CLASS(oc);
 
+    mc->auto_create_sdcard = true;
     rmc->board_rev = 0x9020e0; /* Revision 1.0 */
     raspi_machine_class_init(mc, rmc->board_rev);
 };
@@ -379,6 +383,7 @@ static void raspi3b_machine_class_init(ObjectClass *oc, void *data)
     MachineClass *mc = MACHINE_CLASS(oc);
     RaspiBaseMachineClass *rmc = RASPI_BASE_MACHINE_CLASS(oc);
 
+    mc->auto_create_sdcard = true;
     rmc->board_rev = 0xa02082;
     raspi_machine_class_init(mc, rmc->board_rev);
 };
diff --git a/hw/arm/raspi4b.c b/hw/arm/raspi4b.c
index 1264e0d6eedb2..f6de103a3e1bb 100644
--- a/hw/arm/raspi4b.c
+++ b/hw/arm/raspi4b.c
@@ -118,6 +118,7 @@ static void raspi4b_machine_class_init(ObjectClass *oc, void *data)
     rmc->board_rev = 0xb03115; /* Revision 1.5, 2 Gb RAM */
 #endif
     raspi_machine_class_common_init(mc, rmc->board_rev);
+    mc->auto_create_sdcard = true;
     mc->init = raspi4b_machine_init;
 }
 
diff --git a/hw/arm/realview.c b/hw/arm/realview.c
index 9900a98f3b8c2..008eeaf049a79 100644
--- a/hw/arm/realview.c
+++ b/hw/arm/realview.c
@@ -35,6 +35,8 @@
 #define SMP_BOOT_ADDR 0xe0000000
 #define SMP_BOOTREG_ADDR 0x10000030
 
+#define GIC_EXT_IRQS 64 /* Realview PBX-A9 development board */
+
 /* Board init.  */
 
 static struct arm_boot_info realview_binfo = {
@@ -185,7 +187,12 @@ static void realview_init(MachineState *machine,
     sysbus_mmio_map(SYS_BUS_DEVICE(sysctl), 0, 0x10000000);
 
     if (is_mpcore) {
-        dev = qdev_new(is_pb ? TYPE_A9MPCORE_PRIV : "realview_mpcore");
+        if (is_pb) {
+            dev = qdev_new(TYPE_A9MPCORE_PRIV);
+            qdev_prop_set_uint32(dev, "num-irq", GIC_EXT_IRQS + GIC_INTERNAL);
+        } else {
+            dev = qdev_new("realview_mpcore");
+        }
         qdev_prop_set_uint32(dev, "num-cpu", smp_cpus);
         busdev = SYS_BUS_DEVICE(dev);
         sysbus_realize_and_unref(busdev, &error_fatal);
@@ -201,7 +208,7 @@ static void realview_init(MachineState *machine,
         /* For now just create the nIRQ GIC, and ignore the others.  */
         dev = sysbus_create_simple(TYPE_REALVIEW_GIC, gic_addr, cpu_irq[0]);
     }
-    for (n = 0; n < 64; n++) {
+    for (n = 0; n < GIC_EXT_IRQS; n++) {
         pic[n] = qdev_get_gpio_in(dev, n);
     }
 
@@ -415,6 +422,7 @@ static void realview_eb_class_init(ObjectClass *oc, void *data)
     mc->block_default_type = IF_SCSI;
     mc->ignore_memory_transaction_failures = true;
     mc->default_cpu_type = ARM_CPU_TYPE_NAME("arm926");
+    mc->auto_create_sdcard = true;
 
     machine_add_audiodev_property(mc);
 }
@@ -435,6 +443,7 @@ static void realview_eb_mpcore_class_init(ObjectClass *oc, void *data)
     mc->max_cpus = 4;
     mc->ignore_memory_transaction_failures = true;
     mc->default_cpu_type = ARM_CPU_TYPE_NAME("arm11mpcore");
+    mc->auto_create_sdcard = true;
 
     machine_add_audiodev_property(mc);
 }
@@ -453,6 +462,7 @@ static void realview_pb_a8_class_init(ObjectClass *oc, void *data)
     mc->init = realview_pb_a8_init;
     mc->ignore_memory_transaction_failures = true;
     mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a8");
+    mc->auto_create_sdcard = true;
 
     machine_add_audiodev_property(mc);
 }
@@ -472,6 +482,7 @@ static void realview_pbx_a9_class_init(ObjectClass *oc, void *data)
     mc->max_cpus = 4;
     mc->ignore_memory_transaction_failures = true;
     mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a9");
+    mc->auto_create_sdcard = true;
 
     machine_add_audiodev_property(mc);
 }
diff --git a/hw/arm/sabrelite.c b/hw/arm/sabrelite.c
index 1eb47042eca7b..df60d47c6fd1c 100644
--- a/hw/arm/sabrelite.c
+++ b/hw/arm/sabrelite.c
@@ -110,6 +110,7 @@ static void sabrelite_machine_init(MachineClass *mc)
     mc->max_cpus = FSL_IMX6_NUM_CPUS;
     mc->ignore_memory_transaction_failures = true;
     mc->default_ram_id = "sabrelite.ram";
+    mc->auto_create_sdcard = true;
 }
 
 DEFINE_MACHINE("sabrelite", sabrelite_machine_init)
diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
index e720de3064193..aa09d7a091731 100644
--- a/hw/arm/sbsa-ref.c
+++ b/hw/arm/sbsa-ref.c
@@ -484,6 +484,8 @@ static void create_gic(SBSAMachineState *sms, MemoryRegion *mem)
             [GTIMER_HYP]  = ARCH_TIMER_NS_EL2_IRQ,
             [GTIMER_SEC]  = ARCH_TIMER_S_EL1_IRQ,
             [GTIMER_HYPVIRT] = ARCH_TIMER_NS_EL2_VIRT_IRQ,
+            [GTIMER_S_EL2_PHYS] = ARCH_TIMER_S_EL2_IRQ,
+            [GTIMER_S_EL2_VIRT] = ARCH_TIMER_S_EL2_VIRT_IRQ,
         };
 
         for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) {
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index dd74c2e5583e5..6e720e1b9a0b4 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -225,6 +225,27 @@ static gboolean smmu_hash_remove_by_vmid_ipa(gpointer key, gpointer value,
            ((entry->iova & ~info->mask) == info->iova);
 }
 
+static gboolean
+smmu_hash_remove_by_sid_range(gpointer key, gpointer value, gpointer user_data)
+{
+    SMMUDevice *sdev = (SMMUDevice *)key;
+    uint32_t sid = smmu_get_sid(sdev);
+    SMMUSIDRange *sid_range = (SMMUSIDRange *)user_data;
+
+    if (sid < sid_range->start || sid > sid_range->end) {
+        return false;
+    }
+    trace_smmu_config_cache_inv(sid);
+    return true;
+}
+
+void smmu_configs_inv_sid_range(SMMUState *s, SMMUSIDRange sid_range)
+{
+    trace_smmu_configs_inv_sid_range(sid_range.start, sid_range.end);
+    g_hash_table_foreach_remove(s->configs, smmu_hash_remove_by_sid_range,
+                                &sid_range);
+}
+
 void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova,
                          uint8_t tg, uint64_t num_pages, uint8_t ttl)
 {
@@ -924,7 +945,12 @@ static void smmu_base_realize(DeviceState *dev, Error **errp)
     }
 }
 
-static void smmu_base_reset_hold(Object *obj, ResetType type)
+/*
+ * Make sure the IOMMU is reset in 'exit' phase after
+ * all outstanding DMA requests have been quiesced during
+ * the 'enter' or 'hold' reset phases
+ */
+static void smmu_base_reset_exit(Object *obj, ResetType type)
 {
     SMMUState *s = ARM_SMMU(obj);
 
@@ -949,7 +975,7 @@ static void smmu_base_class_init(ObjectClass *klass, void *data)
     device_class_set_props(dc, smmu_dev_properties);
     device_class_set_parent_realize(dc, smmu_base_realize,
                                     &sbc->parent_realize);
-    rc->phases.hold = smmu_base_reset_hold;
+    rc->phases.exit = smmu_base_reset_exit;
 }
 
 static const TypeInfo smmu_base_info = {
diff --git a/hw/arm/smmu-internal.h b/hw/arm/smmu-internal.h
index 843bebb185d77..d143d296f343e 100644
--- a/hw/arm/smmu-internal.h
+++ b/hw/arm/smmu-internal.h
@@ -141,9 +141,4 @@ typedef struct SMMUIOTLBPageInvInfo {
     uint64_t mask;
 } SMMUIOTLBPageInvInfo;
 
-typedef struct SMMUSIDRange {
-    uint32_t start;
-    uint32_t end;
-} SMMUSIDRange;
-
 #endif
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index c0cf5df0f6e44..1a96287ba9d67 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -377,7 +377,7 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, SMMUTransCfg *cfg,
         qemu_log_mask(LOG_GUEST_ERROR,
                       "Cannot fetch pte at address=0x%"PRIx64"\n", addr);
         event->type = SMMU_EVT_F_CD_FETCH;
-        event->u.f_ste_fetch.addr = addr;
+        event->u.f_cd_fetch.addr = addr;
         return -EINVAL;
     }
     for (i = 0; i < ARRAY_SIZE(buf->word); i++) {
@@ -903,7 +903,7 @@ static void smmuv3_flush_config(SMMUDevice *sdev)
     SMMUv3State *s = sdev->smmu;
     SMMUState *bc = &s->smmu_state;
 
-    trace_smmuv3_config_cache_inv(smmu_get_sid(sdev));
+    trace_smmu_config_cache_inv(smmu_get_sid(sdev));
     g_hash_table_remove(bc->configs, sdev);
 }
 
@@ -1277,20 +1277,6 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, SMMUStage stage)
     }
 }
 
-static gboolean
-smmuv3_invalidate_ste(gpointer key, gpointer value, gpointer user_data)
-{
-    SMMUDevice *sdev = (SMMUDevice *)key;
-    uint32_t sid = smmu_get_sid(sdev);
-    SMMUSIDRange *sid_range = (SMMUSIDRange *)user_data;
-
-    if (sid < sid_range->start || sid > sid_range->end) {
-        return false;
-    }
-    trace_smmuv3_config_cache_inv(sid);
-    return true;
-}
-
 static int smmuv3_cmdq_consume(SMMUv3State *s)
 {
     SMMUState *bs = ARM_SMMU(s);
@@ -1373,8 +1359,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
             sid_range.end = sid_range.start + mask;
 
             trace_smmuv3_cmdq_cfgi_ste_range(sid_range.start, sid_range.end);
-            g_hash_table_foreach_remove(bs->configs, smmuv3_invalidate_ste,
-                                        &sid_range);
+            smmu_configs_inv_sid_range(bs, sid_range);
             break;
         }
         case SMMU_CMD_CFGI_CD:
@@ -1870,13 +1855,19 @@ static void smmu_init_irq(SMMUv3State *s, SysBusDevice *dev)
     }
 }
 
-static void smmu_reset_hold(Object *obj, ResetType type)
+/*
+ * Make sure the IOMMU is reset in 'exit' phase after
+ * all outstanding DMA requests have been quiesced during
+ * the 'enter' or 'hold' reset phases
+ */
+static void smmu_reset_exit(Object *obj, ResetType type)
 {
     SMMUv3State *s = ARM_SMMUV3(obj);
     SMMUv3Class *c = ARM_SMMUV3_GET_CLASS(s);
 
-    if (c->parent_phases.hold) {
-        c->parent_phases.hold(obj, type);
+    trace_smmu_reset_exit();
+    if (c->parent_phases.exit) {
+        c->parent_phases.exit(obj, type);
     }
 
     smmuv3_init_regs(s);
@@ -1999,7 +1990,7 @@ static void smmuv3_class_init(ObjectClass *klass, void *data)
     SMMUv3Class *c = ARM_SMMUV3_CLASS(klass);
 
     dc->vmsd = &vmstate_smmuv3;
-    resettable_class_set_parent_phases(rc, NULL, smmu_reset_hold, NULL,
+    resettable_class_set_parent_phases(rc, NULL, NULL, smmu_reset_exit,
                                        &c->parent_phases);
     device_class_set_parent_realize(dc, smmu_realize,
                                     &c->parent_realize);
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
index 47c1cfa04855d..33611113602d2 100644
--- a/hw/arm/stellaris.c
+++ b/hw/arm/stellaris.c
@@ -1441,6 +1441,7 @@ static void lm3s6965evb_class_init(ObjectClass *oc, void *data)
     mc->init = lm3s6965evb_init;
     mc->ignore_memory_transaction_failures = true;
     mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-m3");
+    mc->auto_create_sdcard = true;
 }
 
 static const TypeInfo lm3s6965evb_type = {
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index c64ad344bd119..f3386bd7ae149 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -1,5 +1,12 @@
 # See docs/devel/tracing.rst for syntax documentation.
 
+# omap1.c
+omap1_pwl_clocking_scheme(const char *scheme) "omap1 CLKM: clocking scheme set to %s"
+omap1_pwl_backlight(int output) "omap1 PWL: backlight now at %d/256"
+omap1_pwt_buzz(int freq) "omap1 PWT: %dHz buzz on"
+omap1_pwt_silence(void) "omap1 PWT: buzzer silenced"
+omap1_lpg_led(const char *onoff) "omap1 LPG: LED is %s"
+
 # virt-acpi-build.c
 virt_acpi_setup(void) "No fw cfg or ACPI disabled. Bailing out."
 
@@ -15,6 +22,8 @@ smmu_iotlb_inv_asid_vmid(int asid, int vmid) "IOTLB invalidate asid=%d vmid=%d"
 smmu_iotlb_inv_vmid(int vmid) "IOTLB invalidate vmid=%d"
 smmu_iotlb_inv_vmid_s1(int vmid) "IOTLB invalidate vmid=%d"
 smmu_iotlb_inv_iova(int asid, uint64_t addr) "IOTLB invalidate asid=%d addr=0x%"PRIx64
+smmu_configs_inv_sid_range(uint32_t start, uint32_t end) "Config cache INV SID range from 0x%x to 0x%x"
+smmu_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x"
 smmu_inv_notifiers_mr(const char *name) "iommu mr=%s"
 smmu_iotlb_lookup_hit(int asid, int vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
 smmu_iotlb_lookup_miss(int asid, int vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
@@ -52,10 +61,10 @@ smmuv3_cmdq_tlbi_nh(int vmid) "vmid=%d"
 smmuv3_cmdq_tlbi_nsnh(void) ""
 smmuv3_cmdq_tlbi_nh_asid(int asid) "asid=%d"
 smmuv3_cmdq_tlbi_s12_vmid(int vmid) "vmid=%d"
-smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x"
 smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s"
 smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s"
 smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova, uint8_t tg, uint64_t num_pages, int stage) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" stage=%d"
+smmu_reset_exit(void) ""
 
 # strongarm.c
 strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
diff --git a/hw/arm/versatilepb.c b/hw/arm/versatilepb.c
index bc4522989ecd0..35766445fa478 100644
--- a/hw/arm/versatilepb.c
+++ b/hw/arm/versatilepb.c
@@ -27,6 +27,7 @@
 #include "qom/object.h"
 #include "audio/audio.h"
 #include "target/arm/cpu-qom.h"
+#include "qemu/log.h"
 
 #define VERSATILE_FLASH_ADDR 0x34000000
 #define VERSATILE_FLASH_SIZE (64 * 1024 * 1024)
@@ -110,7 +111,8 @@ static uint64_t vpb_sic_read(void *opaque, hwaddr offset,
     case 8: /* PICENABLE */
         return s->pic_enable;
     default:
-        printf ("vpb_sic_read: Bad register offset 0x%x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "vpb_sic_read: Bad register offset 0x%x\n", (int)offset);
         return 0;
     }
 }
@@ -144,7 +146,8 @@ static void vpb_sic_write(void *opaque, hwaddr offset,
         vpb_sic_update_pic(s);
         break;
     default:
-        printf ("vpb_sic_write: Bad register offset 0x%x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "vpb_sic_write: Bad register offset 0x%x\n", (int)offset);
         return;
     }
     vpb_sic_update(s);
@@ -419,6 +422,7 @@ static void versatilepb_class_init(ObjectClass *oc, void *data)
     mc->ignore_memory_transaction_failures = true;
     mc->default_cpu_type = ARM_CPU_TYPE_NAME("arm926");
     mc->default_ram_id = "versatile.ram";
+    mc->auto_create_sdcard = true;
 
     machine_add_audiodev_property(mc);
 }
@@ -439,6 +443,7 @@ static void versatileab_class_init(ObjectClass *oc, void *data)
     mc->ignore_memory_transaction_failures = true;
     mc->default_cpu_type = ARM_CPU_TYPE_NAME("arm926");
     mc->default_ram_id = "versatile.ram";
+    mc->auto_create_sdcard = true;
 
     machine_add_audiodev_property(mc);
 }
diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c
index b886d16c0239b..76c6107766cf6 100644
--- a/hw/arm/vexpress.c
+++ b/hw/arm/vexpress.c
@@ -51,6 +51,8 @@
 #define VEXPRESS_FLASH_SIZE (64 * 1024 * 1024)
 #define VEXPRESS_FLASH_SECT_SIZE (256 * 1024)
 
+#define GIC_EXT_IRQS 64 /* Versatile Express A9 development board */
+
 /* Number of virtio transports to create (0..8; limited by
  * number of available IRQ lines).
  */
@@ -241,6 +243,7 @@ static void init_cpus(MachineState *ms, const char *cpu_type,
      */
     dev = qdev_new(privdev);
     qdev_prop_set_uint32(dev, "num-cpu", smp_cpus);
+    qdev_prop_set_uint32(dev, "num-irq", GIC_EXT_IRQS + GIC_INTERNAL);
     busdev = SYS_BUS_DEVICE(dev);
     sysbus_realize_and_unref(busdev, &error_fatal);
     sysbus_mmio_map(busdev, 0, periphbase);
@@ -251,7 +254,7 @@ static void init_cpus(MachineState *ms, const char *cpu_type,
      * external interrupts starting from 32 (because there
      * are internal interrupts 0..31).
      */
-    for (n = 0; n < 64; n++) {
+    for (n = 0; n < GIC_EXT_IRQS; n++) {
         pic[n] = qdev_get_gpio_in(dev, n);
     }
 
@@ -543,7 +546,7 @@ static void vexpress_common_init(MachineState *machine)
     VexpressMachineClass *vmc = VEXPRESS_MACHINE_GET_CLASS(machine);
     VEDBoardInfo *daughterboard = vmc->daughterboard;
     DeviceState *dev, *sysctl, *pl041;
-    qemu_irq pic[64];
+    qemu_irq pic[GIC_EXT_IRQS];
     uint32_t sys_id;
     DriveInfo *dinfo;
     PFlashCFI01 *pflash0;
@@ -803,6 +806,7 @@ static void vexpress_a9_class_init(ObjectClass *oc, void *data)
 
     mc->desc = "ARM Versatile Express for Cortex-A9";
     mc->valid_cpu_types = valid_cpu_types;
+    mc->auto_create_sdcard = true;
 
     vmc->daughterboard = &a9_daughterboard;
 }
@@ -818,6 +822,7 @@ static void vexpress_a15_class_init(ObjectClass *oc, void *data)
 
     mc->desc = "ARM Versatile Express for Cortex-A15";
     mc->valid_cpu_types = valid_cpu_types;
+    mc->auto_create_sdcard = true;
 
     vmc->daughterboard = &a15_daughterboard;
 
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 4a5a9666e9162..a96452f17a48f 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -53,6 +53,7 @@
 #include "hw/loader.h"
 #include "qapi/error.h"
 #include "qemu/bitops.h"
+#include "qemu/cutils.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "hw/pci-host/gpex.h"
@@ -81,6 +82,7 @@
 #include "hw/mem/pc-dimm.h"
 #include "hw/mem/nvdimm.h"
 #include "hw/acpi/generic_event_device.h"
+#include "hw/uefi/var-service-api.h"
 #include "hw/virtio/virtio-md-pci.h"
 #include "hw/virtio/virtio-iommu.h"
 #include "hw/char/pl011.h"
@@ -192,6 +194,10 @@ static const MemMapEntry base_memmap[] = {
     [VIRT_MEM] =                { GiB, LEGACY_RAMLIMIT_BYTES },
 };
 
+/* Update the docs for highmem-mmio-size when changing this default */
+#define DEFAULT_HIGH_PCIE_MMIO_SIZE_GB 512
+#define DEFAULT_HIGH_PCIE_MMIO_SIZE (DEFAULT_HIGH_PCIE_MMIO_SIZE_GB * GiB)
+
 /*
  * Highmem IO Regions: This memory map is floating, located after the RAM.
  * Each MemMapEntry base (GPA) will be dynamically computed, depending on the
@@ -207,13 +213,16 @@ static const MemMapEntry base_memmap[] = {
  * PA space for one specific region is always reserved, even if the region
  * has been disabled or doesn't fit into the PA space. However, the PA space
  * for the region won't be reserved in these circumstances with compact layout.
+ *
+ * Note that the highmem-mmio-size property will update the high PCIE MMIO size
+ * field in this array.
  */
 static MemMapEntry extended_memmap[] = {
     /* Additional 64 MB redist region (can contain up to 512 redistributors) */
     [VIRT_HIGH_GIC_REDIST2] =   { 0x0, 64 * MiB },
     [VIRT_HIGH_PCIE_ECAM] =     { 0x0, 256 * MiB },
     /* Second PCIe window */
-    [VIRT_HIGH_PCIE_MMIO] =     { 0x0, 512 * GiB },
+    [VIRT_HIGH_PCIE_MMIO] =     { 0x0, DEFAULT_HIGH_PCIE_MMIO_SIZE },
 };
 
 static const int a15irqmap[] = {
@@ -873,6 +882,8 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem)
             [GTIMER_HYP]  = ARCH_TIMER_NS_EL2_IRQ,
             [GTIMER_SEC]  = ARCH_TIMER_S_EL1_IRQ,
             [GTIMER_HYPVIRT] = ARCH_TIMER_NS_EL2_VIRT_IRQ,
+            [GTIMER_S_EL2_PHYS] = ARCH_TIMER_S_EL2_IRQ,
+            [GTIMER_S_EL2_VIRT] = ARCH_TIMER_S_EL2_VIRT_IRQ,
         };
 
         for (unsigned irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) {
@@ -2550,6 +2561,40 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp)
     vms->highmem_mmio = value;
 }
 
+static void virt_get_highmem_mmio_size(Object *obj, Visitor *v,
+                                       const char *name, void *opaque,
+                                       Error **errp)
+{
+    uint64_t size = extended_memmap[VIRT_HIGH_PCIE_MMIO].size;
+
+    visit_type_size(v, name, &size, errp);
+}
+
+static void virt_set_highmem_mmio_size(Object *obj, Visitor *v,
+                                       const char *name, void *opaque,
+                                       Error **errp)
+{
+    uint64_t size;
+
+    if (!visit_type_size(v, name, &size, errp)) {
+        return;
+    }
+
+    if (!is_power_of_2(size)) {
+        error_setg(errp, "highmem-mmio-size is not a power of 2");
+        return;
+    }
+
+    if (size < DEFAULT_HIGH_PCIE_MMIO_SIZE) {
+        char *sz = size_to_str(DEFAULT_HIGH_PCIE_MMIO_SIZE);
+        error_setg(errp, "highmem-mmio-size cannot be set to a lower value "
+                         "than the default (%s)", sz);
+        g_free(sz);
+        return;
+    }
+
+    extended_memmap[VIRT_HIGH_PCIE_MMIO].size = size;
+}
 
 static bool virt_get_its(Object *obj, Error **errp)
 {
@@ -3120,6 +3165,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE);
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_PLATFORM);
+    machine_class_allow_dynamic_sysbus_dev(mc, TYPE_UEFI_VARS_SYSBUS);
 #ifdef CONFIG_TPM
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
 #endif
@@ -3207,6 +3253,14 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
                                           "Set on/off to enable/disable high "
                                           "memory region for PCI MMIO");
 
+    object_class_property_add(oc, "highmem-mmio-size", "size",
+                                   virt_get_highmem_mmio_size,
+                                   virt_set_highmem_mmio_size,
+                                   NULL, NULL);
+    object_class_property_set_description(oc, "highmem-mmio-size",
+                                          "Set the high memory region size "
+                                          "for PCI MMIO");
+
     object_class_property_add_str(oc, "gic-version", virt_get_gic_version,
                                   virt_set_gic_version);
     object_class_property_set_description(oc, "gic-version",
diff --git a/hw/arm/xen-stubs.c b/hw/arm/xen-stubs.c
index 34beb8b08cb02..5551584dc208f 100644
--- a/hw/arm/xen-stubs.c
+++ b/hw/arm/xen-stubs.c
@@ -5,10 +5,9 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/error-report.h"
 #include "qapi/qapi-commands-migration.h"
-#include "hw/boards.h"
-#include "system/system.h"
+#include "system/xen.h"
+#include "hw/hw.h"
 #include "hw/xen/xen-hvm-common.h"
 #include "hw/xen/arch_hvm.h"
 
diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c
index 8477b8287450e..b8916665ed6bc 100644
--- a/hw/arm/xilinx_zynq.c
+++ b/hw/arm/xilinx_zynq.c
@@ -54,11 +54,11 @@ OBJECT_DECLARE_SIMPLE_TYPE(ZynqMachineState, ZYNQ_MACHINE)
 #define FLASH_SIZE (64 * 1024 * 1024)
 #define FLASH_SECTOR_SIZE (128 * 1024)
 
-#define IRQ_OFFSET 32 /* pic interrupts start from index 32 */
-
 #define MPCORE_PERIPHBASE 0xF8F00000
 #define ZYNQ_BOARD_MIDR 0x413FC090
 
+#define GIC_EXT_IRQS 64 /* Zynq 7000 SoC */
+
 static const int dma_irqs[8] = {
     46, 47, 48, 49, 72, 73, 74, 75
 };
@@ -207,7 +207,7 @@ static void zynq_init(MachineState *machine)
     MemoryRegion *ocm_ram = g_new(MemoryRegion, 1);
     DeviceState *dev, *slcr;
     SysBusDevice *busdev;
-    qemu_irq pic[64];
+    qemu_irq pic[GIC_EXT_IRQS];
     int n;
     unsigned int smp_cpus = machine->smp.cpus;
 
@@ -263,6 +263,7 @@ static void zynq_init(MachineState *machine)
 
     dev = qdev_new(TYPE_A9MPCORE_PRIV);
     qdev_prop_set_uint32(dev, "num-cpu", smp_cpus);
+    qdev_prop_set_uint32(dev, "num-irq", GIC_EXT_IRQS + GIC_INTERNAL);
     busdev = SYS_BUS_DEVICE(dev);
     sysbus_realize_and_unref(busdev, &error_fatal);
     sysbus_mmio_map(busdev, 0, MPCORE_PERIPHBASE);
@@ -277,16 +278,16 @@ static void zynq_init(MachineState *machine)
                            qdev_get_gpio_in(cpudev, ARM_CPU_FIQ));
     }
 
-    for (n = 0; n < 64; n++) {
+    for (n = 0; n < GIC_EXT_IRQS; n++) {
         pic[n] = qdev_get_gpio_in(dev, n);
     }
 
-    n = zynq_init_spi_flashes(0xE0006000, pic[58 - IRQ_OFFSET], false, 0);
-    n = zynq_init_spi_flashes(0xE0007000, pic[81 - IRQ_OFFSET], false, n);
-    n = zynq_init_spi_flashes(0xE000D000, pic[51 - IRQ_OFFSET], true, n);
+    n = zynq_init_spi_flashes(0xE0006000, pic[58 - GIC_INTERNAL], false, 0);
+    n = zynq_init_spi_flashes(0xE0007000, pic[81 - GIC_INTERNAL], false, n);
+    n = zynq_init_spi_flashes(0xE000D000, pic[51 - GIC_INTERNAL], true, n);
 
-    sysbus_create_simple(TYPE_CHIPIDEA, 0xE0002000, pic[53 - IRQ_OFFSET]);
-    sysbus_create_simple(TYPE_CHIPIDEA, 0xE0003000, pic[76 - IRQ_OFFSET]);
+    sysbus_create_simple(TYPE_CHIPIDEA, 0xE0002000, pic[53 - GIC_INTERNAL]);
+    sysbus_create_simple(TYPE_CHIPIDEA, 0xE0003000, pic[76 - GIC_INTERNAL]);
 
     dev = qdev_new(TYPE_CADENCE_UART);
     busdev = SYS_BUS_DEVICE(dev);
@@ -295,7 +296,7 @@ static void zynq_init(MachineState *machine)
                           qdev_get_clock_out(slcr, "uart0_ref_clk"));
     sysbus_realize_and_unref(busdev, &error_fatal);
     sysbus_mmio_map(busdev, 0, 0xE0000000);
-    sysbus_connect_irq(busdev, 0, pic[59 - IRQ_OFFSET]);
+    sysbus_connect_irq(busdev, 0, pic[59 - GIC_INTERNAL]);
     dev = qdev_new(TYPE_CADENCE_UART);
     busdev = SYS_BUS_DEVICE(dev);
     qdev_prop_set_chr(dev, "chardev", serial_hd(1));
@@ -303,15 +304,15 @@ static void zynq_init(MachineState *machine)
                           qdev_get_clock_out(slcr, "uart1_ref_clk"));
     sysbus_realize_and_unref(busdev, &error_fatal);
     sysbus_mmio_map(busdev, 0, 0xE0001000);
-    sysbus_connect_irq(busdev, 0, pic[82 - IRQ_OFFSET]);
+    sysbus_connect_irq(busdev, 0, pic[82 - GIC_INTERNAL]);
 
     sysbus_create_varargs("cadence_ttc", 0xF8001000,
-            pic[42-IRQ_OFFSET], pic[43-IRQ_OFFSET], pic[44-IRQ_OFFSET], NULL);
+            pic[42-GIC_INTERNAL], pic[43-GIC_INTERNAL], pic[44-GIC_INTERNAL], NULL);
     sysbus_create_varargs("cadence_ttc", 0xF8002000,
-            pic[69-IRQ_OFFSET], pic[70-IRQ_OFFSET], pic[71-IRQ_OFFSET], NULL);
+            pic[69-GIC_INTERNAL], pic[70-GIC_INTERNAL], pic[71-GIC_INTERNAL], NULL);
 
-    gem_init(0xE000B000, pic[54 - IRQ_OFFSET]);
-    gem_init(0xE000C000, pic[77 - IRQ_OFFSET]);
+    gem_init(0xE000B000, pic[54 - GIC_INTERNAL]);
+    gem_init(0xE000C000, pic[77 - GIC_INTERNAL]);
 
     for (n = 0; n < 2; n++) {
         int hci_irq = n ? 79 : 56;
@@ -330,7 +331,7 @@ static void zynq_init(MachineState *machine)
         qdev_prop_set_uint64(dev, "capareg", ZYNQ_SDHCI_CAPABILITIES);
         sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
         sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, hci_addr);
-        sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[hci_irq - IRQ_OFFSET]);
+        sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[hci_irq - GIC_INTERNAL]);
 
         di = drive_get(IF_SD, 0, n);
         blk = di ? blk_by_legacy_dinfo(di) : NULL;
@@ -343,7 +344,7 @@ static void zynq_init(MachineState *machine)
     dev = qdev_new(TYPE_ZYNQ_XADC);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
     sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0xF8007100);
-    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[39-IRQ_OFFSET]);
+    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[39-GIC_INTERNAL]);
 
     dev = qdev_new("pl330");
     object_property_set_link(OBJECT(dev), "memory",
@@ -363,15 +364,15 @@ static void zynq_init(MachineState *machine)
     busdev = SYS_BUS_DEVICE(dev);
     sysbus_realize_and_unref(busdev, &error_fatal);
     sysbus_mmio_map(busdev, 0, 0xF8003000);
-    sysbus_connect_irq(busdev, 0, pic[45-IRQ_OFFSET]); /* abort irq line */
+    sysbus_connect_irq(busdev, 0, pic[45-GIC_INTERNAL]); /* abort irq line */
     for (n = 0; n < ARRAY_SIZE(dma_irqs); ++n) { /* event irqs */
-        sysbus_connect_irq(busdev, n + 1, pic[dma_irqs[n] - IRQ_OFFSET]);
+        sysbus_connect_irq(busdev, n + 1, pic[dma_irqs[n] - GIC_INTERNAL]);
     }
 
     dev = qdev_new("xlnx.ps7-dev-cfg");
     busdev = SYS_BUS_DEVICE(dev);
     sysbus_realize_and_unref(busdev, &error_fatal);
-    sysbus_connect_irq(busdev, 0, pic[40 - IRQ_OFFSET]);
+    sysbus_connect_irq(busdev, 0, pic[40 - GIC_INTERNAL]);
     sysbus_mmio_map(busdev, 0, 0xF8007000);
 
     /*
@@ -460,10 +461,9 @@ static void zynq_machine_class_init(ObjectClass *oc, void *data)
     };
     MachineClass *mc = MACHINE_CLASS(oc);
     ObjectProperty *prop;
-    mc->desc = "Xilinx Zynq Platform Baseboard for Cortex-A9";
+    mc->desc = "Xilinx Zynq 7000 Platform Baseboard for Cortex-A9";
     mc->init = zynq_init;
     mc->max_cpus = ZYNQ_MAX_CPUS;
-    mc->no_sdcard = 1;
     mc->ignore_memory_transaction_failures = true;
     mc->valid_cpu_types = valid_cpu_types;
     mc->default_ram_id = "zynq.ext_ram";
diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c
index 1401d37959e4e..0c6f0359e3d9e 100644
--- a/hw/arm/xlnx-versal-virt.c
+++ b/hw/arm/xlnx-versal-virt.c
@@ -818,6 +818,7 @@ static void versal_virt_machine_class_init(ObjectClass *oc, void *data)
     mc->max_cpus = XLNX_VERSAL_NR_ACPUS + XLNX_VERSAL_NR_RCPUS;
     mc->default_cpus = XLNX_VERSAL_NR_ACPUS + XLNX_VERSAL_NR_RCPUS;
     mc->no_cdrom = true;
+    mc->auto_create_sdcard = true;
     mc->default_ram_id = "ddr";
     object_class_property_add_str(oc, "ospi-flash", versal_get_ospi_model,
                                    versal_set_ospi_model);
diff --git a/hw/arm/xlnx-zcu102.c b/hw/arm/xlnx-zcu102.c
index 70fb444bbd9d1..4fdb153e4d8b3 100644
--- a/hw/arm/xlnx-zcu102.c
+++ b/hw/arm/xlnx-zcu102.c
@@ -280,6 +280,7 @@ static void xlnx_zcu102_machine_class_init(ObjectClass *oc, void *data)
     mc->max_cpus = XLNX_ZYNQMP_NUM_APU_CPUS + XLNX_ZYNQMP_NUM_RPU_CPUS;
     mc->default_cpus = XLNX_ZYNQMP_NUM_APU_CPUS;
     mc->default_ram_id = "ddr-ram";
+    mc->auto_create_sdcard = true;
 
     machine_add_audiodev_property(mc);
     object_class_property_add_bool(oc, "secure", zcu102_get_secure,
diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index bd5b0dd5e7623..d6022ff2d3d64 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -689,16 +689,10 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
          * - SDIO Specification Version 3.0
          * - eMMC Specification Version 4.51
          */
-        if (!object_property_set_uint(sdhci, "sd-spec-version", 3, errp)) {
-            return;
-        }
-        if (!object_property_set_uint(sdhci, "capareg", SDHCI_CAPABILITIES,
-                                      errp)) {
-            return;
-        }
-        if (!object_property_set_uint(sdhci, "uhs", UHS_I, errp)) {
-            return;
-        }
+        object_property_set_uint(sdhci, "sd-spec-version", 3, &error_abort);
+        object_property_set_uint(sdhci, "capareg", SDHCI_CAPABILITIES,
+                                 &error_abort);
+        object_property_set_uint(sdhci, "uhs", UHS_I, &error_abort);
         if (!sysbus_realize(SYS_BUS_DEVICE(sdhci), errp)) {
             return;
         }
@@ -763,14 +757,10 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
     xlnx_zynqmp_create_unimp_mmio(s);
 
     for (i = 0; i < XLNX_ZYNQMP_NUM_GDMA_CH; i++) {
-        if (!object_property_set_uint(OBJECT(&s->gdma[i]), "bus-width", 128,
-                                      errp)) {
-            return;
-        }
-        if (!object_property_set_link(OBJECT(&s->gdma[i]), "dma",
-                                      OBJECT(system_memory), errp)) {
-            return;
-        }
+        object_property_set_uint(OBJECT(&s->gdma[i]), "bus-width", 128,
+                                 &error_abort);
+        object_property_set_link(OBJECT(&s->gdma[i]), "dma",
+                                 OBJECT(system_memory), &error_abort);
         if (!sysbus_realize(SYS_BUS_DEVICE(&s->gdma[i]), errp)) {
             return;
         }
@@ -811,10 +801,8 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->qspi_dma), 0,
                        qdev_get_gpio_in(DEVICE(&s->qspi_irq_orgate), 0));
 
-    if (!object_property_set_link(OBJECT(&s->qspi), "stream-connected-dma",
-                                  OBJECT(&s->qspi_dma), errp)) {
-         return;
-    }
+    object_property_set_link(OBJECT(&s->qspi), "stream-connected-dma",
+                             OBJECT(&s->qspi_dma), &error_abort);
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->qspi), errp)) {
         return;
     }
@@ -833,10 +821,8 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
     }
 
     for (i = 0; i < XLNX_ZYNQMP_NUM_USB; i++) {
-        if (!object_property_set_link(OBJECT(&s->usb[i].sysbus_xhci), "dma",
-                                      OBJECT(system_memory), errp)) {
-            return;
-        }
+        object_property_set_link(OBJECT(&s->usb[i].sysbus_xhci), "dma",
+                                 OBJECT(system_memory), &error_abort);
 
         qdev_prop_set_uint32(DEVICE(&s->usb[i].sysbus_xhci), "intrs", 4);
         qdev_prop_set_uint32(DEVICE(&s->usb[i].sysbus_xhci), "slots", 2);
diff --git a/hw/block/block.c b/hw/block/block.c
index 1d405e02bf875..2e10611d95a60 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -12,6 +12,7 @@
 #include "system/blockdev.h"
 #include "system/block-backend.h"
 #include "hw/block/block.h"
+#include "migration/cpr.h"
 #include "qapi/error.h"
 #include "qapi/qapi-types-block.h"
 
@@ -66,6 +67,10 @@ bool blk_check_size_and_read_all(BlockBackend *blk, DeviceState *dev,
     int ret;
     g_autofree char *dev_id = NULL;
 
+    if (cpr_is_incoming()) {
+        return true;
+    }
+
     blk_len = blk_getlength(blk);
     if (blk_len < 0) {
         error_setg_errno(errp, -blk_len,
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index a1829e3abdf73..5077793e5e335 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -33,6 +33,7 @@
 #endif
 #include "hw/virtio/virtio-bus.h"
 #include "migration/qemu-file-types.h"
+#include "hw/virtio/iothread-vq-mapping.h"
 #include "hw/virtio/virtio-access.h"
 #include "hw/virtio/virtio-blk-common.h"
 #include "qemu/coroutine.h"
@@ -50,7 +51,7 @@ static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq,
     req->mr_next = NULL;
 }
 
-static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status)
+void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status)
 {
     VirtIOBlock *s = req->dev;
     VirtIODevice *vdev = VIRTIO_DEVICE(s);
@@ -961,8 +962,18 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
         break;
     }
     default:
-        virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
-        g_free(req);
+    {
+        /*
+         * Give subclasses a chance to handle unknown requests. This way the
+         * class lookup is not in the hot path.
+         */
+        VirtIOBlkClass *vbk = VIRTIO_BLK_GET_CLASS(s);
+        if (!vbk->handle_unknown_request ||
+            !vbk->handle_unknown_request(req, mrb, type)) {
+            virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
+            g_free(req);
+        }
+    }
     }
     return 0;
 }
@@ -1413,128 +1424,6 @@ static const BlockDevOps virtio_block_ops = {
     .drained_end   = virtio_blk_drained_end,
 };
 
-static bool
-validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list,
-        uint16_t num_queues, Error **errp)
-{
-    g_autofree unsigned long *vqs = bitmap_new(num_queues);
-    g_autoptr(GHashTable) iothreads =
-        g_hash_table_new(g_str_hash, g_str_equal);
-
-    for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) {
-        const char *name = node->value->iothread;
-        uint16List *vq;
-
-        if (!iothread_by_id(name)) {
-            error_setg(errp, "IOThread \"%s\" object does not exist", name);
-            return false;
-        }
-
-        if (!g_hash_table_add(iothreads, (gpointer)name)) {
-            error_setg(errp,
-                    "duplicate IOThread name \"%s\" in iothread-vq-mapping",
-                    name);
-            return false;
-        }
-
-        if (node != list) {
-            if (!!node->value->vqs != !!list->value->vqs) {
-                error_setg(errp, "either all items in iothread-vq-mapping "
-                                 "must have vqs or none of them must have it");
-                return false;
-            }
-        }
-
-        for (vq = node->value->vqs; vq; vq = vq->next) {
-            if (vq->value >= num_queues) {
-                error_setg(errp, "vq index %u for IOThread \"%s\" must be "
-                        "less than num_queues %u in iothread-vq-mapping",
-                        vq->value, name, num_queues);
-                return false;
-            }
-
-            if (test_and_set_bit(vq->value, vqs)) {
-                error_setg(errp, "cannot assign vq %u to IOThread \"%s\" "
-                        "because it is already assigned", vq->value, name);
-                return false;
-            }
-        }
-    }
-
-    if (list->value->vqs) {
-        for (uint16_t i = 0; i < num_queues; i++) {
-            if (!test_bit(i, vqs)) {
-                error_setg(errp,
-                        "missing vq %u IOThread assignment in iothread-vq-mapping",
-                        i);
-                return false;
-            }
-        }
-    }
-
-    return true;
-}
-
-/**
- * apply_iothread_vq_mapping:
- * @iothread_vq_mapping_list: The mapping of virtqueues to IOThreads.
- * @vq_aio_context: The array of AioContext pointers to fill in.
- * @num_queues: The length of @vq_aio_context.
- * @errp: If an error occurs, a pointer to the area to store the error.
- *
- * Fill in the AioContext for each virtqueue in the @vq_aio_context array given
- * the iothread-vq-mapping parameter in @iothread_vq_mapping_list.
- *
- * Returns: %true on success, %false on failure.
- **/
-static bool apply_iothread_vq_mapping(
-        IOThreadVirtQueueMappingList *iothread_vq_mapping_list,
-        AioContext **vq_aio_context,
-        uint16_t num_queues,
-        Error **errp)
-{
-    IOThreadVirtQueueMappingList *node;
-    size_t num_iothreads = 0;
-    size_t cur_iothread = 0;
-
-    if (!validate_iothread_vq_mapping_list(iothread_vq_mapping_list,
-                                           num_queues, errp)) {
-        return false;
-    }
-
-    for (node = iothread_vq_mapping_list; node; node = node->next) {
-        num_iothreads++;
-    }
-
-    for (node = iothread_vq_mapping_list; node; node = node->next) {
-        IOThread *iothread = iothread_by_id(node->value->iothread);
-        AioContext *ctx = iothread_get_aio_context(iothread);
-
-        /* Released in virtio_blk_vq_aio_context_cleanup() */
-        object_ref(OBJECT(iothread));
-
-        if (node->value->vqs) {
-            uint16List *vq;
-
-            /* Explicit vq:IOThread assignment */
-            for (vq = node->value->vqs; vq; vq = vq->next) {
-                assert(vq->value < num_queues);
-                vq_aio_context[vq->value] = ctx;
-            }
-        } else {
-            /* Round-robin vq:IOThread assignment */
-            for (unsigned i = cur_iothread; i < num_queues;
-                 i += num_iothreads) {
-                vq_aio_context[i] = ctx;
-            }
-        }
-
-        cur_iothread++;
-    }
-
-    return true;
-}
-
 /* Context: BQL held */
 static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp)
 {
@@ -1567,7 +1456,7 @@ static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp)
     s->vq_aio_context = g_new(AioContext *, conf->num_queues);
 
     if (conf->iothread_vq_mapping_list) {
-        if (!apply_iothread_vq_mapping(conf->iothread_vq_mapping_list,
+        if (!iothread_vq_mapping_apply(conf->iothread_vq_mapping_list,
                                        s->vq_aio_context,
                                        conf->num_queues,
                                        errp)) {
@@ -1601,12 +1490,7 @@ static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s)
     assert(!s->ioeventfd_started);
 
     if (conf->iothread_vq_mapping_list) {
-        IOThreadVirtQueueMappingList *node;
-
-        for (node = conf->iothread_vq_mapping_list; node; node = node->next) {
-            IOThread *iothread = iothread_by_id(node->value->iothread);
-            object_unref(OBJECT(iothread));
-        }
+        iothread_vq_mapping_cleanup(conf->iothread_vq_mapping_list);
     }
 
     if (conf->iothread) {
@@ -2029,6 +1913,7 @@ static const TypeInfo virtio_blk_info = {
     .instance_size = sizeof(VirtIOBlock),
     .instance_init = virtio_blk_instance_init,
     .class_init = virtio_blk_class_init,
+    .class_size = sizeof(VirtIOBlkClass),
 };
 
 static void virtio_register_types(void)
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 6c26052561a75..ec04102b669ed 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -408,6 +408,8 @@ static void xen_block_realize(XenDevice *xendev, Error **errp)
     }
 
     xen_device_backend_printf(xendev, "info", "%u", blockdev->info);
+    xen_device_backend_printf(xendev, "mode",
+                              (blockdev->info & VDISK_READONLY) ? "r" : "w");
 
     xen_device_frontend_printf(xendev, "virtual-device", "%lu",
                                vdev->number);
@@ -661,8 +663,8 @@ static void xen_block_set_vdev(Object *obj, Visitor *v, const char *name,
  * https://xenbits.xen.org/docs/unstable/man/xen-vbd-interface.7.html
  */
 static const PropertyInfo xen_block_prop_vdev = {
-    .name  = "str",
-    .description = "Virtual Disk specifier: d*p*/xvd*/hd*/sd*",
+    .type  = "str",
+    .description = "Virtual Disk specifier (d*p*/xvd*/hd*/sd*)",
     .get = xen_block_get_vdev,
     .set = xen_block_set_vdev,
 };
diff --git a/hw/char/bcm2835_aux.c b/hw/char/bcm2835_aux.c
index 73ad593406736..c6e7eccf7dd7b 100644
--- a/hw/char/bcm2835_aux.c
+++ b/hw/char/bcm2835_aux.c
@@ -221,7 +221,7 @@ static int bcm2835_aux_can_receive(void *opaque)
 {
     BCM2835AuxState *s = opaque;
 
-    return s->read_count < BCM2835_AUX_RX_FIFO_LEN;
+    return BCM2835_AUX_RX_FIFO_LEN - s->read_count;
 }
 
 static void bcm2835_aux_put_fifo(void *opaque, uint8_t value)
@@ -243,7 +243,9 @@ static void bcm2835_aux_put_fifo(void *opaque, uint8_t value)
 
 static void bcm2835_aux_receive(void *opaque, const uint8_t *buf, int size)
 {
-    bcm2835_aux_put_fifo(opaque, *buf);
+    for (int i = 0; i < size; i++) {
+        bcm2835_aux_put_fifo(opaque, buf[i]);
+    }
 }
 
 static const MemoryRegionOps bcm2835_aux_ops = {
diff --git a/hw/char/imx_serial.c b/hw/char/imx_serial.c
index 38b4865157ed7..6f14f8403a9a7 100644
--- a/hw/char/imx_serial.c
+++ b/hw/char/imx_serial.c
@@ -386,7 +386,8 @@ static void imx_serial_write(void *opaque, hwaddr offset,
 static int imx_can_receive(void *opaque)
 {
     IMXSerialState *s = (IMXSerialState *)opaque;
-    return s->ucr2 & UCR2_RXEN && fifo32_num_used(&s->rx_fifo) < FIFO_SIZE;
+
+    return s->ucr2 & UCR2_RXEN ? fifo32_num_free(&s->rx_fifo) : 0;
 }
 
 static void imx_put_data(void *opaque, uint32_t value)
@@ -417,7 +418,10 @@ static void imx_receive(void *opaque, const uint8_t *buf, int size)
     IMXSerialState *s = (IMXSerialState *)opaque;
 
     s->usr2 |= USR2_WAKE;
-    imx_put_data(opaque, *buf);
+
+    for (int i = 0; i < size; i++) {
+        imx_put_data(opaque, buf[i]);
+    }
 }
 
 static void imx_event(void *opaque, QEMUChrEvent event)
diff --git a/hw/char/mcf_uart.c b/hw/char/mcf_uart.c
index 980a12fcb7dd5..529c26be93a39 100644
--- a/hw/char/mcf_uart.c
+++ b/hw/char/mcf_uart.c
@@ -17,6 +17,8 @@
 #include "chardev/char-fe.h"
 #include "qom/object.h"
 
+#define FIFO_DEPTH 4
+
 struct mcf_uart_state {
     SysBusDevice parent_obj;
 
@@ -27,7 +29,7 @@ struct mcf_uart_state {
     uint8_t imr;
     uint8_t bg1;
     uint8_t bg2;
-    uint8_t fifo[4];
+    uint8_t fifo[FIFO_DEPTH];
     uint8_t tb;
     int current_mr;
     int fifo_len;
@@ -247,14 +249,16 @@ static void mcf_uart_reset(DeviceState *dev)
 static void mcf_uart_push_byte(mcf_uart_state *s, uint8_t data)
 {
     /* Break events overwrite the last byte if the fifo is full.  */
-    if (s->fifo_len == 4)
+    if (s->fifo_len == FIFO_DEPTH) {
         s->fifo_len--;
+    }
 
     s->fifo[s->fifo_len] = data;
     s->fifo_len++;
     s->sr |= MCF_UART_RxRDY;
-    if (s->fifo_len == 4)
+    if (s->fifo_len == FIFO_DEPTH) {
         s->sr |= MCF_UART_FFULL;
+    }
 
     mcf_uart_update(s);
 }
@@ -277,14 +281,16 @@ static int mcf_uart_can_receive(void *opaque)
 {
     mcf_uart_state *s = (mcf_uart_state *)opaque;
 
-    return s->rx_enabled && (s->sr & MCF_UART_FFULL) == 0;
+    return s->rx_enabled ? FIFO_DEPTH - s->fifo_len : 0;
 }
 
 static void mcf_uart_receive(void *opaque, const uint8_t *buf, int size)
 {
     mcf_uart_state *s = (mcf_uart_state *)opaque;
 
-    mcf_uart_push_byte(s, buf[0]);
+    for (int i = 0; i < size; i++) {
+        mcf_uart_push_byte(s, buf[i]);
+    }
 }
 
 static const MemoryRegionOps mcf_uart_ops = {
diff --git a/hw/char/pl011.c b/hw/char/pl011.c
index 06ce851044d9f..0e9ec1301d3fd 100644
--- a/hw/char/pl011.c
+++ b/hw/char/pl011.c
@@ -85,6 +85,7 @@ DeviceState *pl011_create(hwaddr addr, qemu_irq irq, Chardev *chr)
 #define CR_OUT1     (1 << 12)
 #define CR_RTS      (1 << 11)
 #define CR_DTR      (1 << 10)
+#define CR_RXE      (1 << 9)
 #define CR_TXE      (1 << 8)
 #define CR_LBE      (1 << 7)
 #define CR_UARTEN   (1 << 0)
@@ -184,7 +185,7 @@ static void pl011_fifo_rx_put(void *opaque, uint32_t value)
     s->read_fifo[slot] = value;
     s->read_count++;
     s->flags &= ~PL011_FLAG_RXFE;
-    trace_pl011_fifo_rx_put(value, s->read_count);
+    trace_pl011_fifo_rx_put(value, s->read_count, pipe_depth);
     if (s->read_count == pipe_depth) {
         trace_pl011_fifo_rx_full();
         s->flags |= PL011_FLAG_RXFF;
@@ -247,12 +248,13 @@ static void pl011_write_txdata(PL011State *s, uint8_t data)
 static uint32_t pl011_read_rxdata(PL011State *s)
 {
     uint32_t c;
+    unsigned fifo_depth = pl011_get_fifo_depth(s);
 
     s->flags &= ~PL011_FLAG_RXFF;
     c = s->read_fifo[s->read_pos];
     if (s->read_count > 0) {
         s->read_count--;
-        s->read_pos = (s->read_pos + 1) & (pl011_get_fifo_depth(s) - 1);
+        s->read_pos = (s->read_pos + 1) & (fifo_depth - 1);
     }
     if (s->read_count == 0) {
         s->flags |= PL011_FLAG_RXFE;
@@ -260,7 +262,7 @@ static uint32_t pl011_read_rxdata(PL011State *s)
     if (s->read_count == s->read_trigger - 1) {
         s->int_level &= ~INT_RX;
     }
-    trace_pl011_read_fifo(s->read_count);
+    trace_pl011_read_fifo(s->read_count, fifo_depth);
     s->rsr = c >> 8;
     pl011_update(s);
     qemu_chr_fe_accept_input(&s->chr);
@@ -485,15 +487,26 @@ static void pl011_write(void *opaque, hwaddr offset,
 static int pl011_can_receive(void *opaque)
 {
     PL011State *s = (PL011State *)opaque;
-    int r;
+    unsigned fifo_depth = pl011_get_fifo_depth(s);
+    unsigned fifo_available = fifo_depth - s->read_count;
 
-    r = s->read_count < pl011_get_fifo_depth(s);
-    trace_pl011_can_receive(s->lcr, s->read_count, r);
-    return r;
+    /*
+     * In theory we should check the UART and RX enable bits here and
+     * return 0 if they are not set (so the guest can't receive data
+     * until you have enabled the UART). In practice we suspect there
+     * is at least some guest code out there which has been tested only
+     * on QEMU and which never bothers to enable the UART because we
+     * historically never enforced that. So we effectively keep the
+     * UART continuously enabled regardless of the enable bits.
+     */
+
+    trace_pl011_can_receive(s->lcr, s->read_count, fifo_depth, fifo_available);
+    return fifo_available;
 }
 
 static void pl011_receive(void *opaque, const uint8_t *buf, int size)
 {
+    trace_pl011_receive(size);
     /*
      * In loopback mode, the RX input signal is internally disconnected
      * from the entire receiving logics; thus, all inputs are ignored,
@@ -503,7 +516,9 @@ static void pl011_receive(void *opaque, const uint8_t *buf, int size)
         return;
     }
 
-    pl011_fifo_rx_put(opaque, *buf);
+    for (int i = 0; i < size; i++) {
+        pl011_fifo_rx_put(opaque, buf[i]);
+    }
 }
 
 static void pl011_event(void *opaque, QEMUChrEvent event)
diff --git a/hw/char/sh_serial.c b/hw/char/sh_serial.c
index 247aeb071ac6b..41c8175a638f7 100644
--- a/hw/char/sh_serial.c
+++ b/hw/char/sh_serial.c
@@ -320,7 +320,7 @@ static uint64_t sh_serial_read(void *opaque, hwaddr offs,
 
 static int sh_serial_can_receive(SHSerialState *s)
 {
-    return s->scr & (1 << 4);
+    return s->scr & (1 << 4) ? SH_RX_FIFO_LENGTH - s->rx_head : 0;
 }
 
 static void sh_serial_receive_break(SHSerialState *s)
@@ -353,22 +353,20 @@ static void sh_serial_receive1(void *opaque, const uint8_t *buf, int size)
     if (s->feat & SH_SERIAL_FEAT_SCIF) {
         int i;
         for (i = 0; i < size; i++) {
-            if (s->rx_cnt < SH_RX_FIFO_LENGTH) {
-                s->rx_fifo[s->rx_head++] = buf[i];
-                if (s->rx_head == SH_RX_FIFO_LENGTH) {
-                    s->rx_head = 0;
-                }
-                s->rx_cnt++;
-                if (s->rx_cnt >= s->rtrg) {
-                    s->flags |= SH_SERIAL_FLAG_RDF;
-                    if (s->scr & (1 << 6) && s->rxi) {
-                        timer_del(&s->fifo_timeout_timer);
-                        qemu_set_irq(s->rxi, 1);
-                    }
-                } else {
-                    timer_mod(&s->fifo_timeout_timer,
-                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 15 * s->etu);
+            s->rx_fifo[s->rx_head++] = buf[i];
+            if (s->rx_head == SH_RX_FIFO_LENGTH) {
+                s->rx_head = 0;
+            }
+            s->rx_cnt++;
+            if (s->rx_cnt >= s->rtrg) {
+                s->flags |= SH_SERIAL_FLAG_RDF;
+                if (s->scr & (1 << 6) && s->rxi) {
+                    timer_del(&s->fifo_timeout_timer);
+                    qemu_set_irq(s->rxi, 1);
                 }
+            } else {
+                timer_mod(&s->fifo_timeout_timer,
+                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 15 * s->etu);
             }
         }
     } else {
diff --git a/hw/char/sifive_uart.c b/hw/char/sifive_uart.c
index 4bc5767284b43..b45e6c098c44b 100644
--- a/hw/char/sifive_uart.c
+++ b/hw/char/sifive_uart.c
@@ -251,6 +251,23 @@ static int sifive_uart_be_change(void *opaque)
     return 0;
 }
 
+static void sifive_uart_reset_enter(Object *obj, ResetType type)
+{
+    SiFiveUARTState *s = SIFIVE_UART(obj);
+
+    s->txfifo = 0;
+    s->ie = 0;
+    s->ip = 0;
+    s->txctrl = 0;
+    s->rxctrl = 0;
+    s->div = 0;
+
+    s->rx_fifo_len = 0;
+
+    memset(s->rx_fifo, 0, SIFIVE_UART_RX_FIFO_SIZE);
+    fifo8_reset(&s->tx_fifo);
+}
+
 static const Property sifive_uart_properties[] = {
     DEFINE_PROP_CHR("chardev", SiFiveUARTState, chr),
 };
@@ -270,30 +287,24 @@ static void sifive_uart_realize(DeviceState *dev, Error **errp)
 {
     SiFiveUARTState *s = SIFIVE_UART(dev);
 
+    fifo8_create(&s->tx_fifo, SIFIVE_UART_TX_FIFO_SIZE);
+
     s->fifo_trigger_handle = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                           fifo_trigger_update, s);
 
-    qemu_chr_fe_set_handlers(&s->chr, sifive_uart_can_rx, sifive_uart_rx,
-                             sifive_uart_event, sifive_uart_be_change, s,
-                             NULL, true);
+    if (qemu_chr_fe_backend_connected(&s->chr)) {
+        qemu_chr_fe_set_handlers(&s->chr, sifive_uart_can_rx, sifive_uart_rx,
+                                 sifive_uart_event, sifive_uart_be_change, s,
+                                 NULL, true);
+    }
 
 }
 
-static void sifive_uart_reset_enter(Object *obj, ResetType type)
+static void sifive_uart_unrealize(DeviceState *dev)
 {
-    SiFiveUARTState *s = SIFIVE_UART(obj);
-
-    s->txfifo = 0;
-    s->ie = 0;
-    s->ip = 0;
-    s->txctrl = 0;
-    s->rxctrl = 0;
-    s->div = 0;
-
-    s->rx_fifo_len = 0;
+    SiFiveUARTState *s = SIFIVE_UART(dev);
 
-    memset(s->rx_fifo, 0, SIFIVE_UART_RX_FIFO_SIZE);
-    fifo8_create(&s->tx_fifo, SIFIVE_UART_TX_FIFO_SIZE);
+    fifo8_destroy(&s->tx_fifo);
 }
 
 static void sifive_uart_reset_hold(Object *obj, ResetType type)
@@ -329,6 +340,7 @@ static void sifive_uart_class_init(ObjectClass *oc, void *data)
     ResettableClass *rc = RESETTABLE_CLASS(oc);
 
     dc->realize = sifive_uart_realize;
+    dc->unrealize = sifive_uart_unrealize;
     dc->vmsd = &vmstate_sifive_uart;
     rc->phases.enter = sifive_uart_reset_enter;
     rc->phases.hold  = sifive_uart_reset_hold;
diff --git a/hw/char/trace-events b/hw/char/trace-events
index b2e3d25ae346d..05a33036c1207 100644
--- a/hw/char/trace-events
+++ b/hw/char/trace-events
@@ -60,12 +60,13 @@ imx_serial_put_data(const char *chrname, uint32_t value) "%s: 0x%" PRIx32
 # pl011.c
 pl011_irq_state(int level) "irq state %d"
 pl011_read(uint32_t addr, uint32_t value, const char *regname) "addr 0x%03x value 0x%08x reg %s"
-pl011_read_fifo(int read_count) "FIFO read, read_count now %d"
+pl011_read_fifo(unsigned rx_fifo_used, size_t rx_fifo_depth) "RX FIFO read, used %u/%zu"
 pl011_write(uint32_t addr, uint32_t value, const char *regname) "addr 0x%03x value 0x%08x reg %s"
-pl011_can_receive(uint32_t lcr, int read_count, int r) "LCR 0x%08x read_count %d returning %d"
-pl011_fifo_rx_put(uint32_t c, int read_count) "new char 0x%02x read_count now %d"
+pl011_can_receive(uint32_t lcr, unsigned rx_fifo_used, size_t rx_fifo_depth, unsigned rx_fifo_available) "LCR 0x%02x, RX FIFO used %u/%zu, can_receive %u chars"
+pl011_fifo_rx_put(uint32_t c, unsigned read_count, size_t rx_fifo_depth) "RX FIFO push char [0x%02x] %d/%zu depth used"
 pl011_fifo_rx_full(void) "RX FIFO now full, RXFF set"
 pl011_baudrate_change(unsigned int baudrate, uint64_t clock, uint32_t ibrd, uint32_t fbrd) "new baudrate %u (clk: %" PRIu64 "hz, ibrd: %" PRIu32 ", fbrd: %" PRIu32 ")"
+pl011_receive(int size) "recv %d chars"
 
 # cmsdk-apb-uart.c
 cmsdk_apb_uart_read(uint64_t offset, uint64_t data, unsigned size) "CMSDK APB UART read: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
diff --git a/hw/char/xilinx_uartlite.c b/hw/char/xilinx_uartlite.c
index 56955e0d74afd..4037c937eebb9 100644
--- a/hw/char/xilinx_uartlite.c
+++ b/hw/char/xilinx_uartlite.c
@@ -24,6 +24,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/log.h"
+#include "qapi/error.h"
 #include "hw/char/xilinx_uartlite.h"
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
@@ -57,6 +58,7 @@
 struct XilinxUARTLite {
     SysBusDevice parent_obj;
 
+    EndianMode model_endianness;
     MemoryRegion mmio;
     CharBackend chr;
     qemu_irq irq;
@@ -166,17 +168,21 @@ uart_write(void *opaque, hwaddr addr,
     uart_update_irq(s);
 }
 
-static const MemoryRegionOps uart_ops = {
-    .read = uart_read,
-    .write = uart_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-    .valid = {
-        .min_access_size = 1,
-        .max_access_size = 4
-    }
+static const MemoryRegionOps uart_ops[2] = {
+    [0 ... 1] = {
+        .read = uart_read,
+        .write = uart_write,
+        .valid = {
+            .min_access_size = 1,
+            .max_access_size = 4,
+        },
+    },
+    [0].endianness = DEVICE_LITTLE_ENDIAN,
+    [1].endianness = DEVICE_BIG_ENDIAN,
 };
 
 static const Property xilinx_uartlite_properties[] = {
+    DEFINE_PROP_ENDIAN_NODEFAULT("endianness", XilinxUARTLite, model_endianness),
     DEFINE_PROP_CHR("chardev", XilinxUARTLite, chr),
 };
 
@@ -214,6 +220,15 @@ static void xilinx_uartlite_realize(DeviceState *dev, Error **errp)
 {
     XilinxUARTLite *s = XILINX_UARTLITE(dev);
 
+    if (s->model_endianness == ENDIAN_MODE_UNSPECIFIED) {
+        error_setg(errp, TYPE_XILINX_UARTLITE " property 'endianness'"
+                         " must be set to 'big' or 'little'");
+        return;
+    }
+
+    memory_region_init_io(&s->mmio, OBJECT(dev),
+                          &uart_ops[s->model_endianness == ENDIAN_MODE_BIG],
+                          s, "xlnx.xps-uartlite", R_MAX * 4);
     qemu_chr_fe_set_handlers(&s->chr, uart_can_rx, uart_rx,
                              uart_event, NULL, s, NULL, true);
 }
@@ -223,9 +238,6 @@ static void xilinx_uartlite_init(Object *obj)
     XilinxUARTLite *s = XILINX_UARTLITE(obj);
 
     sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq);
-
-    memory_region_init_io(&s->mmio, obj, &uart_ops, s,
-                          "xlnx.xps-uartlite", R_MAX * 4);
     sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio);
 }
 
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
index cb79566cc51bc..9064dd24f827c 100644
--- a/hw/core/cpu-common.c
+++ b/hw/core/cpu-common.c
@@ -40,9 +40,7 @@ CPUState *cpu_by_arch_id(int64_t id)
     CPUState *cpu;
 
     CPU_FOREACH(cpu) {
-        CPUClass *cc = CPU_GET_CLASS(cpu);
-
-        if (cc->get_arch_id(cpu) == id) {
+        if (cpu->cc->get_arch_id(cpu) == id) {
             return cpu;
         }
     }
@@ -101,11 +99,9 @@ static int cpu_common_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg)
 
 void cpu_dump_state(CPUState *cpu, FILE *f, int flags)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    if (cc->dump_state) {
+    if (cpu->cc->dump_state) {
         cpu_synchronize_state(cpu);
-        cc->dump_state(cpu, f, flags);
+        cpu->cc->dump_state(cpu, f, flags);
     }
 }
 
@@ -119,11 +115,10 @@ void cpu_reset(CPUState *cpu)
 static void cpu_common_reset_hold(Object *obj, ResetType type)
 {
     CPUState *cpu = CPU(obj);
-    CPUClass *cc = CPU_GET_CLASS(cpu);
 
     if (qemu_loglevel_mask(CPU_LOG_RESET)) {
         qemu_log("CPU Reset (CPU %d)\n", cpu->cpu_index);
-        log_cpu_state(cpu, cc->reset_dump_flags);
+        log_cpu_state(cpu, cpu->cc->reset_dump_flags);
     }
 
     cpu->interrupt_request = 0;
@@ -139,11 +134,6 @@ static void cpu_common_reset_hold(Object *obj, ResetType type)
     cpu_exec_reset_hold(cpu);
 }
 
-static bool cpu_common_has_work(CPUState *cs)
-{
-    return false;
-}
-
 ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model)
 {
     ObjectClass *oc;
@@ -193,6 +183,20 @@ static void cpu_common_parse_features(const char *typename, char *features,
     }
 }
 
+bool cpu_exec_realizefn(CPUState *cpu, Error **errp)
+{
+    if (!accel_cpu_common_realize(cpu, errp)) {
+        return false;
+    }
+
+    /* Wait until cpu initialization complete before exposing cpu. */
+    cpu_list_add(cpu);
+
+    cpu_vmstate_register(cpu);
+
+    return true;
+}
+
 static void cpu_common_realizefn(DeviceState *dev, Error **errp)
 {
     CPUState *cpu = CPU(dev);
@@ -234,17 +238,35 @@ static void cpu_common_unrealizefn(DeviceState *dev)
     cpu_exec_unrealizefn(cpu);
 }
 
+void cpu_exec_unrealizefn(CPUState *cpu)
+{
+    cpu_vmstate_unregister(cpu);
+
+    cpu_list_remove(cpu);
+    /*
+     * Now that the vCPU has been removed from the RCU list, we can call
+     * accel_cpu_common_unrealize, which may free fields using call_rcu.
+     */
+    accel_cpu_common_unrealize(cpu);
+}
+
 static void cpu_common_initfn(Object *obj)
 {
     CPUState *cpu = CPU(obj);
 
+    cpu_exec_class_post_init(CPU_GET_CLASS(obj));
+
+    /* cache the cpu class for the hotpath */
+    cpu->cc = CPU_GET_CLASS(cpu);
+
     gdb_init_cpu(cpu);
     cpu->cpu_index = UNASSIGNED_CPU_INDEX;
     cpu->cluster_index = UNASSIGNED_CLUSTER_INDEX;
+    cpu->as = NULL;
+    cpu->num_ases = 0;
     /* user-mode doesn't have configurable SMP topology */
     /* the default value is changed by qemu_init_vcpu() for system-mode */
     cpu->nr_threads = 1;
-    cpu->cflags_next_tb = -1;
 
     /* allocate storage for thread info, initialise condition variables */
     cpu->thread = g_new0(QemuThread, 1);
@@ -306,7 +328,6 @@ static void cpu_common_class_init(ObjectClass *klass, void *data)
 
     k->parse_features = cpu_common_parse_features;
     k->get_arch_id = cpu_common_get_arch_id;
-    k->has_work = cpu_common_has_work;
     k->gdb_read_register = cpu_common_gdb_read_register;
     k->gdb_write_register = cpu_common_gdb_write_register;
     set_bit(DEVICE_CATEGORY_CPU, dc->categories);
diff --git a/hw/core/cpu-system.c b/hw/core/cpu-system.c
index 6aae28a349a7a..aed5076ec78b9 100644
--- a/hw/core/cpu-system.c
+++ b/hw/core/cpu-system.c
@@ -20,15 +20,26 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
+#include "exec/address-spaces.h"
+#include "exec/cputlb.h"
+#include "exec/memory.h"
+#include "exec/tb-flush.h"
 #include "exec/tswap.h"
+#include "hw/qdev-core.h"
+#include "hw/qdev-properties.h"
 #include "hw/core/sysemu-cpu-ops.h"
+#include "migration/vmstate.h"
+#include "system/tcg.h"
 
-bool cpu_paging_enabled(const CPUState *cpu)
+bool cpu_has_work(CPUState *cpu)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
+    return cpu->cc->sysemu_ops->has_work(cpu);
+}
 
-    if (cc->sysemu_ops->get_paging_enabled) {
-        return cc->sysemu_ops->get_paging_enabled(cpu);
+bool cpu_paging_enabled(const CPUState *cpu)
+{
+    if (cpu->cc->sysemu_ops->get_paging_enabled) {
+        return cpu->cc->sysemu_ops->get_paging_enabled(cpu);
     }
 
     return false;
@@ -37,10 +48,8 @@ bool cpu_paging_enabled(const CPUState *cpu)
 bool cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list,
                             Error **errp)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    if (cc->sysemu_ops->get_memory_mapping) {
-        return cc->sysemu_ops->get_memory_mapping(cpu, list, errp);
+    if (cpu->cc->sysemu_ops->get_memory_mapping) {
+        return cpu->cc->sysemu_ops->get_memory_mapping(cpu, list, errp);
     }
 
     error_setg(errp, "Obtaining memory mappings is unsupported on this CPU.");
@@ -50,14 +59,19 @@ bool cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list,
 hwaddr cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
                                      MemTxAttrs *attrs)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
+    hwaddr paddr;
 
-    if (cc->sysemu_ops->get_phys_page_attrs_debug) {
-        return cc->sysemu_ops->get_phys_page_attrs_debug(cpu, addr, attrs);
+    if (cpu->cc->sysemu_ops->get_phys_page_attrs_debug) {
+        paddr = cpu->cc->sysemu_ops->get_phys_page_attrs_debug(cpu, addr,
+                                                               attrs);
+    } else {
+        /* Fallback for CPUs which don't implement the _attrs_ hook */
+        *attrs = MEMTXATTRS_UNSPECIFIED;
+        paddr = cpu->cc->sysemu_ops->get_phys_page_debug(cpu, addr);
     }
-    /* Fallback for CPUs which don't implement the _attrs_ hook */
-    *attrs = MEMTXATTRS_UNSPECIFIED;
-    return cc->sysemu_ops->get_phys_page_debug(cpu, addr);
+    /* Indicate that this is a debug access. */
+    attrs->debug = 1;
+    return paddr;
 }
 
 hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr)
@@ -81,64 +95,211 @@ int cpu_asidx_from_attrs(CPUState *cpu, MemTxAttrs attrs)
 int cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu,
                              void *opaque)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    if (!cc->sysemu_ops->write_elf32_qemunote) {
+    if (!cpu->cc->sysemu_ops->write_elf32_qemunote) {
         return 0;
     }
-    return (*cc->sysemu_ops->write_elf32_qemunote)(f, cpu, opaque);
+    return (*cpu->cc->sysemu_ops->write_elf32_qemunote)(f, cpu, opaque);
 }
 
 int cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cpu,
                          int cpuid, void *opaque)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    if (!cc->sysemu_ops->write_elf32_note) {
+    if (!cpu->cc->sysemu_ops->write_elf32_note) {
         return -1;
     }
-    return (*cc->sysemu_ops->write_elf32_note)(f, cpu, cpuid, opaque);
+    return (*cpu->cc->sysemu_ops->write_elf32_note)(f, cpu, cpuid, opaque);
 }
 
 int cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cpu,
                              void *opaque)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    if (!cc->sysemu_ops->write_elf64_qemunote) {
+    if (!cpu->cc->sysemu_ops->write_elf64_qemunote) {
         return 0;
     }
-    return (*cc->sysemu_ops->write_elf64_qemunote)(f, cpu, opaque);
+    return (*cpu->cc->sysemu_ops->write_elf64_qemunote)(f, cpu, opaque);
 }
 
 int cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu,
                          int cpuid, void *opaque)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    if (!cc->sysemu_ops->write_elf64_note) {
+    if (!cpu->cc->sysemu_ops->write_elf64_note) {
         return -1;
     }
-    return (*cc->sysemu_ops->write_elf64_note)(f, cpu, cpuid, opaque);
+    return (*cpu->cc->sysemu_ops->write_elf64_note)(f, cpu, cpuid, opaque);
 }
 
 bool cpu_virtio_is_big_endian(CPUState *cpu)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    if (cc->sysemu_ops->virtio_is_big_endian) {
-        return cc->sysemu_ops->virtio_is_big_endian(cpu);
+    if (cpu->cc->sysemu_ops->virtio_is_big_endian) {
+        return cpu->cc->sysemu_ops->virtio_is_big_endian(cpu);
     }
     return target_words_bigendian();
 }
 
 GuestPanicInformation *cpu_get_crash_info(CPUState *cpu)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
     GuestPanicInformation *res = NULL;
 
-    if (cc->sysemu_ops->get_crash_info) {
-        res = cc->sysemu_ops->get_crash_info(cpu);
+    if (cpu->cc->sysemu_ops->get_crash_info) {
+        res = cpu->cc->sysemu_ops->get_crash_info(cpu);
     }
     return res;
 }
+
+static const Property cpu_system_props[] = {
+    /*
+     * Create a memory property for system CPU object, so users can
+     * wire up its memory.  The default if no link is set up is to use
+     * the system address space.
+     */
+    DEFINE_PROP_LINK("memory", CPUState, memory, TYPE_MEMORY_REGION,
+                     MemoryRegion *),
+};
+
+static bool cpu_get_start_powered_off(Object *obj, Error **errp)
+{
+    CPUState *cpu = CPU(obj);
+    return cpu->start_powered_off;
+}
+
+static void cpu_set_start_powered_off(Object *obj, bool value, Error **errp)
+{
+    CPUState *cpu = CPU(obj);
+    cpu->start_powered_off = value;
+}
+
+void cpu_class_init_props(DeviceClass *dc)
+{
+    ObjectClass *oc = OBJECT_CLASS(dc);
+
+    /*
+     * We can't use DEFINE_PROP_BOOL in the Property array for this
+     * property, because we want this to be settable after realize.
+     */
+    object_class_property_add_bool(oc, "start-powered-off",
+                                   cpu_get_start_powered_off,
+                                   cpu_set_start_powered_off);
+
+    device_class_set_props(dc, cpu_system_props);
+}
+
+void cpu_exec_class_post_init(CPUClass *cc)
+{
+    /* Check mandatory SysemuCPUOps handlers */
+    g_assert(cc->sysemu_ops->has_work);
+}
+
+void cpu_exec_initfn(CPUState *cpu)
+{
+    cpu->memory = get_system_memory();
+    object_ref(OBJECT(cpu->memory));
+}
+
+static int cpu_common_post_load(void *opaque, int version_id)
+{
+    if (tcg_enabled()) {
+        CPUState *cpu = opaque;
+
+        /*
+         * 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
+         * version_id is increased.
+         */
+        cpu->interrupt_request &= ~0x01;
+
+        tlb_flush(cpu);
+
+        /*
+         * loadvm has just updated the content of RAM, bypassing the
+         * usual mechanisms that ensure we flush TBs for writes to
+         * memory we've translated code from. So we must flush all TBs,
+         * which will now be stale.
+         */
+        tb_flush(cpu);
+    }
+
+    return 0;
+}
+
+static int cpu_common_pre_load(void *opaque)
+{
+    CPUState *cpu = opaque;
+
+    cpu->exception_index = -1;
+
+    return 0;
+}
+
+static bool cpu_common_exception_index_needed(void *opaque)
+{
+    CPUState *cpu = opaque;
+
+    return tcg_enabled() && cpu->exception_index != -1;
+}
+
+static const VMStateDescription vmstate_cpu_common_exception_index = {
+    .name = "cpu_common/exception_index",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = cpu_common_exception_index_needed,
+    .fields = (const VMStateField[]) {
+        VMSTATE_INT32(exception_index, CPUState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static bool cpu_common_crash_occurred_needed(void *opaque)
+{
+    CPUState *cpu = opaque;
+
+    return cpu->crash_occurred;
+}
+
+static const VMStateDescription vmstate_cpu_common_crash_occurred = {
+    .name = "cpu_common/crash_occurred",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = cpu_common_crash_occurred_needed,
+    .fields = (const VMStateField[]) {
+        VMSTATE_BOOL(crash_occurred, CPUState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+const VMStateDescription vmstate_cpu_common = {
+    .name = "cpu_common",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .pre_load = cpu_common_pre_load,
+    .post_load = cpu_common_post_load,
+    .fields = (const VMStateField[]) {
+        VMSTATE_UINT32(halted, CPUState),
+        VMSTATE_UINT32(interrupt_request, CPUState),
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * const []) {
+        &vmstate_cpu_common_exception_index,
+        &vmstate_cpu_common_crash_occurred,
+        NULL
+    }
+};
+
+void cpu_vmstate_register(CPUState *cpu)
+{
+    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
+        vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
+    }
+    if (cpu->cc->sysemu_ops->legacy_vmsd != NULL) {
+        vmstate_register(NULL, cpu->cpu_index,
+                         cpu->cc->sysemu_ops->legacy_vmsd, cpu);
+    }
+}
+
+void cpu_vmstate_unregister(CPUState *cpu)
+{
+    if (cpu->cc->sysemu_ops->legacy_vmsd != NULL) {
+        vmstate_unregister(NULL, cpu->cc->sysemu_ops->legacy_vmsd, cpu);
+    }
+    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
+        vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
+    }
+}
diff --git a/hw/core/cpu-user.c b/hw/core/cpu-user.c
new file mode 100644
index 0000000000000..7176791851b44
--- /dev/null
+++ b/hw/core/cpu-user.c
@@ -0,0 +1,49 @@
+/*
+ * QEMU CPU model (user specific)
+ *
+ * Copyright (c) Linaro, Ltd.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/qdev-core.h"
+#include "hw/qdev-properties.h"
+#include "hw/core/cpu.h"
+#include "migration/vmstate.h"
+
+static const Property cpu_user_props[] = {
+    /*
+     * Create a property for the user-only object, so users can
+     * adjust prctl(PR_SET_UNALIGN) from the command-line.
+     * Has no effect if the target does not support the feature.
+     */
+    DEFINE_PROP_BOOL("prctl-unalign-sigbus", CPUState,
+                     prctl_unalign_sigbus, false),
+};
+
+void cpu_class_init_props(DeviceClass *dc)
+{
+    device_class_set_props(dc, cpu_user_props);
+}
+
+void cpu_exec_class_post_init(CPUClass *cc)
+{
+    /* nothing to do */
+}
+
+void cpu_exec_initfn(CPUState *cpu)
+{
+    /* nothing to do */
+}
+
+void cpu_vmstate_register(CPUState *cpu)
+{
+    assert(qdev_get_vmsd(DEVICE(cpu)) == NULL ||
+           qdev_get_vmsd(DEVICE(cpu))->unmigratable);
+}
+
+void cpu_vmstate_unregister(CPUState *cpu)
+{
+    /* nothing to do */
+}
diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c
index d9f5c2e832570..d3a426a1a26c5 100644
--- a/hw/core/generic-loader.c
+++ b/hw/core/generic-loader.c
@@ -47,11 +47,8 @@ static void generic_loader_reset(void *opaque)
     GenericLoaderState *s = GENERIC_LOADER(opaque);
 
     if (s->set_pc) {
-        CPUClass *cc = CPU_GET_CLASS(s->cpu);
         cpu_reset(s->cpu);
-        if (cc) {
-            cc->set_pc(s->cpu, s->addr);
-        }
+        cpu_set_pc(s->cpu, s->addr);
     }
 
     if (s->data_len) {
diff --git a/hw/core/loader-fit.c b/hw/core/loader-fit.c
index 9bdd4fa17c6aa..6eb66406b0736 100644
--- a/hw/core/loader-fit.c
+++ b/hw/core/loader-fit.c
@@ -32,8 +32,8 @@
 
 #define FIT_LOADER_MAX_PATH (128)
 
-static const void *fit_load_image_alloc(const void *itb, const char *name,
-                                        int *poff, size_t *psz, Error **errp)
+static void *fit_load_image_alloc(const void *itb, const char *name,
+                                  int *poff, size_t *psz, Error **errp)
 {
     const void *data;
     const char *comp;
@@ -80,11 +80,11 @@ static const void *fit_load_image_alloc(const void *itb, const char *name,
             return NULL;
         }
 
-        data = g_realloc(uncomp_data, uncomp_len);
+        uncomp_data = g_realloc(uncomp_data, uncomp_len);
         if (psz) {
             *psz = uncomp_len;
         }
-        return data;
+        return uncomp_data;
     }
 
     error_setg(errp, "unknown compression '%s'", comp);
@@ -177,13 +177,12 @@ static int fit_load_kernel(const struct fit_loader *ldr, const void *itb,
 
 static int fit_load_fdt(const struct fit_loader *ldr, const void *itb,
                         int cfg, void *opaque, const void *match_data,
-                        hwaddr kernel_end, Error **errp)
+                        hwaddr kernel_end, void **pfdt, Error **errp)
 {
     ERRP_GUARD();
     Error *err = NULL;
     const char *name;
-    const void *data;
-    const void *load_data;
+    void *data;
     hwaddr load_addr;
     int img_off;
     size_t sz;
@@ -194,7 +193,7 @@ static int fit_load_fdt(const struct fit_loader *ldr, const void *itb,
         return 0;
     }
 
-    load_data = data = fit_load_image_alloc(itb, name, &img_off, &sz, errp);
+    data = fit_load_image_alloc(itb, name, &img_off, &sz, errp);
     if (!data) {
         error_prepend(errp, "unable to load FDT image from FIT: ");
         return -EINVAL;
@@ -211,19 +210,23 @@ static int fit_load_fdt(const struct fit_loader *ldr, const void *itb,
     }
 
     if (ldr->fdt_filter) {
-        load_data = ldr->fdt_filter(opaque, data, match_data, &load_addr);
+        void *filtered_data;
+
+        filtered_data = ldr->fdt_filter(opaque, data, match_data, &load_addr);
+        if (filtered_data != data) {
+            g_free(data);
+            data = filtered_data;
+        }
     }
 
     load_addr = ldr->addr_to_phys(opaque, load_addr);
-    sz = fdt_totalsize(load_data);
-    rom_add_blob_fixed(name, load_data, sz, load_addr);
+    sz = fdt_totalsize(data);
+    rom_add_blob_fixed(name, data, sz, load_addr);
 
-    ret = 0;
+    *pfdt = data;
+    return 0;
 out:
     g_free((void *) data);
-    if (data != load_data) {
-        g_free((void *) load_data);
-    }
     return ret;
 }
 
@@ -259,7 +262,8 @@ static bool fit_cfg_compatible(const void *itb, int cfg, const char *compat)
     return ret;
 }
 
-int load_fit(const struct fit_loader *ldr, const char *filename, void *opaque)
+int load_fit(const struct fit_loader *ldr, const char *filename,
+             void **pfdt, void *opaque)
 {
     Error *err = NULL;
     const struct fit_loader_match *match;
@@ -323,7 +327,7 @@ int load_fit(const struct fit_loader *ldr, const char *filename, void *opaque)
         goto out;
     }
 
-    ret = fit_load_fdt(ldr, itb, cfg_off, opaque, match_data, kernel_end,
+    ret = fit_load_fdt(ldr, itb, cfg_off, opaque, match_data, kernel_end, pfdt,
                        &err);
     if (ret) {
         error_report_err(err);
diff --git a/hw/core/loader.c b/hw/core/loader.c
index fd25c5e01bd9f..ce6ff1b52e3bc 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -51,6 +51,7 @@
 #include "trace.h"
 #include "hw/hw.h"
 #include "disas/disas.h"
+#include "migration/cpr.h"
 #include "migration/vmstate.h"
 #include "monitor/monitor.h"
 #include "system/reset.h"
@@ -144,7 +145,7 @@ ssize_t load_image_mr(const char *filename, MemoryRegion *mr)
 {
     ssize_t size;
 
-    if (!memory_access_is_direct(mr, false)) {
+    if (!memory_access_is_direct(mr, false, MEMTXATTRS_UNSPECIFIED)) {
         /* Can only load an image into RAM or ROM */
         return -1;
     }
@@ -1029,7 +1030,9 @@ static void *rom_set_mr(Rom *rom, Object *owner, const char *name, bool ro)
     vmstate_register_ram_global(rom->mr);
 
     data = memory_region_get_ram_ptr(rom->mr);
-    memcpy(data, rom->data, rom->datasize);
+    if (!cpr_is_incoming()) {
+        memcpy(data, rom->data, rom->datasize);
+    }
 
     return data;
 }
diff --git a/hw/core/machine-smp.c b/hw/core/machine-smp.c
index b954eb8490272..0be0ac044c228 100644
--- a/hw/core/machine-smp.c
+++ b/hw/core/machine-smp.c
@@ -321,10 +321,19 @@ bool machine_parse_smp_cache(MachineState *ms,
             return false;
         }
 
+        if (props->topology == CPU_TOPOLOGY_LEVEL_THREAD) {
+            error_setg(errp,
+                       "%s level cache not supported by this machine",
+                       CpuTopologyLevel_str(props->topology));
+            return false;
+        }
+
         if (!machine_check_topo_support(ms, props->topology, errp)) {
             return false;
         }
     }
+
+    mc->smp_props.has_caches = true;
     return true;
 }
 
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 254cc20c4cb8a..f52a4f2273b2f 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -19,6 +19,7 @@
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qapi/qapi-visit-machine.h"
+#include "qapi/qapi-commands-machine.h"
 #include "qemu/madvise.h"
 #include "qom/object_interfaces.h"
 #include "system/cpus.h"
@@ -42,6 +43,9 @@ GlobalProperty hw_compat_9_2[] = {
     { "virtio-balloon-pci-transitional", "vectors", "0" },
     { "virtio-balloon-pci-non-transitional", "vectors", "0" },
     { "virtio-mem-pci", "vectors", "0" },
+    { "migration", "multifd-clean-tls-termination", "false" },
+    { "migration", "send-switchover-start", "off"},
+    { "vfio-pci", "x-migration-multifd-transfer", "off" },
 };
 const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2);
 
@@ -1695,6 +1699,22 @@ void qemu_remove_machine_init_done_notifier(Notifier *notify)
     notifier_remove(notify);
 }
 
+static void handle_machine_dumpdtb(MachineState *ms)
+{
+    if (!ms->dumpdtb) {
+        return;
+    }
+#ifdef CONFIG_FDT
+    qmp_dumpdtb(ms->dumpdtb, &error_fatal);
+    exit(0);
+#else
+    error_report("This machine doesn't have an FDT");
+    error_printf("(this machine type definitely doesn't use FDT, and "
+                 "this QEMU doesn't have FDT support compiled in)\n");
+    exit(1);
+#endif
+}
+
 void qdev_machine_creation_done(void)
 {
     cpu_synchronize_all_post_init();
@@ -1711,6 +1731,12 @@ void qdev_machine_creation_done(void)
     phase_advance(PHASE_MACHINE_READY);
     qdev_assert_realized_properly();
 
+    /*
+     * If the user used -machine dumpdtb=file.dtb to request that we
+     * dump the DTB to a file,  do it now, and exit.
+     */
+    handle_machine_dumpdtb(current_machine);
+
     /* TODO: once all bus devices are qdevified, this should be done
      * when bus is created by qdev.c */
     /*
diff --git a/hw/core/meson.build b/hw/core/meson.build
index 65a1698ed1fbd..b5a545a0edd63 100644
--- a/hw/core/meson.build
+++ b/hw/core/meson.build
@@ -46,4 +46,7 @@ system_ss.add(files(
   'vm-change-state-handler.c',
   'clock-vmstate.c',
 ))
-user_ss.add(files('qdev-user.c'))
+user_ss.add(files(
+  'cpu-user.c',
+  'qdev-user.c',
+))
diff --git a/hw/core/null-machine.c b/hw/core/null-machine.c
index f586a4bef5436..7f1fb562bebcb 100644
--- a/hw/core/null-machine.c
+++ b/hw/core/null-machine.c
@@ -53,7 +53,6 @@ static void machine_none_machine_init(MachineClass *mc)
     mc->no_parallel = 1;
     mc->no_floppy = 1;
     mc->no_cdrom = 1;
-    mc->no_sdcard = 1;
 }
 
 DEFINE_MACHINE("none", machine_none_machine_init)
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index a96675beb0dc4..a7dde73c29bf9 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -235,7 +235,7 @@ static void release_drive(Object *obj, const char *name, void *opaque)
 }
 
 const PropertyInfo qdev_prop_drive = {
-    .name  = "str",
+    .type  = "str",
     .description = "Node name or ID of a block device to use as a backend",
     .realized_set_allowed = true,
     .get   = get_drive,
@@ -244,7 +244,7 @@ const PropertyInfo qdev_prop_drive = {
 };
 
 const PropertyInfo qdev_prop_drive_iothread = {
-    .name  = "str",
+    .type  = "str",
     .description = "Node name or ID of a block device to use as a backend",
     .realized_set_allowed = true,
     .get   = get_drive,
@@ -312,7 +312,7 @@ static void release_chr(Object *obj, const char *name, void *opaque)
 }
 
 const PropertyInfo qdev_prop_chr = {
-    .name  = "str",
+    .type  = "str",
     .description = "ID of a chardev to use as a backend",
     .get   = get_chr,
     .set   = set_chr,
@@ -386,7 +386,7 @@ static void set_mac(Object *obj, Visitor *v, const char *name, void *opaque,
 }
 
 const PropertyInfo qdev_prop_macaddr = {
-    .name  = "str",
+    .type  = "str",
     .description = "Ethernet 6-byte MAC Address, example: 52:54:00:12:34:56",
     .get   = get_mac,
     .set   = set_mac,
@@ -474,7 +474,7 @@ static void set_netdev(Object *obj, Visitor *v, const char *name,
 }
 
 const PropertyInfo qdev_prop_netdev = {
-    .name  = "str",
+    .type  = "str",
     .description = "ID of a netdev to use as a backend",
     .get   = get_netdev,
     .set   = set_netdev,
@@ -512,7 +512,7 @@ static void set_audiodev(Object *obj, Visitor *v, const char* name,
 }
 
 const PropertyInfo qdev_prop_audiodev = {
-    .name = "str",
+    .type = "str",
     .description = "ID of an audiodev to use as a backend",
     /* release done on shutdown */
     .get = get_audiodev,
@@ -602,7 +602,8 @@ static void qdev_propinfo_set_losttickpolicy(Object *obj, Visitor *v,
 QEMU_BUILD_BUG_ON(sizeof(LostTickPolicy) != sizeof(int));
 
 const PropertyInfo qdev_prop_losttickpolicy = {
-    .name  = "LostTickPolicy",
+    .type  = "LostTickPolicy",
+    .description = "Policy for handling lost ticks (discard/delay/slew)",
     .enum_table  = &LostTickPolicy_lookup,
     .get   = qdev_propinfo_get_enum,
     .set   = qdev_propinfo_set_losttickpolicy,
@@ -628,7 +629,7 @@ static void set_blocksize(Object *obj, Visitor *v, const char *name,
 }
 
 const PropertyInfo qdev_prop_blocksize = {
-    .name  = "size",
+    .type  = "size",
     .description = "A power of two between " MIN_BLOCK_SIZE_STR
                    " and " MAX_BLOCK_SIZE_STR,
     .get   = qdev_propinfo_get_size32,
@@ -641,9 +642,8 @@ const PropertyInfo qdev_prop_blocksize = {
 QEMU_BUILD_BUG_ON(sizeof(BlockdevOnError) != sizeof(int));
 
 const PropertyInfo qdev_prop_blockdev_on_error = {
-    .name = "BlockdevOnError",
-    .description = "Error handling policy, "
-                   "report/ignore/enospc/stop/auto",
+    .type = "BlockdevOnError",
+    .description = "Error handling policy (report/ignore/enospc/stop/auto)",
     .enum_table = &BlockdevOnError_lookup,
     .get = qdev_propinfo_get_enum,
     .set = qdev_propinfo_set_enum,
@@ -655,9 +655,9 @@ const PropertyInfo qdev_prop_blockdev_on_error = {
 QEMU_BUILD_BUG_ON(sizeof(BiosAtaTranslation) != sizeof(int));
 
 const PropertyInfo qdev_prop_bios_chs_trans = {
-    .name = "BiosAtaTranslation",
-    .description = "Logical CHS translation algorithm, "
-                   "auto/none/lba/large/rechs",
+    .type = "BiosAtaTranslation",
+    .description = "Logical CHS translation algorithm "
+                   " (auto/none/lba/large/rechs)",
     .enum_table = &BiosAtaTranslation_lookup,
     .get = qdev_propinfo_get_enum,
     .set = qdev_propinfo_set_enum,
@@ -667,9 +667,8 @@ const PropertyInfo qdev_prop_bios_chs_trans = {
 /* --- FDC default drive types */
 
 const PropertyInfo qdev_prop_fdc_drive_type = {
-    .name = "FdcDriveType",
-    .description = "FDC drive type, "
-                   "144/288/120/none/auto",
+    .type = "FloppyDriveType",
+    .description = "Floppy drive type (144/288/120/none/auto)",
     .enum_table = &FloppyDriveType_lookup,
     .get = qdev_propinfo_get_enum,
     .set = qdev_propinfo_set_enum,
@@ -679,9 +678,9 @@ const PropertyInfo qdev_prop_fdc_drive_type = {
 /* --- MultiFDCompression --- */
 
 const PropertyInfo qdev_prop_multifd_compression = {
-    .name = "MultiFDCompression",
-    .description = "multifd_compression values, "
-                   "none/zlib/zstd/qpl/uadk/qatzip",
+    .type = "MultiFDCompression",
+    .description = "multifd_compression values"
+                   " (none/zlib/zstd/qpl/uadk/qatzip)",
     .enum_table = &MultiFDCompression_lookup,
     .get = qdev_propinfo_get_enum,
     .set = qdev_propinfo_set_enum,
@@ -693,9 +692,8 @@ const PropertyInfo qdev_prop_multifd_compression = {
 QEMU_BUILD_BUG_ON(sizeof(MigMode) != sizeof(int));
 
 const PropertyInfo qdev_prop_mig_mode = {
-    .name = "MigMode",
-    .description = "mig_mode values, "
-                   "normal,cpr-reboot",
+    .type = "MigMode",
+    .description = "Migration mode (normal/cpr-reboot)",
     .enum_table = &MigMode_lookup,
     .get = qdev_propinfo_get_enum,
     .set = qdev_propinfo_set_enum,
@@ -707,9 +705,8 @@ const PropertyInfo qdev_prop_mig_mode = {
 QEMU_BUILD_BUG_ON(sizeof(GranuleMode) != sizeof(int));
 
 const PropertyInfo qdev_prop_granule_mode = {
-    .name = "GranuleMode",
-    .description = "granule_mode values, "
-                   "4k, 8k, 16k, 64k, host",
+    .type = "GranuleMode",
+    .description = "Granule page size (4k/8k/16k/64k/host)",
     .enum_table = &GranuleMode_lookup,
     .get = qdev_propinfo_get_enum,
     .set = qdev_propinfo_set_enum,
@@ -717,9 +714,8 @@ const PropertyInfo qdev_prop_granule_mode = {
 };
 
 const PropertyInfo qdev_prop_zero_page_detection = {
-    .name = "ZeroPageDetection",
-    .description = "zero_page_detection values, "
-                   "none,legacy,multifd",
+    .type = "ZeroPageDetection",
+    .description = "Zero page detection (none/legacy/multifd)",
     .enum_table = &ZeroPageDetection_lookup,
     .get = qdev_propinfo_get_enum,
     .set = qdev_propinfo_set_enum,
@@ -801,7 +797,7 @@ static void set_reserved_region(Object *obj, Visitor *v, const char *name,
 }
 
 const PropertyInfo qdev_prop_reserved_region = {
-    .name  = "reserved_region",
+    .type  = "str",
     .description = "Reserved Region, example: 0xFEE00000:0xFEEFFFFF:0",
     .get   = get_reserved_region,
     .set   = set_reserved_region,
@@ -882,7 +878,7 @@ static int print_pci_devfn(Object *obj, const Property *prop, char *dest,
 }
 
 const PropertyInfo qdev_prop_pci_devfn = {
-    .name  = "int32",
+    .type  = "str",
     .description = "Slot and optional function number, example: 06.0 or 06",
     .print = print_pci_devfn,
     .get   = qdev_propinfo_get_int32,
@@ -988,8 +984,8 @@ static void set_pci_host_devaddr(Object *obj, Visitor *v, const char *name,
 }
 
 const PropertyInfo qdev_prop_pci_host_devaddr = {
-    .name = "str",
-    .description = "Address (bus/device/function) of "
+    .type = "str",
+    .description = "Address (bus:device.function) of "
                    "the host device, example: 04:10.0",
     .get = get_pci_host_devaddr,
     .set = set_pci_host_devaddr,
@@ -998,7 +994,7 @@ const PropertyInfo qdev_prop_pci_host_devaddr = {
 /* --- OffAutoPCIBAR off/auto/bar0/bar1/bar2/bar3/bar4/bar5 --- */
 
 const PropertyInfo qdev_prop_off_auto_pcibar = {
-    .name = "OffAutoPCIBAR",
+    .type = "OffAutoPCIBAR",
     .description = "off/auto/bar0/bar1/bar2/bar3/bar4/bar5",
     .enum_table = &OffAutoPCIBAR_lookup,
     .get = qdev_propinfo_get_enum,
@@ -1080,7 +1076,7 @@ static void set_prop_pcielinkspeed(Object *obj, Visitor *v, const char *name,
 }
 
 const PropertyInfo qdev_prop_pcie_link_speed = {
-    .name = "PCIELinkSpeed",
+    .type = "PCIELinkSpeed",
     .description = "2_5/5/8/16/32/64",
     .enum_table = &PCIELinkSpeed_lookup,
     .get = get_prop_pcielinkspeed,
@@ -1168,7 +1164,7 @@ static void set_prop_pcielinkwidth(Object *obj, Visitor *v, const char *name,
 }
 
 const PropertyInfo qdev_prop_pcie_link_width = {
-    .name = "PCIELinkWidth",
+    .type = "PCIELinkWidth",
     .description = "1/2/4/8/12/16/32",
     .enum_table = &PCIELinkWidth_lookup,
     .get = get_prop_pcielinkwidth,
@@ -1218,7 +1214,7 @@ static void set_default_uuid_auto(ObjectProperty *op, const Property *prop)
 }
 
 const PropertyInfo qdev_prop_uuid = {
-    .name  = "str",
+    .type  = "str",
     .description = "UUID (aka GUID) or \"" UUID_VALUE_AUTO
         "\" for random value (default)",
     .get   = get_uuid,
@@ -1231,8 +1227,8 @@ const PropertyInfo qdev_prop_uuid = {
 QEMU_BUILD_BUG_ON(sizeof(S390CpuEntitlement) != sizeof(int));
 
 const PropertyInfo qdev_prop_cpus390entitlement = {
-    .name  = "S390CpuEntitlement",
-    .description = "low/medium (default)/high",
+    .type  = "S390CpuEntitlement",
+    .description = "auto/low/medium/high (default medium)",
     .enum_table  = &S390CpuEntitlement_lookup,
     .get   = qdev_propinfo_get_enum,
     .set   = qdev_propinfo_set_enum,
@@ -1276,10 +1272,30 @@ static void release_iothread_vq_mapping_list(Object *obj,
 }
 
 const PropertyInfo qdev_prop_iothread_vq_mapping_list = {
-    .name = "IOThreadVirtQueueMappingList",
+    .type = "IOThreadVirtQueueMappingList",
     .description = "IOThread virtqueue mapping list [{\"iothread\":\"<id>\", "
                    "\"vqs\":[1,2,3,...]},...]",
     .get = get_iothread_vq_mapping_list,
     .set = set_iothread_vq_mapping_list,
     .release = release_iothread_vq_mapping_list,
 };
+
+/* --- Endian modes */
+
+const PropertyInfo qdev_prop_endian_mode = {
+    .type = "EndianMode",
+    .description = "Endian mode, big/little/unspecified",
+    .enum_table = &EndianMode_lookup,
+    .get = qdev_propinfo_get_enum,
+    .set = qdev_propinfo_set_enum,
+    .set_default_value = qdev_propinfo_set_default_value_enum,
+};
+
+const PropertyInfo qdev_prop_vmapple_virtio_blk_variant = {
+    .type  = "VMAppleVirtioBlkVariant",
+    .description = "unspecified/root/aux",
+    .enum_table  = &VMAppleVirtioBlkVariant_lookup,
+    .get   = qdev_propinfo_get_enum,
+    .set   = qdev_propinfo_set_enum,
+    .set_default_value = qdev_propinfo_set_default_value_enum,
+};
diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index 0b52aad555160..c04df3b337ae1 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -122,13 +122,6 @@ void qdev_propinfo_set_default_value_enum(ObjectProperty *op,
         qapi_enum_lookup(prop->info->enum_table, prop->defval.i));
 }
 
-const PropertyInfo qdev_prop_enum = {
-    .name  = "enum",
-    .get   = qdev_propinfo_get_enum,
-    .set   = qdev_propinfo_set_enum,
-    .set_default_value = qdev_propinfo_set_default_value_enum,
-};
-
 /* Bit */
 
 static uint32_t qdev_get_prop_mask(const Property *prop)
@@ -176,7 +169,7 @@ static void set_default_value_bool(ObjectProperty *op, const Property *prop)
 }
 
 const PropertyInfo qdev_prop_bit = {
-    .name  = "bool",
+    .type  = "bool",
     .description = "on/off",
     .get   = prop_get_bit,
     .set   = prop_set_bit,
@@ -225,7 +218,7 @@ static void prop_set_bit64(Object *obj, Visitor *v, const char *name,
 }
 
 const PropertyInfo qdev_prop_bit64 = {
-    .name  = "bool",
+    .type  = "bool",
     .description = "on/off",
     .get   = prop_get_bit64,
     .set   = prop_set_bit64,
@@ -253,7 +246,8 @@ static void set_bool(Object *obj, Visitor *v, const char *name, void *opaque,
 }
 
 const PropertyInfo qdev_prop_bool = {
-    .name  = "bool",
+    .type  = "bool",
+    .description = "on/off",
     .get   = get_bool,
     .set   = set_bool,
     .set_default_value = set_default_value_bool,
@@ -292,7 +286,7 @@ void qdev_propinfo_set_default_value_uint(ObjectProperty *op,
 }
 
 const PropertyInfo qdev_prop_uint8 = {
-    .name  = "uint8",
+    .type  = "uint8",
     .get   = get_uint8,
     .set   = set_uint8,
     .set_default_value = qdev_propinfo_set_default_value_uint,
@@ -319,7 +313,7 @@ static void set_uint16(Object *obj, Visitor *v, const char *name,
 }
 
 const PropertyInfo qdev_prop_uint16 = {
-    .name  = "uint16",
+    .type  = "uint16",
     .get   = get_uint16,
     .set   = set_uint16,
     .set_default_value = qdev_propinfo_set_default_value_uint,
@@ -364,14 +358,14 @@ static void set_int32(Object *obj, Visitor *v, const char *name, void *opaque,
 }
 
 const PropertyInfo qdev_prop_uint32 = {
-    .name  = "uint32",
+    .type  = "uint32",
     .get   = get_uint32,
     .set   = set_uint32,
     .set_default_value = qdev_propinfo_set_default_value_uint,
 };
 
 const PropertyInfo qdev_prop_int32 = {
-    .name  = "int32",
+    .type  = "int32",
     .get   = qdev_propinfo_get_int32,
     .set   = set_int32,
     .set_default_value = qdev_propinfo_set_default_value_int,
@@ -416,14 +410,14 @@ static void set_int64(Object *obj, Visitor *v, const char *name,
 }
 
 const PropertyInfo qdev_prop_uint64 = {
-    .name  = "uint64",
+    .type  = "uint64",
     .get   = get_uint64,
     .set   = set_uint64,
     .set_default_value = qdev_propinfo_set_default_value_uint,
 };
 
 const PropertyInfo qdev_prop_int64 = {
-    .name  = "int64",
+    .type  = "int64",
     .get   = get_int64,
     .set   = set_int64,
     .set_default_value = qdev_propinfo_set_default_value_int,
@@ -443,7 +437,7 @@ static void set_uint64_checkmask(Object *obj, Visitor *v, const char *name,
 }
 
 const PropertyInfo qdev_prop_uint64_checkmask = {
-    .name  = "uint64",
+    .type  = "uint64",
     .get   = get_uint64,
     .set   = set_uint64_checkmask,
 };
@@ -485,7 +479,7 @@ static void set_string(Object *obj, Visitor *v, const char *name,
 }
 
 const PropertyInfo qdev_prop_string = {
-    .name  = "str",
+    .type  = "str",
     .release = release_string,
     .get   = get_string,
     .set   = set_string,
@@ -494,7 +488,7 @@ const PropertyInfo qdev_prop_string = {
 /* --- on/off/auto --- */
 
 const PropertyInfo qdev_prop_on_off_auto = {
-    .name = "OnOffAuto",
+    .type = "OnOffAuto",
     .description = "on/off/auto",
     .enum_table = &OnOffAuto_lookup,
     .get = qdev_propinfo_get_enum,
@@ -537,7 +531,7 @@ static void set_size32(Object *obj, Visitor *v, const char *name, void *opaque,
 }
 
 const PropertyInfo qdev_prop_size32 = {
-    .name  = "size",
+    .type  = "size",
     .get = qdev_propinfo_get_size32,
     .set = set_size32,
     .set_default_value = qdev_propinfo_set_default_value_uint,
@@ -740,7 +734,7 @@ static void default_prop_array(ObjectProperty *op, const Property *prop)
 }
 
 const PropertyInfo qdev_prop_array = {
-    .name = "list",
+    .type = "list",
     .get = get_prop_array,
     .set = set_prop_array,
     .release = release_prop_array,
@@ -944,7 +938,7 @@ static void set_size(Object *obj, Visitor *v, const char *name, void *opaque,
 }
 
 const PropertyInfo qdev_prop_size = {
-    .name  = "size",
+    .type  = "size",
     .get = get_size,
     .set = set_size,
     .set_default_value = qdev_propinfo_set_default_value_uint,
@@ -962,7 +956,7 @@ static ObjectProperty *create_link_property(ObjectClass *oc, const char *name,
 }
 
 const PropertyInfo qdev_prop_link = {
-    .name = "link",
+    .type = "link",
     .create = create_link_property,
 };
 
@@ -973,7 +967,7 @@ void qdev_property_add_static(DeviceState *dev, const Property *prop)
 
     assert(!prop->info->create);
 
-    op = object_property_add(obj, prop->name, prop->info->name,
+    op = object_property_add(obj, prop->name, prop->info->type,
                              field_prop_getter(prop->info),
                              field_prop_setter(prop->info),
                              prop->info->release,
@@ -1000,7 +994,7 @@ static void qdev_class_add_property(DeviceClass *klass, const char *name,
         op = prop->info->create(oc, name, prop);
     } else {
         op = object_class_property_add(oc,
-                                       name, prop->info->name,
+                                       name, prop->info->type,
                                        field_prop_getter(prop->info),
                                        field_prop_setter(prop->info),
                                        prop->info->release,
diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c
index 774c0aed41b5e..e85066b905637 100644
--- a/hw/core/sysbus-fdt.c
+++ b/hw/core/sysbus-fdt.c
@@ -36,6 +36,7 @@
 #include "hw/vfio/vfio-calxeda-xgmac.h"
 #include "hw/vfio/vfio-amd-xgbe.h"
 #include "hw/display/ramfb.h"
+#include "hw/uefi/var-service-api.h"
 #include "hw/arm/fdt.h"
 
 /*
@@ -471,6 +472,28 @@ static int add_tpm_tis_fdt_node(SysBusDevice *sbdev, void *opaque)
 }
 #endif
 
+static int add_uefi_vars_node(SysBusDevice *sbdev, void *opaque)
+{
+    PlatformBusFDTData *data = opaque;
+    PlatformBusDevice *pbus = data->pbus;
+    const char *parent_node = data->pbus_node_name;
+    void *fdt = data->fdt;
+    uint64_t mmio_base;
+    char *nodename;
+
+    mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
+    nodename = g_strdup_printf("%s/%s@%" PRIx64, parent_node,
+                               UEFI_VARS_FDT_NODE, mmio_base);
+    qemu_fdt_add_subnode(fdt, nodename);
+    qemu_fdt_setprop_string(fdt, nodename,
+                            "compatible", UEFI_VARS_FDT_COMPAT);
+    qemu_fdt_setprop_sized_cells(fdt, nodename, "reg",
+                                 1, mmio_base,
+                                 1, UEFI_VARS_REGS_SIZE);
+    g_free(nodename);
+    return 0;
+}
+
 static int no_fdt_node(SysBusDevice *sbdev, void *opaque)
 {
     return 0;
@@ -495,6 +518,7 @@ static const BindingEntry bindings[] = {
     TYPE_BINDING(TYPE_TPM_TIS_SYSBUS, add_tpm_tis_fdt_node),
 #endif
     TYPE_BINDING(TYPE_RAMFB_DEVICE, no_fdt_node),
+    TYPE_BINDING(TYPE_UEFI_VARS_SYSBUS, add_uefi_vars_node),
     TYPE_BINDING("", NULL), /* last element */
 };
 
diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c
index 9355849ff0a46..98819d5dc61ef 100644
--- a/hw/core/sysbus.c
+++ b/hw/core/sysbus.c
@@ -19,7 +19,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "qemu/module.h"
 #include "hw/sysbus.h"
 #include "monitor/monitor.h"
 #include "exec/address-spaces.h"
@@ -80,13 +79,6 @@ static void system_bus_class_init(ObjectClass *klass, void *data)
     k->get_fw_dev_path = sysbus_get_fw_dev_path;
 }
 
-static const TypeInfo system_bus_info = {
-    .name = TYPE_SYSTEM_BUS,
-    .parent = TYPE_BUS,
-    .instance_size = sizeof(BusState),
-    .class_init = system_bus_class_init,
-};
-
 /* Check whether an IRQ source exists */
 bool sysbus_has_irq(SysBusDevice *dev, int n)
 {
@@ -306,15 +298,6 @@ static void sysbus_device_class_init(ObjectClass *klass, void *data)
     k->user_creatable = false;
 }
 
-static const TypeInfo sysbus_device_type_info = {
-    .name = TYPE_SYS_BUS_DEVICE,
-    .parent = TYPE_DEVICE,
-    .instance_size = sizeof(SysBusDevice),
-    .abstract = true,
-    .class_size = sizeof(SysBusDeviceClass),
-    .class_init = sysbus_device_class_init,
-};
-
 static BusState *main_system_bus;
 
 static void main_system_bus_create(void)
@@ -323,8 +306,8 @@ static void main_system_bus_create(void)
      * assign main_system_bus before qbus_init()
      * in order to make "if (bus != sysbus_get_default())" work
      */
-    main_system_bus = g_malloc0(system_bus_info.instance_size);
-    qbus_init(main_system_bus, system_bus_info.instance_size,
+    main_system_bus = g_new0(BusState, 1);
+    qbus_init(main_system_bus, sizeof(BusState),
               TYPE_SYSTEM_BUS, NULL, "main-system-bus");
     OBJECT(main_system_bus)->free = g_free;
 }
@@ -337,10 +320,35 @@ BusState *sysbus_get_default(void)
     return main_system_bus;
 }
 
-static void sysbus_register_types(void)
+static void dynamic_sysbus_device_class_init(ObjectClass *klass, void *data)
 {
-    type_register_static(&system_bus_info);
-    type_register_static(&sysbus_device_type_info);
+    DeviceClass *k = DEVICE_CLASS(klass);
+
+    k->user_creatable = true;
+    k->hotpluggable = false;
 }
 
-type_init(sysbus_register_types)
+static const TypeInfo sysbus_types[] = {
+    {
+        .name           = TYPE_SYSTEM_BUS,
+        .parent         = TYPE_BUS,
+        .instance_size  = sizeof(BusState),
+        .class_init     = system_bus_class_init,
+    },
+    {
+        .name           = TYPE_SYS_BUS_DEVICE,
+        .parent         = TYPE_DEVICE,
+        .instance_size  = sizeof(SysBusDevice),
+        .abstract       = true,
+        .class_size     = sizeof(SysBusDeviceClass),
+        .class_init     = sysbus_device_class_init,
+    },
+    {
+        .name           = TYPE_DYNAMIC_SYS_BUS_DEVICE,
+        .parent         = TYPE_SYS_BUS_DEVICE,
+        .class_init     = dynamic_sysbus_device_class_init,
+        .abstract       = true,
+    }
+};
+
+DEFINE_TYPES(sysbus_types)
diff --git a/hw/cpu/a15mpcore.c b/hw/cpu/a15mpcore.c
index d24ab0a6ab251..676f65a0af4f4 100644
--- a/hw/cpu/a15mpcore.c
+++ b/hw/cpu/a15mpcore.c
@@ -58,6 +58,11 @@ static void a15mp_priv_realize(DeviceState *dev, Error **errp)
     bool has_el2 = false;
     Object *cpuobj;
 
+    if (s->num_irq < 32 || s->num_irq > 256) {
+        error_setg(errp, "Property 'num-irq' must be between 32 and 256");
+        return;
+    }
+
     gicdev = DEVICE(&s->gic);
     qdev_prop_set_uint32(gicdev, "num-cpu", s->num_cpu);
     qdev_prop_set_uint32(gicdev, "num-irq", s->num_irq);
@@ -146,13 +151,14 @@ static void a15mp_priv_realize(DeviceState *dev, Error **errp)
 
 static const Property a15mp_priv_properties[] = {
     DEFINE_PROP_UINT32("num-cpu", A15MPPrivState, num_cpu, 1),
-    /* The Cortex-A15MP may have anything from 0 to 224 external interrupt
-     * IRQ lines (with another 32 internal). We default to 128+32, which
-     * is the number provided by the Cortex-A15MP test chip in the
-     * Versatile Express A15 development board.
-     * Other boards may differ and should set this property appropriately.
+    /*
+     * The Cortex-A15MP may have anything from 0 to 224 external interrupt
+     * lines, plus always 32 internal IRQs. This property sets the total
+     * of internal + external, so the valid range is from 32 to 256.
+     * The board model must set this to whatever the configuration
+     * used for the CPU on that board or SoC is.
      */
-    DEFINE_PROP_UINT32("num-irq", A15MPPrivState, num_irq, 160),
+    DEFINE_PROP_UINT32("num-irq", A15MPPrivState, num_irq, 0),
 };
 
 static void a15mp_priv_class_init(ObjectClass *klass, void *data)
diff --git a/hw/cpu/a9mpcore.c b/hw/cpu/a9mpcore.c
index 25416c5032b44..1b9f2bef93c38 100644
--- a/hw/cpu/a9mpcore.c
+++ b/hw/cpu/a9mpcore.c
@@ -56,6 +56,11 @@ static void a9mp_priv_realize(DeviceState *dev, Error **errp)
     CPUState *cpu0;
     Object *cpuobj;
 
+    if (s->num_irq < 32 || s->num_irq > 256) {
+        error_setg(errp, "Property 'num-irq' must be between 32 and 256");
+        return;
+    }
+
     cpu0 = qemu_get_cpu(0);
     cpuobj = OBJECT(cpu0);
     if (strcmp(object_get_typename(cpuobj), ARM_CPU_TYPE_NAME("cortex-a9"))) {
@@ -160,13 +165,14 @@ static void a9mp_priv_realize(DeviceState *dev, Error **errp)
 
 static const Property a9mp_priv_properties[] = {
     DEFINE_PROP_UINT32("num-cpu", A9MPPrivState, num_cpu, 1),
-    /* The Cortex-A9MP may have anything from 0 to 224 external interrupt
-     * IRQ lines (with another 32 internal). We default to 64+32, which
-     * is the number provided by the Cortex-A9MP test chip in the
-     * Realview PBX-A9 and Versatile Express A9 development boards.
-     * Other boards may differ and should set this property appropriately.
+    /*
+     * The Cortex-A9MP may have anything from 0 to 224 external interrupt
+     * lines, plus always 32 internal IRQs. This property sets the total
+     * of internal + external, so the valid range is from 32 to 256.
+     * The board model must set this to whatever the configuration
+     * used for the CPU on that board or SoC is.
      */
-    DEFINE_PROP_UINT32("num-irq", A9MPPrivState, num_irq, 96),
+    DEFINE_PROP_UINT32("num-irq", A9MPPrivState, num_irq, 0),
 };
 
 static void a9mp_priv_class_init(ObjectClass *klass, void *data)
diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
index cd116c0401241..473895948b3d4 100644
--- a/hw/cxl/cxl-component-utils.c
+++ b/hw/cxl/cxl-component-utils.c
@@ -243,8 +243,13 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
     ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_4K, 1);
     ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY,
                      POISON_ON_ERR_CAP, 0);
-    ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, 3_6_12_WAY, 0);
-    ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, 16_WAY, 0);
+    if (type == CXL2_TYPE3_DEVICE) {
+        ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, 3_6_12_WAY, 1);
+        ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, 16_WAY, 1);
+    } else {
+        ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, 3_6_12_WAY, 0);
+        ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, 16_WAY, 0);
+    }
     ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, UIO, 0);
     ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY,
                      UIO_DECODER_COUNT, 0);
diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c
index 035d034f6dd8e..52ad1e4c3f7a2 100644
--- a/hw/cxl/cxl-device-utils.c
+++ b/hw/cxl/cxl-device-utils.c
@@ -352,10 +352,8 @@ static void device_reg_init_common(CXLDeviceState *cxl_dstate)
     }
 }
 
-static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
+static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate, int msi_n)
 {
-    const uint8_t msi_n = 9;
-
     /* 2048 payload size */
     ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
                      PAYLOAD_SIZE, CXL_MAILBOX_PAYLOAD_SHIFT);
@@ -382,7 +380,7 @@ static void memdev_reg_init_common(CXLDeviceState *cxl_dstate)
     cxl_dstate->memdev_status = memdev_status_reg;
 }
 
-void cxl_device_register_init_t3(CXLType3Dev *ct3d)
+void cxl_device_register_init_t3(CXLType3Dev *ct3d, int msi_n)
 {
     CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate;
     uint64_t *cap_h = cxl_dstate->caps_reg_state64;
@@ -398,7 +396,7 @@ void cxl_device_register_init_t3(CXLType3Dev *ct3d)
     device_reg_init_common(cxl_dstate);
 
     cxl_device_cap_init(cxl_dstate, MAILBOX, 2, CXL_DEV_MAILBOX_VERSION);
-    mailbox_reg_init_common(cxl_dstate);
+    mailbox_reg_init_common(cxl_dstate, msi_n);
 
     cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000,
         CXL_MEM_DEV_STATUS_VERSION);
@@ -408,7 +406,7 @@ void cxl_device_register_init_t3(CXLType3Dev *ct3d)
                               CXL_MAILBOX_MAX_PAYLOAD_SIZE);
 }
 
-void cxl_device_register_init_swcci(CSWMBCCIDev *sw)
+void cxl_device_register_init_swcci(CSWMBCCIDev *sw, int msi_n)
 {
     CXLDeviceState *cxl_dstate = &sw->cxl_dstate;
     uint64_t *cap_h = cxl_dstate->caps_reg_state64;
@@ -423,7 +421,7 @@ void cxl_device_register_init_swcci(CSWMBCCIDev *sw)
     device_reg_init_common(cxl_dstate);
 
     cxl_device_cap_init(cxl_dstate, MAILBOX, 2, 1);
-    mailbox_reg_init_common(cxl_dstate);
+    mailbox_reg_init_common(cxl_dstate, msi_n);
 
     cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000, 1);
     memdev_reg_init_common(cxl_dstate);
diff --git a/hw/cxl/switch-mailbox-cci.c b/hw/cxl/switch-mailbox-cci.c
index 65cdac6cc1392..833b8246195ca 100644
--- a/hw/cxl/switch-mailbox-cci.c
+++ b/hw/cxl/switch-mailbox-cci.c
@@ -17,10 +17,12 @@
 #include "hw/qdev-properties.h"
 #include "hw/cxl/cxl.h"
 
+#define CXL_SWCCI_MSIX_MBOX 3
+
 static void cswmbcci_reset(DeviceState *dev)
 {
     CSWMBCCIDev *cswmb = CXL_SWITCH_MAILBOX_CCI(dev);
-    cxl_device_register_init_swcci(cswmb);
+    cxl_device_register_init_swcci(cswmb, CXL_SWCCI_MSIX_MBOX);
 }
 
 static void cswbcci_realize(PCIDevice *pci_dev, Error **errp)
diff --git a/hw/display/Kconfig b/hw/display/Kconfig
index 2b53dfd7d26c2..1e95ab28ef405 100644
--- a/hw/display/Kconfig
+++ b/hw/display/Kconfig
@@ -141,10 +141,6 @@ config XLNX_DISPLAYPORT
 config DM163
     bool
 
-config MAC_PVG
-    bool
-    default y
-
 config MAC_PVG_MMIO
     bool
     depends on MAC_PVG && AARCH64
diff --git a/hw/display/apple-gfx.m b/hw/display/apple-gfx.m
index aa1455b629557..c4323574e1219 100644
--- a/hw/display/apple-gfx.m
+++ b/hw/display/apple-gfx.m
@@ -137,7 +137,8 @@ static void apple_gfx_destroy_task(AppleGFXState *s, PGTask_t *task)
                                          MEMTXATTRS_UNSPECIFIED);
 
     if (!ram_region || ram_region_length < length ||
-        !memory_access_is_direct(ram_region, !read_only)) {
+        !memory_access_is_direct(ram_region, !read_only,
+				 MEMTXATTRS_UNSPECIFIED)) {
         return NULL;
     }
 
@@ -870,7 +871,7 @@ static void apple_gfx_set_display_mode(Object *obj, Visitor *v,
 }
 
 const PropertyInfo qdev_prop_apple_gfx_display_mode = {
-    .name  = "display_mode",
+    .type  = "display_mode",
     .description =
         "Display mode in pixels and Hertz, as <width>x<height>@<refresh-rate> "
         "Example: 3840x2160@60",
diff --git a/hw/display/meson.build b/hw/display/meson.build
index 94f4f05d36f4b..90e6c041bdbc1 100644
--- a/hw/display/meson.build
+++ b/hw/display/meson.build
@@ -61,13 +61,8 @@ system_ss.add(when: 'CONFIG_ARTIST', if_true: files('artist.c'))
 
 system_ss.add(when: 'CONFIG_ATI_VGA', if_true: [files('ati.c', 'ati_2d.c', 'ati_dbg.c'), pixman])
 
-if host_os == 'darwin'
-  system_ss.add(when: 'CONFIG_MAC_PVG',         if_true: [files('apple-gfx.m'), pvg, metal])
-  system_ss.add(when: 'CONFIG_MAC_PVG_PCI',     if_true: [files('apple-gfx-pci.m'), pvg, metal])
-  if cpu == 'aarch64'
-    system_ss.add(when: 'CONFIG_MAC_PVG_MMIO',  if_true: [files('apple-gfx-mmio.m'), pvg, metal])
-  endif
-endif
+system_ss.add(when: [pvg, 'CONFIG_MAC_PVG_PCI'],     if_true: [files('apple-gfx.m', 'apple-gfx-pci.m')])
+system_ss.add(when: [pvg, 'CONFIG_MAC_PVG_MMIO'],    if_true: [files('apple-gfx.m', 'apple-gfx-mmio.m')])
 
 if config_all_devices.has_key('CONFIG_VIRTIO_GPU')
   virtio_gpu_ss = ss.source_set()
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 2efdc77e61330..da14da520910b 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -30,6 +30,7 @@
 #include "qemu/module.h"
 #include "hw/qdev-properties.h"
 #include "system/runstate.h"
+#include "migration/cpr.h"
 #include "migration/vmstate.h"
 #include "trace.h"
 
@@ -333,6 +334,10 @@ static void init_qxl_rom(PCIQXLDevice *d)
     uint32_t fb;
     int i, n;
 
+    if (cpr_is_incoming()) {
+        goto skip_init;
+    }
+
     memset(rom, 0, d->rom_size);
 
     rom->magic         = cpu_to_le32(QXL_ROM_MAGIC);
@@ -390,6 +395,7 @@ static void init_qxl_rom(PCIQXLDevice *d)
             sizeof(rom->client_monitors_config));
     }
 
+skip_init:
     d->shadow_rom = *rom;
     d->rom        = rom;
     d->modes      = modes;
@@ -403,6 +409,9 @@ static void init_qxl_ram(PCIQXLDevice *d)
 
     buf = d->vga.vram_ptr;
     d->ram = (QXLRam *)(buf + le32_to_cpu(d->shadow_rom.ram_header_offset));
+    if (cpr_is_incoming()) {
+        return;
+    }
     d->ram->magic       = cpu_to_le32(QXL_RAM_MAGIC);
     d->ram->int_pending = cpu_to_le32(0);
     d->ram->int_mask    = cpu_to_le32(0);
@@ -539,6 +548,10 @@ static void interface_set_compression_level(QXLInstance *sin, int level)
 
     trace_qxl_interface_set_compression_level(qxl->id, level);
     qxl->shadow_rom.compression_level = cpu_to_le32(level);
+    if (cpr_is_incoming()) {
+        assert(qxl->rom->compression_level == cpu_to_le32(level));
+        return;
+    }
     qxl->rom->compression_level = cpu_to_le32(level);
     qxl_rom_set_dirty(qxl);
 }
@@ -997,7 +1010,8 @@ static void interface_set_client_capabilities(QXLInstance *sin,
     }
 
     if (runstate_check(RUN_STATE_INMIGRATE) ||
-        runstate_check(RUN_STATE_POSTMIGRATE)) {
+        runstate_check(RUN_STATE_POSTMIGRATE) ||
+        cpr_is_incoming()) {
         return;
     }
 
@@ -1200,6 +1214,10 @@ static void qxl_reset_state(PCIQXLDevice *d)
 {
     QXLRom *rom = d->rom;
 
+    if (cpr_is_incoming()) {
+        return;
+    }
+
     qxl_check_state(d);
     d->shadow_rom.update_id = cpu_to_le32(0);
     *rom = d->shadow_rom;
@@ -1370,8 +1388,11 @@ static int qxl_add_memslot(PCIQXLDevice *d, uint32_t slot_id, uint64_t delta,
     memslot.virt_start = virt_start + (guest_start - pci_start);
     memslot.virt_end   = virt_start + (guest_end   - pci_start);
     memslot.addr_delta = memslot.virt_start - delta;
-    memslot.generation = d->rom->slot_generation = 0;
-    qxl_rom_set_dirty(d);
+    if (!cpr_is_incoming()) {
+        d->rom->slot_generation = 0;
+        qxl_rom_set_dirty(d);
+    }
+    memslot.generation = d->rom->slot_generation;
 
     qemu_spice_add_memslot(&d->ssd, &memslot, async);
     d->guest_slots[slot_id].mr = mr;
diff --git a/hw/display/ramfb-standalone.c b/hw/display/ramfb-standalone.c
index 6c35028965d6f..1be106b57f203 100644
--- a/hw/display/ramfb-standalone.c
+++ b/hw/display/ramfb-standalone.c
@@ -72,13 +72,12 @@ static void ramfb_class_initfn(ObjectClass *klass, void *data)
     dc->vmsd = &ramfb_dev_vmstate;
     dc->realize = ramfb_realizefn;
     dc->desc = "ram framebuffer standalone device";
-    dc->user_creatable = true;
     device_class_set_props(dc, ramfb_properties);
 }
 
 static const TypeInfo ramfb_info = {
     .name          = TYPE_RAMFB_DEVICE,
-    .parent        = TYPE_SYS_BUS_DEVICE,
+    .parent        = TYPE_DYNAMIC_SYS_BUS_DEVICE,
     .instance_size = sizeof(RAMFBStandaloneState),
     .class_init    = ramfb_class_initfn,
 };
diff --git a/hw/gpio/Kconfig b/hw/gpio/Kconfig
index c423e10f59f54..a209294c20c4a 100644
--- a/hw/gpio/Kconfig
+++ b/hw/gpio/Kconfig
@@ -16,6 +16,14 @@ config SIFIVE_GPIO
 config STM32L4X5_GPIO
     bool
 
+config PCA9552
+    bool
+    depends on I2C
+
+config PCA9554
+    bool
+    depends on I2C
+
 config PCF8574
     bool
     depends on I2C
diff --git a/hw/gpio/npcm7xx_gpio.c b/hw/gpio/npcm7xx_gpio.c
index 23e67424c9f49..2916056fae6d8 100644
--- a/hw/gpio/npcm7xx_gpio.c
+++ b/hw/gpio/npcm7xx_gpio.c
@@ -220,8 +220,6 @@ static void npcm7xx_gpio_regs_write(void *opaque, hwaddr addr, uint64_t v,
         return;
     }
 
-    diff = s->regs[reg] ^ value;
-
     switch (reg) {
     case NPCM7XX_GPIO_TLOCK1:
     case NPCM7XX_GPIO_TLOCK2:
@@ -242,6 +240,7 @@ static void npcm7xx_gpio_regs_write(void *opaque, hwaddr addr, uint64_t v,
     case NPCM7XX_GPIO_PU:
     case NPCM7XX_GPIO_PD:
     case NPCM7XX_GPIO_IEM:
+        diff = s->regs[reg] ^ value;
         s->regs[reg] = value;
         npcm7xx_gpio_update_pins(s, diff);
         break;
diff --git a/hw/hexagon/Kconfig b/hw/hexagon/Kconfig
new file mode 100644
index 0000000000000..9a2369974e097
--- /dev/null
+++ b/hw/hexagon/Kconfig
@@ -0,0 +1,15 @@
+config HEX_DSP
+    bool
+    default y
+    depends on HEXAGON && TCG
+    imply PTIMER
+    select L2VIC  # Vector PIC
+    select ARM_COMPATIBLE_SEMIHOSTING
+
+config HEX_VIRT
+    bool
+    default y
+    depends on HEX_DSP && FDT
+    select DEVICE_TREE
+    select VIRTIO_MMIO
+    select PL011
diff --git a/hw/hexagon/hexagon_dsp.c b/hw/hexagon/hexagon_dsp.c
new file mode 100644
index 0000000000000..f4440de80ce03
--- /dev/null
+++ b/hw/hexagon/hexagon_dsp.c
@@ -0,0 +1,206 @@
+/*
+ * Hexagon DSP Subsystem emulation.  This represents a generic DSP
+ * subsystem with few peripherals, like the Compute DSP.
+ *
+ * Copyright (c) 2020-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "exec/address-spaces.h"
+#include "hw/hw.h"
+#include "hw/boards.h"
+#include "hw/qdev-properties.h"
+#include "hw/hexagon/hexagon.h"
+#include "hw/loader.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "elf.h"
+#include "cpu.h"
+#include "include/migration/cpu.h"
+#include "include/system/system.h"
+#include "target/hexagon/internal.h"
+#include "system/reset.h"
+#include "include/semihosting/semihost.h"
+
+#include "machine_cfg_v66g_1024.h.inc"
+
+static hwaddr isdb_secure_flag;
+static hwaddr isdb_trusted_flag;
+static void hex_symbol_callback(const char *st_name, int st_info,
+                                uint64_t st_value, uint64_t st_size)
+{
+    if (!g_strcmp0("isdb_secure_flag", st_name)) {
+        isdb_secure_flag = st_value;
+    }
+    if (!g_strcmp0("isdb_trusted_flag", st_name)) {
+        isdb_trusted_flag = st_value;
+    }
+}
+
+/* Board init.  */
+static struct hexagon_board_boot_info hexagon_binfo;
+
+static void hexagon_load_kernel(HexagonCPU *cpu)
+{
+    uint64_t pentry;
+    long kernel_size;
+
+    kernel_size = load_elf_ram_sym(hexagon_binfo.kernel_filename, NULL, NULL,
+                      NULL, &pentry, NULL, NULL,
+                      &hexagon_binfo.kernel_elf_flags, 0, EM_HEXAGON, 0, 0,
+                      &address_space_memory, false, hex_symbol_callback);
+
+    if (kernel_size <= 0) {
+        error_report("no kernel file '%s'",
+            hexagon_binfo.kernel_filename);
+        exit(1);
+    }
+
+    qdev_prop_set_uint32(DEVICE(cpu), "exec-start-addr", pentry);
+}
+
+static void hexagon_init_bootstrap(MachineState *machine, HexagonCPU *cpu)
+{
+    if (machine->kernel_filename) {
+        hexagon_load_kernel(cpu);
+        uint32_t mem = 1;
+        if (isdb_secure_flag) {
+            cpu_physical_memory_write(isdb_secure_flag, &mem, sizeof(mem));
+        }
+        if (isdb_trusted_flag) {
+            cpu_physical_memory_write(isdb_trusted_flag, &mem, sizeof(mem));
+        }
+    }
+}
+
+static void do_cpu_reset(void *opaque)
+{
+    HexagonCPU *cpu = opaque;
+    CPUState *cs = CPU(cpu);
+    cpu_reset(cs);
+}
+
+static void hexagon_common_init(MachineState *machine, Rev_t rev,
+                                hexagon_machine_config *m_cfg)
+{
+    memset(&hexagon_binfo, 0, sizeof(hexagon_binfo));
+    if (machine->kernel_filename) {
+        hexagon_binfo.ram_size = machine->ram_size;
+        hexagon_binfo.kernel_filename = machine->kernel_filename;
+    }
+
+    machine->enable_graphics = 0;
+
+    MemoryRegion *address_space = get_system_memory();
+
+    MemoryRegion *config_table_rom = g_new(MemoryRegion, 1);
+    memory_region_init_rom(config_table_rom, NULL, "config_table.rom",
+                           sizeof(m_cfg->cfgtable), &error_fatal);
+    memory_region_add_subregion(address_space, m_cfg->cfgbase,
+                                config_table_rom);
+
+    MemoryRegion *sram = g_new(MemoryRegion, 1);
+    memory_region_init_ram(sram, NULL, "ddr.ram",
+        machine->ram_size, &error_fatal);
+    memory_region_add_subregion(address_space, 0x0, sram);
+
+    Error **errp = NULL;
+
+    for (int i = 0; i < machine->smp.cpus; i++) {
+        HexagonCPU *cpu = HEXAGON_CPU(object_new(machine->cpu_type));
+        CPUHexagonState *env = &cpu->env;
+        qemu_register_reset(do_cpu_reset, cpu);
+
+        /*
+         * CPU #0 is the only CPU running at boot, others must be
+         * explicitly enabled via start instruction.
+         */
+        qdev_prop_set_bit(DEVICE(cpu), "start-powered-off", (i != 0));
+        qdev_prop_set_uint32(DEVICE(cpu), "l2vic-base-addr", m_cfg->l2vic_base);
+        qdev_prop_set_uint32(DEVICE(cpu), "config-table-addr", m_cfg->cfgbase);
+        qdev_prop_set_uint32(DEVICE(cpu), "qtimer-base-addr", m_cfg->qtmr_region);
+        qdev_prop_set_uint32(DEVICE(cpu), "hvx-contexts",
+                             m_cfg->cfgtable.ext_contexts);
+        qdev_prop_set_uint32(DEVICE(cpu), "jtlb-entries",
+                             m_cfg->cfgtable.jtlb_size_entries);
+
+
+        if (i == 0) {
+            hexagon_init_bootstrap(machine, cpu);
+            if (!qdev_realize_and_unref(DEVICE(cpu), NULL, errp)) {
+                return;
+            }
+            DeviceState *l2vic_dev;
+            l2vic_dev = sysbus_create_varargs("l2vic", m_cfg->l2vic_base,
+                    /* IRQ#, Evnt#,CauseCode */
+                    qdev_get_gpio_in(DEVICE(cpu), 0),
+                    qdev_get_gpio_in(DEVICE(cpu), 1),
+                    qdev_get_gpio_in(DEVICE(cpu), 2),
+                    qdev_get_gpio_in(DEVICE(cpu), 3),
+                    qdev_get_gpio_in(DEVICE(cpu), 4),
+                    qdev_get_gpio_in(DEVICE(cpu), 5),
+                    qdev_get_gpio_in(DEVICE(cpu), 6),
+                    qdev_get_gpio_in(DEVICE(cpu), 7),
+                    NULL);
+            sysbus_mmio_map(SYS_BUS_DEVICE(l2vic_dev), 1,
+                m_cfg->cfgtable.fastl2vic_base << 16);
+        } else if (!qdev_realize_and_unref(DEVICE(cpu), NULL, errp)) {
+            env->dir_list = NULL;
+            return;
+        }
+
+    }
+
+    rom_add_blob_fixed_as("config_table.rom", &m_cfg->cfgtable,
+                          sizeof(m_cfg->cfgtable), m_cfg->cfgbase,
+                          &address_space_memory);
+}
+
+static void init_mc(MachineClass *mc)
+{
+    mc->block_default_type = IF_SD;
+    mc->default_ram_size = 4 * GiB;
+    mc->no_parallel = 1;
+    mc->no_floppy = 1;
+    mc->no_cdrom = 1;
+    mc->no_serial = 1;
+    mc->is_default = false;
+    mc->max_cpus = 8;
+    qemu_semihosting_enable();
+}
+
+/* ----------------------------------------------------------------- */
+/* Core-specific configuration settings are defined below this line. */
+/* Config table values defined in machine_configs.h.inc              */
+/* ----------------------------------------------------------------- */
+
+static void v66g_1024_config_init(MachineState *machine)
+{
+    hexagon_common_init(machine, v66_rev, &v66g_1024);
+}
+
+static void v66g_1024_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->desc = "Hexagon V66G_1024";
+    mc->init = v66g_1024_config_init;
+    init_mc(mc);
+    mc->is_default = true;
+    mc->default_cpu_type = TYPE_HEXAGON_CPU_V66;
+    mc->default_cpus = 4;
+}
+
+static const TypeInfo hexagon_machine_types[] = {
+    {
+        .name = MACHINE_TYPE_NAME("V66G_1024"),
+        .parent = TYPE_MACHINE,
+        .class_init = v66g_1024_init,
+    },
+};
+
+DEFINE_TYPES(hexagon_machine_types)
diff --git a/hw/hexagon/machine_cfg_sa8775_cdsp0.h.inc b/hw/hexagon/machine_cfg_sa8775_cdsp0.h.inc
new file mode 100644
index 0000000000000..70b1eabfe9617
--- /dev/null
+++ b/hw/hexagon/machine_cfg_sa8775_cdsp0.h.inc
@@ -0,0 +1,63 @@
+
+static hexagon_machine_config SA8775P_cdsp0 = {
+    .cfgbase =         0x24000000 + 0x180000,
+    .l2tcm_size =      0x00000000,
+    .l2vic_base =      0x26300000 + 0x90000,
+    .l2vic_size =      0x00001000,
+    .csr_base =        0x26300000,
+    .qtmr_region =     0x26300000 + 0xA1000,
+    .cfgtable = {
+        .l2tcm_base = 0x00002400,
+        .reserved0 = 0x00000000,
+        .subsystem_base = 0x00002638,
+        .etm_base = 0x00002419,
+        .l2cfg_base = 0x0000241a,
+        .reserved1 = 0x0000241b,
+        .l1s0_base = 0x00002500,
+        .axi2_lowaddr = 0x00000000,
+        .streamer_base = 0x00000000,
+        .reserved2 = 0x00000000,
+        .fastl2vic_base = 0x0000241e,
+        .jtlb_size_entries = 0x00000080,
+        .coproc_present = 0x00000001,
+        .ext_contexts = 0x00000004,
+        .vtcm_base = 0x00002500,
+        .vtcm_size_kb = 0x00002000,
+        .l2tag_size = 0x00000400,
+        .l2ecomem_size = 0x00000000,
+        .thread_enable_mask = 0x0000003f,
+        .eccreg_base = 0x0000241f,
+        .l2line_size = 0x00000080,
+        .tiny_core = 0x00000000,
+        .l2itcm_size = 0x00000000,
+        .l2itcm_base = 0x00002400,
+        .reserved3 = 0x00000000,
+        .dtm_present = 0x00000000,
+        .dma_version = 0x00000003,
+        .hvx_vec_log_length = 0x00000007,
+        .core_id = 0x00000000,
+        .core_count = 0x00000000,
+        .coproc2_reg0 = 0x00000040,
+        .coproc2_reg1 = 0x00000020,
+        .v2x_mode = 0x00000001,
+        .coproc2_reg2 = 0x00000008,
+        .coproc2_reg3 = 0x00000020,
+        .coproc2_reg4 = 0x00000000,
+        .coproc2_reg5 = 0x00000002,
+        .coproc2_reg6 = 0x00000016,
+        .coproc2_reg7 = 0x00000006,
+        .acd_preset = 0x00000001,
+        .mnd_preset = 0x00000000,
+        .l1d_size_kb = 0x00000010,
+        .l1i_size_kb = 0x00000020,
+        .l1d_write_policy = 0x00000002,
+        .vtcm_bank_width = 0x00000080,
+        .reserved3 = 0x00000001,
+        .reserved4 = 0x00000000,
+        .reserved5 = 0x00000003,
+        .coproc2_cvt_mpy_size = 0x0000000a,
+        .consistency_domain = 0x000000e0,
+        .capacity_domain = 0x00000080,
+        .axi3_lowaddr = 0x00000000,
+    },
+};
diff --git a/hw/hexagon/machine_cfg_v66g_1024.h.inc b/hw/hexagon/machine_cfg_v66g_1024.h.inc
new file mode 100644
index 0000000000000..8f2a593bb8602
--- /dev/null
+++ b/hw/hexagon/machine_cfg_v66g_1024.h.inc
@@ -0,0 +1,63 @@
+
+static hexagon_machine_config v66g_1024 = {
+    .cfgbase =        0xd8180000,
+    .l2tcm_size =     0x00000000,
+    .l2vic_base =     0xfc910000,
+    .l2vic_size =     0x00001000,
+    .csr_base =       0xfc900000,
+    .qtmr_region =    0xfc921000,
+    .cfgtable = {
+        .l2tcm_base = 0x0000d800,
+        .reserved0 = 0x0000d400,
+        .subsystem_base = 0x0000fc90,
+        .etm_base = 0x0000d805,
+        .l2cfg_base = 0x0000d81a,
+        .reserved1 = 0x00000000,
+        .l1s0_base = 0x0000d820,
+        .axi2_lowaddr = 0x00003000,
+        .streamer_base = 0x00000000,
+        .reserved2 = 0x0000d819,
+        .fastl2vic_base = 0x0000d81e,
+        .jtlb_size_entries = 0x00000080,
+        .coproc_present = 0x00000001,
+        .ext_contexts = 0x00000004,
+        .vtcm_base = 0x0000d820,
+        .vtcm_size_kb = 0x00000100,
+        .l2tag_size = 0x00000400,
+        .l2ecomem_size = 0x00000400,
+        .thread_enable_mask = 0x0000000f,
+        .eccreg_base = 0x0000d81f,
+        .l2line_size = 0x00000080,
+        .tiny_core = 0x00000000,
+        .l2itcm_size = 0x00000000,
+        .l2itcm_base = 0x0000d820,
+        .reserved3 = 0x00000000,
+        .dtm_present = 0x00000000,
+        .dma_version = 0x00000000,
+        .hvx_vec_log_length = 0x00000080,
+        .core_id = 0x00000000,
+        .core_count = 0x00000000,
+        .coproc2_reg0 = 0x00000000,
+        .coproc2_reg1 = 0x00000000,
+        .v2x_mode = 0x00000000,
+        .coproc2_reg2 = 0x00000000,
+        .coproc2_reg3 = 0x00000000,
+        .coproc2_reg4 = 0x00000000,
+        .coproc2_reg5 = 0x00000000,
+        .coproc2_reg6 = 0x00000000,
+        .coproc2_reg7 = 0x00000000,
+        .acd_preset = 0x00000000,
+        .mnd_preset = 0x00000000,
+        .l1d_size_kb = 0x00000000,
+        .l1i_size_kb = 0x00000000,
+        .l1d_write_policy = 0x00000000,
+        .vtcm_bank_width = 0x00000000,
+        .reserved3 = 0x00000000,
+        .reserved4 = 0x00000000,
+        .reserved5 = 0x00000000,
+        .coproc2_cvt_mpy_size = 0x00000000,
+        .consistency_domain = 0x00000000,
+        .capacity_domain = 0x00000000,
+        .axi3_lowaddr = 0x00000000,
+    },
+};
diff --git a/hw/hexagon/machine_cfg_v68n_1024.h.inc b/hw/hexagon/machine_cfg_v68n_1024.h.inc
new file mode 100644
index 0000000000000..257c133df8f34
--- /dev/null
+++ b/hw/hexagon/machine_cfg_v68n_1024.h.inc
@@ -0,0 +1,64 @@
+
+static hexagon_machine_config v68n_1024 = {
+    .cfgbase =           0xde000000,
+    .l2tcm_size =        0x00000000,
+    .l2vic_base =        0xfc910000,
+    .l2vic_size =        0x00001000,
+    .csr_base =          0xfc900000,
+    .qtmr_region =       0xfc921000,
+    .cfgtable = {
+        .l2tcm_base = 0x0000d800,
+        .reserved0 = 0x00000000,
+        .subsystem_base = 0x0000fc90,
+        .etm_base = 0x0000d819,
+        .l2cfg_base = 0x0000d81a,
+        .reserved1 = 0x00000000,
+        .l1s0_base = 0x0000d840,
+        .axi2_lowaddr = 0x00003000,
+        .streamer_base = 0x0000d81c,
+        .reserved2 = 0x0000d81d,
+        .fastl2vic_base = 0x0000d81e,
+        .jtlb_size_entries = 0x00000080,
+        .coproc_present = 0x00000001,
+        .ext_contexts = 0x00000004,
+        .vtcm_base = 0x0000d840,
+        .vtcm_size_kb = 0x00001000,
+        .l2tag_size = 0x00000400,
+        .l2ecomem_size = 0x00000400,
+        .thread_enable_mask = 0x0000003f,
+        .eccreg_base = 0x0000d81f,
+        .l2line_size = 0x00000080,
+        .tiny_core = 0x00000000,
+        .l2itcm_size = 0x00000000,
+        .l2itcm_base = 0x0000d820,
+        .reserved3 = 0x00000000,
+        .dtm_present = 0x00000000,
+        .dma_version = 0x00000001,
+        .hvx_vec_log_length = 0x00000007,
+        .core_id = 0x00000000,
+        .core_count = 0x00000000,
+        .coproc2_reg0 = 0x00000040,
+        .coproc2_reg1 = 0x00000020,
+        .v2x_mode = 0x1f1f1f1f,
+        .coproc2_reg2 = 0x1f1f1f1f,
+        .coproc2_reg3 = 0x1f1f1f1f,
+        .coproc2_reg4 = 0x1f1f1f1f,
+        .coproc2_reg5 = 0x1f1f1f1f,
+        .coproc2_reg6 = 0x1f1f1f1f,
+        .coproc2_reg7 = 0x1f1f1f1f,
+        .acd_preset = 0x1f1f1f1f,
+        .mnd_preset = 0x1f1f1f1f,
+        .l1d_size_kb = 0x1f1f1f1f,
+        .l1i_size_kb = 0x1f1f1f1f,
+        .l1d_write_policy = 0x1f1f1f1f,
+        .vtcm_bank_width = 0x1f1f1f1f,
+        .reserved3 = 0x1f1f1f1f,
+        .reserved4 = 0x1f1f1f1f,
+        .reserved5 = 0x1f1f1f1f,
+        .coproc2_cvt_mpy_size = 0x1f1f1f1f,
+        .consistency_domain = 0x1f1f1f1f,
+        .capacity_domain = 0x1f1f1f1f,
+        .axi3_lowaddr = 0x1f1f1f1f,
+    },
+};
+
diff --git a/hw/hexagon/meson.build b/hw/hexagon/meson.build
new file mode 100644
index 0000000000000..649ad6dc02b3f
--- /dev/null
+++ b/hw/hexagon/meson.build
@@ -0,0 +1,7 @@
+hexagon_ss = ss.source_set()
+hexagon_ss.add(when: 'CONFIG_HEX_DSP', if_true: files('hexagon_dsp.c',))
+
+hw_arch += {'hexagon': hexagon_ss}
+
+hexagon_ss.add(when: 'CONFIG_HEX_VIRT', if_true: files('virt.c',))
+
diff --git a/hw/hexagon/virt.c b/hw/hexagon/virt.c
new file mode 100644
index 0000000000000..1e7ac4e5b70b6
--- /dev/null
+++ b/hw/hexagon/virt.c
@@ -0,0 +1,415 @@
+/*
+ * Hexagon virt emulation
+ *
+ * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "exec/address-spaces.h"
+#include "hw/char/pl011.h"
+#include "hw/core/sysbus-fdt.h"
+#include "hw/hexagon/hexagon.h"
+#include "hw/hexagon/virt.h"
+#include "hw/loader.h"
+#include "hw/qdev-properties.h"
+#include "hw/register.h"
+#include "hw/timer/qct-qtimer.h"
+#include "qemu/error-report.h"
+#include "qemu/guest-random.h"
+#include "qemu/units.h"
+#include "elf.h"
+#include "machine_cfg_v68n_1024.h.inc"
+#include "system/device_tree.h"
+#include "system/reset.h"
+#include "system/system.h"
+#include <libfdt.h>
+
+static const int VIRTIO_DEV_COUNT = 2;
+
+static const MemMapEntry base_memmap[] = {
+    [VIRT_UART0] = { 0x10000000, 0x00000200 },
+    [VIRT_MMIO] = { 0x11000000, 0x1000000, },
+    [VIRT_GPT] = { 0xab000000, 0x00001000 },
+    [VIRT_FDT] = { 0x99900000, 0x00000200 },
+};
+
+static const int irqmap[] = {
+    [VIRT_MMIO] = 18, /* ...to 18 + VIRTIO_DEV_COUNT - 1 */
+    [VIRT_GPT] = 12,
+    [VIRT_UART0] = 15,
+    [VIRT_QTMR0] = 2,
+    [VIRT_QTMR1] = 4,
+};
+
+
+static void create_fdt(HexagonVirtMachineState *vms)
+{
+    MachineState *ms = MACHINE(vms);
+    void *fdt = create_device_tree(&vms->fdt_size);
+
+    if (!fdt) {
+        error_report("create_device_tree() failed");
+        exit(1);
+    }
+
+    ms->fdt = fdt;
+
+    qemu_fdt_setprop_string(fdt, "/", "compatible", "linux,hexagon-virt");
+    qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 0x2);
+    qemu_fdt_setprop_cell(fdt, "/", "#size-cells", 0x1);
+    qemu_fdt_setprop_string(fdt, "/", "model", "linux,hexagon-virt");
+
+    qemu_fdt_setprop_string(fdt, "/", "model", "hexagon-virt,qemu");
+    qemu_fdt_setprop_string(fdt, "/", "compatible", "qcom,sm8150");
+
+    qemu_fdt_add_subnode(fdt, "/soc");
+    qemu_fdt_setprop_cell(fdt, "/soc", "#address-cells", 0x2);
+    qemu_fdt_setprop_cell(fdt, "/soc", "#size-cells", 0x1);
+    qemu_fdt_setprop(fdt, "/soc", "ranges", NULL, 0);
+
+    qemu_fdt_add_subnode(fdt, "/chosen");
+
+    uint8_t rng_seed[32];
+    qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed));
+    qemu_fdt_setprop(fdt, "/chosen", "rng-seed", rng_seed, sizeof(rng_seed));
+}
+
+static void fdt_add_hvx(HexagonVirtMachineState *vms,
+                        const hexagon_machine_config *m_cfg, Error **errp)
+{
+    const MachineState *ms = MACHINE(vms);
+    uint32_t vtcm_size_bytes = m_cfg->cfgtable.vtcm_size_kb * 1024;
+    if (vtcm_size_bytes > 0) {
+        memory_region_init_ram(&vms->vtcm, NULL, "vtcm.ram", vtcm_size_bytes,
+                               errp);
+        memory_region_add_subregion(vms->sys, m_cfg->cfgtable.vtcm_base << 16,
+                                    &vms->vtcm);
+
+        qemu_fdt_add_subnode(ms->fdt, "/soc/vtcm");
+        qemu_fdt_setprop_string(ms->fdt, "/soc/vtcm", "compatible",
+                                "qcom,hexagon_vtcm");
+
+        assert(sizeof(m_cfg->cfgtable.vtcm_base) == sizeof(uint32_t));
+        qemu_fdt_setprop_cells(ms->fdt, "/soc/vtcm", "reg", 0,
+                               m_cfg->cfgtable.vtcm_base << 16,
+                               vtcm_size_bytes);
+    }
+
+    if (m_cfg->cfgtable.ext_contexts > 0) {
+        qemu_fdt_add_subnode(ms->fdt, "/soc/hvx");
+        qemu_fdt_setprop_string(ms->fdt, "/soc/hvx", "compatible",
+                                "qcom,hexagon-hvx");
+        qemu_fdt_setprop_cells(ms->fdt, "/soc/hvx", "qcom,hvx-max-ctxts",
+                               m_cfg->cfgtable.ext_contexts);
+        qemu_fdt_setprop_cells(ms->fdt, "/soc/hvx", "qcom,hvx-vlength",
+                               m_cfg->cfgtable.hvx_vec_log_length);
+    }
+}
+
+static int32_t irq_hvm_ic_phandle = -1;
+static void fdt_add_hvm_pic_node(HexagonVirtMachineState *vms,
+                                 const hexagon_machine_config *m_cfg)
+{
+    MachineState *ms = MACHINE(vms);
+    irq_hvm_ic_phandle = qemu_fdt_alloc_phandle(ms->fdt);
+
+    qemu_fdt_setprop_cell(ms->fdt, "/soc", "interrupt-parent",
+                          irq_hvm_ic_phandle);
+
+    qemu_fdt_add_subnode(ms->fdt, "/soc/interrupt-controller");
+    qemu_fdt_setprop_cell(ms->fdt, "/soc/interrupt-controller",
+                          "#address-cells", 2);
+    qemu_fdt_setprop_cell(ms->fdt, "/soc/interrupt-controller",
+                          "#interrupt-cells", 2);
+    qemu_fdt_setprop_string(ms->fdt, "/soc/interrupt-controller", "compatible",
+                            "qcom,h2-pic,hvm-pic");
+    qemu_fdt_setprop(ms->fdt, "/soc/interrupt-controller",
+                     "interrupt-controller", NULL, 0);
+    qemu_fdt_setprop_cell(ms->fdt, "/soc/interrupt-controller", "phandle",
+                          irq_hvm_ic_phandle);
+
+    sysbus_mmio_map(SYS_BUS_DEVICE(vms->l2vic), 1,
+                    m_cfg->cfgtable.fastl2vic_base << 16);
+}
+
+
+static void fdt_add_gpt_node(HexagonVirtMachineState *vms)
+{
+    g_autofree char *name = NULL;
+    MachineState *ms = MACHINE(vms);
+
+    name = g_strdup_printf("/soc/gpt@%" PRIx64,
+                           (int64_t)base_memmap[VIRT_GPT].base);
+    qemu_fdt_add_subnode(ms->fdt, name);
+    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
+                            "qcom,h2-timer,hvm-timer");
+    qemu_fdt_setprop_cells(ms->fdt, name, "interrupts", irqmap[VIRT_GPT], 0);
+    qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0x0,
+                           base_memmap[VIRT_GPT].base,
+                           base_memmap[VIRT_GPT].size);
+}
+
+static int32_t clock_phandle = -1;
+static void fdt_add_clocks(const HexagonVirtMachineState *vms)
+{
+    MachineState *ms = MACHINE(vms);
+    clock_phandle = qemu_fdt_alloc_phandle(ms->fdt);
+    qemu_fdt_add_subnode(ms->fdt, "/apb-pclk");
+    qemu_fdt_setprop_string(ms->fdt, "/apb-pclk", "compatible", "fixed-clock");
+    qemu_fdt_setprop_cell(ms->fdt, "/apb-pclk", "#clock-cells", 0x0);
+    qemu_fdt_setprop_cell(ms->fdt, "/apb-pclk", "clock-frequency", 24000000);
+    qemu_fdt_setprop_string(ms->fdt, "/apb-pclk", "clock-output-names",
+                            "clk24mhz");
+    qemu_fdt_setprop_cell(ms->fdt, "/apb-pclk", "phandle", clock_phandle);
+}
+
+static void fdt_add_uart(const HexagonVirtMachineState *vms, int uart)
+{
+    char *nodename;
+    hwaddr base = base_memmap[uart].base;
+    hwaddr size = base_memmap[uart].size;
+    assert(uart == 0);
+    int irq = irqmap[VIRT_UART0 + uart];
+    const char compat[] = "arm,pl011\0arm,primecell";
+    const char clocknames[] = "uartclk\0apb_pclk";
+    MachineState *ms = MACHINE(vms);
+
+    pl011_create(base, qdev_get_gpio_in(vms->l2vic, irq), serial_hd(0));
+
+    nodename = g_strdup_printf("/pl011@%" PRIx64, base);
+    qemu_fdt_add_subnode(ms->fdt, nodename);
+
+    /* Note that we can't use setprop_string because of the embedded NUL */
+    qemu_fdt_setprop(ms->fdt, nodename, "compatible", compat, sizeof(compat));
+    qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", 0, base, size);
+    qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts", 32 + irq, 0);
+    qemu_fdt_setprop_cells(ms->fdt, nodename, "clocks", clock_phandle,
+                           clock_phandle);
+    qemu_fdt_setprop(ms->fdt, nodename, "clock-names", clocknames,
+                     sizeof(clocknames));
+    qemu_fdt_setprop_cell(ms->fdt, nodename, "interrupt-parent",
+                          irq_hvm_ic_phandle);
+
+    qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", nodename);
+    qemu_fdt_add_subnode(ms->fdt, "/aliases");
+    qemu_fdt_setprop_string(ms->fdt, "/aliases", "serial0", nodename);
+
+    g_free(nodename);
+}
+
+static void fdt_add_cpu_nodes(const HexagonVirtMachineState *vms)
+{
+    MachineState *ms = MACHINE(vms);
+    qemu_fdt_add_subnode(ms->fdt, "/cpus");
+    qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#address-cells", 0x1);
+    qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#size-cells", 0x0);
+
+    /* cpu nodes */
+    for (int num = ms->smp.cpus - 1; num >= 0; num--) {
+        char *nodename = g_strdup_printf("/cpus/cpu@%d", num);
+        qemu_fdt_add_subnode(ms->fdt, nodename);
+        qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", "cpu");
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "reg", num);
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle",
+                              qemu_fdt_alloc_phandle(ms->fdt));
+        g_free(nodename);
+    }
+}
+
+
+static void fdt_add_virtio_devices(const HexagonVirtMachineState *vms)
+{
+    MachineState *ms = MACHINE(vms);
+    /* VirtIO MMIO devices */
+    for (int i = 0; i < VIRTIO_DEV_COUNT; i++) {
+        char *nodename;
+        int irq = irqmap[VIRT_MMIO] + i;
+        size_t size = base_memmap[VIRT_MMIO].size;
+        hwaddr base = base_memmap[VIRT_MMIO].base + i * size;
+
+        nodename = g_strdup_printf("/virtio_mmio@%" PRIx64, base);
+        qemu_fdt_add_subnode(ms->fdt, nodename);
+        qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "virtio,mmio");
+        qemu_fdt_setprop_sized_cells(ms->fdt, nodename, "reg", 2, base, 1,
+                                     size);
+        qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts", irq, 0);
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "interrupt-parent",
+                              irq_hvm_ic_phandle);
+
+        sysbus_create_simple(
+            "virtio-mmio", base,
+            qdev_get_gpio_in(vms->l2vic, irqmap[VIRT_MMIO] + i));
+
+        g_free(nodename);
+    }
+}
+
+static void create_qtimer(HexagonVirtMachineState *vms,
+                          const hexagon_machine_config *m_cfg)
+{
+    Error **errp = NULL;
+    QCTQtimerState *qtimer = QCT_QTIMER(qdev_new(TYPE_QCT_QTIMER));
+
+    object_property_set_uint(OBJECT(qtimer), "nr_frames", 2, errp);
+    object_property_set_uint(OBJECT(qtimer), "nr_views", 1, errp);
+    object_property_set_uint(OBJECT(qtimer), "cnttid", 0x111, errp);
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(qtimer), errp);
+
+
+    sysbus_mmio_map(SYS_BUS_DEVICE(qtimer), 1, m_cfg->qtmr_region);
+    sysbus_connect_irq(SYS_BUS_DEVICE(qtimer), 0,
+                       qdev_get_gpio_in(vms->l2vic, irqmap[VIRT_QTMR0]));
+    sysbus_connect_irq(SYS_BUS_DEVICE(qtimer), 1,
+                       qdev_get_gpio_in(vms->l2vic, irqmap[VIRT_QTMR1]));
+}
+
+static void virt_instance_init(Object *obj)
+{
+    HexagonVirtMachineState *vms = HEXAGON_VIRT_MACHINE(obj);
+
+    create_fdt(vms);
+}
+
+void hexagon_load_fdt(const HexagonVirtMachineState *vms)
+{
+    MachineState *ms = MACHINE(vms);
+    hwaddr fdt_addr = base_memmap[VIRT_FDT].base;
+    uint32_t fdtsize = vms->fdt_size;
+
+    /* copy in the device tree */
+    rom_add_blob_fixed_as("fdt", ms->fdt, fdtsize, fdt_addr,
+                          &address_space_memory);
+    qemu_register_reset_nosnapshotload(
+        qemu_fdt_randomize_seeds,
+        rom_ptr_for_as(&address_space_memory, fdt_addr, fdtsize));
+}
+
+static uint64_t load_kernel(const HexagonVirtMachineState *vms)
+{
+    MachineState *ms = MACHINE(vms);
+    uint64_t entry = 0;
+    if (load_elf_ram_sym(ms->kernel_filename, NULL, NULL, NULL, &entry, NULL,
+                         NULL, NULL, 0, EM_HEXAGON, 0, 0, &address_space_memory,
+                         false, NULL) > 0) {
+        return entry;
+    }
+    error_report("error loading '%s'", ms->kernel_filename);
+    exit(1);
+}
+
+static void do_cpu_reset(void *opaque)
+{
+    HexagonCPU *cpu = opaque;
+    CPUState *cs = CPU(cpu);
+    cpu_reset(cs);
+}
+
+static void virt_init(MachineState *ms)
+{
+    HexagonVirtMachineState *vms = HEXAGON_VIRT_MACHINE(ms);
+    Error **errp = NULL;
+    const hexagon_machine_config *m_cfg = &v68n_1024;
+
+    qemu_fdt_setprop_string(ms->fdt, "/chosen", "bootargs", ms->kernel_cmdline);
+
+    vms->sys = get_system_memory();
+
+    memory_region_init_ram(&vms->ram, NULL, "ddr.ram", ms->ram_size, errp);
+    memory_region_add_subregion(vms->sys, 0x0, &vms->ram);
+
+    if (m_cfg->l2tcm_size) {
+        memory_region_init_ram(&vms->tcm, NULL, "tcm.ram", m_cfg->l2tcm_size,
+                               errp);
+        memory_region_add_subregion(vms->sys, m_cfg->cfgtable.l2tcm_base << 16,
+                                    &vms->tcm);
+    }
+
+    memory_region_init_rom(&vms->cfgtable, NULL, "config_table.rom",
+                           sizeof(m_cfg->cfgtable), errp);
+    memory_region_add_subregion(vms->sys, m_cfg->cfgbase, &vms->cfgtable);
+    fdt_add_hvx(vms, m_cfg, errp);
+    const char *cpu_model = ms->cpu_type;
+
+    if (!cpu_model) {
+        cpu_model = HEXAGON_CPU_TYPE_NAME("v73");
+    }
+
+    HexagonCPU *cpu_0 = NULL;
+    for (int i = 0; i < ms->smp.cpus; i++) {
+        HexagonCPU *cpu = HEXAGON_CPU(object_new(ms->cpu_type));
+        qemu_register_reset(do_cpu_reset, cpu);
+
+        if (i == 0) {
+            cpu_0 = cpu;
+            if (ms->kernel_filename) {
+                uint64_t entry = load_kernel(vms);
+
+                qdev_prop_set_uint32(DEVICE(cpu_0), "exec-start-addr", entry);
+            }
+        }
+        qdev_prop_set_bit(DEVICE(cpu), "start-powered-off", (i != 0));
+        qdev_prop_set_uint32(DEVICE(cpu), "hvx-contexts",
+                             m_cfg->cfgtable.ext_contexts);
+        qdev_prop_set_uint32(DEVICE(cpu), "config-table-addr", m_cfg->cfgbase);
+        qdev_prop_set_uint32(DEVICE(cpu), "l2vic-base-addr", m_cfg->l2vic_base);
+        qdev_prop_set_uint32(DEVICE(cpu), "qtimer-base-addr", m_cfg->qtmr_region);
+        qdev_prop_set_uint32(DEVICE(cpu), "jtlb-entries",
+                             m_cfg->cfgtable.jtlb_size_entries);
+
+        if (!qdev_realize_and_unref(DEVICE(cpu), NULL, errp)) {
+            return;
+        }
+    }
+    vms->l2vic = sysbus_create_varargs(
+        "l2vic", m_cfg->l2vic_base, qdev_get_gpio_in(DEVICE(cpu_0), 0),
+        qdev_get_gpio_in(DEVICE(cpu_0), 1), qdev_get_gpio_in(DEVICE(cpu_0), 2),
+        qdev_get_gpio_in(DEVICE(cpu_0), 3), qdev_get_gpio_in(DEVICE(cpu_0), 4),
+        qdev_get_gpio_in(DEVICE(cpu_0), 5), qdev_get_gpio_in(DEVICE(cpu_0), 6),
+        qdev_get_gpio_in(DEVICE(cpu_0), 7), NULL);
+
+    fdt_add_hvm_pic_node(vms, m_cfg);
+    fdt_add_virtio_devices(vms);
+    fdt_add_cpu_nodes(vms);
+    fdt_add_clocks(vms);
+    fdt_add_uart(vms, VIRT_UART0);
+    fdt_add_gpt_node(vms);
+    create_qtimer(vms, m_cfg);
+
+    rom_add_blob_fixed_as("config_table.rom", &m_cfg->cfgtable,
+                          sizeof(m_cfg->cfgtable), m_cfg->cfgbase,
+                          &address_space_memory);
+
+
+    hexagon_load_fdt(vms);
+}
+
+
+static void virt_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->init = virt_init;
+    mc->default_cpu_type = HEXAGON_CPU_TYPE_NAME("v73");
+    mc->default_ram_size = 4 * GiB;
+    mc->max_cpus = 8;
+    mc->default_cpus = 8;
+    mc->is_default = false;
+    mc->default_kernel_irqchip_split = false;
+    mc->block_default_type = IF_VIRTIO;
+    mc->default_boot_order = NULL;
+    mc->no_cdrom = 1;
+    mc->numa_mem_supported = false;
+    mc->default_nic = "virtio-mmio-bus";
+}
+
+
+static const TypeInfo virt_machine_types[] = { {
+    .name = TYPE_HEXAGON_VIRT_MACHINE,
+    .parent = TYPE_MACHINE,
+    .instance_size = sizeof(HexagonVirtMachineState),
+    .class_init = virt_class_init,
+    .instance_init = virt_instance_init,
+} };
+
+DEFINE_TYPES(virt_machine_types)
diff --git a/hw/i2c/allwinner-i2c.c b/hw/i2c/allwinner-i2c.c
index 16f1d6d40e7b2..66d6431c5083d 100644
--- a/hw/i2c/allwinner-i2c.c
+++ b/hw/i2c/allwinner-i2c.c
@@ -407,7 +407,7 @@ static const MemoryRegionOps allwinner_i2c_ops = {
     .write = allwinner_i2c_write,
     .valid.min_access_size = 1,
     .valid.max_access_size = 4,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static const VMStateDescription allwinner_i2c_vmstate = {
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 6b13ce894b1aa..5b21cf134a3b7 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1309,15 +1309,15 @@ static int amdvi_int_remap_msi(AMDVIState *iommu,
         ret = -AMDVI_IR_ERR;
         break;
     case AMDVI_IOAPIC_INT_TYPE_NMI:
-        pass = dte[3] & AMDVI_DEV_NMI_PASS_MASK;
+        pass = dte[2] & AMDVI_DEV_NMI_PASS_MASK;
         trace_amdvi_ir_delivery_mode("nmi");
         break;
     case AMDVI_IOAPIC_INT_TYPE_INIT:
-        pass = dte[3] & AMDVI_DEV_INT_PASS_MASK;
+        pass = dte[2] & AMDVI_DEV_INT_PASS_MASK;
         trace_amdvi_ir_delivery_mode("init");
         break;
     case AMDVI_IOAPIC_INT_TYPE_EINT:
-        pass = dte[3] & AMDVI_DEV_EINT_PASS_MASK;
+        pass = dte[2] & AMDVI_DEV_EINT_PASS_MASK;
         trace_amdvi_ir_delivery_mode("eint");
         break;
     default:
@@ -1593,9 +1593,9 @@ static void amdvi_pci_realize(PCIDevice *pdev, Error **errp)
     /* reset AMDVI specific capabilities, all r/o */
     pci_set_long(pdev->config + s->capab_offset, AMDVI_CAPAB_FEATURES);
     pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
-                 AMDVI_BASE_ADDR & ~(0xffff0000));
+                 AMDVI_BASE_ADDR & MAKE_64BIT_MASK(14, 18));
     pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
-                (AMDVI_BASE_ADDR & ~(0xffff)) >> 16);
+                AMDVI_BASE_ADDR >> 32);
     pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_RANGE,
                  0xff000000);
     pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
@@ -1687,8 +1687,6 @@ static void amdvi_sysbus_class_init(ObjectClass *klass, void *data)
     dc->hotpluggable = false;
     dc_class->realize = amdvi_sysbus_realize;
     dc_class->int_remap = amdvi_int_remap;
-    /* Supported by the pc-q35-* machine types */
-    dc->user_creatable = true;
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
     dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
     device_class_set_props(dc, amdvi_properties);
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index e0dac4d9a96c0..28125130c6fc7 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -187,7 +187,7 @@
         AMDVI_CAPAB_FLAG_HTTUNNEL |  AMDVI_CAPAB_EFR_SUP)
 
 /* AMDVI default address */
-#define AMDVI_BASE_ADDR 0xfed80000
+#define AMDVI_BASE_ADDR 0xfed80000ULL
 
 /* page management constants */
 #define AMDVI_PAGE_SHIFT 12
diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c
index 91bf1df0f2e4a..a7f1b60b98c0e 100644
--- a/hw/i386/fw_cfg.c
+++ b/hw/i386/fw_cfg.c
@@ -26,7 +26,9 @@
 #include CONFIG_DEVICES
 #include "target/i386/cpu.h"
 
-struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
+#if !defined(CONFIG_HPET) && !defined(CONFIG_X_HPET_RUST)
+struct hpet_fw_config hpet_fw_cfg = {.count = UINT8_MAX};
+#endif
 
 const char *fw_cfg_arch_key_name(uint16_t key)
 {
@@ -143,13 +145,13 @@ FWCfgState *fw_cfg_arch_create(MachineState *ms,
      */
     fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, apic_id_limit);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, ms->ram_size);
-#ifdef CONFIG_ACPI
-    fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES,
-                     acpi_tables, acpi_tables_len);
-#endif
+    if (acpi_builtin()) {
+        fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES,
+                         acpi_tables, acpi_tables_len);
+    }
     fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, 1);
 
-    fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg));
+    fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_fw_cfg, sizeof(hpet_fw_cfg));
     /* allocate memory for the NUMA channel: one (64bit) word for the number
      * of nodes, one word for each VCPU->node and one word for each node to
      * hold the amount of memory.
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index f366c223d0e1b..dffd7ee885214 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4697,10 +4697,11 @@ static void vtd_init(IntelIOMMUState *s)
 /* Should not reset address_spaces when reset because devices will still use
  * the address space they got at first (won't ask the bus again).
  */
-static void vtd_reset(DeviceState *dev)
+static void vtd_reset_exit(Object *obj, ResetType type)
 {
-    IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
+    IntelIOMMUState *s = INTEL_IOMMU_DEVICE(obj);
 
+    trace_vtd_reset_exit();
     vtd_init(s);
     vtd_address_space_refresh_all(s);
 }
@@ -4864,15 +4865,18 @@ static void vtd_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     X86IOMMUClass *x86_class = X86_IOMMU_DEVICE_CLASS(klass);
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
 
-    device_class_set_legacy_reset(dc, vtd_reset);
+    /*
+     * Use 'exit' reset phase to make sure all DMA requests
+     * have been quiesced during 'enter' or 'hold' phase
+     */
+    rc->phases.exit = vtd_reset_exit;
     dc->vmsd = &vtd_vmstate;
     device_class_set_props(dc, vtd_properties);
     dc->hotpluggable = false;
     x86_class->realize = vtd_realize;
     x86_class->int_remap = vtd_int_remap;
-    /* Supported by the pc-q35-* machine types */
-    dc->user_creatable = true;
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
     dc->desc = "Intel IOMMU (VT-d) DMA Remapping device";
 }
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index a8d354aabe533..d0a236c74f369 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -451,11 +451,44 @@ static HotplugHandler *microvm_get_hotplug_handler(MachineState *machine,
     return NULL;
 }
 
+static void microvm_machine_done(Notifier *notifier, void *data)
+{
+    MicrovmMachineState *mms = container_of(notifier, MicrovmMachineState,
+                                            machine_done);
+    X86MachineState *x86ms = X86_MACHINE(mms);
+
+    acpi_setup_microvm(mms);
+    dt_setup_microvm(mms);
+    fw_cfg_add_e820(x86ms->fw_cfg);
+}
+
+static void microvm_powerdown_req(Notifier *notifier, void *data)
+{
+    MicrovmMachineState *mms = container_of(notifier, MicrovmMachineState,
+                                            powerdown_req);
+    X86MachineState *x86ms = X86_MACHINE(mms);
+
+    if (x86ms->acpi_dev) {
+        Object *obj = OBJECT(x86ms->acpi_dev);
+        AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(obj);
+        adevc->send_event(ACPI_DEVICE_IF(x86ms->acpi_dev),
+                          ACPI_POWER_DOWN_STATUS);
+    }
+}
+
 static void microvm_machine_state_init(MachineState *machine)
 {
     MicrovmMachineState *mms = MICROVM_MACHINE(machine);
     X86MachineState *x86ms = X86_MACHINE(machine);
 
+    /* State */
+    mms->kernel_cmdline_fixed = false;
+
+    mms->machine_done.notify = microvm_machine_done;
+    qemu_add_machine_init_done_notifier(&mms->machine_done);
+    mms->powerdown_req.notify = microvm_powerdown_req;
+    qemu_register_powerdown_notifier(&mms->powerdown_req);
+
     microvm_memory_init(mms);
 
     x86_cpus_init(x86ms, CPU_VERSION_LATEST);
@@ -581,31 +614,6 @@ static void microvm_machine_set_auto_kernel_cmdline(Object *obj, bool value,
     mms->auto_kernel_cmdline = value;
 }
 
-static void microvm_machine_done(Notifier *notifier, void *data)
-{
-    MicrovmMachineState *mms = container_of(notifier, MicrovmMachineState,
-                                            machine_done);
-    X86MachineState *x86ms = X86_MACHINE(mms);
-
-    acpi_setup_microvm(mms);
-    dt_setup_microvm(mms);
-    fw_cfg_add_e820(x86ms->fw_cfg);
-}
-
-static void microvm_powerdown_req(Notifier *notifier, void *data)
-{
-    MicrovmMachineState *mms = container_of(notifier, MicrovmMachineState,
-                                            powerdown_req);
-    X86MachineState *x86ms = X86_MACHINE(mms);
-
-    if (x86ms->acpi_dev) {
-        Object *obj = OBJECT(x86ms->acpi_dev);
-        AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(obj);
-        adevc->send_event(ACPI_DEVICE_IF(x86ms->acpi_dev),
-                          ACPI_POWER_DOWN_STATUS);
-    }
-}
-
 static void microvm_machine_initfn(Object *obj)
 {
     MicrovmMachineState *mms = MICROVM_MACHINE(obj);
@@ -617,14 +625,6 @@ static void microvm_machine_initfn(Object *obj)
     mms->isa_serial = true;
     mms->option_roms = true;
     mms->auto_kernel_cmdline = true;
-
-    /* State */
-    mms->kernel_cmdline_fixed = false;
-
-    mms->machine_done.notify = microvm_machine_done;
-    qemu_add_machine_init_done_notifier(&mms->machine_done);
-    mms->powerdown_req.notify = microvm_powerdown_req;
-    qemu_register_powerdown_notifier(&mms->powerdown_req);
 }
 
 GlobalProperty microvm_properties[] = {
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 0eb52d315bb77..63a96cd23f841 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1241,6 +1241,9 @@ void pc_basic_device_init(struct PCMachineState *pcms,
     /* Super I/O */
     pc_superio_init(isa_bus, create_fdctrl, pcms->i8042_enabled,
                     pcms->vmport != ON_OFF_AUTO_ON, &error_fatal);
+
+    pcms->machine_done.notify = pc_machine_done;
+    qemu_add_machine_init_done_notifier(&pcms->machine_done);
 }
 
 void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus)
@@ -1701,7 +1704,7 @@ static void pc_machine_initfn(Object *obj)
     pcms->sata_enabled = true;
     pcms->i8042_enabled = true;
     pcms->max_fw_size = 8 * MiB;
-#ifdef CONFIG_HPET
+#if defined(CONFIG_HPET) || defined(CONFIG_X_HPET_RUST)
     pcms->hpet_enabled = true;
 #endif
     pcms->fd_bootchk = true;
@@ -1714,9 +1717,6 @@ static void pc_machine_initfn(Object *obj)
     if (pcmc->pci_enabled) {
         cxl_machine_init(obj, &pcms->cxl_devices_state);
     }
-
-    pcms->machine_done.notify = pc_machine_done;
-    qemu_add_machine_init_done_notifier(&pcms->machine_done);
 }
 
 static void pc_machine_reset(MachineState *machine, ResetType type)
@@ -1798,6 +1798,10 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
     mc->nvdimm_supported = true;
     mc->smp_props.dies_supported = true;
     mc->smp_props.modules_supported = true;
+    mc->smp_props.cache_supported[CACHE_LEVEL_AND_TYPE_L1D] = true;
+    mc->smp_props.cache_supported[CACHE_LEVEL_AND_TYPE_L1I] = true;
+    mc->smp_props.cache_supported[CACHE_LEVEL_AND_TYPE_L2] = true;
+    mc->smp_props.cache_supported[CACHE_LEVEL_AND_TYPE_L3] = true;
     mc->default_ram_id = "pc.ram";
     pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_AUTO;
 
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 04d2957adcd7e..6c91e2d29298b 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -65,6 +65,7 @@
 #include "system/numa.h"
 #include "hw/hyperv/vmbus-bridge.h"
 #include "hw/mem/nvdimm.h"
+#include "hw/uefi/var-service-api.h"
 #include "hw/i386/acpi-build.h"
 #include "target/i386/cpu.h"
 
@@ -468,6 +469,7 @@ static void pc_i440fx_machine_options(MachineClass *m)
     m->no_parallel = !module_object_class_by_name(TYPE_ISA_PARALLEL);
     machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE);
     machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE);
+    machine_class_allow_dynamic_sysbus_dev(m, TYPE_UEFI_VARS_X64);
 
     object_class_property_add_enum(oc, "x-south-bridge", "PCSouthBridgeOption",
                                    &PCSouthBridgeOption_lookup,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 77536dd697f5f..fd96d0345c7df 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -58,6 +58,7 @@
 #include "system/numa.h"
 #include "hw/hyperv/vmbus-bridge.h"
 #include "hw/mem/nvdimm.h"
+#include "hw/uefi/var-service-api.h"
 #include "hw/i386/acpi-build.h"
 #include "target/i386/cpu.h"
 
@@ -355,6 +356,7 @@ static void pc_q35_machine_options(MachineClass *m)
     machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE);
     machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE);
     machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE);
+    machine_class_allow_dynamic_sysbus_dev(m, TYPE_UEFI_VARS_X64);
     compat_props_add(m->compat_props,
                      pc_q35_compat_defaults, pc_q35_compat_defaults_len);
 }
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 53c02d7ac85ad..ac9e1a10aa456 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -68,6 +68,7 @@ vtd_frr_new(int index, uint64_t hi, uint64_t lo) "index %d high 0x%"PRIx64" low
 vtd_warn_invalid_qi_tail(uint16_t tail) "tail 0x%"PRIx16
 vtd_warn_ir_vector(uint16_t sid, int index, int vec, int target) "sid 0x%"PRIx16" index %d vec %d (should be: %d)"
 vtd_warn_ir_trigger(uint16_t sid, int index, int trig, int target) "sid 0x%"PRIx16" index %d trigger %d (should be: %d)"
+vtd_reset_exit(void) ""
 
 # amd_iommu.c
 amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index fed34b2fcfa7d..5cdd165af0db8 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -146,7 +146,7 @@ bool x86_iommu_ir_supported(X86IOMMUState *s)
 
 static const TypeInfo x86_iommu_info = {
     .name          = TYPE_X86_IOMMU_DEVICE,
-    .parent        = TYPE_SYS_BUS_DEVICE,
+    .parent        = TYPE_DYNAMIC_SYS_BUS_DEVICE,
     .instance_size = sizeof(X86IOMMUState),
     .class_init    = x86_iommu_class_init,
     .class_size    = sizeof(X86IOMMUClass),
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index d3df488c48330..d4516acec699d 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -10,10 +10,12 @@
 
 #include "qemu/osdep.h"
 #include "qemu/units.h"
+#include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-migration.h"
 #include "trace.h"
 
+#include "hw/hw.h"
 #include "hw/i386/pc.h"
 #include "hw/irq.h"
 #include "hw/i386/apic-msidef.h"
@@ -24,6 +26,10 @@
 #include "hw/xen/arch_hvm.h"
 #include <xen/hvm/e820.h>
 #include "exec/target_page.h"
+#include "target/i386/cpu.h"
+#include "system/runstate.h"
+#include "system/xen-mapcache.h"
+#include "system/xen.h"
 
 static MemoryRegion ram_640k, ram_lo, ram_hi;
 static MemoryRegion *framebuffer;
diff --git a/hw/i386/xen/xen-pvh.c b/hw/i386/xen/xen-pvh.c
index 33c102797639d..f6356f2a7ed31 100644
--- a/hw/i386/xen/xen-pvh.c
+++ b/hw/i386/xen/xen-pvh.c
@@ -14,6 +14,7 @@
 #include "hw/xen/arch_hvm.h"
 #include <xen/hvm/hvm_info_table.h>
 #include "hw/xen/xen-pvh-common.h"
+#include "target/i386/cpu.h"
 
 #define TYPE_XEN_PVH_X86  MACHINE_TYPE_NAME("xenpvh")
 OBJECT_DECLARE_SIMPLE_TYPE(XenPVHx86State, XEN_PVH_X86)
diff --git a/hw/ide/core.c b/hw/ide/core.c
index f9baba59e9364..b14983ec54faf 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -968,8 +968,7 @@ static void ide_dma_cb(void *opaque, int ret)
                                            BDRV_SECTOR_SIZE, ide_dma_cb, s);
         break;
     case IDE_DMA_TRIM:
-        s->bus->dma->aiocb = dma_blk_io(blk_get_aio_context(s->blk),
-                                        &s->sg, offset, BDRV_SECTOR_SIZE,
+        s->bus->dma->aiocb = dma_blk_io(&s->sg, offset, BDRV_SECTOR_SIZE,
                                         ide_issue_trim, s, ide_dma_cb, s,
                                         DMA_DIRECTION_TO_DEVICE);
         break;
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index 5fe764b49b588..c8e8e44cc9a08 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -187,8 +187,7 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
                                            pmac_ide_transfer_cb, io);
         break;
     case IDE_DMA_TRIM:
-        s->bus->dma->aiocb = dma_blk_io(blk_get_aio_context(s->blk), &s->sg,
-                                        offset, 0x1, ide_issue_trim, s,
+        s->bus->dma->aiocb = dma_blk_io(&s->sg, offset, 0x1, ide_issue_trim, s,
                                         pmac_ide_transfer_cb, io,
                                         DMA_DIRECTION_TO_DEVICE);
         break;
diff --git a/hw/intc/Kconfig b/hw/intc/Kconfig
index dd405bdb5d21f..a5b136e2fa72f 100644
--- a/hw/intc/Kconfig
+++ b/hw/intc/Kconfig
@@ -8,6 +8,9 @@ config I8259
 config PL190
     bool
 
+config L2VIC
+    bool
+
 config IOAPIC
     bool
     select I8259
@@ -23,13 +26,13 @@ config APIC
 
 config ARM_GIC
     bool
-    select ARM_GICV3_TCG if TCG
+    select ARM_GICV3 if TCG
     select ARM_GIC_KVM if KVM
     select MSI_NONBROKEN
 
-config ARM_GICV3_TCG
+config ARM_GICV3
     bool
-    depends on ARM_GIC && TCG
+    depends on ARM_GIC
 
 config ARM_GIC_KVM
     bool
diff --git a/hw/intc/allwinner-a10-pic.c b/hw/intc/allwinner-a10-pic.c
index c0f30092cd603..93a604f7a0409 100644
--- a/hw/intc/allwinner-a10-pic.c
+++ b/hw/intc/allwinner-a10-pic.c
@@ -135,7 +135,7 @@ static void aw_a10_pic_write(void *opaque, hwaddr offset, uint64_t value,
 static const MemoryRegionOps aw_a10_pic_ops = {
     .read = aw_a10_pic_read,
     .write = aw_a10_pic_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static const VMStateDescription vmstate_aw_a10_pic = {
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index d1d343d421697..d18c1dbf2cb78 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -350,9 +350,8 @@ static int apic_set_base(APICCommonState *s, uint64_t val)
         return -1;
     }
 
-    s->apicbase = (val & 0xfffff000) |
+    s->apicbase = (val & MSR_IA32_APICBASE_BASE) |
         (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
-    /* if disabled, cannot be enabled again */
     if (!(val & MSR_IA32_APICBASE_ENABLE)) {
         s->apicbase &= ~MSR_IA32_APICBASE_ENABLE;
         cpu_clear_apic_feature(&s->cpu->env);
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index 9cad8313a3acf..de37465bc8791 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -22,6 +22,7 @@
 #include "cpu.h"
 #include "target/arm/cpregs.h"
 #include "target/arm/cpu-features.h"
+#include "target/arm/internals.h"
 #include "system/tcg.h"
 #include "system/qtest.h"
 
@@ -2291,7 +2292,7 @@ static CPAccessResult gicv3_irqfiq_access(CPUARMState *env,
             r = CP_ACCESS_TRAP_EL3;
             break;
         case 3:
-            if (!is_a64(env) && !arm_is_el3_or_mon(env)) {
+            if (!arm_is_el3_or_mon(env)) {
                 r = CP_ACCESS_TRAP_EL3;
             }
             break;
@@ -2300,9 +2301,6 @@ static CPAccessResult gicv3_irqfiq_access(CPUARMState *env,
         }
     }
 
-    if (r == CP_ACCESS_TRAP_EL3 && !arm_el_is_aa64(env, 3)) {
-        r = CP_ACCESS_TRAP;
-    }
     return r;
 }
 
@@ -2356,7 +2354,7 @@ static CPAccessResult gicv3_fiq_access(CPUARMState *env,
             r = CP_ACCESS_TRAP_EL3;
             break;
         case 3:
-            if (!is_a64(env) && !arm_is_el3_or_mon(env)) {
+            if (!arm_is_el3_or_mon(env)) {
                 r = CP_ACCESS_TRAP_EL3;
             }
             break;
@@ -2365,9 +2363,6 @@ static CPAccessResult gicv3_fiq_access(CPUARMState *env,
         }
     }
 
-    if (r == CP_ACCESS_TRAP_EL3 && !arm_el_is_aa64(env, 3)) {
-        r = CP_ACCESS_TRAP;
-    }
     return r;
 }
 
@@ -2395,7 +2390,7 @@ static CPAccessResult gicv3_irq_access(CPUARMState *env,
             r = CP_ACCESS_TRAP_EL3;
             break;
         case 3:
-            if (!is_a64(env) && !arm_is_el3_or_mon(env)) {
+            if (!arm_is_el3_or_mon(env)) {
                 r = CP_ACCESS_TRAP_EL3;
             }
             break;
@@ -2404,9 +2399,6 @@ static CPAccessResult gicv3_irq_access(CPUARMState *env,
         }
     }
 
-    if (r == CP_ACCESS_TRAP_EL3 && !arm_el_is_aa64(env, 3)) {
-        r = CP_ACCESS_TRAP;
-    }
     return r;
 }
 
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index 5fd0760982437..7212c87c68ecc 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -22,7 +22,7 @@
 #include "system/runstate.h"
 #include "target/arm/cpu.h"
 #include "target/arm/cpu-features.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/memop.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
diff --git a/hw/intc/aspeed_intc.c b/hw/intc/aspeed_intc.c
index 126b711b9430a..3fd417084f5b0 100644
--- a/hw/intc/aspeed_intc.c
+++ b/hw/intc/aspeed_intc.c
@@ -14,72 +14,202 @@
 #include "hw/registerfields.h"
 #include "qapi/error.h"
 
-/* INTC Registers */
-REG32(GICINT128_EN,         0x1000)
-REG32(GICINT128_STATUS,     0x1004)
-REG32(GICINT129_EN,         0x1100)
-REG32(GICINT129_STATUS,     0x1104)
-REG32(GICINT130_EN,         0x1200)
-REG32(GICINT130_STATUS,     0x1204)
-REG32(GICINT131_EN,         0x1300)
-REG32(GICINT131_STATUS,     0x1304)
-REG32(GICINT132_EN,         0x1400)
-REG32(GICINT132_STATUS,     0x1404)
-REG32(GICINT133_EN,         0x1500)
-REG32(GICINT133_STATUS,     0x1504)
-REG32(GICINT134_EN,         0x1600)
-REG32(GICINT134_STATUS,     0x1604)
-REG32(GICINT135_EN,         0x1700)
-REG32(GICINT135_STATUS,     0x1704)
-REG32(GICINT136_EN,         0x1800)
-REG32(GICINT136_STATUS,     0x1804)
-
-#define GICINT_STATUS_BASE     R_GICINT128_STATUS
-
-static void aspeed_intc_update(AspeedINTCState *s, int irq, int level)
+/*
+ * INTC Registers
+ *
+ * values below are offset by - 0x1000 from datasheet
+ * because its memory region is start at 0x1000
+ *
+ */
+REG32(GICINT128_EN,         0x000)
+REG32(GICINT128_STATUS,     0x004)
+REG32(GICINT129_EN,         0x100)
+REG32(GICINT129_STATUS,     0x104)
+REG32(GICINT130_EN,         0x200)
+REG32(GICINT130_STATUS,     0x204)
+REG32(GICINT131_EN,         0x300)
+REG32(GICINT131_STATUS,     0x304)
+REG32(GICINT132_EN,         0x400)
+REG32(GICINT132_STATUS,     0x404)
+REG32(GICINT133_EN,         0x500)
+REG32(GICINT133_STATUS,     0x504)
+REG32(GICINT134_EN,         0x600)
+REG32(GICINT134_STATUS,     0x604)
+REG32(GICINT135_EN,         0x700)
+REG32(GICINT135_STATUS,     0x704)
+REG32(GICINT136_EN,         0x800)
+REG32(GICINT136_STATUS,     0x804)
+REG32(GICINT192_201_EN,         0xB00)
+REG32(GICINT192_201_STATUS,     0xB04)
+
+/*
+ * INTCIO Registers
+ *
+ * values below are offset by - 0x100 from datasheet
+ * because its memory region is start at 0x100
+ *
+ */
+REG32(GICINT192_EN,         0x00)
+REG32(GICINT192_STATUS,     0x04)
+REG32(GICINT193_EN,         0x10)
+REG32(GICINT193_STATUS,     0x14)
+REG32(GICINT194_EN,         0x20)
+REG32(GICINT194_STATUS,     0x24)
+REG32(GICINT195_EN,         0x30)
+REG32(GICINT195_STATUS,     0x34)
+REG32(GICINT196_EN,         0x40)
+REG32(GICINT196_STATUS,     0x44)
+REG32(GICINT197_EN,         0x50)
+REG32(GICINT197_STATUS,     0x54)
+
+static const AspeedINTCIRQ *aspeed_intc_get_irq(AspeedINTCClass *aic,
+                                                uint32_t reg)
+{
+    int i;
+
+    for (i = 0; i < aic->irq_table_count; i++) {
+        if (aic->irq_table[i].enable_reg == reg ||
+            aic->irq_table[i].status_reg == reg) {
+            return &aic->irq_table[i];
+        }
+    }
+
+    /*
+     * Invalid reg.
+     */
+    g_assert_not_reached();
+}
+
+/*
+ * Update the state of an interrupt controller pin by setting
+ * the specified output pin to the given level.
+ * The input pin index should be between 0 and the number of input pins.
+ * The output pin index should be between 0 and the number of output pins.
+ */
+static void aspeed_intc_update(AspeedINTCState *s, int inpin_idx,
+                               int outpin_idx, int level)
 {
     AspeedINTCClass *aic = ASPEED_INTC_GET_CLASS(s);
+    const char *name = object_get_typename(OBJECT(s));
 
-    if (irq >= aic->num_ints) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid interrupt number: %d\n",
-                      __func__, irq);
-        return;
+    assert((outpin_idx < aic->num_outpins) && (inpin_idx < aic->num_inpins));
+
+    trace_aspeed_intc_update_irq(name, inpin_idx, outpin_idx, level);
+    qemu_set_irq(s->output_pins[outpin_idx], level);
+}
+
+static void aspeed_intc_set_irq_handler(AspeedINTCState *s,
+                                        const AspeedINTCIRQ *intc_irq,
+                                        uint32_t select)
+{
+    const char *name = object_get_typename(OBJECT(s));
+    uint32_t status_reg;
+    int outpin_idx;
+    int inpin_idx;
+
+    status_reg = intc_irq->status_reg;
+    outpin_idx = intc_irq->outpin_idx;
+    inpin_idx = intc_irq->inpin_idx;
+
+    if (s->mask[inpin_idx] || s->regs[status_reg]) {
+        /*
+         * a. mask is not 0 means in ISR mode
+         * sources interrupt routine are executing.
+         * b. status register value is not 0 means previous
+         * source interrupt does not be executed, yet.
+         *
+         * save source interrupt to pending variable.
+         */
+        s->pending[inpin_idx] |= select;
+        trace_aspeed_intc_pending_irq(name, inpin_idx, s->pending[inpin_idx]);
+    } else {
+        /*
+         * notify firmware which source interrupt are coming
+         * by setting status register
+         */
+        s->regs[status_reg] = select;
+        trace_aspeed_intc_trigger_irq(name, inpin_idx, outpin_idx,
+                                      s->regs[status_reg]);
+        aspeed_intc_update(s, inpin_idx, outpin_idx, 1);
     }
+}
+
+static void aspeed_intc_set_irq_handler_multi_outpins(AspeedINTCState *s,
+                                 const AspeedINTCIRQ *intc_irq, uint32_t select)
+{
+    const char *name = object_get_typename(OBJECT(s));
+    uint32_t status_reg;
+    int num_outpins;
+    int outpin_idx;
+    int inpin_idx;
+    int i;
 
-    trace_aspeed_intc_update_irq(irq, level);
-    qemu_set_irq(s->output_pins[irq], level);
+    num_outpins = intc_irq->num_outpins;
+    status_reg = intc_irq->status_reg;
+    outpin_idx = intc_irq->outpin_idx;
+    inpin_idx = intc_irq->inpin_idx;
+
+    for (i = 0; i < num_outpins; i++) {
+        if (select & BIT(i)) {
+            if (s->mask[inpin_idx] & BIT(i) ||
+                s->regs[status_reg] & BIT(i)) {
+                /*
+                 * a. mask bit is not 0 means in ISR mode sources interrupt
+                 * routine are executing.
+                 * b. status bit is not 0 means previous source interrupt
+                 * does not be executed, yet.
+                 *
+                 * save source interrupt to pending bit.
+                 */
+                 s->pending[inpin_idx] |= BIT(i);
+                 trace_aspeed_intc_pending_irq(name, inpin_idx,
+                                               s->pending[inpin_idx]);
+            } else {
+                /*
+                 * notify firmware which source interrupt are coming
+                 * by setting status bit
+                 */
+                s->regs[status_reg] |= BIT(i);
+                trace_aspeed_intc_trigger_irq(name, inpin_idx, outpin_idx + i,
+                                              s->regs[status_reg]);
+                aspeed_intc_update(s, inpin_idx, outpin_idx + i, 1);
+            }
+        }
+    }
 }
 
 /*
- * The address of GICINT128 to GICINT136 are from 0x1000 to 0x1804.
- * Utilize "address & 0x0f00" to get the irq and irq output pin index
- * The value of irq should be 0 to num_ints.
- * The irq 0 indicates GICINT128, irq 1 indicates GICINT129 and so on.
+ * GICINT192_201 maps 1:10 to input IRQ 0 and output IRQs 0 to 9.
+ * GICINT128 to GICINT136 map 1:1 to input IRQs 1 to 9 and output
+ * IRQs 10 to 18. The value of input IRQ should be between 0 and
+ * the number of input pins.
  */
 static void aspeed_intc_set_irq(void *opaque, int irq, int level)
 {
     AspeedINTCState *s = (AspeedINTCState *)opaque;
     AspeedINTCClass *aic = ASPEED_INTC_GET_CLASS(s);
-    uint32_t status_addr = GICINT_STATUS_BASE + ((0x100 * irq) >> 2);
+    const char *name = object_get_typename(OBJECT(s));
+    const AspeedINTCIRQ *intc_irq;
     uint32_t select = 0;
     uint32_t enable;
+    int num_outpins;
+    int inpin_idx;
     int i;
 
-    if (irq >= aic->num_ints) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid interrupt number: %d\n",
-                      __func__, irq);
-        return;
-    }
+    assert(irq < aic->num_inpins);
 
-    trace_aspeed_intc_set_irq(irq, level);
-    enable = s->enable[irq];
+    intc_irq = &aic->irq_table[irq];
+    num_outpins = intc_irq->num_outpins;
+    inpin_idx = intc_irq->inpin_idx;
+    trace_aspeed_intc_set_irq(name, inpin_idx, level);
+    enable = s->enable[inpin_idx];
 
     if (!level) {
         return;
     }
 
     for (i = 0; i < aic->num_lines; i++) {
-        if (s->orgates[irq].levels[i]) {
+        if (s->orgates[inpin_idx].levels[i]) {
             if (enable & BIT(i)) {
                 select |= BIT(i);
             }
@@ -90,45 +220,190 @@ static void aspeed_intc_set_irq(void *opaque, int irq, int level)
         return;
     }
 
-    trace_aspeed_intc_select(select);
+    trace_aspeed_intc_select(name, select);
+    if (num_outpins > 1) {
+        aspeed_intc_set_irq_handler_multi_outpins(s, intc_irq, select);
+    } else {
+        aspeed_intc_set_irq_handler(s, intc_irq, select);
+    }
+}
 
-    if (s->mask[irq] || s->regs[status_addr]) {
-        /*
-         * a. mask is not 0 means in ISR mode
-         * sources interrupt routine are executing.
-         * b. status register value is not 0 means previous
-         * source interrupt does not be executed, yet.
-         *
-         * save source interrupt to pending variable.
-         */
-        s->pending[irq] |= select;
-        trace_aspeed_intc_pending_irq(irq, s->pending[irq]);
+static void aspeed_intc_enable_handler(AspeedINTCState *s, hwaddr offset,
+                                       uint64_t data)
+{
+    AspeedINTCClass *aic = ASPEED_INTC_GET_CLASS(s);
+    const char *name = object_get_typename(OBJECT(s));
+    const AspeedINTCIRQ *intc_irq;
+    uint32_t reg = offset >> 2;
+    uint32_t old_enable;
+    uint32_t change;
+    int inpin_idx;
+
+    intc_irq = aspeed_intc_get_irq(aic, reg);
+    inpin_idx = intc_irq->inpin_idx;
+
+    assert(inpin_idx < aic->num_inpins);
+
+    /*
+     * The enable registers are used to enable source interrupts.
+     * They also handle masking and unmasking of source interrupts
+     * during the execution of the source ISR.
+     */
+
+    /* disable all source interrupt */
+    if (!data && !s->enable[inpin_idx]) {
+        s->regs[reg] = data;
+        return;
+    }
+
+    old_enable = s->enable[inpin_idx];
+    s->enable[inpin_idx] |= data;
+
+    /* enable new source interrupt */
+    if (old_enable != s->enable[inpin_idx]) {
+        trace_aspeed_intc_enable(name, s->enable[inpin_idx]);
+        s->regs[reg] = data;
+        return;
+    }
+
+    /* mask and unmask source interrupt */
+    change = s->regs[reg] ^ data;
+    if (change & data) {
+        s->mask[inpin_idx] &= ~change;
+        trace_aspeed_intc_unmask(name, change, s->mask[inpin_idx]);
     } else {
-        /*
-         * notify firmware which source interrupt are coming
-         * by setting status register
-         */
-        s->regs[status_addr] = select;
-        trace_aspeed_intc_trigger_irq(irq, s->regs[status_addr]);
-        aspeed_intc_update(s, irq, 1);
+        s->mask[inpin_idx] |= change;
+        trace_aspeed_intc_mask(name, change, s->mask[inpin_idx]);
+    }
+
+    s->regs[reg] = data;
+}
+
+static void aspeed_intc_status_handler(AspeedINTCState *s, hwaddr offset,
+                                       uint64_t data)
+{
+    AspeedINTCClass *aic = ASPEED_INTC_GET_CLASS(s);
+    const char *name = object_get_typename(OBJECT(s));
+    const AspeedINTCIRQ *intc_irq;
+    uint32_t reg = offset >> 2;
+    int outpin_idx;
+    int inpin_idx;
+
+    if (!data) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid data 0\n", __func__);
+        return;
+    }
+
+    intc_irq = aspeed_intc_get_irq(aic, reg);
+    outpin_idx = intc_irq->outpin_idx;
+    inpin_idx = intc_irq->inpin_idx;
+
+    assert(inpin_idx < aic->num_inpins);
+
+    /* clear status */
+    s->regs[reg] &= ~data;
+
+    /*
+     * These status registers are used for notify sources ISR are executed.
+     * If one source ISR is executed, it will clear one bit.
+     * If it clear all bits, it means to initialize this register status
+     * rather than sources ISR are executed.
+     */
+    if (data == 0xffffffff) {
+        return;
+    }
+
+    /* All source ISR execution are done */
+    if (!s->regs[reg]) {
+        trace_aspeed_intc_all_isr_done(name, inpin_idx);
+        if (s->pending[inpin_idx]) {
+            /*
+             * handle pending source interrupt
+             * notify firmware which source interrupt are pending
+             * by setting status register
+             */
+            s->regs[reg] = s->pending[inpin_idx];
+            s->pending[inpin_idx] = 0;
+            trace_aspeed_intc_trigger_irq(name, inpin_idx, outpin_idx,
+                                          s->regs[reg]);
+            aspeed_intc_update(s, inpin_idx, outpin_idx, 1);
+        } else {
+            /* clear irq */
+            trace_aspeed_intc_clear_irq(name, inpin_idx, outpin_idx, 0);
+            aspeed_intc_update(s, inpin_idx, outpin_idx, 0);
+        }
+    }
+}
+
+static void aspeed_intc_status_handler_multi_outpins(AspeedINTCState *s,
+                                                hwaddr offset, uint64_t data)
+{
+    const char *name = object_get_typename(OBJECT(s));
+    AspeedINTCClass *aic = ASPEED_INTC_GET_CLASS(s);
+    const AspeedINTCIRQ *intc_irq;
+    uint32_t reg = offset >> 2;
+    int num_outpins;
+    int outpin_idx;
+    int inpin_idx;
+    int i;
+
+    if (!data) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid data 0\n", __func__);
+        return;
+    }
+
+    intc_irq = aspeed_intc_get_irq(aic, reg);
+    num_outpins = intc_irq->num_outpins;
+    outpin_idx = intc_irq->outpin_idx;
+    inpin_idx = intc_irq->inpin_idx;
+    assert(inpin_idx < aic->num_inpins);
+
+    /* clear status */
+    s->regs[reg] &= ~data;
+
+    /*
+     * The status registers are used for notify sources ISR are executed.
+     * If one source ISR is executed, it will clear one bit.
+     * If it clear all bits, it means to initialize this register status
+     * rather than sources ISR are executed.
+     */
+    if (data == 0xffffffff) {
+        return;
+    }
+
+    for (i = 0; i < num_outpins; i++) {
+        /* All source ISR executions are done from a specific bit */
+        if (data & BIT(i)) {
+            trace_aspeed_intc_all_isr_done_bit(name, inpin_idx, i);
+            if (s->pending[inpin_idx] & BIT(i)) {
+                /*
+                 * Handle pending source interrupt.
+                 * Notify firmware which source interrupt is pending
+                 * by setting the status bit.
+                 */
+                s->regs[reg] |= BIT(i);
+                s->pending[inpin_idx] &= ~BIT(i);
+                trace_aspeed_intc_trigger_irq(name, inpin_idx, outpin_idx + i,
+                                              s->regs[reg]);
+                aspeed_intc_update(s, inpin_idx, outpin_idx + i, 1);
+            } else {
+                /* clear irq for the specific bit */
+                trace_aspeed_intc_clear_irq(name, inpin_idx, outpin_idx + i, 0);
+                aspeed_intc_update(s, inpin_idx, outpin_idx + i, 0);
+            }
+        }
     }
 }
 
 static uint64_t aspeed_intc_read(void *opaque, hwaddr offset, unsigned int size)
 {
     AspeedINTCState *s = ASPEED_INTC(opaque);
-    uint32_t addr = offset >> 2;
+    const char *name = object_get_typename(OBJECT(s));
+    uint32_t reg = offset >> 2;
     uint32_t value = 0;
 
-    if (addr >= ASPEED_INTC_NR_REGS) {
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: Out-of-bounds read at offset 0x%" HWADDR_PRIx "\n",
-                      __func__, offset);
-        return 0;
-    }
-
-    value = s->regs[addr];
-    trace_aspeed_intc_read(offset, size, value);
+    value = s->regs[reg];
+    trace_aspeed_intc_read(name, offset, size, value);
 
     return value;
 }
@@ -137,22 +412,12 @@ static void aspeed_intc_write(void *opaque, hwaddr offset, uint64_t data,
                                         unsigned size)
 {
     AspeedINTCState *s = ASPEED_INTC(opaque);
-    AspeedINTCClass *aic = ASPEED_INTC_GET_CLASS(s);
-    uint32_t addr = offset >> 2;
-    uint32_t old_enable;
-    uint32_t change;
-    uint32_t irq;
-
-    if (addr >= ASPEED_INTC_NR_REGS) {
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: Out-of-bounds write at offset 0x%" HWADDR_PRIx "\n",
-                      __func__, offset);
-        return;
-    }
+    const char *name = object_get_typename(OBJECT(s));
+    uint32_t reg = offset >> 2;
 
-    trace_aspeed_intc_write(offset, size, data);
+    trace_aspeed_intc_write(name, offset, size, data);
 
-    switch (addr) {
+    switch (reg) {
     case R_GICINT128_EN:
     case R_GICINT129_EN:
     case R_GICINT130_EN:
@@ -162,45 +427,8 @@ static void aspeed_intc_write(void *opaque, hwaddr offset, uint64_t data,
     case R_GICINT134_EN:
     case R_GICINT135_EN:
     case R_GICINT136_EN:
-        irq = (offset & 0x0f00) >> 8;
-
-        if (irq >= aic->num_ints) {
-            qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid interrupt number: %d\n",
-                          __func__, irq);
-            return;
-        }
-
-        /*
-         * These registers are used for enable sources interrupt and
-         * mask and unmask source interrupt while executing source ISR.
-         */
-
-        /* disable all source interrupt */
-        if (!data && !s->enable[irq]) {
-            s->regs[addr] = data;
-            return;
-        }
-
-        old_enable = s->enable[irq];
-        s->enable[irq] |= data;
-
-        /* enable new source interrupt */
-        if (old_enable != s->enable[irq]) {
-            trace_aspeed_intc_enable(s->enable[irq]);
-            s->regs[addr] = data;
-            return;
-        }
-
-        /* mask and unmask source interrupt */
-        change = s->regs[addr] ^ data;
-        if (change & data) {
-            s->mask[irq] &= ~change;
-            trace_aspeed_intc_unmask(change, s->mask[irq]);
-        } else {
-            s->mask[irq] |= change;
-            trace_aspeed_intc_mask(change, s->mask[irq]);
-        }
-        s->regs[addr] = data;
+    case R_GICINT192_201_EN:
+        aspeed_intc_enable_handler(s, offset, data);
         break;
     case R_GICINT128_STATUS:
     case R_GICINT129_STATUS:
@@ -211,55 +439,68 @@ static void aspeed_intc_write(void *opaque, hwaddr offset, uint64_t data,
     case R_GICINT134_STATUS:
     case R_GICINT135_STATUS:
     case R_GICINT136_STATUS:
-        irq = (offset & 0x0f00) >> 8;
+        aspeed_intc_status_handler(s, offset, data);
+        break;
+    case R_GICINT192_201_STATUS:
+        aspeed_intc_status_handler_multi_outpins(s, offset, data);
+        break;
+    default:
+        s->regs[reg] = data;
+        break;
+    }
 
-        if (irq >= aic->num_ints) {
-            qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid interrupt number: %d\n",
-                          __func__, irq);
-            return;
-        }
+    return;
+}
+
+static uint64_t aspeed_intcio_read(void *opaque, hwaddr offset,
+                                   unsigned int size)
+{
+    AspeedINTCState *s = ASPEED_INTC(opaque);
+    const char *name = object_get_typename(OBJECT(s));
+    uint32_t reg = offset >> 2;
+    uint32_t value = 0;
 
-        /* clear status */
-        s->regs[addr] &= ~data;
+    value = s->regs[reg];
+    trace_aspeed_intc_read(name, offset, size, value);
 
-        /*
-         * These status registers are used for notify sources ISR are executed.
-         * If one source ISR is executed, it will clear one bit.
-         * If it clear all bits, it means to initialize this register status
-         * rather than sources ISR are executed.
-         */
-        if (data == 0xffffffff) {
-            return;
-        }
+    return value;
+}
 
-        /* All source ISR execution are done */
-        if (!s->regs[addr]) {
-            trace_aspeed_intc_all_isr_done(irq);
-            if (s->pending[irq]) {
-                /*
-                 * handle pending source interrupt
-                 * notify firmware which source interrupt are pending
-                 * by setting status register
-                 */
-                s->regs[addr] = s->pending[irq];
-                s->pending[irq] = 0;
-                trace_aspeed_intc_trigger_irq(irq, s->regs[addr]);
-                aspeed_intc_update(s, irq, 1);
-            } else {
-                /* clear irq */
-                trace_aspeed_intc_clear_irq(irq, 0);
-                aspeed_intc_update(s, irq, 0);
-            }
-        }
+static void aspeed_intcio_write(void *opaque, hwaddr offset, uint64_t data,
+                                unsigned size)
+{
+    AspeedINTCState *s = ASPEED_INTC(opaque);
+    const char *name = object_get_typename(OBJECT(s));
+    uint32_t reg = offset >> 2;
+
+    trace_aspeed_intc_write(name, offset, size, data);
+
+    switch (reg) {
+    case R_GICINT192_EN:
+    case R_GICINT193_EN:
+    case R_GICINT194_EN:
+    case R_GICINT195_EN:
+    case R_GICINT196_EN:
+    case R_GICINT197_EN:
+        aspeed_intc_enable_handler(s, offset, data);
+        break;
+    case R_GICINT192_STATUS:
+    case R_GICINT193_STATUS:
+    case R_GICINT194_STATUS:
+    case R_GICINT195_STATUS:
+    case R_GICINT196_STATUS:
+    case R_GICINT197_STATUS:
+        aspeed_intc_status_handler(s, offset, data);
         break;
     default:
-        s->regs[addr] = data;
+        s->regs[reg] = data;
         break;
     }
 
     return;
 }
 
+
 static const MemoryRegionOps aspeed_intc_ops = {
     .read = aspeed_intc_read,
     .write = aspeed_intc_write,
@@ -270,14 +511,24 @@ static const MemoryRegionOps aspeed_intc_ops = {
     }
 };
 
+static const MemoryRegionOps aspeed_intcio_ops = {
+    .read = aspeed_intcio_read,
+    .write = aspeed_intcio_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    }
+};
+
 static void aspeed_intc_instance_init(Object *obj)
 {
     AspeedINTCState *s = ASPEED_INTC(obj);
     AspeedINTCClass *aic = ASPEED_INTC_GET_CLASS(s);
     int i;
 
-    assert(aic->num_ints <= ASPEED_INTC_NR_INTS);
-    for (i = 0; i < aic->num_ints; i++) {
+    assert(aic->num_inpins <= ASPEED_INTC_MAX_INPINS);
+    for (i = 0; i < aic->num_inpins; i++) {
         object_initialize_child(obj, "intc-orgates[*]", &s->orgates[i],
                                 TYPE_OR_IRQ);
         object_property_set_int(OBJECT(&s->orgates[i]), "num-lines",
@@ -288,8 +539,9 @@ static void aspeed_intc_instance_init(Object *obj)
 static void aspeed_intc_reset(DeviceState *dev)
 {
     AspeedINTCState *s = ASPEED_INTC(dev);
+    AspeedINTCClass *aic = ASPEED_INTC_GET_CLASS(s);
 
-    memset(s->regs, 0, sizeof(s->regs));
+    memset(s->regs, 0, aic->nr_regs << 2);
     memset(s->enable, 0, sizeof(s->enable));
     memset(s->mask, 0, sizeof(s->mask));
     memset(s->pending, 0, sizeof(s->pending));
@@ -302,28 +554,51 @@ static void aspeed_intc_realize(DeviceState *dev, Error **errp)
     AspeedINTCClass *aic = ASPEED_INTC_GET_CLASS(s);
     int i;
 
-    memory_region_init_io(&s->iomem, OBJECT(s), &aspeed_intc_ops, s,
-                          TYPE_ASPEED_INTC ".regs", ASPEED_INTC_NR_REGS << 2);
+    memory_region_init(&s->iomem_container, OBJECT(s),
+            TYPE_ASPEED_INTC ".container", aic->mem_size);
+
+    sysbus_init_mmio(sbd, &s->iomem_container);
 
-    sysbus_init_mmio(sbd, &s->iomem);
-    qdev_init_gpio_in(dev, aspeed_intc_set_irq, aic->num_ints);
+    s->regs = g_new(uint32_t, aic->nr_regs);
+    memory_region_init_io(&s->iomem, OBJECT(s), aic->reg_ops, s,
+                          TYPE_ASPEED_INTC ".regs", aic->nr_regs << 2);
 
-    for (i = 0; i < aic->num_ints; i++) {
+    memory_region_add_subregion(&s->iomem_container, aic->reg_offset,
+                                &s->iomem);
+
+    qdev_init_gpio_in(dev, aspeed_intc_set_irq, aic->num_inpins);
+
+    for (i = 0; i < aic->num_inpins; i++) {
         if (!qdev_realize(DEVICE(&s->orgates[i]), NULL, errp)) {
             return;
         }
+    }
+
+    for (i = 0; i < aic->num_outpins; i++) {
         sysbus_init_irq(sbd, &s->output_pins[i]);
     }
 }
 
+static void aspeed_intc_unrealize(DeviceState *dev)
+{
+    AspeedINTCState *s = ASPEED_INTC(dev);
+
+    g_free(s->regs);
+    s->regs = NULL;
+}
+
 static void aspeed_intc_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedINTCClass *aic = ASPEED_INTC_CLASS(klass);
 
     dc->desc = "ASPEED INTC Controller";
     dc->realize = aspeed_intc_realize;
+    dc->unrealize = aspeed_intc_unrealize;
     device_class_set_legacy_reset(dc, aspeed_intc_reset);
     dc->vmsd = NULL;
+
+    aic->reg_ops = &aspeed_intc_ops;
 }
 
 static const TypeInfo aspeed_intc_info = {
@@ -336,6 +611,19 @@ static const TypeInfo aspeed_intc_info = {
     .abstract = true,
 };
 
+static AspeedINTCIRQ aspeed_2700_intc_irqs[ASPEED_INTC_MAX_INPINS] = {
+    {0, 0, 10, R_GICINT192_201_EN, R_GICINT192_201_STATUS},
+    {1, 10, 1, R_GICINT128_EN, R_GICINT128_STATUS},
+    {2, 11, 1, R_GICINT129_EN, R_GICINT129_STATUS},
+    {3, 12, 1, R_GICINT130_EN, R_GICINT130_STATUS},
+    {4, 13, 1, R_GICINT131_EN, R_GICINT131_STATUS},
+    {5, 14, 1, R_GICINT132_EN, R_GICINT132_STATUS},
+    {6, 15, 1, R_GICINT133_EN, R_GICINT133_STATUS},
+    {7, 16, 1, R_GICINT134_EN, R_GICINT134_STATUS},
+    {8, 17, 1, R_GICINT135_EN, R_GICINT135_STATUS},
+    {9, 18, 1, R_GICINT136_EN, R_GICINT136_STATUS},
+};
+
 static void aspeed_2700_intc_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -343,7 +631,13 @@ static void aspeed_2700_intc_class_init(ObjectClass *klass, void *data)
 
     dc->desc = "ASPEED 2700 INTC Controller";
     aic->num_lines = 32;
-    aic->num_ints = 9;
+    aic->num_inpins = 10;
+    aic->num_outpins = 19;
+    aic->mem_size = 0x4000;
+    aic->nr_regs = 0xB08 >> 2;
+    aic->reg_offset = 0x1000;
+    aic->irq_table = aspeed_2700_intc_irqs;
+    aic->irq_table_count = ARRAY_SIZE(aspeed_2700_intc_irqs);
 }
 
 static const TypeInfo aspeed_2700_intc_info = {
@@ -352,10 +646,43 @@ static const TypeInfo aspeed_2700_intc_info = {
     .class_init = aspeed_2700_intc_class_init,
 };
 
+static AspeedINTCIRQ aspeed_2700_intcio_irqs[ASPEED_INTC_MAX_INPINS] = {
+    {0, 0, 1, R_GICINT192_EN, R_GICINT192_STATUS},
+    {1, 1, 1, R_GICINT193_EN, R_GICINT193_STATUS},
+    {2, 2, 1, R_GICINT194_EN, R_GICINT194_STATUS},
+    {3, 3, 1, R_GICINT195_EN, R_GICINT195_STATUS},
+    {4, 4, 1, R_GICINT196_EN, R_GICINT196_STATUS},
+    {5, 5, 1, R_GICINT197_EN, R_GICINT197_STATUS},
+};
+
+static void aspeed_2700_intcio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedINTCClass *aic = ASPEED_INTC_CLASS(klass);
+
+    dc->desc = "ASPEED 2700 INTC IO Controller";
+    aic->num_lines = 32;
+    aic->num_inpins = 6;
+    aic->num_outpins = 6;
+    aic->mem_size = 0x400;
+    aic->nr_regs = 0x58 >> 2;
+    aic->reg_offset = 0x100;
+    aic->reg_ops = &aspeed_intcio_ops;
+    aic->irq_table = aspeed_2700_intcio_irqs;
+    aic->irq_table_count = ARRAY_SIZE(aspeed_2700_intcio_irqs);
+}
+
+static const TypeInfo aspeed_2700_intcio_info = {
+    .name = TYPE_ASPEED_2700_INTCIO,
+    .parent = TYPE_ASPEED_INTC,
+    .class_init = aspeed_2700_intcio_class_init,
+};
+
 static void aspeed_intc_register_types(void)
 {
     type_register_static(&aspeed_intc_info);
     type_register_static(&aspeed_2700_intc_info);
+    type_register_static(&aspeed_2700_intcio_info);
 }
 
 type_init(aspeed_intc_register_types);
diff --git a/hw/intc/l2vic.c b/hw/intc/l2vic.c
new file mode 100644
index 0000000000000..1c450179dd6d6
--- /dev/null
+++ b/hw/intc/l2vic.c
@@ -0,0 +1,417 @@
+/*
+ * QEMU L2VIC Interrupt Controller
+ *
+ * Arm PrimeCell PL190 Vector Interrupt Controller was used as a reference.
+ * Copyright(c) 2020-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "migration/vmstate.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "hw/intc/l2vic.h"
+#include "trace.h"
+
+#define L2VICA(s, n) (s[(n) >> 2])
+
+#define TYPE_L2VIC "l2vic"
+OBJECT_DECLARE_SIMPLE_TYPE(L2VICState, L2VIC)
+
+#define SLICE_MAX (L2VIC_INTERRUPT_MAX / 32)
+
+typedef struct L2VICState {
+    SysBusDevice parent_obj;
+
+    QemuMutex active;
+    MemoryRegion iomem;
+    MemoryRegion fast_iomem;
+    uint32_t level;
+    /*
+     * offset 0:vid group 0 etc, 10 bits in each group
+     * are used:
+     */
+    uint32_t vid_group[4];
+    uint32_t vid0;
+    /* Clear Status of Active Edge interrupt, not used: */
+    uint32_t int_clear[SLICE_MAX] QEMU_ALIGNED(16);
+    /* Enable interrupt source */
+    uint32_t int_enable[SLICE_MAX] QEMU_ALIGNED(16);
+    /* Clear (set to 0) corresponding bit in int_enable */
+    uint32_t int_enable_clear;
+    /* Set (to 1) corresponding bit in int_enable */
+    uint32_t int_enable_set;
+    /* Present for debugging, not used */
+    uint32_t int_pending[SLICE_MAX] QEMU_ALIGNED(16);
+    /* Generate an interrupt */
+    uint32_t int_soft;
+    /* Which enabled interrupt is active */
+    uint32_t int_status[SLICE_MAX] QEMU_ALIGNED(16);
+    /* Edge or Level interrupt */
+    uint32_t int_type[SLICE_MAX] QEMU_ALIGNED(16);
+    /* L2 interrupt group 0-3 0x600-0x7FF */
+    uint32_t int_group_n0[SLICE_MAX] QEMU_ALIGNED(16);
+    uint32_t int_group_n1[SLICE_MAX] QEMU_ALIGNED(16);
+    uint32_t int_group_n2[SLICE_MAX] QEMU_ALIGNED(16);
+    uint32_t int_group_n3[SLICE_MAX] QEMU_ALIGNED(16);
+    qemu_irq irq[8];
+} L2VICState;
+
+
+/*
+ * Find out if this irq is associated with a group other than
+ * the default group
+ */
+static uint32_t *get_int_group(L2VICState *s, int irq)
+{
+    int n = irq & 0x1f;
+    if (n < 8) {
+        return s->int_group_n0;
+    }
+    if (n < 16) {
+        return s->int_group_n1;
+    }
+    if (n < 24) {
+        return s->int_group_n2;
+    }
+    return s->int_group_n3;
+}
+
+static int find_slice(int irq)
+{
+    return irq / 32;
+}
+
+static int get_vid(L2VICState *s, int irq)
+{
+    uint32_t *group = get_int_group(s, irq);
+    uint32_t slice = group[find_slice(irq)];
+    /* Mask with 0x7 to remove the GRP:EN bit */
+    uint32_t val = slice >> ((irq & 0x7) * 4);
+    if (val & 0x8) {
+        return val & 0x7;
+    } else {
+        return 0;
+    }
+}
+
+static inline bool vid_active(L2VICState *s)
+
+{
+    /* scan all 1024 bits in int_status arrary */
+    const int size = sizeof(s->int_status) * CHAR_BIT;
+    const int active_irq = find_first_bit((unsigned long *)s->int_status, size);
+    return ((active_irq != size)) ? true : false;
+}
+
+static bool l2vic_update(L2VICState *s, int irq)
+{
+    if (vid_active(s)) {
+        return true;
+    }
+
+    bool pending = test_bit(irq, (unsigned long *)s->int_pending);
+    bool enable = test_bit(irq, (unsigned long *)s->int_enable);
+    if (pending && enable) {
+        int vid = get_vid(s, irq);
+        set_bit(irq, (unsigned long *)s->int_status);
+        clear_bit(irq, (unsigned long *)s->int_pending);
+        clear_bit(irq, (unsigned long *)s->int_enable);
+        /* ensure the irq line goes low after going high */
+        s->vid0 = irq;
+        s->vid_group[get_vid(s, irq)] = irq;
+
+        /* already low: now call pulse */
+        /*     pulse: calls qemu_upper() and then qemu_lower()) */
+        qemu_irq_pulse(s->irq[vid + 2]);
+        trace_l2vic_delivered(irq, vid);
+        return true;
+    }
+    return false;
+}
+
+static void l2vic_update_all(L2VICState *s)
+{
+    for (int i = 0; i < L2VIC_INTERRUPT_MAX; i++) {
+        if (l2vic_update(s, i) == true) {
+            /* once vid is active, no-one else can set it until ciad */
+            return;
+        }
+    }
+}
+
+static void l2vic_set_irq(void *opaque, int irq, int level)
+{
+    L2VICState *s = (L2VICState *)opaque;
+    if (level) {
+        qemu_mutex_lock(&s->active);
+        set_bit(irq, (unsigned long *)s->int_pending);
+        qemu_mutex_unlock(&s->active);
+    }
+    l2vic_update(s, irq);
+}
+
+static void l2vic_write(void *opaque, hwaddr offset, uint64_t val,
+                        unsigned size)
+{
+    L2VICState *s = (L2VICState *)opaque;
+    qemu_mutex_lock(&s->active);
+    trace_l2vic_reg_write((unsigned)offset, (uint32_t)val);
+
+    if (offset == L2VIC_VID_0) {
+        if ((int)val != L2VIC_CIAD_INSTRUCTION) {
+            s->vid0 = val;
+        } else {
+            /* ciad issued: clear int_status */
+            clear_bit(s->vid0, (unsigned long *)s->int_status);
+        }
+    } else if (offset >= L2VIC_INT_ENABLEn &&
+               offset < (L2VIC_INT_ENABLE_CLEARn)) {
+        L2VICA(s->int_enable, offset - L2VIC_INT_ENABLEn) = val;
+    } else if (offset >= L2VIC_INT_ENABLE_CLEARn &&
+               offset < L2VIC_INT_ENABLE_SETn) {
+        L2VICA(s->int_enable, offset - L2VIC_INT_ENABLE_CLEARn) &= ~val;
+    } else if (offset >= L2VIC_INT_ENABLE_SETn && offset < L2VIC_INT_TYPEn) {
+        L2VICA(s->int_enable, offset - L2VIC_INT_ENABLE_SETn) |= val;
+    } else if (offset >= L2VIC_INT_TYPEn && offset < L2VIC_INT_TYPEn + 0x80) {
+        L2VICA(s->int_type, offset - L2VIC_INT_TYPEn) = val;
+    } else if (offset >= L2VIC_INT_STATUSn && offset < L2VIC_INT_CLEARn) {
+        L2VICA(s->int_status, offset - L2VIC_INT_STATUSn) = val;
+    } else if (offset >= L2VIC_INT_CLEARn && offset < L2VIC_SOFT_INTn) {
+        L2VICA(s->int_clear, offset - L2VIC_INT_CLEARn) = val;
+    } else if (offset >= L2VIC_INT_PENDINGn &&
+               offset < L2VIC_INT_PENDINGn + 0x80) {
+        L2VICA(s->int_pending, offset - L2VIC_INT_PENDINGn) = val;
+    } else if (offset >= L2VIC_SOFT_INTn && offset < L2VIC_INT_PENDINGn) {
+        L2VICA(s->int_enable, offset - L2VIC_SOFT_INTn) |= val;
+        /*
+         *  Need to reverse engineer the actual irq number.
+         */
+        int irq = find_first_bit((unsigned long *)&val,
+                                 sizeof(s->int_enable[0]) * CHAR_BIT);
+        hwaddr byteoffset = offset - L2VIC_SOFT_INTn;
+        g_assert(irq != sizeof(s->int_enable[0]) * CHAR_BIT);
+        irq += byteoffset * 8;
+
+        /* The soft-int interface only works with edge-triggered interrupts */
+        if (test_bit(irq, (unsigned long *)s->int_type)) {
+            qemu_mutex_unlock(&s->active);
+            l2vic_set_irq(opaque, irq, 1);
+            qemu_mutex_lock(&s->active);
+        }
+    } else if (offset >= L2VIC_INT_GRPn_0 && offset < L2VIC_INT_GRPn_1) {
+        L2VICA(s->int_group_n0, offset - L2VIC_INT_GRPn_0) = val;
+    } else if (offset >= L2VIC_INT_GRPn_1 && offset < L2VIC_INT_GRPn_2) {
+        L2VICA(s->int_group_n1, offset - L2VIC_INT_GRPn_1) = val;
+    } else if (offset >= L2VIC_INT_GRPn_2 && offset < L2VIC_INT_GRPn_3) {
+        L2VICA(s->int_group_n2, offset - L2VIC_INT_GRPn_2) = val;
+    } else if (offset >= L2VIC_INT_GRPn_3 && offset < L2VIC_INT_GRPn_3 + 0x80) {
+        L2VICA(s->int_group_n3, offset - L2VIC_INT_GRPn_3) = val;
+    } else {
+        qemu_log_mask(LOG_UNIMP, "%s: offset %x unimplemented\n", __func__,
+                      (int)offset);
+    }
+    l2vic_update_all(s);
+    qemu_mutex_unlock(&s->active);
+    return;
+}
+
+static uint64_t l2vic_read(void *opaque, hwaddr offset, unsigned size)
+{
+    uint64_t value;
+    L2VICState *s = (L2VICState *)opaque;
+    qemu_mutex_lock(&s->active);
+
+    if (offset == L2VIC_VID_GRP_0) {
+        value = s->vid_group[0];
+    } else if (offset == L2VIC_VID_GRP_1) {
+        value = s->vid_group[1];
+    } else if (offset == L2VIC_VID_GRP_2) {
+        value = s->vid_group[2];
+    } else if (offset == L2VIC_VID_GRP_3) {
+        value = s->vid_group[3];
+    } else if (offset == L2VIC_VID_0) {
+        value = s->vid0;
+    } else if (offset >= L2VIC_INT_ENABLEn &&
+               offset < L2VIC_INT_ENABLE_CLEARn) {
+        value = L2VICA(s->int_enable, offset - L2VIC_INT_ENABLEn);
+    } else if (offset >= L2VIC_INT_ENABLE_CLEARn &&
+               offset < L2VIC_INT_ENABLE_SETn) {
+        value = 0;
+    } else if (offset >= L2VIC_INT_ENABLE_SETn && offset < L2VIC_INT_TYPEn) {
+        value = 0;
+    } else if (offset >= L2VIC_INT_TYPEn && offset < L2VIC_INT_TYPEn + 0x80) {
+        value = L2VICA(s->int_type, offset - L2VIC_INT_TYPEn);
+    } else if (offset >= L2VIC_INT_STATUSn && offset < L2VIC_INT_CLEARn) {
+        value = L2VICA(s->int_status, offset - L2VIC_INT_STATUSn);
+    } else if (offset >= L2VIC_INT_CLEARn && offset < L2VIC_SOFT_INTn) {
+        value = L2VICA(s->int_clear, offset - L2VIC_INT_CLEARn);
+    } else if (offset >= L2VIC_SOFT_INTn && offset < L2VIC_INT_PENDINGn) {
+        value = 0;
+    } else if (offset >= L2VIC_INT_PENDINGn &&
+               offset < L2VIC_INT_PENDINGn + 0x80) {
+        value = L2VICA(s->int_pending, offset - L2VIC_INT_PENDINGn);
+    } else if (offset >= L2VIC_INT_GRPn_0 && offset < L2VIC_INT_GRPn_1) {
+        value = L2VICA(s->int_group_n0, offset - L2VIC_INT_GRPn_0);
+    } else if (offset >= L2VIC_INT_GRPn_1 && offset < L2VIC_INT_GRPn_2) {
+        value = L2VICA(s->int_group_n1, offset - L2VIC_INT_GRPn_1);
+    } else if (offset >= L2VIC_INT_GRPn_2 && offset < L2VIC_INT_GRPn_3) {
+        value = L2VICA(s->int_group_n2, offset - L2VIC_INT_GRPn_2);
+    } else if (offset >= L2VIC_INT_GRPn_3 && offset < L2VIC_INT_GRPn_3 + 0x80) {
+        value = L2VICA(s->int_group_n3, offset - L2VIC_INT_GRPn_3);
+    } else {
+        value = 0;
+        qemu_log_mask(LOG_GUEST_ERROR, "L2VIC: %s: offset 0x%x\n", __func__,
+                      (int)offset);
+    }
+
+    trace_l2vic_reg_read((unsigned)offset, value);
+    qemu_mutex_unlock(&s->active);
+
+    return value;
+}
+
+static const MemoryRegionOps l2vic_ops = {
+    .read = l2vic_read,
+    .write = l2vic_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+    .valid.unaligned = false,
+};
+
+#define FASTL2VIC_ENABLE 0x0
+#define FASTL2VIC_DISABLE 0x1
+#define FASTL2VIC_INT 0x2
+
+static void fastl2vic_write(void *opaque, hwaddr offset, uint64_t val,
+                            unsigned size)
+{
+    if (offset == 0) {
+        uint32_t cmd = (val >> 16) & 0x3;
+        uint32_t irq = val & 0x3ff;
+        uint32_t slice = (irq / 32) * 4;
+        val = 1 << (irq % 32);
+
+        if (cmd == FASTL2VIC_ENABLE) {
+            l2vic_write(opaque, L2VIC_INT_ENABLE_SETn + slice, val, size);
+        } else if (cmd == FASTL2VIC_DISABLE) {
+            l2vic_write(opaque, L2VIC_INT_ENABLE_CLEARn + slice, val, size);
+        } else if (cmd == FASTL2VIC_INT) {
+            l2vic_write(opaque, L2VIC_SOFT_INTn + slice, val, size);
+        }
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid write cmd %" PRId32 "\n",
+            __func__, cmd);
+        return;
+    }
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid write offset 0x%08" HWADDR_PRIx
+            "\n", __func__, offset);
+}
+
+static const MemoryRegionOps fastl2vic_ops = {
+    .write = fastl2vic_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+    .valid.unaligned = false,
+};
+
+static void l2vic_reset_hold(Object *obj, G_GNUC_UNUSED ResetType res_type)
+{
+    L2VICState *s = L2VIC(obj);
+    memset(s->int_clear, 0, sizeof(s->int_clear));
+    memset(s->int_enable, 0, sizeof(s->int_enable));
+    memset(s->int_pending, 0, sizeof(s->int_pending));
+    memset(s->int_status, 0, sizeof(s->int_status));
+    memset(s->int_type, 0, sizeof(s->int_type));
+    memset(s->int_group_n0, 0, sizeof(s->int_group_n0));
+    memset(s->int_group_n1, 0, sizeof(s->int_group_n1));
+    memset(s->int_group_n2, 0, sizeof(s->int_group_n2));
+    memset(s->int_group_n3, 0, sizeof(s->int_group_n3));
+    s->int_soft = 0;
+    s->vid0 = 0;
+
+    l2vic_update_all(s);
+}
+
+
+static void reset_irq_handler(void *opaque, int irq, int level)
+{
+    L2VICState *s = (L2VICState *)opaque;
+    Object *obj = OBJECT(opaque);
+    if (level) {
+        l2vic_reset_hold(obj, RESET_TYPE_COLD);
+    }
+    l2vic_update_all(s);
+}
+
+static void l2vic_init(Object *obj)
+{
+    DeviceState *dev = DEVICE(obj);
+    L2VICState *s = L2VIC(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    int i;
+
+    memory_region_init_io(&s->iomem, obj, &l2vic_ops, s, "l2vic", 0x1000);
+    sysbus_init_mmio(sbd, &s->iomem);
+    memory_region_init_io(&s->fast_iomem, obj, &fastl2vic_ops, s, "fast",
+                          0x10000);
+    sysbus_init_mmio(sbd, &s->fast_iomem);
+
+    qdev_init_gpio_in(dev, l2vic_set_irq, L2VIC_INTERRUPT_MAX);
+    qdev_init_gpio_in_named(dev, reset_irq_handler, "reset", 1);
+    for (i = 0; i < 8; i++) {
+        sysbus_init_irq(sbd, &s->irq[i]);
+    }
+    qemu_mutex_init(&s->active); /* TODO: Remove this is an experiment */
+}
+
+static const VMStateDescription vmstate_l2vic = {
+    .name = "l2vic",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields =
+        (VMStateField[]){
+            VMSTATE_UINT32(level, L2VICState),
+            VMSTATE_UINT32_ARRAY(vid_group, L2VICState, 4),
+            VMSTATE_UINT32(vid0, L2VICState),
+            VMSTATE_UINT32_ARRAY(int_enable, L2VICState, SLICE_MAX),
+            VMSTATE_UINT32(int_enable_clear, L2VICState),
+            VMSTATE_UINT32(int_enable_set, L2VICState),
+            VMSTATE_UINT32_ARRAY(int_type, L2VICState, SLICE_MAX),
+            VMSTATE_UINT32_ARRAY(int_status, L2VICState, SLICE_MAX),
+            VMSTATE_UINT32_ARRAY(int_clear, L2VICState, SLICE_MAX),
+            VMSTATE_UINT32(int_soft, L2VICState),
+            VMSTATE_UINT32_ARRAY(int_pending, L2VICState, SLICE_MAX),
+            VMSTATE_UINT32_ARRAY(int_group_n0, L2VICState, SLICE_MAX),
+            VMSTATE_UINT32_ARRAY(int_group_n1, L2VICState, SLICE_MAX),
+            VMSTATE_UINT32_ARRAY(int_group_n2, L2VICState, SLICE_MAX),
+            VMSTATE_UINT32_ARRAY(int_group_n3, L2VICState, SLICE_MAX),
+            VMSTATE_END_OF_LIST() }
+};
+
+static void l2vic_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+
+    dc->vmsd = &vmstate_l2vic;
+    rc->phases.hold = l2vic_reset_hold;
+}
+
+static const TypeInfo l2vic_info = {
+    .name = TYPE_L2VIC,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(L2VICState),
+    .instance_init = l2vic_init,
+    .class_init = l2vic_class_init,
+};
+
+static void l2vic_register_types(void)
+{
+    type_register_static(&l2vic_info);
+}
+
+type_init(l2vic_register_types)
diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c
index f3055ec4d26d1..a51a215e6ec23 100644
--- a/hw/intc/loongarch_extioi.c
+++ b/hw/intc/loongarch_extioi.c
@@ -343,7 +343,7 @@ static void loongarch_extioi_realize(DeviceState *dev, Error **errp)
     LoongArchExtIOIClass *lec = LOONGARCH_EXTIOI_GET_CLASS(dev);
     SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
     Error *local_err = NULL;
-    int i, pin;
+    int i;
 
     lec->parent_realize(dev, &local_err);
     if (local_err) {
@@ -368,12 +368,6 @@ static void loongarch_extioi_realize(DeviceState *dev, Error **errp)
     } else {
         s->status |= BIT(EXTIOI_ENABLE);
     }
-
-    for (i = 0; i < s->num_cpu; i++) {
-        for (pin = 0; pin < LS3A_INTC_IP; pin++) {
-            qdev_init_gpio_out(dev, &s->cpu[i].parent_irq[pin], 1);
-        }
-    }
 }
 
 static void loongarch_extioi_unrealize(DeviceState *dev)
diff --git a/hw/intc/loongarch_extioi_common.c b/hw/intc/loongarch_extioi_common.c
index fd56253d10b94..ff3974f2a10d0 100644
--- a/hw/intc/loongarch_extioi_common.c
+++ b/hw/intc/loongarch_extioi_common.c
@@ -4,11 +4,82 @@
  * Copyright (C) 2024 Loongson Technology Corporation Limited
  */
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
 #include "hw/qdev-properties.h"
 #include "hw/intc/loongarch_extioi_common.h"
 #include "migration/vmstate.h"
+#include "target/loongarch/cpu.h"
+
+static ExtIOICore *loongarch_extioi_get_cpu(LoongArchExtIOICommonState *s,
+                                            DeviceState *dev)
+{
+    CPUClass *k = CPU_GET_CLASS(dev);
+    uint64_t arch_id = k->get_arch_id(CPU(dev));
+    int i;
+
+    for (i = 0; i < s->num_cpu; i++) {
+        if (s->cpu[i].arch_id == arch_id) {
+            return &s->cpu[i];
+        }
+    }
+
+    return NULL;
+}
+
+static void loongarch_extioi_cpu_plug(HotplugHandler *hotplug_dev,
+                                      DeviceState *dev, Error **errp)
+{
+    LoongArchExtIOICommonState *s = LOONGARCH_EXTIOI_COMMON(hotplug_dev);
+    Object *obj = OBJECT(dev);
+    ExtIOICore *core;
+    int pin, index;
+
+    if (!object_dynamic_cast(obj, TYPE_LOONGARCH_CPU)) {
+        warn_report("LoongArch extioi: Invalid %s device type",
+                                       object_get_typename(obj));
+        return;
+    }
+
+    core = loongarch_extioi_get_cpu(s, dev);
+    if (!core) {
+        return;
+    }
+
+    core->cpu = CPU(dev);
+    index = core - s->cpu;
+
+    /*
+     * connect extioi irq to the cpu irq
+     * cpu_pin[LS3A_INTC_IP + 2 : 2] <= intc_pin[LS3A_INTC_IP : 0]
+     */
+    for (pin = 0; pin < LS3A_INTC_IP; pin++) {
+        qdev_connect_gpio_out(DEVICE(s), index * LS3A_INTC_IP + pin,
+                              qdev_get_gpio_in(dev, pin + 2));
+    }
+}
+
+static void loongarch_extioi_cpu_unplug(HotplugHandler *hotplug_dev,
+                                        DeviceState *dev, Error **errp)
+{
+    LoongArchExtIOICommonState *s = LOONGARCH_EXTIOI_COMMON(hotplug_dev);
+    Object *obj = OBJECT(dev);
+    ExtIOICore *core;
+
+    if (!object_dynamic_cast(obj, TYPE_LOONGARCH_CPU)) {
+        warn_report("LoongArch extioi: Invalid %s device type",
+                                       object_get_typename(obj));
+        return;
+    }
+
+    core = loongarch_extioi_get_cpu(s, dev);
+    if (!core) {
+        return;
+    }
+
+    core->cpu = NULL;
+}
 
 static void loongarch_extioi_common_realize(DeviceState *dev, Error **errp)
 {
@@ -16,7 +87,7 @@ static void loongarch_extioi_common_realize(DeviceState *dev, Error **errp)
     MachineState *machine = MACHINE(qdev_get_machine());
     MachineClass *mc = MACHINE_GET_CLASS(machine);
     const CPUArchIdList *id_list;
-    int i;
+    int i, pin;
 
     assert(mc->possible_cpu_arch_ids);
     id_list = mc->possible_cpu_arch_ids(machine);
@@ -30,6 +101,10 @@ static void loongarch_extioi_common_realize(DeviceState *dev, Error **errp)
     for (i = 0; i < s->num_cpu; i++) {
         s->cpu[i].arch_id = id_list->cpus[i].arch_id;
         s->cpu[i].cpu = CPU(id_list->cpus[i].cpu);
+
+        for (pin = 0; pin < LS3A_INTC_IP; pin++) {
+            qdev_init_gpio_out(dev, &s->cpu[i].parent_irq[pin], 1);
+        }
     }
 }
 
@@ -103,11 +178,14 @@ static void loongarch_extioi_common_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     LoongArchExtIOICommonClass *lecc = LOONGARCH_EXTIOI_COMMON_CLASS(klass);
+    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
 
     device_class_set_parent_realize(dc, loongarch_extioi_common_realize,
                                     &lecc->parent_realize);
     device_class_set_props(dc, extioi_properties);
     dc->vmsd = &vmstate_loongarch_extioi;
+    hc->plug = loongarch_extioi_cpu_plug;
+    hc->unplug = loongarch_extioi_cpu_unplug;
 }
 
 static const TypeInfo loongarch_extioi_common_types[] = {
@@ -117,6 +195,10 @@ static const TypeInfo loongarch_extioi_common_types[] = {
         .instance_size      = sizeof(LoongArchExtIOICommonState),
         .class_size         = sizeof(LoongArchExtIOICommonClass),
         .class_init         = loongarch_extioi_common_class_init,
+        .interfaces         = (InterfaceInfo[]) {
+            { TYPE_HOTPLUG_HANDLER },
+            { }
+        },
         .abstract           = true,
     }
 };
diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c
index 5376f1e084058..b10641dd03345 100644
--- a/hw/intc/loongarch_ipi.c
+++ b/hw/intc/loongarch_ipi.c
@@ -6,6 +6,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "hw/boards.h"
 #include "qapi/error.h"
 #include "hw/intc/loongarch_ipi.h"
@@ -48,6 +49,22 @@ static int loongarch_cpu_by_arch_id(LoongsonIPICommonState *lics,
     return MEMTX_ERROR;
 }
 
+static IPICore *loongarch_ipi_get_cpu(LoongsonIPICommonState *lics,
+                                      DeviceState *dev)
+{
+    CPUClass *k = CPU_GET_CLASS(dev);
+    uint64_t arch_id = k->get_arch_id(CPU(dev));
+    int i;
+
+    for (i = 0; i < lics->num_cpu; i++) {
+        if (lics->cpu[i].arch_id == arch_id) {
+            return &lics->cpu[i];
+        }
+    }
+
+    return NULL;
+}
+
 static void loongarch_ipi_realize(DeviceState *dev, Error **errp)
 {
     LoongsonIPICommonState *lics = LOONGSON_IPI_COMMON(dev);
@@ -76,9 +93,57 @@ static void loongarch_ipi_realize(DeviceState *dev, Error **errp)
     }
 }
 
+static void loongarch_ipi_cpu_plug(HotplugHandler *hotplug_dev,
+                                   DeviceState *dev, Error **errp)
+{
+    LoongsonIPICommonState *lics = LOONGSON_IPI_COMMON(hotplug_dev);
+    Object *obj = OBJECT(dev);
+    IPICore *core;
+    int index;
+
+    if (!object_dynamic_cast(obj, TYPE_LOONGARCH_CPU)) {
+        warn_report("LoongArch extioi: Invalid %s device type",
+                                       object_get_typename(obj));
+        return;
+    }
+
+    core = loongarch_ipi_get_cpu(lics, dev);
+    if (!core) {
+        return;
+    }
+
+    core->cpu = CPU(dev);
+    index = core - lics->cpu;
+
+    /* connect ipi irq to cpu irq */
+    qdev_connect_gpio_out(DEVICE(lics), index, qdev_get_gpio_in(dev, IRQ_IPI));
+}
+
+static void loongarch_ipi_cpu_unplug(HotplugHandler *hotplug_dev,
+                                     DeviceState *dev, Error **errp)
+{
+    LoongsonIPICommonState *lics = LOONGSON_IPI_COMMON(hotplug_dev);
+    Object *obj = OBJECT(dev);
+    IPICore *core;
+
+    if (!object_dynamic_cast(obj, TYPE_LOONGARCH_CPU)) {
+        warn_report("LoongArch extioi: Invalid %s device type",
+                                       object_get_typename(obj));
+        return;
+    }
+
+    core = loongarch_ipi_get_cpu(lics, dev);
+    if (!core) {
+        return;
+    }
+
+    core->cpu = NULL;
+}
+
 static void loongarch_ipi_class_init(ObjectClass *klass, void *data)
 {
     LoongsonIPICommonClass *licc = LOONGSON_IPI_COMMON_CLASS(klass);
+    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
     LoongarchIPIClass *lic = LOONGARCH_IPI_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
 
@@ -86,6 +151,8 @@ static void loongarch_ipi_class_init(ObjectClass *klass, void *data)
                                     &lic->parent_realize);
     licc->get_iocsr_as = get_iocsr_as;
     licc->cpu_by_arch_id = loongarch_cpu_by_arch_id;
+    hc->plug = loongarch_ipi_cpu_plug;
+    hc->unplug = loongarch_ipi_cpu_unplug;
 }
 
 static const TypeInfo loongarch_ipi_types[] = {
@@ -95,6 +162,10 @@ static const TypeInfo loongarch_ipi_types[] = {
         .instance_size      = sizeof(LoongarchIPIState),
         .class_size         = sizeof(LoongarchIPIClass),
         .class_init         = loongarch_ipi_class_init,
+        .interfaces         = (InterfaceInfo[]) {
+            { TYPE_HOTPLUG_HANDLER },
+            { }
+        },
     }
 };
 
diff --git a/hw/intc/meson.build b/hw/intc/meson.build
index 510fdfb68865a..35f4a7bad5efe 100644
--- a/hw/intc/meson.build
+++ b/hw/intc/meson.build
@@ -6,7 +6,7 @@ system_ss.add(when: 'CONFIG_ARM_GIC', if_true: files(
   'arm_gicv3_common.c',
   'arm_gicv3_its_common.c',
 ))
-system_ss.add(when: 'CONFIG_ARM_GICV3_TCG', if_true: files(
+system_ss.add(when: 'CONFIG_ARM_GICV3', if_true: files(
   'arm_gicv3.c',
   'arm_gicv3_dist.c',
   'arm_gicv3_its.c',
@@ -39,7 +39,7 @@ endif
 
 specific_ss.add(when: 'CONFIG_APIC', if_true: files('apic.c', 'apic_common.c'))
 specific_ss.add(when: 'CONFIG_ARM_GIC', if_true: files('arm_gicv3_cpuif_common.c'))
-specific_ss.add(when: 'CONFIG_ARM_GICV3_TCG', if_true: files('arm_gicv3_cpuif.c'))
+specific_ss.add(when: 'CONFIG_ARM_GICV3', if_true: files('arm_gicv3_cpuif.c'))
 specific_ss.add(when: 'CONFIG_ARM_GIC_KVM', if_true: files('arm_gic_kvm.c'))
 specific_ss.add(when: ['CONFIG_ARM_GIC_KVM', 'TARGET_AARCH64'], if_true: files('arm_gicv3_kvm.c', 'arm_gicv3_its_kvm.c'))
 specific_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m_nvic.c'))
@@ -67,6 +67,8 @@ specific_ss.add(when: 'CONFIG_PSERIES', if_true: files('xics_spapr.c', 'spapr_xi
 specific_ss.add(when: 'CONFIG_XIVE', if_true: files('xive.c'))
 specific_ss.add(when: ['CONFIG_KVM', 'CONFIG_XIVE'],
 		if_true: files('spapr_xive_kvm.c'))
+
+specific_ss.add(when: 'CONFIG_L2VIC', if_true: files('l2vic.c'))
 specific_ss.add(when: 'CONFIG_M68K_IRQC', if_true: files('m68k_irqc.c'))
 specific_ss.add(when: 'CONFIG_LOONGSON_IPI_COMMON', if_true: files('loongson_ipi_common.c'))
 specific_ss.add(when: 'CONFIG_LOONGSON_IPI', if_true: files('loongson_ipi.c'))
diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c
index b755ddf0ffab0..ccbe95a58ea01 100644
--- a/hw/intc/pnv_xive.c
+++ b/hw/intc/pnv_xive.c
@@ -1,10 +1,9 @@
 /*
  * QEMU PowerPC XIVE interrupt controller model
  *
- * Copyright (c) 2017-2019, IBM Corporation.
+ * Copyright (c) 2017-2024, IBM Corporation.
  *
- * This code is licensed under the GPL version 2 or later. See the
- * COPYING file in the top-level directory.
+ * SPDX-License-Identifier: GPL-2.0-or-later
  */
 
 #include "qemu/osdep.h"
@@ -473,7 +472,7 @@ static bool pnv_xive_is_cpu_enabled(PnvXive *xive, PowerPCCPU *cpu)
 
 static int pnv_xive_match_nvt(XivePresenter *xptr, uint8_t format,
                               uint8_t nvt_blk, uint32_t nvt_idx,
-                              bool cam_ignore, uint8_t priority,
+                              bool crowd, bool cam_ignore, uint8_t priority,
                               uint32_t logic_serv, XiveTCTXMatch *match)
 {
     PnvXive *xive = PNV_XIVE(xptr);
@@ -500,7 +499,8 @@ static int pnv_xive_match_nvt(XivePresenter *xptr, uint8_t format,
              * Check the thread context CAM lines and record matches.
              */
             ring = xive_presenter_tctx_match(xptr, tctx, format, nvt_blk,
-                                             nvt_idx, cam_ignore, logic_serv);
+                                             nvt_idx, cam_ignore,
+                                             logic_serv);
             /*
              * Save the context and follow on to catch duplicates, that we
              * don't support yet.
diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c
index 9ed759417e001..0b81dad6ba59e 100644
--- a/hw/intc/pnv_xive2.c
+++ b/hw/intc/pnv_xive2.c
@@ -1,10 +1,9 @@
 /*
  * QEMU PowerPC XIVE2 interrupt controller model  (POWER10)
  *
- * Copyright (c) 2019-2022, IBM Corporation.
+ * Copyright (c) 2019-2024, IBM Corporation.
  *
- * This code is licensed under the GPL version 2 or later. See the
- * COPYING file in the top-level directory.
+ * SPDX-License-Identifier: GPL-2.0-or-later
  */
 
 #include "qemu/osdep.h"
@@ -625,7 +624,7 @@ static bool pnv_xive2_is_cpu_enabled(PnvXive2 *xive, PowerPCCPU *cpu)
 
 static int pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format,
                                uint8_t nvt_blk, uint32_t nvt_idx,
-                               bool cam_ignore, uint8_t priority,
+                               bool crowd, bool cam_ignore, uint8_t priority,
                                uint32_t logic_serv, XiveTCTXMatch *match)
 {
     PnvXive2 *xive = PNV_XIVE2(xptr);
@@ -656,25 +655,38 @@ static int pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format,
                                                  logic_serv);
             } else {
                 ring = xive2_presenter_tctx_match(xptr, tctx, format, nvt_blk,
-                                                   nvt_idx, cam_ignore,
-                                                   logic_serv);
+                                                  nvt_idx, crowd, cam_ignore,
+                                                  logic_serv);
             }
 
-            /*
-             * Save the context and follow on to catch duplicates,
-             * that we don't support yet.
-             */
             if (ring != -1) {
-                if (match->tctx) {
+                /*
+                 * For VP-specific match, finding more than one is a
+                 * problem. For group notification, it's possible.
+                 */
+                if (!cam_ignore && match->tctx) {
                     qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a "
                                   "thread context NVT %x/%x\n",
                                   nvt_blk, nvt_idx);
-                    return false;
+                    /* Should set a FIR if we ever model it */
+                    return -1;
+                }
+                /*
+                 * For a group notification, we need to know if the
+                 * match is precluded first by checking the current
+                 * thread priority. If the interrupt can be delivered,
+                 * we always notify the first match (for now).
+                 */
+                if (cam_ignore &&
+                    xive2_tm_irq_precluded(tctx, ring, priority)) {
+                        match->precluded = true;
+                } else {
+                    if (!match->tctx) {
+                        match->ring = ring;
+                        match->tctx = tctx;
+                    }
+                    count++;
                 }
-
-                match->ring = ring;
-                match->tctx = tctx;
-                count++;
             }
         }
     }
@@ -693,6 +705,47 @@ static uint32_t pnv_xive2_presenter_get_config(XivePresenter *xptr)
     return cfg;
 }
 
+static int pnv_xive2_broadcast(XivePresenter *xptr,
+                               uint8_t nvt_blk, uint32_t nvt_idx,
+                               bool crowd, bool ignore, uint8_t priority)
+{
+    PnvXive2 *xive = PNV_XIVE2(xptr);
+    PnvChip *chip = xive->chip;
+    int i, j;
+    bool gen1_tima_os =
+        xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS;
+
+    for (i = 0; i < chip->nr_cores; i++) {
+        PnvCore *pc = chip->cores[i];
+        CPUCore *cc = CPU_CORE(pc);
+
+        for (j = 0; j < cc->nr_threads; j++) {
+            PowerPCCPU *cpu = pc->threads[j];
+            XiveTCTX *tctx;
+            int ring;
+
+            if (!pnv_xive2_is_cpu_enabled(xive, cpu)) {
+                continue;
+            }
+
+            tctx = XIVE_TCTX(pnv_cpu_state(cpu)->intc);
+
+            if (gen1_tima_os) {
+                ring = xive_presenter_tctx_match(xptr, tctx, 0, nvt_blk,
+                                                 nvt_idx, ignore, 0);
+            } else {
+                ring = xive2_presenter_tctx_match(xptr, tctx, 0, nvt_blk,
+                                                  nvt_idx, crowd, ignore, 0);
+            }
+
+            if (ring != -1) {
+                xive2_tm_set_lsmfb(tctx, ring, priority);
+            }
+        }
+    }
+    return 0;
+}
+
 static uint8_t pnv_xive2_get_block_id(Xive2Router *xrtr)
 {
     return pnv_xive2_block_id(PNV_XIVE2(xrtr));
@@ -2149,21 +2202,40 @@ static const MemoryRegionOps pnv_xive2_tm_ops = {
     },
 };
 
-static uint64_t pnv_xive2_nvc_read(void *opaque, hwaddr offset,
+static uint64_t pnv_xive2_nvc_read(void *opaque, hwaddr addr,
                                    unsigned size)
 {
     PnvXive2 *xive = PNV_XIVE2(opaque);
+    XivePresenter *xptr = XIVE_PRESENTER(xive);
+    uint32_t page = addr >> xive->nvpg_shift;
+    uint16_t op = addr & 0xFFF;
+    uint8_t blk = pnv_xive2_block_id(xive);
 
-    xive2_error(xive, "NVC: invalid read @%"HWADDR_PRIx, offset);
-    return -1;
+    if (size != 2) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid nvc load size %d\n",
+                      size);
+        return -1;
+    }
+
+    return xive2_presenter_nvgc_backlog_op(xptr, true, blk, page, op, 1);
 }
 
-static void pnv_xive2_nvc_write(void *opaque, hwaddr offset,
+static void pnv_xive2_nvc_write(void *opaque, hwaddr addr,
                                 uint64_t val, unsigned size)
 {
     PnvXive2 *xive = PNV_XIVE2(opaque);
+    XivePresenter *xptr = XIVE_PRESENTER(xive);
+    uint32_t page = addr >> xive->nvc_shift;
+    uint16_t op = addr & 0xFFF;
+    uint8_t blk = pnv_xive2_block_id(xive);
 
-    xive2_error(xive, "NVC: invalid write @%"HWADDR_PRIx, offset);
+    if (size != 1) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid nvc write size %d\n",
+                      size);
+        return;
+    }
+
+    (void)xive2_presenter_nvgc_backlog_op(xptr, true, blk, page, op, val);
 }
 
 static const MemoryRegionOps pnv_xive2_nvc_ops = {
@@ -2171,30 +2243,63 @@ static const MemoryRegionOps pnv_xive2_nvc_ops = {
     .write = pnv_xive2_nvc_write,
     .endianness = DEVICE_BIG_ENDIAN,
     .valid = {
-        .min_access_size = 8,
+        .min_access_size = 1,
         .max_access_size = 8,
     },
     .impl = {
-        .min_access_size = 8,
+        .min_access_size = 1,
         .max_access_size = 8,
     },
 };
 
-static uint64_t pnv_xive2_nvpg_read(void *opaque, hwaddr offset,
+static uint64_t pnv_xive2_nvpg_read(void *opaque, hwaddr addr,
                                     unsigned size)
 {
     PnvXive2 *xive = PNV_XIVE2(opaque);
+    XivePresenter *xptr = XIVE_PRESENTER(xive);
+    uint32_t page = addr >> xive->nvpg_shift;
+    uint16_t op = addr & 0xFFF;
+    uint32_t index = page >> 1;
+    uint8_t blk = pnv_xive2_block_id(xive);
 
-    xive2_error(xive, "NVPG: invalid read @%"HWADDR_PRIx, offset);
-    return -1;
+    if (size != 2) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid nvpg load size %d\n",
+                      size);
+        return -1;
+    }
+
+    if (page % 2) {
+        /* odd page - NVG */
+        return xive2_presenter_nvgc_backlog_op(xptr, false, blk, index, op, 1);
+    } else {
+        /* even page - NVP */
+        return xive2_presenter_nvp_backlog_op(xptr, blk, index, op);
+    }
 }
 
-static void pnv_xive2_nvpg_write(void *opaque, hwaddr offset,
+static void pnv_xive2_nvpg_write(void *opaque, hwaddr addr,
                                  uint64_t val, unsigned size)
 {
     PnvXive2 *xive = PNV_XIVE2(opaque);
+    XivePresenter *xptr = XIVE_PRESENTER(xive);
+    uint32_t page = addr >> xive->nvpg_shift;
+    uint16_t op = addr & 0xFFF;
+    uint32_t index = page >> 1;
+    uint8_t blk = pnv_xive2_block_id(xive);
 
-    xive2_error(xive, "NVPG: invalid write @%"HWADDR_PRIx, offset);
+    if (size != 1) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid nvpg write size %d\n",
+                      size);
+        return;
+    }
+
+    if (page % 2) {
+        /* odd page - NVG */
+        (void)xive2_presenter_nvgc_backlog_op(xptr, false, blk, index, op, val);
+    } else {
+        /* even page - NVP */
+        (void)xive2_presenter_nvp_backlog_op(xptr, blk, index, op);
+    }
 }
 
 static const MemoryRegionOps pnv_xive2_nvpg_ops = {
@@ -2202,11 +2307,11 @@ static const MemoryRegionOps pnv_xive2_nvpg_ops = {
     .write = pnv_xive2_nvpg_write,
     .endianness = DEVICE_BIG_ENDIAN,
     .valid = {
-        .min_access_size = 8,
+        .min_access_size = 1,
         .max_access_size = 8,
     },
     .impl = {
-        .min_access_size = 8,
+        .min_access_size = 1,
         .max_access_size = 8,
     },
 };
@@ -2432,6 +2537,7 @@ static void pnv_xive2_class_init(ObjectClass *klass, void *data)
 
     xpc->match_nvt  = pnv_xive2_match_nvt;
     xpc->get_config = pnv_xive2_presenter_get_config;
+    xpc->broadcast  = pnv_xive2_broadcast;
 };
 
 static const TypeInfo pnv_xive2_info = {
diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c
index 4866649115801..5964cde7e09e0 100644
--- a/hw/intc/riscv_aplic.c
+++ b/hw/intc/riscv_aplic.c
@@ -181,8 +181,10 @@ void riscv_aplic_set_kvm_msicfgaddr(RISCVAPLICState *aplic, hwaddr addr)
 {
 #ifdef CONFIG_KVM
     if (riscv_use_emulated_aplic(aplic->msimode)) {
+        addr >>= APLIC_xMSICFGADDR_PPN_SHIFT;
         aplic->kvm_msicfgaddr = extract64(addr, 0, 32);
-        aplic->kvm_msicfgaddrH = extract64(addr, 32, 32);
+        aplic->kvm_msicfgaddrH = extract64(addr, 32, 32) &
+                                 APLIC_xMSICFGADDRH_VALID_MASK;
     }
 #endif
 }
@@ -403,12 +405,17 @@ static void riscv_aplic_msi_send(RISCVAPLICState *aplic,
         }
     }
 
-    if (aplic->mmode) {
-        msicfgaddr = aplic_m->mmsicfgaddr;
-        msicfgaddrH = aplic_m->mmsicfgaddrH;
+    if (aplic->kvm_splitmode) {
+        msicfgaddr = aplic->kvm_msicfgaddr;
+        msicfgaddrH = ((uint64_t)aplic->kvm_msicfgaddrH << 32);
     } else {
-        msicfgaddr = aplic_m->smsicfgaddr;
-        msicfgaddrH = aplic_m->smsicfgaddrH;
+        if (aplic->mmode) {
+            msicfgaddr = aplic_m->mmsicfgaddr;
+            msicfgaddrH = aplic_m->mmsicfgaddrH;
+        } else {
+            msicfgaddr = aplic_m->smsicfgaddr;
+            msicfgaddrH = aplic_m->smsicfgaddrH;
+        }
     }
 
     lhxs = (msicfgaddrH >> APLIC_xMSICFGADDRH_LHXS_SHIFT) &
@@ -421,7 +428,6 @@ static void riscv_aplic_msi_send(RISCVAPLICState *aplic,
             APLIC_xMSICFGADDRH_HHXW_MASK;
 
     group_idx = hart_idx >> lhxw;
-    hart_idx &= APLIC_xMSICFGADDR_PPN_LHX_MASK(lhxw);
 
     addr = msicfgaddr;
     addr |= ((uint64_t)(msicfgaddrH & APLIC_xMSICFGADDRH_BAPPN_MASK)) << 32;
@@ -432,11 +438,6 @@ static void riscv_aplic_msi_send(RISCVAPLICState *aplic,
     addr |= (uint64_t)(guest_idx & APLIC_xMSICFGADDR_PPN_HART(lhxs));
     addr <<= APLIC_xMSICFGADDR_PPN_SHIFT;
 
-    if (aplic->kvm_splitmode) {
-        addr |= aplic->kvm_msicfgaddr;
-        addr |= ((uint64_t)aplic->kvm_msicfgaddrH << 32);
-    }
-
     address_space_stl_le(&address_space_memory, addr,
                          eiid, MEMTXATTRS_UNSPECIFIED, &result);
     if (result != MEMTX_OK) {
@@ -894,6 +895,26 @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp)
     RISCVAPLICState *aplic = RISCV_APLIC(dev);
 
     if (riscv_use_emulated_aplic(aplic->msimode)) {
+        /* Create output IRQ lines for non-MSI mode */
+        if (!aplic->msimode) {
+            /* Claim the CPU interrupt to be triggered by this APLIC */
+            for (i = 0; i < aplic->num_harts; i++) {
+                RISCVCPU *cpu;
+
+                cpu = RISCV_CPU(cpu_by_arch_id(aplic->hartid_base + i));
+                if (riscv_cpu_claim_interrupts(cpu,
+                    (aplic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) {
+                    error_report("%s already claimed",
+                                 (aplic->mmode) ? "MEIP" : "SEIP");
+                    exit(1);
+                }
+            }
+
+            aplic->external_irqs = g_malloc(sizeof(qemu_irq) *
+                                            aplic->num_harts);
+            qdev_init_gpio_out(dev, aplic->external_irqs, aplic->num_harts);
+        }
+
         aplic->bitfield_words = (aplic->num_irqs + 31) >> 5;
         aplic->sourcecfg = g_new0(uint32_t, aplic->num_irqs);
         aplic->state = g_new0(uint32_t, aplic->num_irqs);
@@ -928,23 +949,6 @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp)
         }
     }
 
-    /* Create output IRQ lines for non-MSI mode */
-    if (!aplic->msimode) {
-        aplic->external_irqs = g_malloc(sizeof(qemu_irq) * aplic->num_harts);
-        qdev_init_gpio_out(dev, aplic->external_irqs, aplic->num_harts);
-
-        /* Claim the CPU interrupt to be triggered by this APLIC */
-        for (i = 0; i < aplic->num_harts; i++) {
-            RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(aplic->hartid_base + i));
-            if (riscv_cpu_claim_interrupts(cpu,
-                (aplic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) {
-                error_report("%s already claimed",
-                             (aplic->mmode) ? "MEIP" : "SEIP");
-                exit(1);
-            }
-        }
-    }
-
     msi_nonbroken = true;
 }
 
@@ -1068,15 +1072,15 @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size,
 
     if (riscv_use_emulated_aplic(msimode)) {
         sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
-    }
 
-    if (!msimode) {
-        for (i = 0; i < num_harts; i++) {
-            CPUState *cpu = cpu_by_arch_id(hartid_base + i);
+        if (!msimode) {
+            for (i = 0; i < num_harts; i++) {
+                CPUState *cpu = cpu_by_arch_id(hartid_base + i);
 
-            qdev_connect_gpio_out_named(dev, NULL, i,
-                                        qdev_get_gpio_in(DEVICE(cpu),
+                qdev_connect_gpio_out_named(dev, NULL, i,
+                                            qdev_get_gpio_in(DEVICE(cpu),
                                             (mmode) ? IRQ_M_EXT : IRQ_S_EXT));
+            }
         }
     }
 
diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c
index dc8162c0a7c9c..241b12fef09f4 100644
--- a/hw/intc/riscv_imsic.c
+++ b/hw/intc/riscv_imsic.c
@@ -349,7 +349,19 @@ static void riscv_imsic_realize(DeviceState *dev, Error **errp)
     CPUState *cpu = cpu_by_arch_id(imsic->hartid);
     CPURISCVState *env = cpu ? cpu_env(cpu) : NULL;
 
+    /* Claim the CPU interrupt to be triggered by this IMSIC */
+    if (riscv_cpu_claim_interrupts(rcpu,
+            (imsic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) {
+        error_setg(errp, "%s already claimed",
+                   (imsic->mmode) ? "MEIP" : "SEIP");
+        return;
+    }
+
     if (!kvm_irqchip_in_kernel()) {
+        /* Create output IRQ lines */
+        imsic->external_irqs = g_malloc(sizeof(qemu_irq) * imsic->num_pages);
+        qdev_init_gpio_out(dev, imsic->external_irqs, imsic->num_pages);
+
         imsic->num_eistate = imsic->num_pages * imsic->num_irqs;
         imsic->eidelivery = g_new0(uint32_t, imsic->num_pages);
         imsic->eithreshold = g_new0(uint32_t, imsic->num_pages);
@@ -361,18 +373,6 @@ static void riscv_imsic_realize(DeviceState *dev, Error **errp)
                           IMSIC_MMIO_SIZE(imsic->num_pages));
     sysbus_init_mmio(SYS_BUS_DEVICE(dev), &imsic->mmio);
 
-    /* Claim the CPU interrupt to be triggered by this IMSIC */
-    if (riscv_cpu_claim_interrupts(rcpu,
-            (imsic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) {
-        error_setg(errp, "%s already claimed",
-                   (imsic->mmode) ? "MEIP" : "SEIP");
-        return;
-    }
-
-    /* Create output IRQ lines */
-    imsic->external_irqs = g_malloc(sizeof(qemu_irq) * imsic->num_pages);
-    qdev_init_gpio_out(dev, imsic->external_irqs, imsic->num_pages);
-
     /* Force select AIA feature and setup CSR read-modify-write callback */
     if (env) {
         if (!imsic->mmode) {
@@ -381,8 +381,11 @@ static void riscv_imsic_realize(DeviceState *dev, Error **errp)
         } else {
             rcpu->cfg.ext_smaia = true;
         }
-        riscv_cpu_set_aia_ireg_rmw_fn(env, (imsic->mmode) ? PRV_M : PRV_S,
-                                      riscv_imsic_rmw, imsic);
+
+        if (!kvm_irqchip_in_kernel()) {
+            riscv_cpu_set_aia_ireg_rmw_fn(env, (imsic->mmode) ? PRV_M : PRV_S,
+                                          riscv_imsic_rmw, imsic);
+        }
     }
 
     msi_nonbroken = true;
@@ -464,15 +467,17 @@ DeviceState *riscv_imsic_create(hwaddr addr, uint32_t hartid, bool mmode,
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
     sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
 
-    for (i = 0; i < num_pages; i++) {
-        if (!i) {
-            qdev_connect_gpio_out_named(dev, NULL, i,
-                                        qdev_get_gpio_in(DEVICE(cpu),
+    if (!kvm_irqchip_in_kernel()) {
+        for (i = 0; i < num_pages; i++) {
+            if (!i) {
+                qdev_connect_gpio_out_named(dev, NULL, i,
+                                            qdev_get_gpio_in(DEVICE(cpu),
                                             (mmode) ? IRQ_M_EXT : IRQ_S_EXT));
-        } else {
-            qdev_connect_gpio_out_named(dev, NULL, i,
-                                        qdev_get_gpio_in(DEVICE(cpu),
+            } else {
+                qdev_connect_gpio_out_named(dev, NULL, i,
+                                            qdev_get_gpio_in(DEVICE(cpu),
                                             IRQ_LOCAL_MAX + i - 1));
+            }
         }
     }
 
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index a764c0bb575f6..ce734b03ab53a 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -1,10 +1,9 @@
 /*
  * QEMU PowerPC sPAPR XIVE interrupt controller model
  *
- * Copyright (c) 2017-2018, IBM Corporation.
+ * Copyright (c) 2017-2024, IBM Corporation.
  *
- * This code is licensed under the GPL version 2 or later. See the
- * COPYING file in the top-level directory.
+ * SPDX-License-Identifier: GPL-2.0-or-later
  */
 
 #include "qemu/osdep.h"
@@ -431,7 +430,8 @@ static int spapr_xive_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk,
 
 static int spapr_xive_match_nvt(XivePresenter *xptr, uint8_t format,
                                 uint8_t nvt_blk, uint32_t nvt_idx,
-                                bool cam_ignore, uint8_t priority,
+                                bool crowd, bool cam_ignore,
+                                uint8_t priority,
                                 uint32_t logic_serv, XiveTCTXMatch *match)
 {
     CPUState *cs;
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index 3dcf147198337..daffdbc0f4827 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -80,18 +80,19 @@ aspeed_vic_update_irq(int flags) "Raising IRQ: %d"
 aspeed_vic_read(uint64_t offset, unsigned size, uint32_t value) "From 0x%" PRIx64 " of size %u: 0x%" PRIx32
 aspeed_vic_write(uint64_t offset, unsigned size, uint32_t data) "To 0x%" PRIx64 " of size %u: 0x%" PRIx32
 # aspeed_intc.c
-aspeed_intc_read(uint64_t offset, unsigned size, uint32_t value) "From 0x%" PRIx64 " of size %u: 0x%" PRIx32
-aspeed_intc_write(uint64_t offset, unsigned size, uint32_t data) "To 0x%" PRIx64 " of size %u: 0x%" PRIx32
-aspeed_intc_set_irq(int irq, int level) "Set IRQ %d: %d"
-aspeed_intc_clear_irq(int irq, int level) "Clear IRQ %d: %d"
-aspeed_intc_update_irq(int irq, int level) "Update IRQ: %d: %d"
-aspeed_intc_pending_irq(int irq, uint32_t value) "Pending IRQ: %d: 0x%x"
-aspeed_intc_trigger_irq(int irq, uint32_t value) "Trigger IRQ: %d: 0x%x"
-aspeed_intc_all_isr_done(int irq) "All source ISR execution are done: %d"
-aspeed_intc_enable(uint32_t value) "Enable: 0x%x"
-aspeed_intc_select(uint32_t value) "Select: 0x%x"
-aspeed_intc_mask(uint32_t change, uint32_t value) "Mask: 0x%x: 0x%x"
-aspeed_intc_unmask(uint32_t change, uint32_t value) "UnMask: 0x%x: 0x%x"
+aspeed_intc_read(const char *s, uint64_t offset, unsigned size, uint32_t value) "%s: From 0x%" PRIx64 " of size %u: 0x%" PRIx32
+aspeed_intc_write(const char *s, uint64_t offset, unsigned size, uint32_t data) "%s: To 0x%" PRIx64 " of size %u: 0x%" PRIx32
+aspeed_intc_set_irq(const char *s, int inpin_idx, int level) "%s: Set IRQ %d: %d"
+aspeed_intc_clear_irq(const char *s, int inpin_idx, int outpin_idx, int level) "%s: Clear IRQ %d-%d: %d"
+aspeed_intc_update_irq(const char *s, int inpin_idx, int outpin_idx, int level) "%s: Update IRQ: %d-%d: %d"
+aspeed_intc_pending_irq(const char *s, int inpin_idx, uint32_t value) "%s: Pending IRQ: %d: 0x%x"
+aspeed_intc_trigger_irq(const char *s, int inpin_idx, int outpin_idx, uint32_t value) "%s: Trigger IRQ: %d-%d: 0x%x"
+aspeed_intc_all_isr_done(const char *s, int inpin_idx) "%s: All source ISR execution are done: %d"
+aspeed_intc_enable(const char *s, uint32_t value) "%s: Enable: 0x%x"
+aspeed_intc_select(const char *s, uint32_t value) "%s: Select: 0x%x"
+aspeed_intc_mask(const char *s, uint32_t change, uint32_t value) "%s: Mask: 0x%x: 0x%x"
+aspeed_intc_unmask(const char *s, uint32_t change, uint32_t value) "%s: UnMask: 0x%x: 0x%x"
+aspeed_intc_all_isr_done_bit(const char *s, int inpin_idx, int bit) "%s: All source ISR execution are done from specific bit: %d-%d"
 
 # arm_gic.c
 gic_enable_irq(int irq) "irq %d enabled"
@@ -282,9 +283,13 @@ xive_router_end_notify(uint8_t end_blk, uint32_t end_idx, uint32_t end_data) "EN
 xive_router_end_escalate(uint8_t end_blk, uint32_t end_idx, uint8_t esc_blk, uint32_t esc_idx, uint32_t end_data) "END 0x%02x/0x%04x -> escalate END 0x%02x/0x%04x data 0x%08x"
 xive_tctx_tm_write(uint32_t index, uint64_t offset, unsigned int size, uint64_t value) "target=%d @0x%"PRIx64" sz=%d val=0x%" PRIx64
 xive_tctx_tm_read(uint32_t index, uint64_t offset, unsigned int size, uint64_t value) "target=%d @0x%"PRIx64" sz=%d val=0x%" PRIx64
-xive_presenter_notify(uint8_t nvt_blk, uint32_t nvt_idx, uint8_t ring) "found NVT 0x%x/0x%x ring=0x%x"
+xive_presenter_notify(uint8_t nvt_blk, uint32_t nvt_idx, uint8_t ring, uint8_t group_level) "found NVT 0x%x/0x%x ring=0x%x group_level=%d"
 xive_end_source_read(uint8_t end_blk, uint32_t end_idx, uint64_t addr) "END 0x%x/0x%x @0x%"PRIx64
 
+# xive2.c
+xive_nvp_backlog_op(uint8_t blk, uint32_t idx, uint8_t op, uint8_t priority, uint8_t rc) "NVP 0x%x/0x%x operation=%d priority=%d rc=%d"
+xive_nvgc_backlog_op(bool c, uint8_t blk, uint32_t idx, uint8_t op, uint8_t priority, uint32_t rc) "NVGC crowd=%d 0x%x/0x%x operation=%d priority=%d rc=%d"
+
 # pnv_xive.c
 pnv_xive_ic_hw_trigger(uint64_t addr, uint64_t val) "@0x%"PRIx64" val=0x%"PRIx64
 
@@ -303,6 +308,10 @@ sh_intc_register(const char *s, int id, unsigned short v, int c, int m) "%s %u -
 sh_intc_read(unsigned size, uint64_t offset, unsigned long val) "size %u 0x%" PRIx64 " -> 0x%lx"
 sh_intc_write(unsigned size, uint64_t offset, unsigned long val) "size %u 0x%" PRIx64 " <- 0x%lx"
 sh_intc_set(int id, int enable) "setting interrupt group %d to %d"
+# l2vic.c
+l2vic_reg_write(unsigned int addr, uint32_t value) "addr: 0x%03x value: 0x%08"PRIx32
+l2vic_reg_read(unsigned int addr, uint32_t value) "addr: 0x%03x value: 0x%08"PRIx32
+l2vic_delivered(int irq, int vid) "l2vic: delivered %d (vid %d)"
 
 # loongson_ipi.c
 loongson_ipi_read(unsigned size, uint64_t addr, uint64_t val) "size: %u addr: 0x%"PRIx64 "val: 0x%"PRIx64
diff --git a/hw/intc/xilinx_intc.c b/hw/intc/xilinx_intc.c
index 6930f83907aca..ab1c4a3222170 100644
--- a/hw/intc/xilinx_intc.c
+++ b/hw/intc/xilinx_intc.c
@@ -3,6 +3,9 @@
  *
  * Copyright (c) 2009 Edgar E. Iglesias.
  *
+ * https://docs.amd.com/v/u/en-US/xps_intc
+ * DS572: LogiCORE IP XPS Interrupt Controller (v2.01a)
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
  * in the Software without restriction, including without limitation the rights
@@ -23,10 +26,12 @@
  */
 
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "hw/sysbus.h"
 #include "qemu/module.h"
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
 #include "qom/object.h"
 
 #define D(x)
@@ -49,6 +54,7 @@ struct XpsIntc
 {
     SysBusDevice parent_obj;
 
+    EndianMode model_endianness;
     MemoryRegion mmio;
     qemu_irq parent_irq;
 
@@ -140,18 +146,28 @@ static void pic_write(void *opaque, hwaddr addr,
     update_irq(p);
 }
 
-static const MemoryRegionOps pic_ops = {
-    .read = pic_read,
-    .write = pic_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-    .impl = {
-        .min_access_size = 4,
-        .max_access_size = 4,
+static const MemoryRegionOps pic_ops[2] = {
+    [0 ... 1] = {
+        .read = pic_read,
+        .write = pic_write,
+        .impl = {
+            .min_access_size = 4,
+            .max_access_size = 4,
+        },
+        .valid = {
+            /*
+             * All XPS INTC registers are accessed through the PLB interface.
+             * The base address for these registers is provided by the
+             * configuration parameter, C_BASEADDR. Each register is 32 bits
+             * although some bits may be unused and is accessed on a 4-byte
+             * boundary offset from the base address.
+             */
+            .min_access_size = 4,
+            .max_access_size = 4,
+        },
     },
-    .valid = {
-        .min_access_size = 4,
-        .max_access_size = 4
-    }
+    [0].endianness = DEVICE_LITTLE_ENDIAN,
+    [1].endianness = DEVICE_BIG_ENDIAN,
 };
 
 static void irq_handler(void *opaque, int irq, int level)
@@ -174,13 +190,27 @@ static void xilinx_intc_init(Object *obj)
 
     qdev_init_gpio_in(DEVICE(obj), irq_handler, 32);
     sysbus_init_irq(SYS_BUS_DEVICE(obj), &p->parent_irq);
+    sysbus_init_mmio(SYS_BUS_DEVICE(obj), &p->mmio);
+}
 
-    memory_region_init_io(&p->mmio, obj, &pic_ops, p, "xlnx.xps-intc",
+static void xilinx_intc_realize(DeviceState *dev, Error **errp)
+{
+    XpsIntc *p = XILINX_INTC(dev);
+
+    if (p->model_endianness == ENDIAN_MODE_UNSPECIFIED) {
+        error_setg(errp, TYPE_XILINX_INTC " property 'endianness'"
+                         " must be set to 'big' or 'little'");
+        return;
+    }
+
+    memory_region_init_io(&p->mmio, OBJECT(dev),
+                          &pic_ops[p->model_endianness == ENDIAN_MODE_BIG],
+                          p, "xlnx.xps-intc",
                           R_MAX * 4);
-    sysbus_init_mmio(SYS_BUS_DEVICE(obj), &p->mmio);
 }
 
 static const Property xilinx_intc_properties[] = {
+    DEFINE_PROP_ENDIAN_NODEFAULT("endianness", XpsIntc, model_endianness),
     DEFINE_PROP_UINT32("kind-of-intr", XpsIntc, c_kind_of_intr, 0),
 };
 
@@ -188,6 +218,7 @@ static void xilinx_intc_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
+    dc->realize = xilinx_intc_realize;
     device_class_set_props(dc, xilinx_intc_properties);
 }
 
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index 139cfdf9bfaf1..c77df2c1f8cc6 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -3,8 +3,7 @@
  *
  * Copyright (c) 2017-2018, IBM Corporation.
  *
- * This code is licensed under the GPL version 2 or later. See the
- * COPYING file in the top-level directory.
+ * SPDX-License-Identifier: GPL-2.0-or-later
  */
 
 #include "qemu/osdep.h"
@@ -27,28 +26,6 @@
  * XIVE Thread Interrupt Management context
  */
 
-/*
- * Convert an Interrupt Pending Buffer (IPB) register to a Pending
- * Interrupt Priority Register (PIPR), which contains the priority of
- * the most favored pending notification.
- */
-static uint8_t ipb_to_pipr(uint8_t ibp)
-{
-    return ibp ? clz32((uint32_t)ibp << 24) : 0xff;
-}
-
-static uint8_t exception_mask(uint8_t ring)
-{
-    switch (ring) {
-    case TM_QW1_OS:
-        return TM_QW1_NSR_EO;
-    case TM_QW3_HV_PHYS:
-        return TM_QW3_NSR_HE;
-    default:
-        g_assert_not_reached();
-    }
-}
-
 static qemu_irq xive_tctx_output(XiveTCTX *tctx, uint8_t ring)
 {
         switch (ring) {
@@ -68,11 +45,10 @@ static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
 {
     uint8_t *regs = &tctx->regs[ring];
     uint8_t nsr = regs[TM_NSR];
-    uint8_t mask = exception_mask(ring);
 
     qemu_irq_lower(xive_tctx_output(tctx, ring));
 
-    if (regs[TM_NSR] & mask) {
+    if (regs[TM_NSR] != 0) {
         uint8_t cppr = regs[TM_PIPR];
         uint8_t alt_ring;
         uint8_t *alt_regs;
@@ -87,11 +63,18 @@ static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
 
         regs[TM_CPPR] = cppr;
 
-        /* Reset the pending buffer bit */
-        alt_regs[TM_IPB] &= ~xive_priority_to_ipb(cppr);
+        /*
+         * If the interrupt was for a specific VP, reset the pending
+         * buffer bit, otherwise clear the logical server indicator
+         */
+        if (regs[TM_NSR] & TM_NSR_GRP_LVL) {
+            regs[TM_NSR] &= ~TM_NSR_GRP_LVL;
+        } else {
+            alt_regs[TM_IPB] &= ~xive_priority_to_ipb(cppr);
+        }
 
-        /* Drop Exception bit */
-        regs[TM_NSR] &= ~mask;
+        /* Drop the exception bit and any group/crowd */
+        regs[TM_NSR] = 0;
 
         trace_xive_tctx_accept(tctx->cs->cpu_index, alt_ring,
                                alt_regs[TM_IPB], regs[TM_PIPR],
@@ -101,7 +84,7 @@ static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
     return ((uint64_t)nsr << 8) | regs[TM_CPPR];
 }
 
-static void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring)
+void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring, uint8_t group_level)
 {
     /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */
     uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring;
@@ -111,13 +94,13 @@ static void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring)
     if (alt_regs[TM_PIPR] < alt_regs[TM_CPPR]) {
         switch (ring) {
         case TM_QW1_OS:
-            regs[TM_NSR] |= TM_QW1_NSR_EO;
+            regs[TM_NSR] = TM_QW1_NSR_EO | (group_level & 0x3F);
             break;
         case TM_QW2_HV_POOL:
-            alt_regs[TM_NSR] = (TM_QW3_NSR_HE_POOL << 6);
+            alt_regs[TM_NSR] = (TM_QW3_NSR_HE_POOL << 6) | (group_level & 0x3F);
             break;
         case TM_QW3_HV_PHYS:
-            regs[TM_NSR] |= (TM_QW3_NSR_HE_PHYS << 6);
+            regs[TM_NSR] = (TM_QW3_NSR_HE_PHYS << 6) | (group_level & 0x3F);
             break;
         default:
             g_assert_not_reached();
@@ -159,7 +142,7 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
      * Recompute the PIPR based on local pending interrupts.  The PHYS
      * ring must take the minimum of both the PHYS and POOL PIPR values.
      */
-    pipr_min = ipb_to_pipr(regs[TM_IPB]);
+    pipr_min = xive_ipb_to_pipr(regs[TM_IPB]);
     ring_min = ring;
 
     /* PHYS updates also depend on POOL values */
@@ -169,7 +152,7 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
         /* POOL values only matter if POOL ctx is valid */
         if (pool_regs[TM_WORD2] & 0x80) {
 
-            uint8_t pool_pipr = ipb_to_pipr(pool_regs[TM_IPB]);
+            uint8_t pool_pipr = xive_ipb_to_pipr(pool_regs[TM_IPB]);
 
             /*
              * Determine highest priority interrupt and
@@ -185,17 +168,27 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
     regs[TM_PIPR] = pipr_min;
 
     /* CPPR has changed, check if we need to raise a pending exception */
-    xive_tctx_notify(tctx, ring_min);
+    xive_tctx_notify(tctx, ring_min, 0);
 }
 
-void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb)
-{
+void xive_tctx_pipr_update(XiveTCTX *tctx, uint8_t ring, uint8_t priority,
+                           uint8_t group_level)
+ {
+    /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */
+    uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring;
+    uint8_t *alt_regs = &tctx->regs[alt_ring];
     uint8_t *regs = &tctx->regs[ring];
 
-    regs[TM_IPB] |= ipb;
-    regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
-    xive_tctx_notify(tctx, ring);
-}
+    if (group_level == 0) {
+        /* VP-specific */
+        regs[TM_IPB] |= xive_priority_to_ipb(priority);
+        alt_regs[TM_PIPR] = xive_ipb_to_pipr(regs[TM_IPB]);
+    } else {
+        /* VP-group */
+        alt_regs[TM_PIPR] = xive_priority_to_pipr(priority);
+    }
+    xive_tctx_notify(tctx, ring, group_level);
+ }
 
 /*
  * XIVE Thread Interrupt Management Area (TIMA)
@@ -411,13 +404,13 @@ static void xive_tm_set_os_lgs(XivePresenter *xptr, XiveTCTX *tctx,
 }
 
 /*
- * Adjust the IPB to allow a CPU to process event queues of other
+ * Adjust the PIPR to allow a CPU to process event queues of other
  * priorities during one physical interrupt cycle.
  */
 static void xive_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx,
                                    hwaddr offset, uint64_t value, unsigned size)
 {
-    xive_tctx_ipb_update(tctx, TM_QW1_OS, xive_priority_to_ipb(value & 0xff));
+    xive_tctx_pipr_update(tctx, TM_QW1_OS, value & 0xff, 0);
 }
 
 static void xive_os_cam_decode(uint32_t cam, uint8_t *nvt_blk,
@@ -495,16 +488,20 @@ static void xive_tctx_need_resend(XiveRouter *xrtr, XiveTCTX *tctx,
         /* Reset the NVT value */
         nvt.w4 = xive_set_field32(NVT_W4_IPB, nvt.w4, 0);
         xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
+
+        uint8_t *regs = &tctx->regs[TM_QW1_OS];
+        regs[TM_IPB] |= ipb;
     }
+
     /*
-     * Always call xive_tctx_ipb_update(). Even if there were no
+     * Always call xive_tctx_pipr_update(). Even if there were no
      * escalation triggered, there could be a pending interrupt which
      * was saved when the context was pulled and that we need to take
      * into account by recalculating the PIPR (which is not
      * saved/restored).
      * It will also raise the External interrupt signal if needed.
      */
-    xive_tctx_ipb_update(tctx, TM_QW1_OS, ipb);
+    xive_tctx_pipr_update(tctx, TM_QW1_OS, 0xFF, 0); /* fxb */
 }
 
 /*
@@ -592,7 +589,7 @@ static const XiveTmOp xive2_tm_operations[] = {
      * MMIOs below 2K : raw values and special operations without side
      * effects
      */
-    { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR,       1, xive_tm_set_os_cppr,
+    { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR,       1, xive2_tm_set_os_cppr,
                                                      NULL },
     { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2,      4, xive2_tm_push_os_ctx,
                                                      NULL },
@@ -600,7 +597,7 @@ static const XiveTmOp xive2_tm_operations[] = {
                                                      NULL },
     { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_LGS,        1, xive_tm_set_os_lgs,
                                                      NULL },
-    { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR,  1, xive_tm_set_hv_cppr,
+    { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR,  1, xive2_tm_set_hv_cppr,
                                                      NULL },
     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push,
                                                      NULL },
@@ -841,9 +838,9 @@ void xive_tctx_reset(XiveTCTX *tctx)
      * CPPR is first set.
      */
     tctx->regs[TM_QW1_OS + TM_PIPR] =
-        ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
+        xive_ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
     tctx->regs[TM_QW3_HV_PHYS + TM_PIPR] =
-        ipb_to_pipr(tctx->regs[TM_QW3_HV_PHYS + TM_IPB]);
+        xive_ipb_to_pipr(tctx->regs[TM_QW3_HV_PHYS + TM_IPB]);
 }
 
 static void xive_tctx_realize(DeviceState *dev, Error **errp)
@@ -1658,6 +1655,54 @@ static uint32_t xive_tctx_hw_cam_line(XivePresenter *xptr, XiveTCTX *tctx)
     return xive_nvt_cam_line(blk, 1 << 7 | (pir & 0x7f));
 }
 
+uint32_t xive_get_vpgroup_size(uint32_t nvp_index)
+{
+    /*
+     * Group size is a power of 2. The position of the first 0
+     * (starting with the least significant bits) in the NVP index
+     * gives the size of the group.
+     */
+    return 1 << (ctz32(~nvp_index) + 1);
+}
+
+static uint8_t xive_get_group_level(bool crowd, bool ignore,
+                                    uint32_t nvp_blk, uint32_t nvp_index)
+{
+    uint8_t level;
+
+    if (!ignore) {
+        g_assert(!crowd);
+        return 0;
+    }
+
+    level = (ctz32(~nvp_index) + 1) & 0b1111;
+    if (crowd) {
+        uint32_t blk;
+
+        /* crowd level is bit position of first 0 from the right in nvp_blk */
+        blk = ctz32(~nvp_blk) + 1;
+
+        /*
+         * Supported crowd sizes are 2^1, 2^2, and 2^4. 2^3 is not supported.
+         * HW will encode level 4 as the value 3.  See xive2_pgofnext().
+         */
+        switch (level) {
+        case 1:
+        case 2:
+            break;
+        case 4:
+            blk = 3;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+
+        /* Crowd level bits reside in upper 2 bits of the 6 bit group level */
+        level |= blk << 4;
+    }
+    return level;
+}
+
 /*
  * The thread context register words are in big-endian format.
  */
@@ -1724,31 +1769,41 @@ int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
 /*
  * This is our simple Xive Presenter Engine model. It is merged in the
  * Router as it does not require an extra object.
- *
- * It receives notification requests sent by the IVRE to find one
- * matching NVT (or more) dispatched on the processor threads. In case
- * of a single NVT notification, the process is abbreviated and the
- * thread is signaled if a match is found. In case of a logical server
- * notification (bits ignored at the end of the NVT identifier), the
- * IVPE and IVRE select a winning thread using different filters. This
- * involves 2 or 3 exchanges on the PowerBus that the model does not
- * support.
- *
- * The parameters represent what is sent on the PowerBus
  */
 bool xive_presenter_notify(XiveFabric *xfb, uint8_t format,
                            uint8_t nvt_blk, uint32_t nvt_idx,
-                           bool cam_ignore, uint8_t priority,
-                           uint32_t logic_serv)
+                           bool crowd, bool cam_ignore, uint8_t priority,
+                           uint32_t logic_serv, bool *precluded)
 {
     XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xfb);
-    XiveTCTXMatch match = { .tctx = NULL, .ring = 0 };
+    XiveTCTXMatch match = { .tctx = NULL, .ring = 0, .precluded = false };
+    uint8_t group_level;
     int count;
 
     /*
-     * Ask the machine to scan the interrupt controllers for a match
+     * Ask the machine to scan the interrupt controllers for a match.
+     *
+     * For VP-specific notification, we expect at most one match and
+     * one call to the presenters is all we need (abbreviated notify
+     * sequence documented by the architecture).
+     *
+     * For VP-group notification, match_nvt() is the equivalent of the
+     * "histogram" and "poll" commands sent to the power bus to the
+     * presenters. 'count' could be more than one, but we always
+     * select the first match for now. 'precluded' tells if (at least)
+     * one thread matches but can't take the interrupt now because
+     * it's running at a more favored priority. We return the
+     * information to the router so that it can take appropriate
+     * actions (backlog, escalation, broadcast, etc...)
+     *
+     * If we were to implement a better way of dispatching the
+     * interrupt in case of multiple matches (instead of the first
+     * match), we would need a heuristic to elect a thread (for
+     * example, the hardware keeps track of an 'age' in the TIMA) and
+     * a new command to the presenters (the equivalent of the "assign"
+     * power bus command in the documented full notify sequence.
      */
-    count = xfc->match_nvt(xfb, format, nvt_blk, nvt_idx, cam_ignore,
+    count = xfc->match_nvt(xfb, format, nvt_blk, nvt_idx, crowd, cam_ignore,
                            priority, logic_serv, &match);
     if (count < 0) {
         return false;
@@ -1756,9 +1811,11 @@ bool xive_presenter_notify(XiveFabric *xfb, uint8_t format,
 
     /* handle CPU exception delivery */
     if (count) {
-        trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring);
-        xive_tctx_ipb_update(match.tctx, match.ring,
-                             xive_priority_to_ipb(priority));
+        group_level = xive_get_group_level(crowd, cam_ignore, nvt_blk, nvt_idx);
+        trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring, group_level);
+        xive_tctx_pipr_update(match.tctx, match.ring, priority, group_level);
+    } else {
+        *precluded = match.precluded;
     }
 
     return !!count;
@@ -1798,7 +1855,7 @@ void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas)
     uint8_t nvt_blk;
     uint32_t nvt_idx;
     XiveNVT nvt;
-    bool found;
+    bool found, precluded;
 
     uint8_t end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
     uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
@@ -1879,10 +1936,12 @@ void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas)
     }
 
     found = xive_presenter_notify(xrtr->xfb, format, nvt_blk, nvt_idx,
+                          false /* crowd */,
                           xive_get_field32(END_W7_F0_IGNORE, end.w7),
                           priority,
-                          xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7));
-
+                          xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7),
+                          &precluded);
+    /* we don't support VP-group notification on P9, so precluded is not used */
     /* TODO: Auto EOI. */
 
     if (found) {
diff --git a/hw/intc/xive2.c b/hw/intc/xive2.c
index fc5aed3315800..f8ef615487824 100644
--- a/hw/intc/xive2.c
+++ b/hw/intc/xive2.c
@@ -1,10 +1,9 @@
 /*
  * QEMU PowerPC XIVE2 interrupt controller model (POWER10)
  *
- * Copyright (c) 2019-2022, IBM Corporation..
+ * Copyright (c) 2019-2024, IBM Corporation..
  *
- * This code is licensed under the GPL version 2 or later. See the
- * COPYING file in the top-level directory.
+ * SPDX-License-Identifier: GPL-2.0-or-later
  */
 
 #include "qemu/osdep.h"
@@ -18,6 +17,7 @@
 #include "hw/ppc/xive.h"
 #include "hw/ppc/xive2.h"
 #include "hw/ppc/xive2_regs.h"
+#include "trace.h"
 
 uint32_t xive2_router_get_config(Xive2Router *xrtr)
 {
@@ -54,7 +54,8 @@ static uint32_t xive2_nvgc_get_backlog(Xive2Nvgc *nvgc, uint8_t priority)
 
     /*
      * The per-priority backlog counters are 24-bit and the structure
-     * is stored in big endian
+     * is stored in big endian. NVGC is 32-bytes long, so 24-bytes from
+     * w2, which fits 8 priorities * 24-bits per priority.
      */
     ptr = (uint8_t *)&nvgc->w2 + priority * 3;
     for (i = 0; i < 3; i++, ptr++) {
@@ -63,6 +64,117 @@ static uint32_t xive2_nvgc_get_backlog(Xive2Nvgc *nvgc, uint8_t priority)
     return val;
 }
 
+static void xive2_nvgc_set_backlog(Xive2Nvgc *nvgc, uint8_t priority,
+                                   uint32_t val)
+{
+    uint8_t *ptr, i;
+    uint32_t shift;
+
+    if (priority > 7) {
+        return;
+    }
+
+    if (val > 0xFFFFFF) {
+        val = 0xFFFFFF;
+    }
+    /*
+     * The per-priority backlog counters are 24-bit and the structure
+     * is stored in big endian
+     */
+    ptr = (uint8_t *)&nvgc->w2 + priority * 3;
+    for (i = 0; i < 3; i++, ptr++) {
+        shift = 8 * (2 - i);
+        *ptr = (val >> shift) & 0xFF;
+    }
+}
+
+uint64_t xive2_presenter_nvgc_backlog_op(XivePresenter *xptr,
+                                         bool crowd,
+                                         uint8_t blk, uint32_t idx,
+                                         uint16_t offset, uint16_t val)
+{
+    Xive2Router *xrtr = XIVE2_ROUTER(xptr);
+    uint8_t priority = GETFIELD(NVx_BACKLOG_PRIO, offset);
+    uint8_t op = GETFIELD(NVx_BACKLOG_OP, offset);
+    Xive2Nvgc nvgc;
+    uint32_t count, old_count;
+
+    if (xive2_router_get_nvgc(xrtr, crowd, blk, idx, &nvgc)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No %s %x/%x\n",
+                      crowd ? "NVC" : "NVG", blk, idx);
+        return -1;
+    }
+    if (!xive2_nvgc_is_valid(&nvgc)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid NVG %x/%x\n", blk, idx);
+        return -1;
+    }
+
+    old_count = xive2_nvgc_get_backlog(&nvgc, priority);
+    count = old_count;
+    /*
+     * op:
+     * 0b00 => increment
+     * 0b01 => decrement
+     * 0b1- => read
+     */
+    if (op == 0b00 || op == 0b01) {
+        if (op == 0b00) {
+            count += val;
+        } else {
+            if (count > val) {
+                count -= val;
+            } else {
+                count = 0;
+            }
+        }
+        xive2_nvgc_set_backlog(&nvgc, priority, count);
+        xive2_router_write_nvgc(xrtr, crowd, blk, idx, &nvgc);
+    }
+    trace_xive_nvgc_backlog_op(crowd, blk, idx, op, priority, old_count);
+    return old_count;
+}
+
+uint64_t xive2_presenter_nvp_backlog_op(XivePresenter *xptr,
+                                        uint8_t blk, uint32_t idx,
+                                        uint16_t offset)
+{
+    Xive2Router *xrtr = XIVE2_ROUTER(xptr);
+    uint8_t priority = GETFIELD(NVx_BACKLOG_PRIO, offset);
+    uint8_t op = GETFIELD(NVx_BACKLOG_OP, offset);
+    Xive2Nvp nvp;
+    uint8_t ipb, old_ipb, rc;
+
+    if (xive2_router_get_nvp(xrtr, blk, idx, &nvp)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No NVP %x/%x\n", blk, idx);
+        return -1;
+    }
+    if (!xive2_nvp_is_valid(&nvp)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid NVP %x/%x\n", blk, idx);
+        return -1;
+    }
+
+    old_ipb = xive_get_field32(NVP2_W2_IPB, nvp.w2);
+    ipb = old_ipb;
+    /*
+     * op:
+     * 0b00 => set priority bit
+     * 0b01 => reset priority bit
+     * 0b1- => read
+     */
+    if (op == 0b00 || op == 0b01) {
+        if (op == 0b00) {
+            ipb |= xive_priority_to_ipb(priority);
+        } else {
+            ipb &= ~xive_priority_to_ipb(priority);
+        }
+        nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, ipb);
+        xive2_router_write_nvp(xrtr, blk, idx, &nvp, 2);
+    }
+    rc = !!(old_ipb & xive_priority_to_ipb(priority));
+    trace_xive_nvp_backlog_op(blk, idx, op, priority, rc);
+    return rc;
+}
+
 void xive2_eas_pic_print_info(Xive2Eas *eas, uint32_t lisn, GString *buf)
 {
     if (!xive2_eas_is_valid(eas)) {
@@ -114,8 +226,8 @@ void xive2_end_pic_print_info(Xive2End *end, uint32_t end_idx, GString *buf)
     uint32_t qsize = xive_get_field32(END2_W3_QSIZE, end->w3);
     uint32_t qentries = 1 << (qsize + 10);
 
-    uint32_t nvp_blk = xive_get_field32(END2_W6_VP_BLOCK, end->w6);
-    uint32_t nvp_idx = xive_get_field32(END2_W6_VP_OFFSET, end->w6);
+    uint32_t nvx_blk = xive_get_field32(END2_W6_VP_BLOCK, end->w6);
+    uint32_t nvx_idx = xive_get_field32(END2_W6_VP_OFFSET, end->w6);
     uint8_t priority = xive_get_field32(END2_W7_F0_PRIORITY, end->w7);
     uint8_t pq;
 
@@ -144,7 +256,7 @@ void xive2_end_pic_print_info(Xive2End *end, uint32_t end_idx, GString *buf)
                            xive2_end_is_firmware2(end)   ? 'F' : '-',
                            xive2_end_is_ignore(end) ? 'i' : '-',
                            xive2_end_is_crowd(end)  ? 'c' : '-',
-                           priority, nvp_blk, nvp_idx);
+                           priority, nvx_blk, nvx_idx);
 
     if (qaddr_base) {
         g_string_append_printf(buf, " eq:@%08"PRIx64"% 6d/%5d ^%d",
@@ -255,6 +367,115 @@ static void xive2_end_enqueue(Xive2End *end, uint32_t data)
     end->w1 = xive_set_field32(END2_W1_PAGE_OFF, end->w1, qindex);
 }
 
+static void xive2_pgofnext(uint8_t *nvgc_blk, uint32_t *nvgc_idx,
+                           uint8_t next_level)
+{
+    uint32_t mask, next_idx;
+    uint8_t next_blk;
+
+    /*
+     * Adjust the block and index of a VP for the next group/crowd
+     * size (PGofFirst/PGofNext field in the NVP and NVGC structures).
+     *
+     * The 6-bit group level is split into a 2-bit crowd and 4-bit
+     * group levels. Encoding is similar. However, we don't support
+     * crowd size of 8. So a crowd level of 0b11 is bumped to a crowd
+     * size of 16.
+     */
+    next_blk = NVx_CROWD_LVL(next_level);
+    if (next_blk == 3) {
+        next_blk = 4;
+    }
+    mask = (1 << next_blk) - 1;
+    *nvgc_blk &= ~mask;
+    *nvgc_blk |= mask >> 1;
+
+    next_idx = NVx_GROUP_LVL(next_level);
+    mask = (1 << next_idx) - 1;
+    *nvgc_idx &= ~mask;
+    *nvgc_idx |= mask >> 1;
+}
+
+/*
+ * Scan the group chain and return the highest priority and group
+ * level of pending group interrupts.
+ */
+static uint8_t xive2_presenter_backlog_scan(XivePresenter *xptr,
+                                            uint8_t nvx_blk, uint32_t nvx_idx,
+                                            uint8_t first_group,
+                                            uint8_t *out_level)
+{
+    Xive2Router *xrtr = XIVE2_ROUTER(xptr);
+    uint32_t nvgc_idx;
+    uint32_t current_level, count;
+    uint8_t nvgc_blk, prio;
+    Xive2Nvgc nvgc;
+
+    for (prio = 0; prio <= XIVE_PRIORITY_MAX; prio++) {
+        current_level = first_group & 0x3F;
+        nvgc_blk = nvx_blk;
+        nvgc_idx = nvx_idx;
+
+        while (current_level) {
+            xive2_pgofnext(&nvgc_blk, &nvgc_idx, current_level);
+
+            if (xive2_router_get_nvgc(xrtr, NVx_CROWD_LVL(current_level),
+                                      nvgc_blk, nvgc_idx, &nvgc)) {
+                qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No NVGC %x/%x\n",
+                              nvgc_blk, nvgc_idx);
+                return 0xFF;
+            }
+            if (!xive2_nvgc_is_valid(&nvgc)) {
+                qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid NVGC %x/%x\n",
+                              nvgc_blk, nvgc_idx);
+                return 0xFF;
+            }
+
+            count = xive2_nvgc_get_backlog(&nvgc, prio);
+            if (count) {
+                *out_level = current_level;
+                return prio;
+            }
+            current_level = xive_get_field32(NVGC2_W0_PGONEXT, nvgc.w0) & 0x3F;
+        }
+    }
+    return 0xFF;
+}
+
+static void xive2_presenter_backlog_decr(XivePresenter *xptr,
+                                         uint8_t nvx_blk, uint32_t nvx_idx,
+                                         uint8_t group_prio,
+                                         uint8_t group_level)
+{
+    Xive2Router *xrtr = XIVE2_ROUTER(xptr);
+    uint32_t nvgc_idx, count;
+    uint8_t nvgc_blk;
+    Xive2Nvgc nvgc;
+
+    nvgc_blk = nvx_blk;
+    nvgc_idx = nvx_idx;
+    xive2_pgofnext(&nvgc_blk, &nvgc_idx, group_level);
+
+    if (xive2_router_get_nvgc(xrtr, NVx_CROWD_LVL(group_level),
+                              nvgc_blk, nvgc_idx, &nvgc)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No NVGC %x/%x\n",
+                      nvgc_blk, nvgc_idx);
+        return;
+    }
+    if (!xive2_nvgc_is_valid(&nvgc)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid NVGC %x/%x\n",
+                      nvgc_blk, nvgc_idx);
+        return;
+    }
+    count = xive2_nvgc_get_backlog(&nvgc, group_prio);
+    if (!count) {
+        return;
+    }
+    xive2_nvgc_set_backlog(&nvgc, group_prio, count - 1);
+    xive2_router_write_nvgc(xrtr, NVx_CROWD_LVL(group_level),
+                            nvgc_blk, nvgc_idx, &nvgc);
+}
+
 /*
  * XIVE Thread Interrupt Management Area (TIMA) - Gen2 mode
  *
@@ -313,7 +534,19 @@ static void xive2_tctx_save_ctx(Xive2Router *xrtr, XiveTCTX *tctx,
 
     nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, regs[TM_IPB]);
     nvp.w2 = xive_set_field32(NVP2_W2_CPPR, nvp.w2, regs[TM_CPPR]);
-    nvp.w2 = xive_set_field32(NVP2_W2_LSMFB, nvp.w2, regs[TM_LSMFB]);
+    if (nvp.w0 & NVP2_W0_L) {
+        /*
+         * Typically not used. If LSMFB is restored with 0, it will
+         * force a backlog rescan
+         */
+        nvp.w2 = xive_set_field32(NVP2_W2_LSMFB, nvp.w2, regs[TM_LSMFB]);
+    }
+    if (nvp.w0 & NVP2_W0_G) {
+        nvp.w2 = xive_set_field32(NVP2_W2_LGS, nvp.w2, regs[TM_LGS]);
+    }
+    if (nvp.w0 & NVP2_W0_T) {
+        nvp.w2 = xive_set_field32(NVP2_W2_T, nvp.w2, regs[TM_T]);
+    }
     xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, &nvp, 2);
 
     nvp.w1 = xive_set_field32(NVP2_W1_CO, nvp.w1, 0);
@@ -527,7 +760,9 @@ static uint8_t xive2_tctx_restore_os_ctx(Xive2Router *xrtr, XiveTCTX *tctx,
     xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, nvp, 2);
 
     tctx->regs[TM_QW1_OS + TM_CPPR] = cppr;
-    /* we don't model LSMFB */
+    tctx->regs[TM_QW1_OS + TM_LSMFB] = xive_get_field32(NVP2_W2_LSMFB, nvp->w2);
+    tctx->regs[TM_QW1_OS + TM_LGS] = xive_get_field32(NVP2_W2_LGS, nvp->w2);
+    tctx->regs[TM_QW1_OS + TM_T] = xive_get_field32(NVP2_W2_T, nvp->w2);
 
     nvp->w1 = xive_set_field32(NVP2_W1_CO, nvp->w1, 1);
     nvp->w1 = xive_set_field32(NVP2_W1_CO_THRID_VALID, nvp->w1, 1);
@@ -550,8 +785,15 @@ static void xive2_tctx_need_resend(Xive2Router *xrtr, XiveTCTX *tctx,
                                    uint8_t nvp_blk, uint32_t nvp_idx,
                                    bool do_restore)
 {
-    Xive2Nvp nvp;
+    XivePresenter *xptr = XIVE_PRESENTER(xrtr);
     uint8_t ipb;
+    uint8_t backlog_level;
+    uint8_t group_level;
+    uint8_t first_group;
+    uint8_t backlog_prio;
+    uint8_t group_prio;
+    uint8_t *regs = &tctx->regs[TM_QW1_OS];
+    Xive2Nvp nvp;
 
     /*
      * Grab the associated thread interrupt context registers in the
@@ -580,15 +822,29 @@ static void xive2_tctx_need_resend(Xive2Router *xrtr, XiveTCTX *tctx,
         nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, 0);
         xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, &nvp, 2);
     }
+    regs[TM_IPB] |= ipb;
+    backlog_prio = xive_ipb_to_pipr(ipb);
+    backlog_level = 0;
+
+    first_group = xive_get_field32(NVP2_W0_PGOFIRST, nvp.w0);
+    if (first_group && regs[TM_LSMFB] < backlog_prio) {
+        group_prio = xive2_presenter_backlog_scan(xptr, nvp_blk, nvp_idx,
+                                                  first_group, &group_level);
+        regs[TM_LSMFB] = group_prio;
+        if (regs[TM_LGS] && group_prio < backlog_prio) {
+            /* VP can take a group interrupt */
+            xive2_presenter_backlog_decr(xptr, nvp_blk, nvp_idx,
+                                         group_prio, group_level);
+            backlog_prio = group_prio;
+            backlog_level = group_level;
+        }
+    }
+
     /*
-     * Always call xive_tctx_ipb_update(). Even if there were no
-     * escalation triggered, there could be a pending interrupt which
-     * was saved when the context was pulled and that we need to take
-     * into account by recalculating the PIPR (which is not
-     * saved/restored).
-     * It will also raise the External interrupt signal if needed.
+     * Compute the PIPR based on the restored state.
+     * It will raise the External interrupt signal if needed.
      */
-    xive_tctx_ipb_update(tctx, TM_QW1_OS, ipb);
+    xive_tctx_pipr_update(tctx, TM_QW1_OS, backlog_prio, backlog_level);
 }
 
 /*
@@ -630,6 +886,172 @@ void xive2_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
     }
 }
 
+static int xive2_tctx_get_nvp_indexes(XiveTCTX *tctx, uint8_t ring,
+                                      uint32_t *nvp_blk, uint32_t *nvp_idx)
+{
+    uint32_t w2, cam;
+
+    w2 = xive_tctx_word2(&tctx->regs[ring]);
+    switch (ring) {
+    case TM_QW1_OS:
+        if (!(be32_to_cpu(w2) & TM2_QW1W2_VO)) {
+            return -1;
+        }
+        cam = xive_get_field32(TM2_QW1W2_OS_CAM, w2);
+        break;
+    case TM_QW2_HV_POOL:
+        if (!(be32_to_cpu(w2) & TM2_QW2W2_VP)) {
+            return -1;
+        }
+        cam = xive_get_field32(TM2_QW2W2_POOL_CAM, w2);
+        break;
+    case TM_QW3_HV_PHYS:
+        if (!(be32_to_cpu(w2) & TM2_QW3W2_VT)) {
+            return -1;
+        }
+        cam = xive2_tctx_hw_cam_line(tctx->xptr, tctx);
+        break;
+    default:
+        return -1;
+    }
+    *nvp_blk = xive2_nvp_blk(cam);
+    *nvp_idx = xive2_nvp_idx(cam);
+    return 0;
+}
+
+static void xive2_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
+{
+    uint8_t *regs = &tctx->regs[ring];
+    Xive2Router *xrtr = XIVE2_ROUTER(tctx->xptr);
+    uint8_t old_cppr, backlog_prio, first_group, group_level = 0;
+    uint8_t pipr_min, lsmfb_min, ring_min;
+    bool group_enabled;
+    uint32_t nvp_blk, nvp_idx;
+    Xive2Nvp nvp;
+    int rc;
+
+    trace_xive_tctx_set_cppr(tctx->cs->cpu_index, ring,
+                             regs[TM_IPB], regs[TM_PIPR],
+                             cppr, regs[TM_NSR]);
+
+    if (cppr > XIVE_PRIORITY_MAX) {
+        cppr = 0xff;
+    }
+
+    old_cppr = regs[TM_CPPR];
+    regs[TM_CPPR] = cppr;
+
+    /*
+     * Recompute the PIPR based on local pending interrupts. It will
+     * be adjusted below if needed in case of pending group interrupts.
+     */
+    pipr_min = xive_ipb_to_pipr(regs[TM_IPB]);
+    group_enabled = !!regs[TM_LGS];
+    lsmfb_min = (group_enabled) ? regs[TM_LSMFB] : 0xff;
+    ring_min = ring;
+
+    /* PHYS updates also depend on POOL values */
+    if (ring == TM_QW3_HV_PHYS) {
+        uint8_t *pregs = &tctx->regs[TM_QW2_HV_POOL];
+
+        /* POOL values only matter if POOL ctx is valid */
+        if (pregs[TM_WORD2] & 0x80) {
+
+            uint8_t pool_pipr = xive_ipb_to_pipr(pregs[TM_IPB]);
+            uint8_t pool_lsmfb = pregs[TM_LSMFB];
+
+            /*
+             * Determine highest priority interrupt and
+             * remember which ring has it.
+             */
+            if (pool_pipr < pipr_min) {
+                pipr_min = pool_pipr;
+                if (pool_pipr < lsmfb_min) {
+                    ring_min = TM_QW2_HV_POOL;
+                }
+            }
+
+            /* Values needed for group priority calculation */
+            if (pregs[TM_LGS] && (pool_lsmfb < lsmfb_min)) {
+                group_enabled = true;
+                lsmfb_min = pool_lsmfb;
+                if (lsmfb_min < pipr_min) {
+                    ring_min = TM_QW2_HV_POOL;
+                }
+            }
+        }
+    }
+    regs[TM_PIPR] = pipr_min;
+
+    rc = xive2_tctx_get_nvp_indexes(tctx, ring_min, &nvp_blk, &nvp_idx);
+    if (rc) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: set CPPR on invalid context\n");
+        return;
+    }
+
+    if (cppr < old_cppr) {
+        /*
+         * FIXME: check if there's a group interrupt being presented
+         * and if the new cppr prevents it. If so, then the group
+         * interrupt needs to be re-added to the backlog and
+         * re-triggered (see re-trigger END info in the NVGC
+         * structure)
+         */
+    }
+
+    if (group_enabled &&
+        lsmfb_min < cppr &&
+        lsmfb_min < regs[TM_PIPR]) {
+        /*
+         * Thread has seen a group interrupt with a higher priority
+         * than the new cppr or pending local interrupt. Check the
+         * backlog
+         */
+        if (xive2_router_get_nvp(xrtr, nvp_blk, nvp_idx, &nvp)) {
+            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No NVP %x/%x\n",
+                          nvp_blk, nvp_idx);
+            return;
+        }
+
+        if (!xive2_nvp_is_valid(&nvp)) {
+            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid NVP %x/%x\n",
+                          nvp_blk, nvp_idx);
+            return;
+        }
+
+        first_group = xive_get_field32(NVP2_W0_PGOFIRST, nvp.w0);
+        if (!first_group) {
+            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid NVP %x/%x\n",
+                          nvp_blk, nvp_idx);
+            return;
+        }
+
+        backlog_prio = xive2_presenter_backlog_scan(tctx->xptr,
+                                                    nvp_blk, nvp_idx,
+                                                    first_group, &group_level);
+        tctx->regs[ring_min + TM_LSMFB] = backlog_prio;
+        if (backlog_prio != 0xFF) {
+            xive2_presenter_backlog_decr(tctx->xptr, nvp_blk, nvp_idx,
+                                         backlog_prio, group_level);
+            regs[TM_PIPR] = backlog_prio;
+        }
+    }
+    /* CPPR has changed, check if we need to raise a pending exception */
+    xive_tctx_notify(tctx, ring_min, group_level);
+}
+
+void xive2_tm_set_hv_cppr(XivePresenter *xptr, XiveTCTX *tctx,
+                          hwaddr offset, uint64_t value, unsigned size)
+{
+    xive2_tctx_set_cppr(tctx, TM_QW3_HV_PHYS, value & 0xff);
+}
+
+void xive2_tm_set_os_cppr(XivePresenter *xptr, XiveTCTX *tctx,
+                          hwaddr offset, uint64_t value, unsigned size)
+{
+    xive2_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff);
+}
+
 static void xive2_tctx_set_target(XiveTCTX *tctx, uint8_t ring, uint8_t target)
 {
     uint8_t *regs = &tctx->regs[ring];
@@ -723,13 +1145,46 @@ int xive2_router_write_nvgc(Xive2Router *xrtr, bool crowd,
    return xrc->write_nvgc(xrtr, crowd, nvgc_blk, nvgc_idx, nvgc);
 }
 
+static bool xive2_vp_match_mask(uint32_t cam1, uint32_t cam2,
+                                uint32_t vp_mask)
+{
+    return (cam1 & vp_mask) == (cam2 & vp_mask);
+}
+
+static uint8_t xive2_get_vp_block_mask(uint32_t nvt_blk, bool crowd)
+{
+    uint8_t size, block_mask = 0b1111;
+
+    /* 3 supported crowd sizes: 2, 4, 16 */
+    if (crowd) {
+        size = xive_get_vpgroup_size(nvt_blk);
+        if (size == 8) {
+            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid crowd size of 8n");
+            return block_mask;
+        }
+        block_mask &= ~(size - 1);
+    }
+    return block_mask;
+}
+
+static uint32_t xive2_get_vp_index_mask(uint32_t nvt_index, bool cam_ignore)
+{
+    uint32_t index_mask = 0xFFFFFF; /* 24 bits */
+
+    if (cam_ignore) {
+        index_mask &= ~(xive_get_vpgroup_size(nvt_index) - 1);
+    }
+    return index_mask;
+}
+
 /*
  * The thread context register words are in big-endian format.
  */
 int xive2_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
                                uint8_t format,
                                uint8_t nvt_blk, uint32_t nvt_idx,
-                               bool cam_ignore, uint32_t logic_serv)
+                               bool crowd, bool cam_ignore,
+                               uint32_t logic_serv)
 {
     uint32_t cam =   xive2_nvp_cam_line(nvt_blk, nvt_idx);
     uint32_t qw3w2 = xive_tctx_word2(&tctx->regs[TM_QW3_HV_PHYS]);
@@ -737,44 +1192,51 @@ int xive2_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
     uint32_t qw1w2 = xive_tctx_word2(&tctx->regs[TM_QW1_OS]);
     uint32_t qw0w2 = xive_tctx_word2(&tctx->regs[TM_QW0_USER]);
 
-    /*
-     * TODO (PowerNV): ignore mode. The low order bits of the NVT
-     * identifier are ignored in the "CAM" match.
-     */
+    uint32_t index_mask, vp_mask;
+    uint8_t block_mask;
 
     if (format == 0) {
-        if (cam_ignore == true) {
-            /*
-             * F=0 & i=1: Logical server notification (bits ignored at
-             * the end of the NVT identifier)
-             */
-            qemu_log_mask(LOG_UNIMP, "XIVE: no support for LS NVT %x/%x\n",
-                          nvt_blk, nvt_idx);
-            return -1;
-        }
+        /*
+         * i=0: Specific NVT notification
+         * i=1: VP-group notification (bits ignored at the end of the
+         *      NVT identifier)
+         */
+        block_mask = xive2_get_vp_block_mask(nvt_blk, crowd);
+        index_mask = xive2_get_vp_index_mask(nvt_idx, cam_ignore);
+        vp_mask = xive2_nvp_cam_line(block_mask, index_mask);
 
-        /* F=0 & i=0: Specific NVT notification */
+        /* For VP-group notifications, threads with LGS=0 are excluded */
 
         /* PHYS ring */
         if ((be32_to_cpu(qw3w2) & TM2_QW3W2_VT) &&
-            cam == xive2_tctx_hw_cam_line(xptr, tctx)) {
+            !(cam_ignore && tctx->regs[TM_QW3_HV_PHYS + TM_LGS] == 0) &&
+            xive2_vp_match_mask(cam,
+                                xive2_tctx_hw_cam_line(xptr, tctx),
+                                vp_mask)) {
             return TM_QW3_HV_PHYS;
         }
 
         /* HV POOL ring */
         if ((be32_to_cpu(qw2w2) & TM2_QW2W2_VP) &&
-            cam == xive_get_field32(TM2_QW2W2_POOL_CAM, qw2w2)) {
+            !(cam_ignore && tctx->regs[TM_QW2_HV_POOL + TM_LGS] == 0) &&
+            xive2_vp_match_mask(cam,
+                                xive_get_field32(TM2_QW2W2_POOL_CAM, qw2w2),
+                                vp_mask)) {
             return TM_QW2_HV_POOL;
         }
 
         /* OS ring */
         if ((be32_to_cpu(qw1w2) & TM2_QW1W2_VO) &&
-            cam == xive_get_field32(TM2_QW1W2_OS_CAM, qw1w2)) {
+            !(cam_ignore && tctx->regs[TM_QW1_OS + TM_LGS] == 0) &&
+            xive2_vp_match_mask(cam,
+                                xive_get_field32(TM2_QW1W2_OS_CAM, qw1w2),
+                                vp_mask)) {
             return TM_QW1_OS;
         }
     } else {
         /* F=1 : User level Event-Based Branch (EBB) notification */
 
+        /* FIXME: what if cam_ignore and LGS = 0 ? */
         /* USER ring */
         if  ((be32_to_cpu(qw1w2) & TM2_QW1W2_VO) &&
              (cam == xive_get_field32(TM2_QW1W2_OS_CAM, qw1w2)) &&
@@ -786,6 +1248,37 @@ int xive2_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
     return -1;
 }
 
+bool xive2_tm_irq_precluded(XiveTCTX *tctx, int ring, uint8_t priority)
+{
+    /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */
+    uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring;
+    uint8_t *alt_regs = &tctx->regs[alt_ring];
+
+    /*
+     * The xive2_presenter_tctx_match() above tells if there's a match
+     * but for VP-group notification, we still need to look at the
+     * priority to know if the thread can take the interrupt now or if
+     * it is precluded.
+     */
+    if (priority < alt_regs[TM_CPPR]) {
+        return false;
+    }
+    return true;
+}
+
+void xive2_tm_set_lsmfb(XiveTCTX *tctx, int ring, uint8_t priority)
+{
+    uint8_t *regs = &tctx->regs[ring];
+
+    /*
+     * Called by the router during a VP-group notification when the
+     * thread matches but can't take the interrupt because it's
+     * already running at a more favored priority. It then stores the
+     * new interrupt priority in the LSMFB field.
+     */
+    regs[TM_LSMFB] = priority;
+}
+
 static void xive2_router_realize(DeviceState *dev, Error **errp)
 {
     Xive2Router *xrtr = XIVE2_ROUTER(dev);
@@ -825,10 +1318,9 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk,
     Xive2End end;
     uint8_t priority;
     uint8_t format;
-    bool found;
-    Xive2Nvp nvp;
-    uint8_t nvp_blk;
-    uint32_t nvp_idx;
+    bool found, precluded;
+    uint8_t nvx_blk;
+    uint32_t nvx_idx;
 
     /* END cache lookup */
     if (xive2_router_get_end(xrtr, end_blk, end_idx, &end)) {
@@ -843,6 +1335,12 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk,
         return;
     }
 
+    if (xive2_end_is_crowd(&end) & !xive2_end_is_ignore(&end)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "XIVE: invalid END, 'crowd' bit requires 'ignore' bit\n");
+        return;
+    }
+
     if (xive2_end_is_enqueue(&end)) {
         xive2_end_enqueue(&end, end_data);
         /* Enqueuing event data modifies the EQ toggle and index */
@@ -887,26 +1385,14 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk,
     /*
      * Follows IVPE notification
      */
-    nvp_blk = xive_get_field32(END2_W6_VP_BLOCK, end.w6);
-    nvp_idx = xive_get_field32(END2_W6_VP_OFFSET, end.w6);
+    nvx_blk = xive_get_field32(END2_W6_VP_BLOCK, end.w6);
+    nvx_idx = xive_get_field32(END2_W6_VP_OFFSET, end.w6);
 
-    /* NVP cache lookup */
-    if (xive2_router_get_nvp(xrtr, nvp_blk, nvp_idx, &nvp)) {
-        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no NVP %x/%x\n",
-                      nvp_blk, nvp_idx);
-        return;
-    }
-
-    if (!xive2_nvp_is_valid(&nvp)) {
-        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVP %x/%x is invalid\n",
-                      nvp_blk, nvp_idx);
-        return;
-    }
-
-    found = xive_presenter_notify(xrtr->xfb, format, nvp_blk, nvp_idx,
-                          xive2_end_is_ignore(&end),
+    found = xive_presenter_notify(xrtr->xfb, format, nvx_blk, nvx_idx,
+                          xive2_end_is_crowd(&end), xive2_end_is_ignore(&end),
                           priority,
-                          xive_get_field32(END2_W7_F1_LOG_SERVER_ID, end.w7));
+                          xive_get_field32(END2_W7_F1_LOG_SERVER_ID, end.w7),
+                          &precluded);
 
     /* TODO: Auto EOI. */
 
@@ -917,10 +1403,9 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk,
     /*
      * If no matching NVP is dispatched on a HW thread :
      * - specific VP: update the NVP structure if backlog is activated
-     * - logical server : forward request to IVPE (not supported)
+     * - VP-group: update the backlog counter for that priority in the NVG
      */
     if (xive2_end_is_backlog(&end)) {
-        uint8_t ipb;
 
         if (format == 1) {
             qemu_log_mask(LOG_GUEST_ERROR,
@@ -929,19 +1414,82 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk,
             return;
         }
 
-        /*
-         * Record the IPB in the associated NVP structure for later
-         * use. The presenter will resend the interrupt when the vCPU
-         * is dispatched again on a HW thread.
-         */
-        ipb = xive_get_field32(NVP2_W2_IPB, nvp.w2) |
-            xive_priority_to_ipb(priority);
-        nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, ipb);
-        xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, &nvp, 2);
+        if (!xive2_end_is_ignore(&end)) {
+            uint8_t ipb;
+            Xive2Nvp nvp;
 
-        /*
-         * On HW, follows a "Broadcast Backlog" to IVPEs
-         */
+            /* NVP cache lookup */
+            if (xive2_router_get_nvp(xrtr, nvx_blk, nvx_idx, &nvp)) {
+                qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no NVP %x/%x\n",
+                              nvx_blk, nvx_idx);
+                return;
+            }
+
+            if (!xive2_nvp_is_valid(&nvp)) {
+                qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVP %x/%x is invalid\n",
+                              nvx_blk, nvx_idx);
+                return;
+            }
+
+            /*
+             * Record the IPB in the associated NVP structure for later
+             * use. The presenter will resend the interrupt when the vCPU
+             * is dispatched again on a HW thread.
+             */
+            ipb = xive_get_field32(NVP2_W2_IPB, nvp.w2) |
+                xive_priority_to_ipb(priority);
+            nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, ipb);
+            xive2_router_write_nvp(xrtr, nvx_blk, nvx_idx, &nvp, 2);
+        } else {
+            Xive2Nvgc nvgc;
+            uint32_t backlog;
+            bool crowd;
+
+            crowd = xive2_end_is_crowd(&end);
+
+            /*
+             * For groups and crowds, the per-priority backlog
+             * counters are stored in the NVG/NVC structures
+             */
+            if (xive2_router_get_nvgc(xrtr, crowd,
+                                      nvx_blk, nvx_idx, &nvgc)) {
+                qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no %s %x/%x\n",
+                              crowd ? "NVC" : "NVG", nvx_blk, nvx_idx);
+                return;
+            }
+
+            if (!xive2_nvgc_is_valid(&nvgc)) {
+                qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVG %x/%x is invalid\n",
+                              nvx_blk, nvx_idx);
+                return;
+            }
+
+            /*
+             * Increment the backlog counter for that priority.
+             * We only call broadcast the first time the counter is
+             * incremented. broadcast will set the LSMFB field of the TIMA of
+             * relevant threads so that they know an interrupt is pending.
+             */
+            backlog = xive2_nvgc_get_backlog(&nvgc, priority) + 1;
+            xive2_nvgc_set_backlog(&nvgc, priority, backlog);
+            xive2_router_write_nvgc(xrtr, crowd, nvx_blk, nvx_idx, &nvgc);
+
+            if (backlog == 1) {
+                XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xrtr->xfb);
+                xfc->broadcast(xrtr->xfb, nvx_blk, nvx_idx,
+                               xive2_end_is_crowd(&end),
+                               xive2_end_is_ignore(&end),
+                               priority);
+
+                if (!xive2_end_is_precluded_escalation(&end)) {
+                    /*
+                     * The interrupt will be picked up when the
+                     * matching thread lowers its priority level
+                     */
+                    return;
+                }
+            }
+        }
     }
 
 do_escalation:
diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c
index 6f44b381a5f70..43bd67eeef25a 100644
--- a/hw/isa/vt82c686.c
+++ b/hw/isa/vt82c686.c
@@ -224,7 +224,7 @@ static void via_pm_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
-    ViaPMInitInfo *info = data;
+    const ViaPMInitInfo *info = data;
 
     k->realize = via_pm_realize;
     k->config_write = pm_write_config;
diff --git a/hw/loongarch/Kconfig b/hw/loongarch/Kconfig
index fe1c6feac1387..bb2838b7b53a2 100644
--- a/hw/loongarch/Kconfig
+++ b/hw/loongarch/Kconfig
@@ -17,6 +17,7 @@ config LOONGARCH_VIRT
     select LOONGARCH_EXTIOI
     select LS7A_RTC
     select SMBIOS
+    select ACPI_CPU_HOTPLUG
     select ACPI_PCI
     select ACPI_HW_REDUCED
     select FW_CFG_DMA
diff --git a/hw/loongarch/virt-acpi-build.c b/hw/loongarch/virt-acpi-build.c
index 9ca88d63aebd8..fced6c445ac6d 100644
--- a/hw/loongarch/virt-acpi-build.c
+++ b/hw/loongarch/virt-acpi-build.c
@@ -47,6 +47,22 @@
 #define ACPI_BUILD_DPRINTF(fmt, ...)
 #endif
 
+static void virt_madt_cpu_entry(int uid,
+                                const CPUArchIdList *apic_ids,
+                                GArray *entry, bool force_enabled)
+{
+    uint32_t flags, apic_id = apic_ids->cpus[uid].arch_id;
+
+    flags = apic_ids->cpus[uid].cpu || force_enabled ? 1 /* Enabled */ : 0;
+
+    /* Rev 1.0b, Table 5-13 Processor Local APIC Structure */
+    build_append_int_noprefix(entry, 0, 1);       /* Type */
+    build_append_int_noprefix(entry, 8, 1);       /* Length */
+    build_append_int_noprefix(entry, uid, 1);     /* ACPI Processor ID */
+    build_append_int_noprefix(entry, apic_id, 1); /* APIC ID */
+    build_append_int_noprefix(entry, flags, 4); /* Flags */
+}
+
 /* build FADT */
 static void init_common_fadt_data(AcpiFadtData *data)
 {
@@ -112,7 +128,7 @@ build_madt(GArray *table_data, BIOSLinker *linker,
     MachineState *ms = MACHINE(lvms);
     MachineClass *mc = MACHINE_GET_CLASS(ms);
     const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms);
-    int i, arch_id;
+    int i, arch_id, flags;
     AcpiTable table = { .sig = "APIC", .rev = 1, .oem_id = lvms->oem_id,
                         .oem_table_id = lvms->oem_table_id };
 
@@ -125,13 +141,13 @@ build_madt(GArray *table_data, BIOSLinker *linker,
     for (i = 0; i < arch_ids->len; i++) {
         /* Processor Core Interrupt Controller Structure */
         arch_id = arch_ids->cpus[i].arch_id;
-
+        flags   = arch_ids->cpus[i].cpu ? 1 : 0;
         build_append_int_noprefix(table_data, 17, 1);    /* Type */
         build_append_int_noprefix(table_data, 15, 1);    /* Length */
         build_append_int_noprefix(table_data, 1, 1);     /* Version */
         build_append_int_noprefix(table_data, i, 4);     /* ACPI Processor ID */
         build_append_int_noprefix(table_data, arch_id, 4); /* Core ID */
-        build_append_int_noprefix(table_data, 1, 4);     /* Flags */
+        build_append_int_noprefix(table_data, flags, 4); /* Flags */
     }
 
     /* Extend I/O Interrupt Controller Structure */
@@ -338,6 +354,7 @@ build_la_ged_aml(Aml *dsdt, MachineState *machine)
 {
     uint32_t event;
     LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(machine);
+    CPUHotplugFeatures opts;
 
     build_ged_aml(dsdt, "\\_SB."GED_DEVICE,
                   HOTPLUG_HANDLER(lvms->acpi_ged),
@@ -350,6 +367,18 @@ build_la_ged_aml(Aml *dsdt, MachineState *machine)
                                  AML_SYSTEM_MEMORY,
                                  VIRT_GED_MEM_ADDR);
     }
+
+    if (event & ACPI_GED_CPU_HOTPLUG_EVT) {
+        opts.acpi_1_compatible = false;
+        opts.has_legacy_cphp = false;
+        opts.fw_unplugs_cpu = false;
+        opts.smi_path = NULL;
+
+        build_cpus_aml(dsdt, machine, opts, virt_madt_cpu_entry,
+                       VIRT_GED_CPUHP_ADDR, "\\_SB",
+                       AML_GED_EVT_CPU_SCAN_METHOD, AML_SYSTEM_MEMORY);
+    }
+
     acpi_dsdt_add_power_button(dsdt);
 }
 
diff --git a/hw/loongarch/virt-fdt-build.c b/hw/loongarch/virt-fdt-build.c
index dbc269afba4fe..728ce466996f8 100644
--- a/hw/loongarch/virt-fdt-build.c
+++ b/hw/loongarch/virt-fdt-build.c
@@ -527,7 +527,6 @@ void virt_fdt_setup(LoongArchVirtMachineState *lvms)
      * Put the FDT into the memory map as a ROM image: this will ensure
      * the FDT is copied again upon reset, even if addr points into RAM.
      */
-    qemu_fdt_dumpdtb(machine->fdt, lvms->fdt_size);
     rom_add_blob_fixed_as("fdt", machine->fdt, lvms->fdt_size, FDT_BASE,
                           &address_space_memory);
     qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds,
diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index f2aa0a9782e79..a5840ff96857e 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -45,14 +45,6 @@
 #include "hw/virtio/virtio-iommu.h"
 #include "qemu/error-report.h"
 
-static bool virt_is_veiointc_enabled(LoongArchVirtMachineState *lvms)
-{
-    if (lvms->veiointc == ON_OFF_AUTO_OFF) {
-        return false;
-    }
-    return true;
-}
-
 static void virt_get_veiointc(Object *obj, Visitor *v, const char *name,
                               void *opaque, Error **errp)
 {
@@ -195,11 +187,17 @@ static DeviceState *create_acpi_ged(DeviceState *pch_pic,
 {
     DeviceState *dev;
     MachineState *ms = MACHINE(lvms);
+    MachineClass *mc = MACHINE_GET_CLASS(lvms);
     uint32_t event = ACPI_GED_PWR_DOWN_EVT;
 
     if (ms->ram_slots) {
         event |= ACPI_GED_MEM_HOTPLUG_EVT;
     }
+
+    if (mc->has_hotpluggable_cpus) {
+        event |= ACPI_GED_CPU_HOTPLUG_EVT;
+    }
+
     dev = qdev_new(TYPE_ACPI_GED);
     qdev_prop_set_uint32(dev, "ged-event", event);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
@@ -211,6 +209,10 @@ static DeviceState *create_acpi_ged(DeviceState *pch_pic,
     /* ged regs used for reset and power down */
     sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, VIRT_GED_REG_ADDR);
 
+    if (mc->has_hotpluggable_cpus) {
+        sysbus_mmio_map(SYS_BUS_DEVICE(dev), 3, VIRT_GED_CPUHP_ADDR);
+    }
+
     sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0,
                        qdev_get_gpio_in(pch_pic, VIRT_SCI_IRQ - VIRT_GSI_BASE));
     return dev;
@@ -320,11 +322,12 @@ static void virt_devices_init(DeviceState *pch_pic,
 
 static void virt_cpu_irq_init(LoongArchVirtMachineState *lvms)
 {
-    int num, pin;
+    int num;
     MachineState *ms = MACHINE(lvms);
     MachineClass *mc = MACHINE_GET_CLASS(ms);
     const CPUArchIdList *possible_cpus;
     CPUState *cs;
+    Error *err = NULL;
 
     /* cpu nodes */
     possible_cpus = mc->possible_cpu_arch_ids(ms);
@@ -334,18 +337,8 @@ static void virt_cpu_irq_init(LoongArchVirtMachineState *lvms)
             continue;
         }
 
-        /* connect ipi irq to cpu irq */
-        qdev_connect_gpio_out(lvms->ipi, num,
-                              qdev_get_gpio_in(DEVICE(cs), IRQ_IPI));
-
-        /*
-         * connect ext irq to the cpu irq
-         * cpu_pin[9:2] <= intc_pin[7:0]
-         */
-        for (pin = 0; pin < LS3A_INTC_IP; pin++) {
-            qdev_connect_gpio_out(lvms->extioi, (num * LS3A_INTC_IP + pin),
-                                  qdev_get_gpio_in(DEVICE(cs), pin + 2));
-        }
+        hotplug_handler_plug(HOTPLUG_HANDLER(lvms->ipi), DEVICE(cs), &err);
+        hotplug_handler_plug(HOTPLUG_HANDLER(lvms->extioi), DEVICE(cs), &err);
     }
 }
 
@@ -664,15 +657,13 @@ static void fw_cfg_add_memory(MachineState *ms)
 
 static void virt_init(MachineState *machine)
 {
-    LoongArchCPU *lacpu;
     const char *cpu_model = machine->cpu_type;
     MemoryRegion *address_space_mem = get_system_memory();
     LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(machine);
     int i;
     hwaddr base, size, ram_size = machine->ram_size;
-    const CPUArchIdList *possible_cpus;
     MachineClass *mc = MACHINE_GET_CLASS(machine);
-    CPUState *cpu;
+    Object *cpuobj;
 
     if (!cpu_model) {
         cpu_model = LOONGARCH_CPU_TYPE_NAME("la464");
@@ -688,14 +679,15 @@ static void virt_init(MachineState *machine)
     memory_region_add_subregion(&lvms->system_iocsr, 0, &lvms->iocsr_mem);
 
     /* Init CPUs */
-    possible_cpus = mc->possible_cpu_arch_ids(machine);
-    for (i = 0; i < possible_cpus->len; i++) {
-        cpu = cpu_create(machine->cpu_type);
-        cpu->cpu_index = i;
-        machine->possible_cpus->cpus[i].cpu = cpu;
-        lacpu = LOONGARCH_CPU(cpu);
-        lacpu->phy_id = machine->possible_cpus->cpus[i].arch_id;
-        lacpu->env.address_space_iocsr = &lvms->as_iocsr;
+    mc->possible_cpu_arch_ids(machine);
+    for (i = 0; i < machine->smp.cpus; i++) {
+        cpuobj = object_new(machine->cpu_type);
+        if (cpuobj == NULL) {
+            error_report("Fail to create object with type %s ",
+                         machine->cpu_type);
+            exit(EXIT_FAILURE);
+        }
+        qdev_realize_and_unref(DEVICE(cpuobj), NULL, &error_fatal);
     }
     fw_cfg_add_memory(machine);
 
@@ -791,6 +783,232 @@ static void virt_initfn(Object *obj)
     virt_flash_create(lvms);
 }
 
+static void virt_get_topo_from_index(MachineState *ms,
+                                     LoongArchCPUTopo *topo, int index)
+{
+    topo->socket_id = index / (ms->smp.cores * ms->smp.threads);
+    topo->core_id = index / ms->smp.threads % ms->smp.cores;
+    topo->thread_id = index % ms->smp.threads;
+}
+
+static unsigned int topo_align_up(unsigned int count)
+{
+    g_assert(count >= 1);
+    count -= 1;
+    return BIT(count ? 32 - clz32(count) : 0);
+}
+
+/*
+ * LoongArch Reference Manual Vol1, Chapter 7.4.12 CPU Identity
+ *  For CPU architecture, bit0 .. bit8 is valid for CPU id, max cpuid is 512
+ *  However for IPI/Eiointc interrupt controller, max supported cpu id for
+ *  irq routingis 256
+ *
+ *  Here max cpu id is 256 for virt machine
+ */
+static int virt_get_arch_id_from_topo(MachineState *ms, LoongArchCPUTopo *topo)
+{
+    int arch_id, threads, cores, sockets;
+
+    threads = topo_align_up(ms->smp.threads);
+    cores = topo_align_up(ms->smp.cores);
+    sockets = topo_align_up(ms->smp.sockets);
+    if ((threads * cores * sockets) > 256) {
+        error_report("Exceeding max cpuid 256 with sockets[%d] cores[%d]"
+                     " threads[%d]", ms->smp.sockets, ms->smp.cores,
+                     ms->smp.threads);
+        exit(1);
+    }
+
+    arch_id = topo->thread_id + topo->core_id * threads;
+    arch_id += topo->socket_id * threads * cores;
+    return arch_id;
+}
+
+/* Find cpu slot in machine->possible_cpus by arch_id */
+static CPUArchId *virt_find_cpu_slot(MachineState *ms, int arch_id)
+{
+    int n;
+    for (n = 0; n < ms->possible_cpus->len; n++) {
+        if (ms->possible_cpus->cpus[n].arch_id == arch_id) {
+            return &ms->possible_cpus->cpus[n];
+        }
+    }
+
+    return NULL;
+}
+
+/* Find cpu slot for cold-plut CPU object where cpu is NULL */
+static CPUArchId *virt_find_empty_cpu_slot(MachineState *ms)
+{
+    int n;
+    for (n = 0; n < ms->possible_cpus->len; n++) {
+        if (ms->possible_cpus->cpus[n].cpu == NULL) {
+            return &ms->possible_cpus->cpus[n];
+        }
+    }
+
+    return NULL;
+}
+
+static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev,
+                              DeviceState *dev, Error **errp)
+{
+    LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(hotplug_dev);
+    MachineState *ms = MACHINE(OBJECT(hotplug_dev));
+    LoongArchCPU *cpu = LOONGARCH_CPU(dev);
+    CPUState *cs = CPU(dev);
+    CPUArchId *cpu_slot;
+    Error *err = NULL;
+    LoongArchCPUTopo topo;
+    int arch_id;
+
+    if (lvms->acpi_ged) {
+        if ((cpu->thread_id < 0) || (cpu->thread_id >= ms->smp.threads)) {
+            error_setg(&err,
+                       "Invalid thread-id %u specified, must be in range 1:%u",
+                       cpu->thread_id, ms->smp.threads - 1);
+            goto out;
+        }
+
+        if ((cpu->core_id < 0) || (cpu->core_id >= ms->smp.cores)) {
+            error_setg(&err,
+                       "Invalid core-id %u specified, must be in range 1:%u",
+                       cpu->core_id, ms->smp.cores - 1);
+            goto out;
+        }
+
+        if ((cpu->socket_id < 0) || (cpu->socket_id >= ms->smp.sockets)) {
+            error_setg(&err,
+                       "Invalid socket-id %u specified, must be in range 1:%u",
+                       cpu->socket_id, ms->smp.sockets - 1);
+            goto out;
+        }
+
+        topo.socket_id = cpu->socket_id;
+        topo.core_id = cpu->core_id;
+        topo.thread_id = cpu->thread_id;
+        arch_id =  virt_get_arch_id_from_topo(ms, &topo);
+        cpu_slot = virt_find_cpu_slot(ms, arch_id);
+        if (CPU(cpu_slot->cpu)) {
+            error_setg(&err,
+                       "cpu(id%d=%d:%d:%d) with arch-id %" PRIu64 " exists",
+                       cs->cpu_index, cpu->socket_id, cpu->core_id,
+                       cpu->thread_id, cpu_slot->arch_id);
+            goto out;
+        }
+    } else {
+        /* For cold-add cpu, find empty cpu slot */
+        cpu_slot = virt_find_empty_cpu_slot(ms);
+        topo.socket_id = cpu_slot->props.socket_id;
+        topo.core_id = cpu_slot->props.core_id;
+        topo.thread_id = cpu_slot->props.thread_id;
+        object_property_set_int(OBJECT(dev), "socket-id", topo.socket_id, NULL);
+        object_property_set_int(OBJECT(dev), "core-id", topo.core_id, NULL);
+        object_property_set_int(OBJECT(dev), "thread-id", topo.thread_id, NULL);
+    }
+
+    cpu->env.address_space_iocsr = &lvms->as_iocsr;
+    cpu->phy_id = cpu_slot->arch_id;
+    cs->cpu_index = cpu_slot - ms->possible_cpus->cpus;
+    numa_cpu_pre_plug(cpu_slot, dev, &err);
+out:
+    if (err) {
+        error_propagate(errp, err);
+    }
+}
+
+static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev,
+                                    DeviceState *dev, Error **errp)
+{
+    LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(hotplug_dev);
+    Error *err = NULL;
+    LoongArchCPU *cpu = LOONGARCH_CPU(dev);
+    CPUState *cs = CPU(dev);
+
+    if (cs->cpu_index == 0) {
+        error_setg(&err, "hot-unplug of boot cpu(id%d=%d:%d:%d) not supported",
+                   cs->cpu_index, cpu->socket_id,
+                   cpu->core_id, cpu->thread_id);
+        error_propagate(errp, err);
+        return;
+    }
+
+    hotplug_handler_unplug_request(HOTPLUG_HANDLER(lvms->acpi_ged), dev, &err);
+    if (err) {
+        error_propagate(errp, err);
+    }
+}
+
+static void virt_cpu_unplug(HotplugHandler *hotplug_dev,
+                            DeviceState *dev, Error **errp)
+{
+    CPUArchId *cpu_slot;
+    Error *err = NULL;
+    LoongArchCPU *cpu = LOONGARCH_CPU(dev);
+    LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(hotplug_dev);
+
+    /* Notify ipi and extioi irqchip to remove interrupt routing to CPU */
+    hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->ipi), dev, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->extioi), dev, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    /* Notify acpi ged CPU removed */
+    hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->acpi_ged), dev, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    cpu_slot = virt_find_cpu_slot(MACHINE(lvms), cpu->phy_id);
+    cpu_slot->cpu = NULL;
+    return;
+}
+
+static void virt_cpu_plug(HotplugHandler *hotplug_dev,
+                          DeviceState *dev, Error **errp)
+{
+    CPUArchId *cpu_slot;
+    LoongArchCPU *cpu = LOONGARCH_CPU(dev);
+    LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(hotplug_dev);
+    Error *err = NULL;
+
+    cpu_slot = virt_find_cpu_slot(MACHINE(lvms), cpu->phy_id);
+    cpu_slot->cpu = CPU(dev);
+    if (lvms->ipi) {
+        hotplug_handler_plug(HOTPLUG_HANDLER(lvms->ipi), dev, &err);
+        if (err) {
+            error_propagate(errp, err);
+            return;
+        }
+    }
+
+    if (lvms->extioi) {
+        hotplug_handler_plug(HOTPLUG_HANDLER(lvms->extioi), dev, &err);
+        if (err) {
+            error_propagate(errp, err);
+            return;
+        }
+    }
+
+    if (lvms->acpi_ged) {
+        hotplug_handler_plug(HOTPLUG_HANDLER(lvms->acpi_ged), dev, &err);
+        if (err) {
+            error_propagate(errp, err);
+        }
+    }
+
+    return;
+}
+
 static bool memhp_type_supported(DeviceState *dev)
 {
     /* we only support pc dimm now */
@@ -809,6 +1027,8 @@ static void virt_device_pre_plug(HotplugHandler *hotplug_dev,
 {
     if (memhp_type_supported(dev)) {
         virt_mem_pre_plug(hotplug_dev, dev, errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_LOONGARCH_CPU)) {
+        virt_cpu_pre_plug(hotplug_dev, dev, errp);
     }
 }
 
@@ -827,6 +1047,8 @@ static void virt_device_unplug_request(HotplugHandler *hotplug_dev,
 {
     if (memhp_type_supported(dev)) {
         virt_mem_unplug_request(hotplug_dev, dev, errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_LOONGARCH_CPU)) {
+        virt_cpu_unplug_request(hotplug_dev, dev, errp);
     }
 }
 
@@ -845,6 +1067,8 @@ static void virt_device_unplug(HotplugHandler *hotplug_dev,
 {
     if (memhp_type_supported(dev)) {
         virt_mem_unplug(hotplug_dev, dev, errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_LOONGARCH_CPU)) {
+        virt_cpu_unplug(hotplug_dev, dev, errp);
     }
 }
 
@@ -872,6 +1096,8 @@ static void virt_device_plug_cb(HotplugHandler *hotplug_dev,
         }
     } else if (memhp_type_supported(dev)) {
         virt_mem_plug(hotplug_dev, dev, errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_LOONGARCH_CPU)) {
+        virt_cpu_plug(hotplug_dev, dev, errp);
     }
 }
 
@@ -881,6 +1107,7 @@ static HotplugHandler *virt_get_hotplug_handler(MachineState *machine,
     MachineClass *mc = MACHINE_GET_CLASS(machine);
 
     if (device_is_dynamic_sysbus(mc, dev) ||
+        object_dynamic_cast(OBJECT(dev), TYPE_LOONGARCH_CPU) ||
         object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) ||
         memhp_type_supported(dev)) {
         return HOTPLUG_HANDLER(machine);
@@ -890,8 +1117,9 @@ static HotplugHandler *virt_get_hotplug_handler(MachineState *machine,
 
 static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
 {
-    int n;
+    int n, arch_id;
     unsigned int max_cpus = ms->smp.max_cpus;
+    LoongArchCPUTopo topo;
 
     if (ms->possible_cpus) {
         assert(ms->possible_cpus->len == max_cpus);
@@ -902,17 +1130,17 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
                                   sizeof(CPUArchId) * max_cpus);
     ms->possible_cpus->len = max_cpus;
     for (n = 0; n < ms->possible_cpus->len; n++) {
+        virt_get_topo_from_index(ms, &topo, n);
+        arch_id = virt_get_arch_id_from_topo(ms, &topo);
         ms->possible_cpus->cpus[n].type = ms->cpu_type;
-        ms->possible_cpus->cpus[n].arch_id = n;
-
+        ms->possible_cpus->cpus[n].arch_id = arch_id;
+        ms->possible_cpus->cpus[n].vcpus_count = 1;
         ms->possible_cpus->cpus[n].props.has_socket_id = true;
-        ms->possible_cpus->cpus[n].props.socket_id  =
-                                   n / (ms->smp.cores * ms->smp.threads);
+        ms->possible_cpus->cpus[n].props.socket_id = topo.socket_id;
         ms->possible_cpus->cpus[n].props.has_core_id = true;
-        ms->possible_cpus->cpus[n].props.core_id =
-                                   n / ms->smp.threads % ms->smp.cores;
+        ms->possible_cpus->cpus[n].props.core_id = topo.core_id;
         ms->possible_cpus->cpus[n].props.has_thread_id = true;
-        ms->possible_cpus->cpus[n].props.thread_id = n % ms->smp.threads;
+        ms->possible_cpus->cpus[n].props.thread_id = topo.thread_id;
     }
     return ms->possible_cpus;
 }
@@ -960,6 +1188,7 @@ static void virt_class_init(ObjectClass *oc, void *data)
     mc->numa_mem_supported = true;
     mc->auto_enable_numa_with_memhp = true;
     mc->auto_enable_numa_with_memdev = true;
+    mc->has_hotpluggable_cpus = true;
     mc->get_hotplug_handler = virt_get_hotplug_handler;
     mc->default_nic = "virtio-net-pci";
     hc->plug = virt_device_plug_cb;
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 0ae1704a345cf..6fffa21ead1f8 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -30,6 +30,14 @@
 #include "hw/cxl/cxl.h"
 #include "hw/pci/msix.h"
 
+/* type3 device private */
+enum CXL_T3_MSIX_VECTOR {
+    CXL_T3_MSIX_PCIE_DOE_TABLE_ACCESS = 0,
+    CXL_T3_MSIX_EVENT_START = 2,
+    CXL_T3_MSIX_MBOX = CXL_T3_MSIX_EVENT_START + CXL_EVENT_TYPE_MAX,
+    CXL_T3_MSIX_VECTOR_NR
+};
+
 #define DWORD_BYTE 4
 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
 
@@ -843,7 +851,6 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
     ComponentRegisters *regs = &cxl_cstate->crb;
     MemoryRegion *mr = &regs->component_registers;
     uint8_t *pci_conf = pci_dev->config;
-    unsigned short msix_num = 10;
     int i, rc;
     uint16_t count;
 
@@ -884,31 +891,32 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
                      &ct3d->cxl_dstate.device_registers);
 
     /* MSI(-X) Initialization */
-    rc = msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL);
+    rc = msix_init_exclusive_bar(pci_dev, CXL_T3_MSIX_VECTOR_NR, 4, errp);
     if (rc) {
-        goto err_address_space_free;
+        goto err_free_special_ops;
     }
-    for (i = 0; i < msix_num; i++) {
+    for (i = 0; i < CXL_T3_MSIX_VECTOR_NR; i++) {
         msix_vector_use(pci_dev, i);
     }
 
     /* DOE Initialization */
-    pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true, 0);
+    pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true,
+                  CXL_T3_MSIX_PCIE_DOE_TABLE_ACCESS);
 
     cxl_cstate->cdat.build_cdat_table = ct3_build_cdat_table;
     cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table;
     cxl_cstate->cdat.private = ct3d;
     if (!cxl_doe_cdat_init(cxl_cstate, errp)) {
-        goto err_free_special_ops;
+        goto err_msix_uninit;
     }
 
     pcie_cap_deverr_init(pci_dev);
     /* Leave a bit of room for expansion */
-    rc = pcie_aer_init(pci_dev, PCI_ERR_VER, 0x200, PCI_ERR_SIZEOF, NULL);
+    rc = pcie_aer_init(pci_dev, PCI_ERR_VER, 0x200, PCI_ERR_SIZEOF, errp);
     if (rc) {
         goto err_release_cdat;
     }
-    cxl_event_init(&ct3d->cxl_dstate, 2);
+    cxl_event_init(&ct3d->cxl_dstate, CXL_T3_MSIX_EVENT_START);
 
     /* Set default value for patrol scrub attributes */
     ct3d->patrol_scrub_attrs.scrub_cycle_cap =
@@ -935,9 +943,10 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
 
 err_release_cdat:
     cxl_doe_cdat_release(cxl_cstate);
+err_msix_uninit:
+    msix_uninit_exclusive_bar(pci_dev);
 err_free_special_ops:
     g_free(regs->special_ops);
-err_address_space_free:
     if (ct3d->dc.host_dc) {
         cxl_destroy_dc_regions(ct3d);
         address_space_destroy(&ct3d->dc.host_dc_as);
@@ -959,6 +968,7 @@ static void ct3_exit(PCIDevice *pci_dev)
 
     pcie_aer_exit(pci_dev);
     cxl_doe_cdat_release(cxl_cstate);
+    msix_uninit_exclusive_bar(pci_dev);
     g_free(regs->special_ops);
     if (ct3d->dc.host_dc) {
         cxl_destroy_dc_regions(ct3d);
@@ -1090,10 +1100,17 @@ static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
             continue;
         }
 
-        *dpa = dpa_base +
-            ((MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) |
-             ((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset)
-              >> iw));
+        if (iw < 8) {
+            *dpa = dpa_base +
+                ((MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) |
+                 ((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset)
+                  >> iw));
+        } else {
+            *dpa = dpa_base +
+                ((MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) |
+                 ((((MAKE_64BIT_MASK(ig + iw, 64 - ig - iw) & hpa_offset)
+                   >> (ig + iw)) / 3) << (ig + 8)));
+        }
 
         return true;
     }
@@ -1202,7 +1219,7 @@ static void ct3d_reset(DeviceState *dev)
 
     pcie_cap_fill_link_ep_usp(PCI_DEVICE(dev), ct3d->width, ct3d->speed);
     cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
-    cxl_device_register_init_t3(ct3d);
+    cxl_device_register_init_t3(ct3d, CXL_T3_MSIX_MBOX);
 
     /*
      * Bring up an endpoint to target with MCTP over VDM.
diff --git a/hw/meson.build b/hw/meson.build
index b827c82c5d7b9..6aaf469f95e47 100644
--- a/hw/meson.build
+++ b/hw/meson.build
@@ -35,10 +35,12 @@ subdir('smbios')
 subdir('ssi')
 subdir('timer')
 subdir('tpm')
+subdir('uefi')
 subdir('ufs')
 subdir('usb')
 subdir('vfio')
 subdir('virtio')
+subdir('vmapple')
 subdir('watchdog')
 subdir('xen')
 subdir('xenpv')
@@ -64,3 +66,4 @@ subdir('sparc')
 subdir('sparc64')
 subdir('tricore')
 subdir('xtensa')
+subdir('hexagon')
diff --git a/hw/microblaze/petalogix_ml605_mmu.c b/hw/microblaze/petalogix_ml605_mmu.c
index 8b44be75a2203..21ad215e442dc 100644
--- a/hw/microblaze/petalogix_ml605_mmu.c
+++ b/hw/microblaze/petalogix_ml605_mmu.c
@@ -80,6 +80,8 @@ petalogix_ml605_init(MachineState *machine)
     MemoryRegion *phys_lmb_bram = g_new(MemoryRegion, 1);
     MemoryRegion *phys_ram = g_new(MemoryRegion, 1);
     qemu_irq irq[32];
+    EndianMode endianness = TARGET_BIG_ENDIAN ? ENDIAN_MODE_BIG
+                                              : ENDIAN_MODE_LITTLE;
 
     /* init CPUs */
     cpu = MICROBLAZE_CPU(object_new(TYPE_MICROBLAZE_CPU));
@@ -111,6 +113,7 @@ petalogix_ml605_init(MachineState *machine)
 
 
     dev = qdev_new("xlnx.xps-intc");
+    qdev_prop_set_enum(dev, "endianness", endianness);
     qdev_prop_set_uint32(dev, "kind-of-intr", 1 << TIMER_IRQ);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
     sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, INTC_BASEADDR);
@@ -126,6 +129,7 @@ petalogix_ml605_init(MachineState *machine)
 
     /* 2 timers at irq 2 @ 100 Mhz.  */
     dev = qdev_new("xlnx.xps-timer");
+    qdev_prop_set_enum(dev, "endianness", endianness);
     qdev_prop_set_uint32(dev, "one-timer-only", 0);
     qdev_prop_set_uint32(dev, "clock-frequency", 100 * 1000000);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
@@ -173,6 +177,7 @@ petalogix_ml605_init(MachineState *machine)
         SSIBus *spi;
 
         dev = qdev_new("xlnx.xps-spi");
+        qdev_prop_set_enum(dev, "endianness", endianness);
         qdev_prop_set_uint8(dev, "num-ss-bits", NUM_SPI_FLASHES);
         busdev = SYS_BUS_DEVICE(dev);
         sysbus_realize_and_unref(busdev, &error_fatal);
diff --git a/hw/microblaze/petalogix_s3adsp1800_mmu.c b/hw/microblaze/petalogix_s3adsp1800_mmu.c
index 2c0d8c34cd23c..bdba2006b7222 100644
--- a/hw/microblaze/petalogix_s3adsp1800_mmu.c
+++ b/hw/microblaze/petalogix_s3adsp1800_mmu.c
@@ -71,6 +71,8 @@ petalogix_s3adsp1800_init(MachineState *machine)
     MemoryRegion *phys_ram = g_new(MemoryRegion, 1);
     qemu_irq irq[32];
     MemoryRegion *sysmem = get_system_memory();
+    EndianMode endianness = TARGET_BIG_ENDIAN ? ENDIAN_MODE_BIG
+                                              : ENDIAN_MODE_LITTLE;
 
     cpu = MICROBLAZE_CPU(object_new(TYPE_MICROBLAZE_CPU));
     object_property_set_str(OBJECT(cpu), "version", "7.10.d", &error_abort);
@@ -95,6 +97,7 @@ petalogix_s3adsp1800_init(MachineState *machine)
                           64 * KiB, 1, 0x89, 0x18, 0x0000, 0x0, 1);
 
     dev = qdev_new("xlnx.xps-intc");
+    qdev_prop_set_enum(dev, "endianness", endianness);
     qdev_prop_set_uint32(dev, "kind-of-intr",
                          1 << ETHLITE_IRQ | 1 << UARTLITE_IRQ);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
@@ -106,6 +109,7 @@ petalogix_s3adsp1800_init(MachineState *machine)
     }
 
     dev = qdev_new(TYPE_XILINX_UARTLITE);
+    qdev_prop_set_enum(dev, "endianness", endianness);
     qdev_prop_set_chr(dev, "chardev", serial_hd(0));
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
     sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, UARTLITE_BASEADDR);
@@ -113,6 +117,7 @@ petalogix_s3adsp1800_init(MachineState *machine)
 
     /* 2 timers at irq 2 @ 62 Mhz.  */
     dev = qdev_new("xlnx.xps-timer");
+    qdev_prop_set_enum(dev, "endianness", endianness);
     qdev_prop_set_uint32(dev, "one-timer-only", 0);
     qdev_prop_set_uint32(dev, "clock-frequency", 62 * 1000000);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
@@ -120,6 +125,7 @@ petalogix_s3adsp1800_init(MachineState *machine)
     sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq[TIMER_IRQ]);
 
     dev = qdev_new("xlnx.xps-ethernetlite");
+    qdev_prop_set_enum(dev, "endianness", endianness);
     qemu_configure_nic_device(dev, true, NULL);
     qdev_prop_set_uint32(dev, "tx-ping-pong", 0);
     qdev_prop_set_uint32(dev, "rx-ping-pong", 0);
diff --git a/hw/mips/boston.c b/hw/mips/boston.c
index 364c328032a1e..149a263bd5aa8 100644
--- a/hw/mips/boston.c
+++ b/hw/mips/boston.c
@@ -220,7 +220,7 @@ static void boston_lcd_write(void *opaque, hwaddr addr,
 static const MemoryRegionOps boston_lcd_ops = {
     .read = boston_lcd_read,
     .write = boston_lcd_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static uint64_t boston_platreg_read(void *opaque, hwaddr addr,
@@ -299,7 +299,7 @@ static void boston_platreg_write(void *opaque, hwaddr addr,
 static const MemoryRegionOps boston_platreg_ops = {
     .read = boston_platreg_read,
     .write = boston_platreg_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static void mips_boston_instance_init(Object *obj)
@@ -358,8 +358,8 @@ static void gen_firmware(void *p, hwaddr kernel_entry, hwaddr fdt_addr)
                        kernel_entry);
 }
 
-static const void *boston_fdt_filter(void *opaque, const void *fdt_orig,
-                                     const void *match_data, hwaddr *load_addr)
+static void *boston_fdt_filter(void *opaque, const void *fdt_orig,
+                               const void *match_data, hwaddr *load_addr)
 {
     BostonState *s = BOSTON(opaque);
     MachineState *machine = s->mach;
@@ -395,7 +395,6 @@ static const void *boston_fdt_filter(void *opaque, const void *fdt_orig,
                         1, ram_high_sz);
 
     fdt = g_realloc(fdt, fdt_totalsize(fdt));
-    qemu_fdt_dumpdtb(fdt, fdt_sz);
 
     s->fdt_base = *load_addr;
 
@@ -758,7 +757,7 @@ static void boston_mach_init(MachineState *machine)
 
     s->uart = serial_mm_init(sys_mem, boston_memmap[BOSTON_UART].base, 2,
                              get_cps_irq(&s->cps, 3), 10000000,
-                             serial_hd(0), DEVICE_NATIVE_ENDIAN);
+                             serial_hd(0), DEVICE_LITTLE_ENDIAN);
 
     lcd = g_new(MemoryRegion, 1);
     memory_region_init_io(lcd, NULL, &boston_lcd_ops, s, "boston-lcd", 0x8);
@@ -797,7 +796,7 @@ static void boston_mach_init(MachineState *machine)
         if (kernel_size > 0) {
             int dt_size;
             g_autofree const void *dtb_file_data = NULL;
-            g_autofree const void *dtb_load_data = NULL;
+            void *dtb_load_data = NULL;
             hwaddr dtb_paddr = QEMU_ALIGN_UP(kernel_high, 64 * KiB);
             hwaddr dtb_vaddr = cpu_mips_phys_to_kseg0(NULL, dtb_paddr);
 
@@ -810,6 +809,12 @@ static void boston_mach_init(MachineState *machine)
 
             dtb_load_data = boston_fdt_filter(s, dtb_file_data,
                                               NULL, &dtb_vaddr);
+            if (!dtb_load_data) {
+                /* boston_fdt_filter() already printed the error for us */
+                exit(1);
+            }
+
+            machine->fdt = dtb_load_data;
 
             /* Calculate real fdt size after filter */
             dt_size = fdt_totalsize(dtb_load_data);
@@ -818,7 +823,8 @@ static void boston_mach_init(MachineState *machine)
                                 rom_ptr(dtb_paddr, dt_size));
         } else {
             /* Try to load file as FIT */
-            fit_err = load_fit(&boston_fit_loader, machine->kernel_filename, s);
+            fit_err = load_fit(&boston_fit_loader, machine->kernel_filename,
+                               &machine->fdt, s);
             if (fit_err) {
                 error_report("unable to load kernel image");
                 exit(1);
diff --git a/hw/mips/jazz.c b/hw/mips/jazz.c
index c89610639a9e0..1700c3765ded0 100644
--- a/hw/mips/jazz.c
+++ b/hw/mips/jazz.c
@@ -50,7 +50,7 @@
 #include "qemu/error-report.h"
 #include "qemu/help_option.h"
 #ifdef CONFIG_TCG
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 #endif /* CONFIG_TCG */
 #include "cpu.h"
 
diff --git a/hw/mips/loongson3_virt.c b/hw/mips/loongson3_virt.c
index 831fddb1bd74a..db1cc5131471a 100644
--- a/hw/mips/loongson3_virt.c
+++ b/hw/mips/loongson3_virt.c
@@ -144,7 +144,7 @@ static void loongson3_pm_write(void *opaque, hwaddr addr,
 static const MemoryRegionOps loongson3_pm_ops = {
     .read  = loongson3_pm_read,
     .write = loongson3_pm_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 1,
         .max_access_size = 1
@@ -560,7 +560,7 @@ static void mips_loongson3_virt_init(MachineState *machine)
 
     serial_mm_init(address_space_mem, virt_memmap[VIRT_UART].base, 0,
                    qdev_get_gpio_in(liointc, UART_IRQ), 115200, serial_hd(0),
-                   DEVICE_NATIVE_ENDIAN);
+                   DEVICE_LITTLE_ENDIAN);
 
     sysbus_create_simple("goldfish_rtc", virt_memmap[VIRT_RTC].base,
                          qdev_get_gpio_in(liointc, RTC_IRQ));
diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig
index 8f9ce2f68c3fb..ec0fa5aa9f8c4 100644
--- a/hw/misc/Kconfig
+++ b/hw/misc/Kconfig
@@ -30,14 +30,6 @@ config EDU
     default y if TEST_DEVICES
     depends on PCI && MSI_NONBROKEN
 
-config PCA9552
-    bool
-    depends on I2C
-
-config PCA9554
-    bool
-    depends on I2C
-
 config I2C_ECHO
     bool
     default y if TEST_DEVICES
@@ -86,6 +78,12 @@ config IMX
     select SSI
     select USB_EHCI_SYSBUS
 
+config FSL_IMX8MP_ANALOG
+    bool
+
+config FSL_IMX8MP_CCM
+    bool
+
 config STM32_RCC
     bool
 
@@ -150,6 +148,10 @@ config PVPANIC_ISA
     depends on ISA_BUS
     select PVPANIC_COMMON
 
+config PVPANIC_MMIO
+    bool
+    select PVPANIC_COMMON
+
 config AUX
     bool
     select I2C
diff --git a/hw/misc/allwinner-a10-ccm.c b/hw/misc/allwinner-a10-ccm.c
index 575b0189524a8..6ca1daaff8a3c 100644
--- a/hw/misc/allwinner-a10-ccm.c
+++ b/hw/misc/allwinner-a10-ccm.c
@@ -147,7 +147,7 @@ static void allwinner_a10_ccm_write(void *opaque, hwaddr offset,
 static const MemoryRegionOps allwinner_a10_ccm_ops = {
     .read = allwinner_a10_ccm_read,
     .write = allwinner_a10_ccm_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
diff --git a/hw/misc/allwinner-a10-dramc.c b/hw/misc/allwinner-a10-dramc.c
index a7c58fa6d060d..badc4c56eb73b 100644
--- a/hw/misc/allwinner-a10-dramc.c
+++ b/hw/misc/allwinner-a10-dramc.c
@@ -114,7 +114,7 @@ static void allwinner_a10_dramc_write(void *opaque, hwaddr offset,
 static const MemoryRegionOps allwinner_a10_dramc_ops = {
     .read = allwinner_a10_dramc_read,
     .write = allwinner_a10_dramc_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
diff --git a/hw/misc/allwinner-cpucfg.c b/hw/misc/allwinner-cpucfg.c
index 022f63ddf34f4..a4f7a011419b6 100644
--- a/hw/misc/allwinner-cpucfg.c
+++ b/hw/misc/allwinner-cpucfg.c
@@ -217,7 +217,7 @@ static void allwinner_cpucfg_write(void *opaque, hwaddr offset,
 static const MemoryRegionOps allwinner_cpucfg_ops = {
     .read = allwinner_cpucfg_read,
     .write = allwinner_cpucfg_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
diff --git a/hw/misc/allwinner-h3-ccu.c b/hw/misc/allwinner-h3-ccu.c
index 92e579a99181c..e765f4c54b41c 100644
--- a/hw/misc/allwinner-h3-ccu.c
+++ b/hw/misc/allwinner-h3-ccu.c
@@ -155,7 +155,7 @@ static void allwinner_h3_ccu_write(void *opaque, hwaddr offset,
 static const MemoryRegionOps allwinner_h3_ccu_ops = {
     .read = allwinner_h3_ccu_read,
     .write = allwinner_h3_ccu_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
diff --git a/hw/misc/allwinner-h3-dramc.c b/hw/misc/allwinner-h3-dramc.c
index 13bba26d0e4ed..c4f3eb92747b6 100644
--- a/hw/misc/allwinner-h3-dramc.c
+++ b/hw/misc/allwinner-h3-dramc.c
@@ -219,7 +219,7 @@ static void allwinner_h3_dramphy_write(void *opaque, hwaddr offset,
 static const MemoryRegionOps allwinner_h3_dramcom_ops = {
     .read = allwinner_h3_dramcom_read,
     .write = allwinner_h3_dramcom_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
@@ -230,7 +230,7 @@ static const MemoryRegionOps allwinner_h3_dramcom_ops = {
 static const MemoryRegionOps allwinner_h3_dramctl_ops = {
     .read = allwinner_h3_dramctl_read,
     .write = allwinner_h3_dramctl_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
@@ -241,7 +241,7 @@ static const MemoryRegionOps allwinner_h3_dramctl_ops = {
 static const MemoryRegionOps allwinner_h3_dramphy_ops = {
     .read = allwinner_h3_dramphy_read,
     .write = allwinner_h3_dramphy_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
diff --git a/hw/misc/allwinner-h3-sysctrl.c b/hw/misc/allwinner-h3-sysctrl.c
index 40059e8cb0c0d..32a0ceb01a3e5 100644
--- a/hw/misc/allwinner-h3-sysctrl.c
+++ b/hw/misc/allwinner-h3-sysctrl.c
@@ -78,7 +78,7 @@ static void allwinner_h3_sysctrl_write(void *opaque, hwaddr offset,
 static const MemoryRegionOps allwinner_h3_sysctrl_ops = {
     .read = allwinner_h3_sysctrl_read,
     .write = allwinner_h3_sysctrl_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
diff --git a/hw/misc/allwinner-r40-ccu.c b/hw/misc/allwinner-r40-ccu.c
index 005a15b2daebc..8f37a9213c030 100644
--- a/hw/misc/allwinner-r40-ccu.c
+++ b/hw/misc/allwinner-r40-ccu.c
@@ -129,7 +129,7 @@ static void allwinner_r40_ccu_write(void *opaque, hwaddr offset,
 static const MemoryRegionOps allwinner_r40_ccu_ops = {
     .read = allwinner_r40_ccu_read,
     .write = allwinner_r40_ccu_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
diff --git a/hw/misc/allwinner-r40-dramc.c b/hw/misc/allwinner-r40-dramc.c
index 97c3664e3a3cc..96e1848c21fc3 100644
--- a/hw/misc/allwinner-r40-dramc.c
+++ b/hw/misc/allwinner-r40-dramc.c
@@ -297,7 +297,7 @@ static void allwinner_r40_dramphy_write(void *opaque, hwaddr offset,
 static const MemoryRegionOps allwinner_r40_dramcom_ops = {
     .read = allwinner_r40_dramcom_read,
     .write = allwinner_r40_dramcom_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
@@ -308,7 +308,7 @@ static const MemoryRegionOps allwinner_r40_dramcom_ops = {
 static const MemoryRegionOps allwinner_r40_dramctl_ops = {
     .read = allwinner_r40_dramctl_read,
     .write = allwinner_r40_dramctl_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
@@ -319,7 +319,7 @@ static const MemoryRegionOps allwinner_r40_dramctl_ops = {
 static const MemoryRegionOps allwinner_r40_dramphy_ops = {
     .read = allwinner_r40_dramphy_read,
     .write = allwinner_r40_dramphy_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
@@ -358,7 +358,7 @@ static void allwinner_r40_detect_write(void *opaque, hwaddr offset,
 static const MemoryRegionOps allwinner_r40_detect_ops = {
     .read = allwinner_r40_detect_read,
     .write = allwinner_r40_detect_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
@@ -393,7 +393,7 @@ static uint64_t allwinner_r40_dualrank_detect_read(void *opaque, hwaddr offset,
 
 static const MemoryRegionOps allwinner_r40_dualrank_detect_ops = {
     .read = allwinner_r40_dualrank_detect_read,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
diff --git a/hw/misc/allwinner-sid.c b/hw/misc/allwinner-sid.c
index 042b747f30b7b..2bb81f9c540a4 100644
--- a/hw/misc/allwinner-sid.c
+++ b/hw/misc/allwinner-sid.c
@@ -99,7 +99,7 @@ static void allwinner_sid_write(void *opaque, hwaddr offset,
 static const MemoryRegionOps allwinner_sid_ops = {
     .read = allwinner_sid_read,
     .write = allwinner_sid_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
diff --git a/hw/misc/allwinner-sramc.c b/hw/misc/allwinner-sramc.c
index a20b0b4c5cb15..51df5e45aa29d 100644
--- a/hw/misc/allwinner-sramc.c
+++ b/hw/misc/allwinner-sramc.c
@@ -104,7 +104,7 @@ static void allwinner_sramc_write(void *opaque, hwaddr offset,
 static const MemoryRegionOps allwinner_sramc_ops = {
     .read = allwinner_sramc_read,
     .write = allwinner_sramc_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
diff --git a/hw/misc/aspeed_hace.c b/hw/misc/aspeed_hace.c
index e3f7df2e862a2..32a5dbded3c61 100644
--- a/hw/misc/aspeed_hace.c
+++ b/hw/misc/aspeed_hace.c
@@ -59,6 +59,7 @@
 /* Other cmd bits */
 #define  HASH_IRQ_EN                    BIT(9)
 #define  HASH_SG_EN                     BIT(18)
+#define  CRYPT_IRQ_EN                   BIT(12)
 /* Scatter-gather data list */
 #define SG_LIST_LEN_SIZE                4
 #define SG_LIST_LEN_MASK                0x0FFFFFFF
@@ -75,9 +76,12 @@ static const struct {
     { HASH_ALGO_SHA1, QCRYPTO_HASH_ALGO_SHA1 },
     { HASH_ALGO_SHA224, QCRYPTO_HASH_ALGO_SHA224 },
     { HASH_ALGO_SHA256, QCRYPTO_HASH_ALGO_SHA256 },
-    { HASH_ALGO_SHA512_SERIES | HASH_ALGO_SHA512_SHA512, QCRYPTO_HASH_ALGO_SHA512 },
-    { HASH_ALGO_SHA512_SERIES | HASH_ALGO_SHA512_SHA384, QCRYPTO_HASH_ALGO_SHA384 },
-    { HASH_ALGO_SHA512_SERIES | HASH_ALGO_SHA512_SHA256, QCRYPTO_HASH_ALGO_SHA256 },
+    { HASH_ALGO_SHA512_SERIES | HASH_ALGO_SHA512_SHA512,
+      QCRYPTO_HASH_ALGO_SHA512 },
+    { HASH_ALGO_SHA512_SERIES | HASH_ALGO_SHA512_SHA384,
+      QCRYPTO_HASH_ALGO_SHA384 },
+    { HASH_ALGO_SHA512_SERIES | HASH_ALGO_SHA512_SHA256,
+      QCRYPTO_HASH_ALGO_SHA256 },
 };
 
 static int hash_algo_lookup(uint32_t reg)
@@ -201,7 +205,8 @@ static void do_hash_operation(AspeedHACEState *s, int algo, bool sg_mode,
             haddr = address_space_map(&s->dram_as, addr, &plen, false,
                                       MEMTXATTRS_UNSPECIFIED);
             if (haddr == NULL) {
-                qemu_log_mask(LOG_GUEST_ERROR, "%s: qcrypto failed\n", __func__);
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "%s: qcrypto failed\n", __func__);
                 return;
             }
             iov[i].iov_base = haddr;
@@ -339,6 +344,15 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data,
                 qemu_irq_lower(s->irq);
             }
         }
+        if (ahc->raise_crypt_interrupt_workaround) {
+            if (data & CRYPT_IRQ) {
+                data &= ~CRYPT_IRQ;
+
+                if (s->regs[addr] & CRYPT_IRQ) {
+                    qemu_irq_lower(s->irq);
+                }
+            }
+        }
         break;
     case R_HASH_SRC:
         data &= ahc->src_mask;
@@ -384,6 +398,12 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data,
     case R_CRYPT_CMD:
         qemu_log_mask(LOG_UNIMP, "%s: Crypt commands not implemented\n",
                        __func__);
+        if (ahc->raise_crypt_interrupt_workaround) {
+            s->regs[R_STATUS] |= CRYPT_IRQ;
+            if (data & CRYPT_IRQ_EN) {
+                qemu_irq_raise(s->irq);
+            }
+        }
         break;
     default:
         break;
@@ -548,12 +568,39 @@ static const TypeInfo aspeed_ast1030_hace_info = {
     .class_init = aspeed_ast1030_hace_class_init,
 };
 
+static void aspeed_ast2700_hace_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedHACEClass *ahc = ASPEED_HACE_CLASS(klass);
+
+    dc->desc = "AST2700 Hash and Crypto Engine";
+
+    ahc->src_mask = 0x7FFFFFFF;
+    ahc->dest_mask = 0x7FFFFFF8;
+    ahc->key_mask = 0x7FFFFFF8;
+    ahc->hash_mask = 0x00147FFF;
+
+    /*
+     * Currently, it does not support the CRYPT command. Instead, it only
+     * sends an interrupt to notify the firmware that the crypt command
+     * has completed. It is a temporary workaround.
+     */
+    ahc->raise_crypt_interrupt_workaround = true;
+}
+
+static const TypeInfo aspeed_ast2700_hace_info = {
+    .name = TYPE_ASPEED_AST2700_HACE,
+    .parent = TYPE_ASPEED_HACE,
+    .class_init = aspeed_ast2700_hace_class_init,
+};
+
 static void aspeed_hace_register_types(void)
 {
     type_register_static(&aspeed_ast2400_hace_info);
     type_register_static(&aspeed_ast2500_hace_info);
     type_register_static(&aspeed_ast2600_hace_info);
     type_register_static(&aspeed_ast1030_hace_info);
+    type_register_static(&aspeed_ast2700_hace_info);
     type_register_static(&aspeed_hace_info);
 }
 
diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c
index bac1441b061a2..76cfd9167161f 100644
--- a/hw/misc/aspeed_scu.c
+++ b/hw/misc/aspeed_scu.c
@@ -157,6 +157,7 @@
 #define AST2700_SCU_FREQ_CNTR       TO_REG(0x3b0)
 #define AST2700_SCU_CPU_SCRATCH_0   TO_REG(0x780)
 #define AST2700_SCU_CPU_SCRATCH_1   TO_REG(0x784)
+#define AST2700_SCU_VGA_SCRATCH_0   TO_REG(0x900)
 
 #define AST2700_SCUIO_CLK_STOP_CTL_1    TO_REG(0x240)
 #define AST2700_SCUIO_CLK_STOP_CLR_1    TO_REG(0x244)
@@ -559,6 +560,8 @@ static uint32_t aspeed_silicon_revs[] = {
     AST2700_A0_SILICON_REV,
     AST2720_A0_SILICON_REV,
     AST2750_A0_SILICON_REV,
+    AST2700_A1_SILICON_REV,
+    AST2750_A1_SILICON_REV,
 };
 
 bool is_supported_silicon_rev(uint32_t silicon_rev)
@@ -909,8 +912,6 @@ static const MemoryRegionOps aspeed_ast2700_scu_ops = {
 };
 
 static const uint32_t ast2700_a0_resets[ASPEED_AST2700_SCU_NR_REGS] = {
-    [AST2700_SILICON_REV]           = AST2700_A0_SILICON_REV,
-    [AST2700_HW_STRAP1]             = 0x00000800,
     [AST2700_HW_STRAP1_CLR]         = 0xFFF0FFF0,
     [AST2700_HW_STRAP1_LOCK]        = 0x00000FFF,
     [AST2700_HW_STRAP1_SEC1]        = 0x000000FF,
@@ -930,6 +931,7 @@ static const uint32_t ast2700_a0_resets[ASPEED_AST2700_SCU_NR_REGS] = {
     [AST2700_SCU_FREQ_CNTR]         = 0x000375eb,
     [AST2700_SCU_CPU_SCRATCH_0]     = 0x00000000,
     [AST2700_SCU_CPU_SCRATCH_1]     = 0x00000004,
+    [AST2700_SCU_VGA_SCRATCH_0]     = 0x00000040,
 };
 
 static void aspeed_ast2700_scu_reset(DeviceState *dev)
@@ -938,6 +940,8 @@ static void aspeed_ast2700_scu_reset(DeviceState *dev)
     AspeedSCUClass *asc = ASPEED_SCU_GET_CLASS(dev);
 
     memcpy(s->regs, asc->resets, asc->nr_regs * 4);
+    s->regs[AST2700_SILICON_REV] = s->silicon_rev;
+    s->regs[AST2700_HW_STRAP1] = s->hw_strap1;
 }
 
 static void aspeed_2700_scu_class_init(ObjectClass *klass, void *data)
@@ -1030,8 +1034,6 @@ static const MemoryRegionOps aspeed_ast2700_scuio_ops = {
 };
 
 static const uint32_t ast2700_a0_resets_io[ASPEED_AST2700_SCU_NR_REGS] = {
-    [AST2700_SILICON_REV]               = 0x06000003,
-    [AST2700_HW_STRAP1]                 = 0x00000504,
     [AST2700_HW_STRAP1_CLR]             = 0xFFF0FFF0,
     [AST2700_HW_STRAP1_LOCK]            = 0x00000FFF,
     [AST2700_HW_STRAP1_SEC1]            = 0x000000FF,
diff --git a/hw/misc/imx8mp_analog.c b/hw/misc/imx8mp_analog.c
new file mode 100644
index 0000000000000..f7e7c83cc495a
--- /dev/null
+++ b/hw/misc/imx8mp_analog.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2025 Bernhard Beschow <shentey@gmail.com>
+ *
+ * i.MX 8M Plus ANALOG IP block emulation code
+ *
+ * Based on hw/misc/imx7_ccm.c
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+
+#include "hw/misc/imx8mp_analog.h"
+#include "migration/vmstate.h"
+
+#define ANALOG_PLL_LOCK BIT(31)
+
+static void imx8mp_analog_reset(DeviceState *dev)
+{
+    IMX8MPAnalogState *s = IMX8MP_ANALOG(dev);
+
+    memset(s->analog, 0, sizeof(s->analog));
+
+    s->analog[ANALOG_AUDIO_PLL1_GEN_CTRL] = 0x00002010;
+    s->analog[ANALOG_AUDIO_PLL1_FDIV_CTL0] = 0x00145032;
+    s->analog[ANALOG_AUDIO_PLL1_FDIV_CTL1] = 0x00000000;
+    s->analog[ANALOG_AUDIO_PLL1_SSCG_CTRL] = 0x00000000;
+    s->analog[ANALOG_AUDIO_PLL1_MNIT_CTRL] = 0x00100103;
+    s->analog[ANALOG_AUDIO_PLL2_GEN_CTRL] = 0x00002010;
+    s->analog[ANALOG_AUDIO_PLL2_FDIV_CTL0] = 0x00145032;
+    s->analog[ANALOG_AUDIO_PLL2_FDIV_CTL1] = 0x00000000;
+    s->analog[ANALOG_AUDIO_PLL2_SSCG_CTRL] = 0x00000000;
+    s->analog[ANALOG_AUDIO_PLL2_MNIT_CTRL] = 0x00100103;
+    s->analog[ANALOG_VIDEO_PLL1_GEN_CTRL] = 0x00002010;
+    s->analog[ANALOG_VIDEO_PLL1_FDIV_CTL0] = 0x00145032;
+    s->analog[ANALOG_VIDEO_PLL1_FDIV_CTL1] = 0x00000000;
+    s->analog[ANALOG_VIDEO_PLL1_SSCG_CTRL] = 0x00000000;
+    s->analog[ANALOG_VIDEO_PLL1_MNIT_CTRL] = 0x00100103;
+    s->analog[ANALOG_DRAM_PLL_GEN_CTRL] = 0x00002010;
+    s->analog[ANALOG_DRAM_PLL_FDIV_CTL0] = 0x0012c032;
+    s->analog[ANALOG_DRAM_PLL_FDIV_CTL1] = 0x00000000;
+    s->analog[ANALOG_DRAM_PLL_SSCG_CTRL] = 0x00000000;
+    s->analog[ANALOG_DRAM_PLL_MNIT_CTRL] = 0x00100103;
+    s->analog[ANALOG_GPU_PLL_GEN_CTRL] = 0x00000810;
+    s->analog[ANALOG_GPU_PLL_FDIV_CTL0] = 0x000c8031;
+    s->analog[ANALOG_GPU_PLL_LOCKD_CTRL] = 0x0010003f;
+    s->analog[ANALOG_GPU_PLL_MNIT_CTRL] = 0x00280081;
+    s->analog[ANALOG_VPU_PLL_GEN_CTRL] = 0x00000810;
+    s->analog[ANALOG_VPU_PLL_FDIV_CTL0] = 0x0012c032;
+    s->analog[ANALOG_VPU_PLL_LOCKD_CTRL] = 0x0010003f;
+    s->analog[ANALOG_VPU_PLL_MNIT_CTRL] = 0x00280081;
+    s->analog[ANALOG_ARM_PLL_GEN_CTRL] = 0x00000810;
+    s->analog[ANALOG_ARM_PLL_FDIV_CTL0] = 0x000fa031;
+    s->analog[ANALOG_ARM_PLL_LOCKD_CTRL] = 0x0010003f;
+    s->analog[ANALOG_ARM_PLL_MNIT_CTRL] = 0x00280081;
+    s->analog[ANALOG_SYS_PLL1_GEN_CTRL] = 0x0aaaa810;
+    s->analog[ANALOG_SYS_PLL1_FDIV_CTL0] = 0x00190032;
+    s->analog[ANALOG_SYS_PLL1_LOCKD_CTRL] = 0x0010003f;
+    s->analog[ANALOG_SYS_PLL1_MNIT_CTRL] = 0x00280081;
+    s->analog[ANALOG_SYS_PLL2_GEN_CTRL] = 0x0aaaa810;
+    s->analog[ANALOG_SYS_PLL2_FDIV_CTL0] = 0x000fa031;
+    s->analog[ANALOG_SYS_PLL2_LOCKD_CTRL] = 0x0010003f;
+    s->analog[ANALOG_SYS_PLL2_MNIT_CTRL] = 0x00280081;
+    s->analog[ANALOG_SYS_PLL3_GEN_CTRL] = 0x00000810;
+    s->analog[ANALOG_SYS_PLL3_FDIV_CTL0] = 0x000fa031;
+    s->analog[ANALOG_SYS_PLL3_LOCKD_CTRL] = 0x0010003f;
+    s->analog[ANALOG_SYS_PLL3_MNIT_CTRL] = 0x00280081;
+    s->analog[ANALOG_OSC_MISC_CFG] = 0x00000000;
+    s->analog[ANALOG_ANAMIX_PLL_MNIT_CTL] = 0x00000000;
+    s->analog[ANALOG_DIGPROG] = 0x00824010;
+
+    /* all PLLs need to be locked */
+    s->analog[ANALOG_AUDIO_PLL1_GEN_CTRL] |= ANALOG_PLL_LOCK;
+    s->analog[ANALOG_AUDIO_PLL2_GEN_CTRL] |= ANALOG_PLL_LOCK;
+    s->analog[ANALOG_VIDEO_PLL1_GEN_CTRL] |= ANALOG_PLL_LOCK;
+    s->analog[ANALOG_DRAM_PLL_GEN_CTRL] |= ANALOG_PLL_LOCK;
+    s->analog[ANALOG_GPU_PLL_GEN_CTRL] |= ANALOG_PLL_LOCK;
+    s->analog[ANALOG_VPU_PLL_GEN_CTRL] |= ANALOG_PLL_LOCK;
+    s->analog[ANALOG_ARM_PLL_GEN_CTRL] |= ANALOG_PLL_LOCK;
+    s->analog[ANALOG_SYS_PLL1_GEN_CTRL] |= ANALOG_PLL_LOCK;
+    s->analog[ANALOG_SYS_PLL2_GEN_CTRL] |= ANALOG_PLL_LOCK;
+    s->analog[ANALOG_SYS_PLL3_GEN_CTRL] |= ANALOG_PLL_LOCK;
+}
+
+static uint64_t imx8mp_analog_read(void *opaque, hwaddr offset, unsigned size)
+{
+    IMX8MPAnalogState *s = opaque;
+
+    return s->analog[offset >> 2];
+}
+
+static void imx8mp_analog_write(void *opaque, hwaddr offset,
+                                uint64_t value, unsigned size)
+{
+    IMX8MPAnalogState *s = opaque;
+
+    if (offset >> 2 == ANALOG_DIGPROG) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "Guest write to read-only ANALOG_DIGPROG register\n");
+    } else {
+        s->analog[offset >> 2] = value;
+    }
+}
+
+static const struct MemoryRegionOps imx8mp_analog_ops = {
+    .read = imx8mp_analog_read,
+    .write = imx8mp_analog_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+        .unaligned = false,
+    },
+};
+
+static void imx8mp_analog_init(Object *obj)
+{
+    IMX8MPAnalogState *s = IMX8MP_ANALOG(obj);
+    SysBusDevice *sd = SYS_BUS_DEVICE(obj);
+
+    memory_region_init(&s->mmio.container, obj, TYPE_IMX8MP_ANALOG, 0x10000);
+
+    memory_region_init_io(&s->mmio.analog, obj, &imx8mp_analog_ops, s,
+                          TYPE_IMX8MP_ANALOG, sizeof(s->analog));
+    memory_region_add_subregion(&s->mmio.container, 0, &s->mmio.analog);
+
+    sysbus_init_mmio(sd, &s->mmio.container);
+}
+
+static const VMStateDescription imx8mp_analog_vmstate = {
+    .name = TYPE_IMX8MP_ANALOG,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (const VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(analog, IMX8MPAnalogState, ANALOG_MAX),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static void imx8mp_analog_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    device_class_set_legacy_reset(dc, imx8mp_analog_reset);
+    dc->vmsd  = &imx8mp_analog_vmstate;
+    dc->desc  = "i.MX 8M Plus Analog Module";
+}
+
+static const TypeInfo imx8mp_analog_types[] = {
+    {
+        .name          = TYPE_IMX8MP_ANALOG,
+        .parent        = TYPE_SYS_BUS_DEVICE,
+        .instance_size = sizeof(IMX8MPAnalogState),
+        .instance_init = imx8mp_analog_init,
+        .class_init    = imx8mp_analog_class_init,
+    }
+};
+
+DEFINE_TYPES(imx8mp_analog_types);
diff --git a/hw/misc/imx8mp_ccm.c b/hw/misc/imx8mp_ccm.c
new file mode 100644
index 0000000000000..1a1c932427a4e
--- /dev/null
+++ b/hw/misc/imx8mp_ccm.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2025 Bernhard Beschow <shentey@gmail.com>
+ *
+ * i.MX 8M Plus CCM IP block emulation code
+ *
+ * Based on hw/misc/imx7_ccm.c
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+
+#include "hw/misc/imx8mp_ccm.h"
+#include "migration/vmstate.h"
+
+#include "trace.h"
+
+#define CKIH_FREQ 16000000 /* 16MHz crystal input */
+
+static void imx8mp_ccm_reset(DeviceState *dev)
+{
+    IMX8MPCCMState *s = IMX8MP_CCM(dev);
+
+    memset(s->ccm, 0, sizeof(s->ccm));
+}
+
+#define CCM_INDEX(offset)   (((offset) & ~(hwaddr)0xF) / sizeof(uint32_t))
+#define CCM_BITOP(offset)   ((offset) & (hwaddr)0xF)
+
+enum {
+    CCM_BITOP_NONE = 0x00,
+    CCM_BITOP_SET  = 0x04,
+    CCM_BITOP_CLR  = 0x08,
+    CCM_BITOP_TOG  = 0x0C,
+};
+
+static uint64_t imx8mp_set_clr_tog_read(void *opaque, hwaddr offset,
+                                        unsigned size)
+{
+    const uint32_t *mmio = opaque;
+
+    return mmio[CCM_INDEX(offset)];
+}
+
+static void imx8mp_set_clr_tog_write(void *opaque, hwaddr offset,
+                                     uint64_t value, unsigned size)
+{
+    const uint8_t  bitop = CCM_BITOP(offset);
+    const uint32_t index = CCM_INDEX(offset);
+    uint32_t *mmio = opaque;
+
+    switch (bitop) {
+    case CCM_BITOP_NONE:
+        mmio[index]  = value;
+        break;
+    case CCM_BITOP_SET:
+        mmio[index] |= value;
+        break;
+    case CCM_BITOP_CLR:
+        mmio[index] &= ~value;
+        break;
+    case CCM_BITOP_TOG:
+        mmio[index] ^= value;
+        break;
+    };
+}
+
+static const struct MemoryRegionOps imx8mp_set_clr_tog_ops = {
+    .read = imx8mp_set_clr_tog_read,
+    .write = imx8mp_set_clr_tog_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .impl = {
+        /*
+         * Our device would not work correctly if the guest was doing
+         * unaligned access. This might not be a limitation on the real
+         * device but in practice there is no reason for a guest to access
+         * this device unaligned.
+         */
+        .min_access_size = 4,
+        .max_access_size = 4,
+        .unaligned = false,
+    },
+};
+
+static void imx8mp_ccm_init(Object *obj)
+{
+    SysBusDevice *sd = SYS_BUS_DEVICE(obj);
+    IMX8MPCCMState *s = IMX8MP_CCM(obj);
+
+    memory_region_init_io(&s->iomem,
+                          obj,
+                          &imx8mp_set_clr_tog_ops,
+                          s->ccm,
+                          TYPE_IMX8MP_CCM ".ccm",
+                          sizeof(s->ccm));
+
+    sysbus_init_mmio(sd, &s->iomem);
+}
+
+static const VMStateDescription imx8mp_ccm_vmstate = {
+    .name = TYPE_IMX8MP_CCM,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (const VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(ccm, IMX8MPCCMState, CCM_MAX),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static uint32_t imx8mp_ccm_get_clock_frequency(IMXCCMState *dev, IMXClk clock)
+{
+    /*
+     * This function is "consumed" by GPT emulation code. Some clocks
+     * have fixed frequencies and we can provide requested frequency
+     * easily. However for CCM provided clocks (like IPG) each GPT
+     * timer can have its own clock root.
+     * This means we need additional information when calling this
+     * function to know the requester's identity.
+     */
+    uint32_t freq = 0;
+
+    switch (clock) {
+    case CLK_NONE:
+        break;
+    case CLK_32k:
+        freq = CKIL_FREQ;
+        break;
+    case CLK_HIGH:
+        freq = CKIH_FREQ;
+        break;
+    case CLK_IPG:
+    case CLK_IPG_HIGH:
+        /*
+         * For now we don't have a way to figure out the device this
+         * function is called for. Until then the IPG derived clocks
+         * are left unimplemented.
+         */
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Clock %d Not implemented\n",
+                      TYPE_IMX8MP_CCM, __func__, clock);
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: unsupported clock %d\n",
+                      TYPE_IMX8MP_CCM, __func__, clock);
+        break;
+    }
+
+    trace_ccm_clock_freq(clock, freq);
+
+    return freq;
+}
+
+static void imx8mp_ccm_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    IMXCCMClass *ccm = IMX_CCM_CLASS(klass);
+
+    device_class_set_legacy_reset(dc, imx8mp_ccm_reset);
+    dc->vmsd  = &imx8mp_ccm_vmstate;
+    dc->desc  = "i.MX 8M Plus Clock Control Module";
+
+    ccm->get_clock_frequency = imx8mp_ccm_get_clock_frequency;
+}
+
+static const TypeInfo imx8mp_ccm_types[] = {
+    {
+        .name          = TYPE_IMX8MP_CCM,
+        .parent        = TYPE_IMX_CCM,
+        .instance_size = sizeof(IMX8MPCCMState),
+        .instance_init = imx8mp_ccm_init,
+        .class_init    = imx8mp_ccm_class_init,
+    },
+};
+
+DEFINE_TYPES(imx8mp_ccm_types);
diff --git a/hw/misc/macio/gpio.c b/hw/misc/macio/gpio.c
index 7cad62819a069..e87bfca1f5d37 100644
--- a/hw/misc/macio/gpio.c
+++ b/hw/misc/macio/gpio.c
@@ -34,6 +34,11 @@
 #include "qemu/module.h"
 #include "trace.h"
 
+enum MacioGPIORegisterBits {
+    OUT_DATA   = 1,
+    IN_DATA    = 2,
+    OUT_ENABLE = 4,
+};
 
 void macio_set_gpio(MacIOGPIOState *s, uint32_t gpio, bool state)
 {
@@ -41,14 +46,14 @@ void macio_set_gpio(MacIOGPIOState *s, uint32_t gpio, bool state)
 
     trace_macio_set_gpio(gpio, state);
 
-    if (s->gpio_regs[gpio] & 4) {
+    if (s->gpio_regs[gpio] & OUT_ENABLE) {
         qemu_log_mask(LOG_GUEST_ERROR,
                       "GPIO: Setting GPIO %d while it's an output\n", gpio);
     }
 
-    new_reg = s->gpio_regs[gpio] & ~2;
+    new_reg = s->gpio_regs[gpio] & ~IN_DATA;
     if (state) {
-        new_reg |= 2;
+        new_reg |= IN_DATA;
     }
 
     if (new_reg == s->gpio_regs[gpio]) {
@@ -107,12 +112,12 @@ static void macio_gpio_write(void *opaque, hwaddr addr, uint64_t value,
 
     addr -= 8;
     if (addr < 36) {
-        value &= ~2;
+        value &= ~IN_DATA;
 
-        if (value & 4) {
-            ibit = (value & 1) << 1;
+        if (value & OUT_ENABLE) {
+            ibit = (value & OUT_DATA) << 1;
         } else {
-            ibit = s->gpio_regs[addr] & 2;
+            ibit = s->gpio_regs[addr] & IN_DATA;
         }
 
         s->gpio_regs[addr] = value | ibit;
@@ -135,7 +140,7 @@ static uint64_t macio_gpio_read(void *opaque, hwaddr addr, unsigned size)
         }
     }
 
-    trace_macio_gpio_write(addr, val);
+    trace_macio_gpio_read(addr, val);
     return val;
 }
 
diff --git a/hw/misc/macio/trace-events b/hw/misc/macio/trace-events
index ad4b9d1c08eae..055a407aebb66 100644
--- a/hw/misc/macio/trace-events
+++ b/hw/misc/macio/trace-events
@@ -18,7 +18,8 @@ macio_timer_read(uint64_t addr, unsigned len, uint32_t val) "read addr 0x%"PRIx6
 macio_set_gpio(int gpio, bool state) "setting GPIO %d to %d"
 macio_gpio_irq_assert(int gpio) "asserting GPIO %d"
 macio_gpio_irq_deassert(int gpio) "deasserting GPIO %d"
-macio_gpio_write(uint64_t addr, uint64_t val) "addr: 0x%"PRIx64" value: 0x%"PRIx64
+macio_gpio_write(uint64_t addr, uint64_t val) "addr 0x%"PRIx64" val 0x%"PRIx64
+macio_gpio_read(uint64_t addr, uint64_t val) "addr 0x%"PRIx64" val 0x%"PRIx64
 
 # pmu.c
 pmu_adb_poll(int olen) "ADB autopoll, olen=%d"
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
index 55f493521bec0..6d47de482c588 100644
--- a/hw/misc/meson.build
+++ b/hw/misc/meson.build
@@ -55,6 +55,8 @@ system_ss.add(when: 'CONFIG_AXP2XX_PMU', if_true: files('axp2xx.c'))
 system_ss.add(when: 'CONFIG_REALVIEW', if_true: files('arm_sysctl.c'))
 system_ss.add(when: 'CONFIG_ECCMEMCTL', if_true: files('eccmemctl.c'))
 system_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210_pmu.c', 'exynos4210_clk.c', 'exynos4210_rng.c'))
+system_ss.add(when: 'CONFIG_FSL_IMX8MP_ANALOG', if_true: files('imx8mp_analog.c'))
+system_ss.add(when: 'CONFIG_FSL_IMX8MP_CCM', if_true: files('imx8mp_ccm.c'))
 system_ss.add(when: 'CONFIG_IMX', if_true: files(
   'imx25_ccm.c',
   'imx31_ccm.c',
@@ -69,8 +71,8 @@ system_ss.add(when: 'CONFIG_IMX', if_true: files(
   'imx_rngc.c',
 ))
 system_ss.add(when: 'CONFIG_NPCM7XX', if_true: files(
-  'npcm7xx_clk.c',
-  'npcm7xx_gcr.c',
+  'npcm_clk.c',
+  'npcm_gcr.c',
   'npcm7xx_mft.c',
   'npcm7xx_pwm.c',
   'npcm7xx_rng.c',
@@ -124,6 +126,7 @@ system_ss.add(when: 'CONFIG_ARMSSE_MHU', if_true: files('armsse-mhu.c'))
 
 system_ss.add(when: 'CONFIG_PVPANIC_ISA', if_true: files('pvpanic-isa.c'))
 system_ss.add(when: 'CONFIG_PVPANIC_PCI', if_true: files('pvpanic-pci.c'))
+system_ss.add(when: 'CONFIG_PVPANIC_MMIO', if_true: files('pvpanic-mmio.c'))
 system_ss.add(when: 'CONFIG_AUX', if_true: files('auxbus.c'))
 system_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files(
   'aspeed_hace.c',
diff --git a/hw/misc/mps2-fpgaio.c b/hw/misc/mps2-fpgaio.c
index d07568248d6ed..04a3da5db051f 100644
--- a/hw/misc/mps2-fpgaio.c
+++ b/hw/misc/mps2-fpgaio.c
@@ -198,7 +198,7 @@ static void mps2_fpgaio_write(void *opaque, hwaddr offset, uint64_t value,
 
             s->led0 = value & MAKE_64BIT_MASK(0, s->num_leds);
             for (i = 0; i < s->num_leds; i++) {
-                led_set_state(s->led[i], value & (1 << i));
+                led_set_state(s->led[i], extract64(value, i, 1));
             }
         }
         break;
diff --git a/hw/misc/npcm7xx_gcr.c b/hw/misc/npcm7xx_gcr.c
deleted file mode 100644
index 07464a4dc93e6..0000000000000
--- a/hw/misc/npcm7xx_gcr.c
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Nuvoton NPCM7xx System Global Control Registers.
- *
- * Copyright 2020 Google LLC
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- */
-
-#include "qemu/osdep.h"
-
-#include "hw/misc/npcm7xx_gcr.h"
-#include "hw/qdev-properties.h"
-#include "migration/vmstate.h"
-#include "qapi/error.h"
-#include "qemu/cutils.h"
-#include "qemu/log.h"
-#include "qemu/module.h"
-#include "qemu/units.h"
-
-#include "trace.h"
-
-#define NPCM7XX_GCR_MIN_DRAM_SIZE   (128 * MiB)
-#define NPCM7XX_GCR_MAX_DRAM_SIZE   (2 * GiB)
-
-enum NPCM7xxGCRRegisters {
-    NPCM7XX_GCR_PDID,
-    NPCM7XX_GCR_PWRON,
-    NPCM7XX_GCR_MFSEL1          = 0x0c / sizeof(uint32_t),
-    NPCM7XX_GCR_MFSEL2,
-    NPCM7XX_GCR_MISCPE,
-    NPCM7XX_GCR_SPSWC           = 0x038 / sizeof(uint32_t),
-    NPCM7XX_GCR_INTCR,
-    NPCM7XX_GCR_INTSR,
-    NPCM7XX_GCR_HIFCR           = 0x050 / sizeof(uint32_t),
-    NPCM7XX_GCR_INTCR2          = 0x060 / sizeof(uint32_t),
-    NPCM7XX_GCR_MFSEL3,
-    NPCM7XX_GCR_SRCNT,
-    NPCM7XX_GCR_RESSR,
-    NPCM7XX_GCR_RLOCKR1,
-    NPCM7XX_GCR_FLOCKR1,
-    NPCM7XX_GCR_DSCNT,
-    NPCM7XX_GCR_MDLR,
-    NPCM7XX_GCR_SCRPAD3,
-    NPCM7XX_GCR_SCRPAD2,
-    NPCM7XX_GCR_DAVCLVLR        = 0x098 / sizeof(uint32_t),
-    NPCM7XX_GCR_INTCR3,
-    NPCM7XX_GCR_VSINTR          = 0x0ac / sizeof(uint32_t),
-    NPCM7XX_GCR_MFSEL4,
-    NPCM7XX_GCR_CPBPNTR         = 0x0c4 / sizeof(uint32_t),
-    NPCM7XX_GCR_CPCTL           = 0x0d0 / sizeof(uint32_t),
-    NPCM7XX_GCR_CP2BST,
-    NPCM7XX_GCR_B2CPNT,
-    NPCM7XX_GCR_CPPCTL,
-    NPCM7XX_GCR_I2CSEGSEL,
-    NPCM7XX_GCR_I2CSEGCTL,
-    NPCM7XX_GCR_VSRCR,
-    NPCM7XX_GCR_MLOCKR,
-    NPCM7XX_GCR_SCRPAD          = 0x013c / sizeof(uint32_t),
-    NPCM7XX_GCR_USB1PHYCTL,
-    NPCM7XX_GCR_USB2PHYCTL,
-    NPCM7XX_GCR_REGS_END,
-};
-
-static const uint32_t cold_reset_values[NPCM7XX_GCR_NR_REGS] = {
-    [NPCM7XX_GCR_PDID]          = 0x04a92750,   /* Poleg A1 */
-    [NPCM7XX_GCR_MISCPE]        = 0x0000ffff,
-    [NPCM7XX_GCR_SPSWC]         = 0x00000003,
-    [NPCM7XX_GCR_INTCR]         = 0x0000035e,
-    [NPCM7XX_GCR_HIFCR]         = 0x0000004e,
-    [NPCM7XX_GCR_INTCR2]        = (1U << 19),   /* DDR initialized */
-    [NPCM7XX_GCR_RESSR]         = 0x80000000,
-    [NPCM7XX_GCR_DSCNT]         = 0x000000c0,
-    [NPCM7XX_GCR_DAVCLVLR]      = 0x5a00f3cf,
-    [NPCM7XX_GCR_SCRPAD]        = 0x00000008,
-    [NPCM7XX_GCR_USB1PHYCTL]    = 0x034730e4,
-    [NPCM7XX_GCR_USB2PHYCTL]    = 0x034730e4,
-};
-
-static uint64_t npcm7xx_gcr_read(void *opaque, hwaddr offset, unsigned size)
-{
-    uint32_t reg = offset / sizeof(uint32_t);
-    NPCM7xxGCRState *s = opaque;
-
-    if (reg >= NPCM7XX_GCR_NR_REGS) {
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: offset 0x%04" HWADDR_PRIx " out of range\n",
-                      __func__, offset);
-        return 0;
-    }
-
-    trace_npcm7xx_gcr_read(offset, s->regs[reg]);
-
-    return s->regs[reg];
-}
-
-static void npcm7xx_gcr_write(void *opaque, hwaddr offset,
-                              uint64_t v, unsigned size)
-{
-    uint32_t reg = offset / sizeof(uint32_t);
-    NPCM7xxGCRState *s = opaque;
-    uint32_t value = v;
-
-    trace_npcm7xx_gcr_write(offset, value);
-
-    if (reg >= NPCM7XX_GCR_NR_REGS) {
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: offset 0x%04" HWADDR_PRIx " out of range\n",
-                      __func__, offset);
-        return;
-    }
-
-    switch (reg) {
-    case NPCM7XX_GCR_PDID:
-    case NPCM7XX_GCR_PWRON:
-    case NPCM7XX_GCR_INTSR:
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: register @ 0x%04" HWADDR_PRIx " is read-only\n",
-                      __func__, offset);
-        return;
-
-    case NPCM7XX_GCR_RESSR:
-    case NPCM7XX_GCR_CP2BST:
-        /* Write 1 to clear */
-        value = s->regs[reg] & ~value;
-        break;
-
-    case NPCM7XX_GCR_RLOCKR1:
-    case NPCM7XX_GCR_MDLR:
-        /* Write 1 to set */
-        value |= s->regs[reg];
-        break;
-    };
-
-    s->regs[reg] = value;
-}
-
-static const struct MemoryRegionOps npcm7xx_gcr_ops = {
-    .read       = npcm7xx_gcr_read,
-    .write      = npcm7xx_gcr_write,
-    .endianness = DEVICE_LITTLE_ENDIAN,
-    .valid      = {
-        .min_access_size        = 4,
-        .max_access_size        = 4,
-        .unaligned              = false,
-    },
-};
-
-static void npcm7xx_gcr_enter_reset(Object *obj, ResetType type)
-{
-    NPCM7xxGCRState *s = NPCM7XX_GCR(obj);
-
-    QEMU_BUILD_BUG_ON(sizeof(s->regs) != sizeof(cold_reset_values));
-
-    memcpy(s->regs, cold_reset_values, sizeof(s->regs));
-    s->regs[NPCM7XX_GCR_PWRON] = s->reset_pwron;
-    s->regs[NPCM7XX_GCR_MDLR] = s->reset_mdlr;
-    s->regs[NPCM7XX_GCR_INTCR3] = s->reset_intcr3;
-}
-
-static void npcm7xx_gcr_realize(DeviceState *dev, Error **errp)
-{
-    ERRP_GUARD();
-    NPCM7xxGCRState *s = NPCM7XX_GCR(dev);
-    uint64_t dram_size;
-    Object *obj;
-
-    obj = object_property_get_link(OBJECT(dev), "dram-mr", errp);
-    if (!obj) {
-        error_prepend(errp, "%s: required dram-mr link not found: ", __func__);
-        return;
-    }
-    dram_size = memory_region_size(MEMORY_REGION(obj));
-    if (!is_power_of_2(dram_size) ||
-        dram_size < NPCM7XX_GCR_MIN_DRAM_SIZE ||
-        dram_size > NPCM7XX_GCR_MAX_DRAM_SIZE) {
-        g_autofree char *sz = size_to_str(dram_size);
-        g_autofree char *min_sz = size_to_str(NPCM7XX_GCR_MIN_DRAM_SIZE);
-        g_autofree char *max_sz = size_to_str(NPCM7XX_GCR_MAX_DRAM_SIZE);
-        error_setg(errp, "%s: unsupported DRAM size %s", __func__, sz);
-        error_append_hint(errp,
-                          "DRAM size must be a power of two between %s and %s,"
-                          " inclusive.\n", min_sz, max_sz);
-        return;
-    }
-
-    /* Power-on reset value */
-    s->reset_intcr3 = 0x00001002;
-
-    /*
-     * The GMMAP (Graphics Memory Map) field is used by u-boot to detect the
-     * DRAM size, and is normally initialized by the boot block as part of DRAM
-     * training. However, since we don't have a complete emulation of the
-     * memory controller and try to make it look like it has already been
-     * initialized, the boot block will skip this initialization, and we need
-     * to make sure this field is set correctly up front.
-     *
-     * WARNING: some versions of u-boot only looks at bits 8 and 9, so 2 GiB of
-     * DRAM will be interpreted as 128 MiB.
-     *
-     * https://github.com/Nuvoton-Israel/u-boot/blob/2aef993bd2aafeb5408dbaad0f3ce099ee40c4aa/board/nuvoton/poleg/poleg.c#L244
-     */
-    s->reset_intcr3 |= ctz64(dram_size / NPCM7XX_GCR_MIN_DRAM_SIZE) << 8;
-}
-
-static void npcm7xx_gcr_init(Object *obj)
-{
-    NPCM7xxGCRState *s = NPCM7XX_GCR(obj);
-
-    memory_region_init_io(&s->iomem, obj, &npcm7xx_gcr_ops, s,
-                          TYPE_NPCM7XX_GCR, 4 * KiB);
-    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
-}
-
-static const VMStateDescription vmstate_npcm7xx_gcr = {
-    .name = "npcm7xx-gcr",
-    .version_id = 0,
-    .minimum_version_id = 0,
-    .fields = (const VMStateField[]) {
-        VMSTATE_UINT32_ARRAY(regs, NPCM7xxGCRState, NPCM7XX_GCR_NR_REGS),
-        VMSTATE_END_OF_LIST(),
-    },
-};
-
-static const Property npcm7xx_gcr_properties[] = {
-    DEFINE_PROP_UINT32("disabled-modules", NPCM7xxGCRState, reset_mdlr, 0),
-    DEFINE_PROP_UINT32("power-on-straps", NPCM7xxGCRState, reset_pwron, 0),
-};
-
-static void npcm7xx_gcr_class_init(ObjectClass *klass, void *data)
-{
-    ResettableClass *rc = RESETTABLE_CLASS(klass);
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    QEMU_BUILD_BUG_ON(NPCM7XX_GCR_REGS_END > NPCM7XX_GCR_NR_REGS);
-
-    dc->desc = "NPCM7xx System Global Control Registers";
-    dc->realize = npcm7xx_gcr_realize;
-    dc->vmsd = &vmstate_npcm7xx_gcr;
-    rc->phases.enter = npcm7xx_gcr_enter_reset;
-
-    device_class_set_props(dc, npcm7xx_gcr_properties);
-}
-
-static const TypeInfo npcm7xx_gcr_info = {
-    .name               = TYPE_NPCM7XX_GCR,
-    .parent             = TYPE_SYS_BUS_DEVICE,
-    .instance_size      = sizeof(NPCM7xxGCRState),
-    .instance_init      = npcm7xx_gcr_init,
-    .class_init         = npcm7xx_gcr_class_init,
-};
-
-static void npcm7xx_gcr_register_type(void)
-{
-    type_register_static(&npcm7xx_gcr_info);
-}
-type_init(npcm7xx_gcr_register_type);
diff --git a/hw/misc/npcm7xx_clk.c b/hw/misc/npcm_clk.c
similarity index 81%
rename from hw/misc/npcm7xx_clk.c
rename to hw/misc/npcm_clk.c
index 46f907b61c2c9..0e85974cf96dc 100644
--- a/hw/misc/npcm7xx_clk.c
+++ b/hw/misc/npcm_clk.c
@@ -1,5 +1,5 @@
 /*
- * Nuvoton NPCM7xx Clock Control Registers.
+ * Nuvoton NPCM7xx/8xx Clock Control Registers.
  *
  * Copyright 2020 Google LLC
  *
@@ -16,7 +16,7 @@
 
 #include "qemu/osdep.h"
 
-#include "hw/misc/npcm7xx_clk.h"
+#include "hw/misc/npcm_clk.h"
 #include "hw/timer/npcm7xx_timer.h"
 #include "hw/qdev-clock.h"
 #include "migration/vmstate.h"
@@ -72,7 +72,57 @@ enum NPCM7xxCLKRegisters {
     NPCM7XX_CLK_AHBCKFI,
     NPCM7XX_CLK_SECCNT,
     NPCM7XX_CLK_CNTR25M,
-    NPCM7XX_CLK_REGS_END,
+};
+
+enum NPCM8xxCLKRegisters {
+    NPCM8XX_CLK_CLKEN1,
+    NPCM8XX_CLK_CLKSEL,
+    NPCM8XX_CLK_CLKDIV1,
+    NPCM8XX_CLK_PLLCON0,
+    NPCM8XX_CLK_PLLCON1,
+    NPCM8XX_CLK_SWRSTR,
+    NPCM8XX_CLK_IPSRST1         = 0x20 / sizeof(uint32_t),
+    NPCM8XX_CLK_IPSRST2,
+    NPCM8XX_CLK_CLKEN2,
+    NPCM8XX_CLK_CLKDIV2,
+    NPCM8XX_CLK_CLKEN3,
+    NPCM8XX_CLK_IPSRST3,
+    NPCM8XX_CLK_WD0RCR,
+    NPCM8XX_CLK_WD1RCR,
+    NPCM8XX_CLK_WD2RCR,
+    NPCM8XX_CLK_SWRSTC1,
+    NPCM8XX_CLK_SWRSTC2,
+    NPCM8XX_CLK_SWRSTC3,
+    NPCM8XX_CLK_TIPRSTC,
+    NPCM8XX_CLK_PLLCON2,
+    NPCM8XX_CLK_CLKDIV3,
+    NPCM8XX_CLK_CORSTC,
+    NPCM8XX_CLK_PLLCONG,
+    NPCM8XX_CLK_AHBCKFI,
+    NPCM8XX_CLK_SECCNT,
+    NPCM8XX_CLK_CNTR25M,
+    /* Registers unique to NPCM8XX SoC */
+    NPCM8XX_CLK_CLKEN4,
+    NPCM8XX_CLK_IPSRST4,
+    NPCM8XX_CLK_BUSTO,
+    NPCM8XX_CLK_CLKDIV4,
+    NPCM8XX_CLK_WD0RCRB,
+    NPCM8XX_CLK_WD1RCRB,
+    NPCM8XX_CLK_WD2RCRB,
+    NPCM8XX_CLK_SWRSTC1B,
+    NPCM8XX_CLK_SWRSTC2B,
+    NPCM8XX_CLK_SWRSTC3B,
+    NPCM8XX_CLK_TIPRSTCB,
+    NPCM8XX_CLK_CORSTCB,
+    NPCM8XX_CLK_IPSRSTDIS1,
+    NPCM8XX_CLK_IPSRSTDIS2,
+    NPCM8XX_CLK_IPSRSTDIS3,
+    NPCM8XX_CLK_IPSRSTDIS4,
+    NPCM8XX_CLK_CLKENDIS1,
+    NPCM8XX_CLK_CLKENDIS2,
+    NPCM8XX_CLK_CLKENDIS3,
+    NPCM8XX_CLK_CLKENDIS4,
+    NPCM8XX_CLK_THRTL_CNT,
 };
 
 /*
@@ -81,7 +131,7 @@ enum NPCM7xxCLKRegisters {
  * All are loaded on power-up reset. CLKENx and SWRSTR should also be loaded on
  * core domain reset, but this reset type is not yet supported by QEMU.
  */
-static const uint32_t cold_reset_values[NPCM7XX_CLK_NR_REGS] = {
+static const uint32_t npcm7xx_cold_reset_values[NPCM7XX_CLK_NR_REGS] = {
     [NPCM7XX_CLK_CLKEN1]        = 0xffffffff,
     [NPCM7XX_CLK_CLKSEL]        = 0x004aaaaa,
     [NPCM7XX_CLK_CLKDIV1]       = 0x5413f855,
@@ -103,6 +153,46 @@ static const uint32_t cold_reset_values[NPCM7XX_CLK_NR_REGS] = {
     [NPCM7XX_CLK_AHBCKFI]       = 0x000000c8,
 };
 
+/*
+ * These reset values were taken from version 0.92 of the NPCM8xx data sheet.
+ */
+static const uint32_t npcm8xx_cold_reset_values[NPCM8XX_CLK_NR_REGS] = {
+    [NPCM8XX_CLK_CLKEN1]        = 0xffffffff,
+    [NPCM8XX_CLK_CLKSEL]        = 0x154aaaaa,
+    [NPCM8XX_CLK_CLKDIV1]       = 0x5413f855,
+    [NPCM8XX_CLK_PLLCON0]       = 0x00222101 | PLLCON_LOKI,
+    [NPCM8XX_CLK_PLLCON1]       = 0x00202101 | PLLCON_LOKI,
+    [NPCM8XX_CLK_IPSRST1]       = 0x00001000,
+    [NPCM8XX_CLK_IPSRST2]       = 0x80000000,
+    [NPCM8XX_CLK_CLKEN2]        = 0xffffffff,
+    [NPCM8XX_CLK_CLKDIV2]       = 0xaa4f8f9f,
+    [NPCM8XX_CLK_CLKEN3]        = 0xffffffff,
+    [NPCM8XX_CLK_IPSRST3]       = 0x03000000,
+    [NPCM8XX_CLK_WD0RCR]        = 0xffffffff,
+    [NPCM8XX_CLK_WD1RCR]        = 0xffffffff,
+    [NPCM8XX_CLK_WD2RCR]        = 0xffffffff,
+    [NPCM8XX_CLK_SWRSTC1]       = 0x00000003,
+    [NPCM8XX_CLK_SWRSTC2]       = 0x00000001,
+    [NPCM8XX_CLK_SWRSTC3]       = 0x00000001,
+    [NPCM8XX_CLK_TIPRSTC]       = 0x00000001,
+    [NPCM8XX_CLK_PLLCON2]       = 0x00c02105 | PLLCON_LOKI,
+    [NPCM8XX_CLK_CLKDIV3]       = 0x00009100,
+    [NPCM8XX_CLK_CORSTC]        = 0x04000003,
+    [NPCM8XX_CLK_PLLCONG]       = 0x01228606 | PLLCON_LOKI,
+    [NPCM8XX_CLK_AHBCKFI]       = 0x000000c8,
+    [NPCM8XX_CLK_CLKEN4]        = 0xffffffff,
+    [NPCM8XX_CLK_CLKDIV4]       = 0x70009000,
+    [NPCM8XX_CLK_IPSRST4]       = 0x02000000,
+    [NPCM8XX_CLK_WD0RCRB]       = 0xfffffe71,
+    [NPCM8XX_CLK_WD1RCRB]       = 0xfffffe71,
+    [NPCM8XX_CLK_WD2RCRB]       = 0xfffffe71,
+    [NPCM8XX_CLK_SWRSTC1B]      = 0xfffffe71,
+    [NPCM8XX_CLK_SWRSTC2B]      = 0xfffffe71,
+    [NPCM8XX_CLK_SWRSTC3B]      = 0xfffffe71,
+    [NPCM8XX_CLK_TIPRSTCB]      = 0xfffffe71,
+    [NPCM8XX_CLK_CORSTCB]       = 0xfffffe71,
+};
+
 /* The number of watchdogs that can trigger a reset. */
 #define NPCM7XX_NR_WATCHDOGS    (3)
 
@@ -198,7 +288,7 @@ static NPCM7xxClockPLL find_pll_by_reg(enum NPCM7xxCLKRegisters reg)
     }
 }
 
-static void npcm7xx_clk_update_all_plls(NPCM7xxCLKState *clk)
+static void npcm7xx_clk_update_all_plls(NPCMCLKState *clk)
 {
     int i;
 
@@ -207,7 +297,7 @@ static void npcm7xx_clk_update_all_plls(NPCM7xxCLKState *clk)
     }
 }
 
-static void npcm7xx_clk_update_all_sels(NPCM7xxCLKState *clk)
+static void npcm7xx_clk_update_all_sels(NPCMCLKState *clk)
 {
     int i;
 
@@ -216,7 +306,7 @@ static void npcm7xx_clk_update_all_sels(NPCM7xxCLKState *clk)
     }
 }
 
-static void npcm7xx_clk_update_all_dividers(NPCM7xxCLKState *clk)
+static void npcm7xx_clk_update_all_dividers(NPCMCLKState *clk)
 {
     int i;
 
@@ -225,7 +315,7 @@ static void npcm7xx_clk_update_all_dividers(NPCM7xxCLKState *clk)
     }
 }
 
-static void npcm7xx_clk_update_all_clocks(NPCM7xxCLKState *clk)
+static void npcm7xx_clk_update_all_clocks(NPCMCLKState *clk)
 {
     clock_update_hz(clk->clkref, NPCM7XX_CLOCK_REF_HZ);
     npcm7xx_clk_update_all_plls(clk);
@@ -635,7 +725,7 @@ static void npcm7xx_clk_divider_init(Object *obj)
 }
 
 static void npcm7xx_init_clock_pll(NPCM7xxClockPLLState *pll,
-        NPCM7xxCLKState *clk, const PLLInitInfo *init_info)
+        NPCMCLKState *clk, const PLLInitInfo *init_info)
 {
     pll->name = init_info->name;
     pll->clk = clk;
@@ -647,7 +737,7 @@ static void npcm7xx_init_clock_pll(NPCM7xxClockPLLState *pll,
 }
 
 static void npcm7xx_init_clock_sel(NPCM7xxClockSELState *sel,
-        NPCM7xxCLKState *clk, const SELInitInfo *init_info)
+        NPCMCLKState *clk, const SELInitInfo *init_info)
 {
     int input_size = init_info->input_size;
 
@@ -664,7 +754,7 @@ static void npcm7xx_init_clock_sel(NPCM7xxClockSELState *sel,
 }
 
 static void npcm7xx_init_clock_divider(NPCM7xxClockDividerState *div,
-        NPCM7xxCLKState *clk, const DividerInitInfo *init_info)
+        NPCMCLKState *clk, const DividerInitInfo *init_info)
 {
     div->name = init_info->name;
     div->clk = clk;
@@ -683,7 +773,7 @@ static void npcm7xx_init_clock_divider(NPCM7xxClockDividerState *div,
     }
 }
 
-static Clock *npcm7xx_get_clock(NPCM7xxCLKState *clk, ClockSrcType type,
+static Clock *npcm7xx_get_clock(NPCMCLKState *clk, ClockSrcType type,
         int index)
 {
     switch (type) {
@@ -700,7 +790,7 @@ static Clock *npcm7xx_get_clock(NPCM7xxCLKState *clk, ClockSrcType type,
     }
 }
 
-static void npcm7xx_connect_clocks(NPCM7xxCLKState *clk)
+static void npcm7xx_connect_clocks(NPCMCLKState *clk)
 {
     int i, j;
     Clock *src;
@@ -724,14 +814,15 @@ static void npcm7xx_connect_clocks(NPCM7xxCLKState *clk)
     }
 }
 
-static uint64_t npcm7xx_clk_read(void *opaque, hwaddr offset, unsigned size)
+static uint64_t npcm_clk_read(void *opaque, hwaddr offset, unsigned size)
 {
     uint32_t reg = offset / sizeof(uint32_t);
-    NPCM7xxCLKState *s = opaque;
+    NPCMCLKState *s = opaque;
+    NPCMCLKClass *c = NPCM_CLK_GET_CLASS(s);
     int64_t now_ns;
     uint32_t value = 0;
 
-    if (reg >= NPCM7XX_CLK_NR_REGS) {
+    if (reg >= c->nr_regs) {
         qemu_log_mask(LOG_GUEST_ERROR,
                       "%s: offset 0x%04" HWADDR_PRIx " out of range\n",
                       __func__, offset);
@@ -766,21 +857,22 @@ static uint64_t npcm7xx_clk_read(void *opaque, hwaddr offset, unsigned size)
         break;
     };
 
-    trace_npcm7xx_clk_read(offset, value);
+    trace_npcm_clk_read(offset, value);
 
     return value;
 }
 
-static void npcm7xx_clk_write(void *opaque, hwaddr offset,
+static void npcm_clk_write(void *opaque, hwaddr offset,
                               uint64_t v, unsigned size)
 {
     uint32_t reg = offset / sizeof(uint32_t);
-    NPCM7xxCLKState *s = opaque;
+    NPCMCLKState *s = opaque;
+    NPCMCLKClass *c = NPCM_CLK_GET_CLASS(s);
     uint32_t value = v;
 
-    trace_npcm7xx_clk_write(offset, value);
+    trace_npcm_clk_write(offset, value);
 
-    if (reg >= NPCM7XX_CLK_NR_REGS) {
+    if (reg >= c->nr_regs) {
         qemu_log_mask(LOG_GUEST_ERROR,
                       "%s: offset 0x%04" HWADDR_PRIx " out of range\n",
                       __func__, offset);
@@ -842,7 +934,7 @@ static void npcm7xx_clk_write(void *opaque, hwaddr offset,
 static void npcm7xx_clk_perform_watchdog_reset(void *opaque, int n,
         int level)
 {
-    NPCM7xxCLKState *clk = NPCM7XX_CLK(opaque);
+    NPCMCLKState *clk = NPCM_CLK(opaque);
     uint32_t rcr;
 
     g_assert(n >= 0 && n <= NPCM7XX_NR_WATCHDOGS);
@@ -856,9 +948,9 @@ static void npcm7xx_clk_perform_watchdog_reset(void *opaque, int n,
     }
 }
 
-static const struct MemoryRegionOps npcm7xx_clk_ops = {
-    .read       = npcm7xx_clk_read,
-    .write      = npcm7xx_clk_write,
+static const struct MemoryRegionOps npcm_clk_ops = {
+    .read       = npcm_clk_read,
+    .write      = npcm_clk_write,
     .endianness = DEVICE_LITTLE_ENDIAN,
     .valid      = {
         .min_access_size        = 4,
@@ -867,13 +959,14 @@ static const struct MemoryRegionOps npcm7xx_clk_ops = {
     },
 };
 
-static void npcm7xx_clk_enter_reset(Object *obj, ResetType type)
+static void npcm_clk_enter_reset(Object *obj, ResetType type)
 {
-    NPCM7xxCLKState *s = NPCM7XX_CLK(obj);
+    NPCMCLKState *s = NPCM_CLK(obj);
+    NPCMCLKClass *c = NPCM_CLK_GET_CLASS(s);
 
-    QEMU_BUILD_BUG_ON(sizeof(s->regs) != sizeof(cold_reset_values));
-
-    memcpy(s->regs, cold_reset_values, sizeof(cold_reset_values));
+    size_t sizeof_regs = c->nr_regs * sizeof(uint32_t);
+    g_assert(sizeof(s->regs) >= sizeof_regs);
+    memcpy(s->regs, c->cold_reset_values, sizeof_regs);
     s->ref_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
     npcm7xx_clk_update_all_clocks(s);
     /*
@@ -882,7 +975,7 @@ static void npcm7xx_clk_enter_reset(Object *obj, ResetType type)
      */
 }
 
-static void npcm7xx_clk_init_clock_hierarchy(NPCM7xxCLKState *s)
+static void npcm7xx_clk_init_clock_hierarchy(NPCMCLKState *s)
 {
     int i;
 
@@ -918,19 +1011,19 @@ static void npcm7xx_clk_init_clock_hierarchy(NPCM7xxCLKState *s)
     clock_update_hz(s->clkref, NPCM7XX_CLOCK_REF_HZ);
 }
 
-static void npcm7xx_clk_init(Object *obj)
+static void npcm_clk_init(Object *obj)
 {
-    NPCM7xxCLKState *s = NPCM7XX_CLK(obj);
+    NPCMCLKState *s = NPCM_CLK(obj);
 
-    memory_region_init_io(&s->iomem, obj, &npcm7xx_clk_ops, s,
-                          TYPE_NPCM7XX_CLK, 4 * KiB);
+    memory_region_init_io(&s->iomem, obj, &npcm_clk_ops, s,
+                          TYPE_NPCM_CLK, 4 * KiB);
     sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
 }
 
-static int npcm7xx_clk_post_load(void *opaque, int version_id)
+static int npcm_clk_post_load(void *opaque, int version_id)
 {
     if (version_id >= 1) {
-        NPCM7xxCLKState *clk = opaque;
+        NPCMCLKState *clk = opaque;
 
         npcm7xx_clk_update_all_clocks(clk);
     }
@@ -938,10 +1031,10 @@ static int npcm7xx_clk_post_load(void *opaque, int version_id)
     return 0;
 }
 
-static void npcm7xx_clk_realize(DeviceState *dev, Error **errp)
+static void npcm_clk_realize(DeviceState *dev, Error **errp)
 {
     int i;
-    NPCM7xxCLKState *s = NPCM7XX_CLK(dev);
+    NPCMCLKState *s = NPCM_CLK(dev);
 
     qdev_init_gpio_in_named(DEVICE(s), npcm7xx_clk_perform_watchdog_reset,
             NPCM7XX_WATCHDOG_RESET_GPIO_IN, NPCM7XX_NR_WATCHDOGS);
@@ -996,15 +1089,15 @@ static const VMStateDescription vmstate_npcm7xx_clk_divider = {
     },
 };
 
-static const VMStateDescription vmstate_npcm7xx_clk = {
-    .name = "npcm7xx-clk",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .post_load = npcm7xx_clk_post_load,
+static const VMStateDescription vmstate_npcm_clk = {
+    .name = "npcm-clk",
+    .version_id = 3,
+    .minimum_version_id = 3,
+    .post_load = npcm_clk_post_load,
     .fields = (const VMStateField[]) {
-        VMSTATE_UINT32_ARRAY(regs, NPCM7xxCLKState, NPCM7XX_CLK_NR_REGS),
-        VMSTATE_INT64(ref_ns, NPCM7xxCLKState),
-        VMSTATE_CLOCK(clkref, NPCM7xxCLKState),
+        VMSTATE_UINT32_ARRAY(regs, NPCMCLKState, NPCM_CLK_MAX_NR_REGS),
+        VMSTATE_INT64(ref_ns, NPCMCLKState),
+        VMSTATE_CLOCK(clkref, NPCMCLKState),
         VMSTATE_END_OF_LIST(),
     },
 };
@@ -1033,17 +1126,34 @@ static void npcm7xx_clk_divider_class_init(ObjectClass *klass, void *data)
     dc->vmsd = &vmstate_npcm7xx_clk_divider;
 }
 
-static void npcm7xx_clk_class_init(ObjectClass *klass, void *data)
+static void npcm_clk_class_init(ObjectClass *klass, void *data)
 {
     ResettableClass *rc = RESETTABLE_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
 
-    QEMU_BUILD_BUG_ON(NPCM7XX_CLK_REGS_END > NPCM7XX_CLK_NR_REGS);
+    dc->vmsd = &vmstate_npcm_clk;
+    dc->realize = npcm_clk_realize;
+    rc->phases.enter = npcm_clk_enter_reset;
+}
+
+static void npcm7xx_clk_class_init(ObjectClass *klass, void *data)
+{
+    NPCMCLKClass *c = NPCM_CLK_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->desc = "NPCM7xx Clock Control Registers";
-    dc->vmsd = &vmstate_npcm7xx_clk;
-    dc->realize = npcm7xx_clk_realize;
-    rc->phases.enter = npcm7xx_clk_enter_reset;
+    c->nr_regs = NPCM7XX_CLK_NR_REGS;
+    c->cold_reset_values = npcm7xx_cold_reset_values;
+}
+
+static void npcm8xx_clk_class_init(ObjectClass *klass, void *data)
+{
+    NPCMCLKClass *c = NPCM_CLK_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc = "NPCM8xx Clock Control Registers";
+    c->nr_regs = NPCM8XX_CLK_NR_REGS;
+    c->cold_reset_values = npcm8xx_cold_reset_values;
 }
 
 static const TypeInfo npcm7xx_clk_pll_info = {
@@ -1070,19 +1180,35 @@ static const TypeInfo npcm7xx_clk_divider_info = {
     .class_init         = npcm7xx_clk_divider_class_init,
 };
 
+static const TypeInfo npcm_clk_info = {
+    .name               = TYPE_NPCM_CLK,
+    .parent             = TYPE_SYS_BUS_DEVICE,
+    .instance_size      = sizeof(NPCMCLKState),
+    .instance_init      = npcm_clk_init,
+    .class_size         = sizeof(NPCMCLKClass),
+    .class_init         = npcm_clk_class_init,
+    .abstract           = true,
+};
+
 static const TypeInfo npcm7xx_clk_info = {
     .name               = TYPE_NPCM7XX_CLK,
-    .parent             = TYPE_SYS_BUS_DEVICE,
-    .instance_size      = sizeof(NPCM7xxCLKState),
-    .instance_init      = npcm7xx_clk_init,
+    .parent             = TYPE_NPCM_CLK,
     .class_init         = npcm7xx_clk_class_init,
 };
 
+static const TypeInfo npcm8xx_clk_info = {
+    .name               = TYPE_NPCM8XX_CLK,
+    .parent             = TYPE_NPCM_CLK,
+    .class_init         = npcm8xx_clk_class_init,
+};
+
 static void npcm7xx_clk_register_type(void)
 {
     type_register_static(&npcm7xx_clk_pll_info);
     type_register_static(&npcm7xx_clk_sel_info);
     type_register_static(&npcm7xx_clk_divider_info);
+    type_register_static(&npcm_clk_info);
     type_register_static(&npcm7xx_clk_info);
+    type_register_static(&npcm8xx_clk_info);
 }
 type_init(npcm7xx_clk_register_type);
diff --git a/hw/misc/npcm_gcr.c b/hw/misc/npcm_gcr.c
new file mode 100644
index 0000000000000..4e8ce2cb89dd4
--- /dev/null
+++ b/hw/misc/npcm_gcr.c
@@ -0,0 +1,482 @@
+/*
+ * Nuvoton NPCM7xx/8xx System Global Control Registers.
+ *
+ * Copyright 2020 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/misc/npcm_gcr.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qemu/units.h"
+
+#include "trace.h"
+
+#define NPCM7XX_GCR_MIN_DRAM_SIZE   (128 * MiB)
+#define NPCM7XX_GCR_MAX_DRAM_SIZE   (2 * GiB)
+
+enum NPCM7xxGCRRegisters {
+    NPCM7XX_GCR_PDID,
+    NPCM7XX_GCR_PWRON,
+    NPCM7XX_GCR_MFSEL1          = 0x0c / sizeof(uint32_t),
+    NPCM7XX_GCR_MFSEL2,
+    NPCM7XX_GCR_MISCPE,
+    NPCM7XX_GCR_SPSWC           = 0x038 / sizeof(uint32_t),
+    NPCM7XX_GCR_INTCR,
+    NPCM7XX_GCR_INTSR,
+    NPCM7XX_GCR_HIFCR           = 0x050 / sizeof(uint32_t),
+    NPCM7XX_GCR_INTCR2          = 0x060 / sizeof(uint32_t),
+    NPCM7XX_GCR_MFSEL3,
+    NPCM7XX_GCR_SRCNT,
+    NPCM7XX_GCR_RESSR,
+    NPCM7XX_GCR_RLOCKR1,
+    NPCM7XX_GCR_FLOCKR1,
+    NPCM7XX_GCR_DSCNT,
+    NPCM7XX_GCR_MDLR,
+    NPCM7XX_GCR_SCRPAD3,
+    NPCM7XX_GCR_SCRPAD2,
+    NPCM7XX_GCR_DAVCLVLR        = 0x098 / sizeof(uint32_t),
+    NPCM7XX_GCR_INTCR3,
+    NPCM7XX_GCR_VSINTR          = 0x0ac / sizeof(uint32_t),
+    NPCM7XX_GCR_MFSEL4,
+    NPCM7XX_GCR_CPBPNTR         = 0x0c4 / sizeof(uint32_t),
+    NPCM7XX_GCR_CPCTL           = 0x0d0 / sizeof(uint32_t),
+    NPCM7XX_GCR_CP2BST,
+    NPCM7XX_GCR_B2CPNT,
+    NPCM7XX_GCR_CPPCTL,
+    NPCM7XX_GCR_I2CSEGSEL,
+    NPCM7XX_GCR_I2CSEGCTL,
+    NPCM7XX_GCR_VSRCR,
+    NPCM7XX_GCR_MLOCKR,
+    NPCM7XX_GCR_SCRPAD          = 0x013c / sizeof(uint32_t),
+    NPCM7XX_GCR_USB1PHYCTL,
+    NPCM7XX_GCR_USB2PHYCTL,
+};
+
+static const uint32_t npcm7xx_cold_reset_values[NPCM7XX_GCR_NR_REGS] = {
+    [NPCM7XX_GCR_PDID]          = 0x04a92750,   /* Poleg A1 */
+    [NPCM7XX_GCR_MISCPE]        = 0x0000ffff,
+    [NPCM7XX_GCR_SPSWC]         = 0x00000003,
+    [NPCM7XX_GCR_INTCR]         = 0x0000035e,
+    [NPCM7XX_GCR_HIFCR]         = 0x0000004e,
+    [NPCM7XX_GCR_INTCR2]        = (1U << 19),   /* DDR initialized */
+    [NPCM7XX_GCR_RESSR]         = 0x80000000,
+    [NPCM7XX_GCR_DSCNT]         = 0x000000c0,
+    [NPCM7XX_GCR_DAVCLVLR]      = 0x5a00f3cf,
+    [NPCM7XX_GCR_SCRPAD]        = 0x00000008,
+    [NPCM7XX_GCR_USB1PHYCTL]    = 0x034730e4,
+    [NPCM7XX_GCR_USB2PHYCTL]    = 0x034730e4,
+};
+
+enum NPCM8xxGCRRegisters {
+    NPCM8XX_GCR_PDID,
+    NPCM8XX_GCR_PWRON,
+    NPCM8XX_GCR_MISCPE          = 0x014 / sizeof(uint32_t),
+    NPCM8XX_GCR_FLOCKR2         = 0x020 / sizeof(uint32_t),
+    NPCM8XX_GCR_FLOCKR3,
+    NPCM8XX_GCR_A35_MODE        = 0x034 / sizeof(uint32_t),
+    NPCM8XX_GCR_SPSWC,
+    NPCM8XX_GCR_INTCR,
+    NPCM8XX_GCR_INTSR,
+    NPCM8XX_GCR_HIFCR           = 0x050 / sizeof(uint32_t),
+    NPCM8XX_GCR_INTCR2          = 0x060 / sizeof(uint32_t),
+    NPCM8XX_GCR_SRCNT           = 0x068 / sizeof(uint32_t),
+    NPCM8XX_GCR_RESSR,
+    NPCM8XX_GCR_RLOCKR1,
+    NPCM8XX_GCR_FLOCKR1,
+    NPCM8XX_GCR_DSCNT,
+    NPCM8XX_GCR_MDLR,
+    NPCM8XX_GCR_SCRPAD_C        = 0x080 / sizeof(uint32_t),
+    NPCM8XX_GCR_SCRPAD_B,
+    NPCM8XX_GCR_DAVCLVLR        = 0x098 / sizeof(uint32_t),
+    NPCM8XX_GCR_INTCR3,
+    NPCM8XX_GCR_PCIRCTL         = 0x0a0 / sizeof(uint32_t),
+    NPCM8XX_GCR_VSINTR,
+    NPCM8XX_GCR_SD2SUR1         = 0x0b4 / sizeof(uint32_t),
+    NPCM8XX_GCR_SD2SUR2,
+    NPCM8XX_GCR_INTCR4          = 0x0c0 / sizeof(uint32_t),
+    NPCM8XX_GCR_CPCTL           = 0x0d0 / sizeof(uint32_t),
+    NPCM8XX_GCR_CP2BST,
+    NPCM8XX_GCR_B2CPNT,
+    NPCM8XX_GCR_CPPCTL,
+    NPCM8XX_GCR_I2CSEGSEL       = 0x0e0 / sizeof(uint32_t),
+    NPCM8XX_GCR_I2CSEGCTL,
+    NPCM8XX_GCR_VSRCR,
+    NPCM8XX_GCR_MLOCKR,
+    NPCM8XX_GCR_SCRPAD          = 0x13c / sizeof(uint32_t),
+    NPCM8XX_GCR_USB1PHYCTL,
+    NPCM8XX_GCR_USB2PHYCTL,
+    NPCM8XX_GCR_USB3PHYCTL,
+    NPCM8XX_GCR_MFSEL1          = 0x260 / sizeof(uint32_t),
+    NPCM8XX_GCR_MFSEL2,
+    NPCM8XX_GCR_MFSEL3,
+    NPCM8XX_GCR_MFSEL4,
+    NPCM8XX_GCR_MFSEL5,
+    NPCM8XX_GCR_MFSEL6,
+    NPCM8XX_GCR_MFSEL7,
+    NPCM8XX_GCR_MFSEL_LK1       = 0x280 / sizeof(uint32_t),
+    NPCM8XX_GCR_MFSEL_LK2,
+    NPCM8XX_GCR_MFSEL_LK3,
+    NPCM8XX_GCR_MFSEL_LK4,
+    NPCM8XX_GCR_MFSEL_LK5,
+    NPCM8XX_GCR_MFSEL_LK6,
+    NPCM8XX_GCR_MFSEL_LK7,
+    NPCM8XX_GCR_MFSEL_SET1      = 0x2a0 / sizeof(uint32_t),
+    NPCM8XX_GCR_MFSEL_SET2,
+    NPCM8XX_GCR_MFSEL_SET3,
+    NPCM8XX_GCR_MFSEL_SET4,
+    NPCM8XX_GCR_MFSEL_SET5,
+    NPCM8XX_GCR_MFSEL_SET6,
+    NPCM8XX_GCR_MFSEL_SET7,
+    NPCM8XX_GCR_MFSEL_CLR1      = 0x2c0 / sizeof(uint32_t),
+    NPCM8XX_GCR_MFSEL_CLR2,
+    NPCM8XX_GCR_MFSEL_CLR3,
+    NPCM8XX_GCR_MFSEL_CLR4,
+    NPCM8XX_GCR_MFSEL_CLR5,
+    NPCM8XX_GCR_MFSEL_CLR6,
+    NPCM8XX_GCR_MFSEL_CLR7,
+    NPCM8XX_GCR_WD0RCRLK        = 0x400 / sizeof(uint32_t),
+    NPCM8XX_GCR_WD1RCRLK,
+    NPCM8XX_GCR_WD2RCRLK,
+    NPCM8XX_GCR_SWRSTC1LK,
+    NPCM8XX_GCR_SWRSTC2LK,
+    NPCM8XX_GCR_SWRSTC3LK,
+    NPCM8XX_GCR_TIPRSTCLK,
+    NPCM8XX_GCR_CORSTCLK,
+    NPCM8XX_GCR_WD0RCRBLK,
+    NPCM8XX_GCR_WD1RCRBLK,
+    NPCM8XX_GCR_WD2RCRBLK,
+    NPCM8XX_GCR_SWRSTC1BLK,
+    NPCM8XX_GCR_SWRSTC2BLK,
+    NPCM8XX_GCR_SWRSTC3BLK,
+    NPCM8XX_GCR_TIPRSTCBLK,
+    NPCM8XX_GCR_CORSTCBLK,
+    /* 64 scratch pad registers start here. 0xe00 ~ 0xefc */
+    NPCM8XX_GCR_SCRPAD_00       = 0xe00 / sizeof(uint32_t),
+    /* 32 semaphore registers start here. 0xf00 ~ 0xf7c */
+    NPCM8XX_GCR_GP_SEMFR_00     = 0xf00 / sizeof(uint32_t),
+    NPCM8XX_GCR_GP_SEMFR_31     = 0xf7c / sizeof(uint32_t),
+};
+
+static const uint32_t npcm8xx_cold_reset_values[NPCM8XX_GCR_NR_REGS] = {
+    [NPCM8XX_GCR_PDID]          = 0x04a35850,   /* Arbel A1 */
+    [NPCM8XX_GCR_MISCPE]        = 0x0000ffff,
+    [NPCM8XX_GCR_A35_MODE]      = 0xfff4ff30,
+    [NPCM8XX_GCR_SPSWC]         = 0x00000003,
+    [NPCM8XX_GCR_INTCR]         = 0x0010035e,
+    [NPCM8XX_GCR_HIFCR]         = 0x0000004e,
+    [NPCM8XX_GCR_SD2SUR1]       = 0xfdc80000,
+    [NPCM8XX_GCR_SD2SUR2]       = 0x5200b130,
+    [NPCM8XX_GCR_INTCR2]        = (1U << 19),   /* DDR initialized */
+    [NPCM8XX_GCR_RESSR]         = 0x80000000,
+    [NPCM8XX_GCR_DAVCLVLR]      = 0x5a00f3cf,
+    [NPCM8XX_GCR_INTCR3]        = 0x5e001002,
+    [NPCM8XX_GCR_VSRCR]         = 0x00004800,
+    [NPCM8XX_GCR_SCRPAD]        = 0x00000008,
+    [NPCM8XX_GCR_USB1PHYCTL]    = 0x034730e4,
+    [NPCM8XX_GCR_USB2PHYCTL]    = 0x034730e4,
+    [NPCM8XX_GCR_USB3PHYCTL]    = 0x034730e4,
+    /* All 32 semaphores should be initialized to 1. */
+    [NPCM8XX_GCR_GP_SEMFR_00...NPCM8XX_GCR_GP_SEMFR_31] = 0x00000001,
+};
+
+static uint64_t npcm_gcr_read(void *opaque, hwaddr offset, unsigned size)
+{
+    uint32_t reg = offset / sizeof(uint32_t);
+    NPCMGCRState *s = opaque;
+    NPCMGCRClass *c = NPCM_GCR_GET_CLASS(s);
+    uint64_t value;
+
+    if (reg >= c->nr_regs) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: offset 0x%04" HWADDR_PRIx " out of range\n",
+                      __func__, offset);
+        return 0;
+    }
+
+    switch (size) {
+    case 4:
+        value = s->regs[reg];
+        break;
+
+    case 8:
+        g_assert(!(reg & 1));
+        value = deposit64(s->regs[reg], 32, 32, s->regs[reg + 1]);
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+
+    trace_npcm_gcr_read(offset, value);
+    return value;
+}
+
+static void npcm_gcr_write(void *opaque, hwaddr offset,
+                              uint64_t v, unsigned size)
+{
+    uint32_t reg = offset / sizeof(uint32_t);
+    NPCMGCRState *s = opaque;
+    NPCMGCRClass *c = NPCM_GCR_GET_CLASS(s);
+    uint32_t value = v;
+
+    trace_npcm_gcr_write(offset, v);
+
+    if (reg >= c->nr_regs) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: offset 0x%04" HWADDR_PRIx " out of range\n",
+                      __func__, offset);
+        return;
+    }
+
+    switch (size) {
+    case 4:
+        switch (reg) {
+        case NPCM7XX_GCR_PDID:
+        case NPCM7XX_GCR_PWRON:
+        case NPCM7XX_GCR_INTSR:
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: register @ 0x%04" HWADDR_PRIx " is read-only\n",
+                          __func__, offset);
+            return;
+
+        case NPCM7XX_GCR_RESSR:
+        case NPCM7XX_GCR_CP2BST:
+            /* Write 1 to clear */
+            value = s->regs[reg] & ~value;
+            break;
+
+        case NPCM7XX_GCR_RLOCKR1:
+        case NPCM7XX_GCR_MDLR:
+            /* Write 1 to set */
+            value |= s->regs[reg];
+            break;
+        };
+        s->regs[reg] = value;
+        break;
+
+    case 8:
+        g_assert(!(reg & 1));
+        s->regs[reg] = value;
+        s->regs[reg + 1] = extract64(v, 32, 32);
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static bool npcm_gcr_check_mem_op(void *opaque, hwaddr offset,
+                                  unsigned size, bool is_write,
+                                  MemTxAttrs attrs)
+{
+    NPCMGCRClass *c = NPCM_GCR_GET_CLASS(opaque);
+
+    if (offset >= c->nr_regs * sizeof(uint32_t)) {
+        return false;
+    }
+
+    switch (size) {
+    case 4:
+        return true;
+    case 8:
+        if (offset >= NPCM8XX_GCR_SCRPAD_00 * sizeof(uint32_t) &&
+            offset < (NPCM8XX_GCR_NR_REGS - 1) * sizeof(uint32_t)) {
+            return true;
+        } else {
+            return false;
+        }
+    default:
+        return false;
+    }
+}
+
+static const struct MemoryRegionOps npcm_gcr_ops = {
+    .read       = npcm_gcr_read,
+    .write      = npcm_gcr_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid      = {
+        .min_access_size        = 4,
+        .max_access_size        = 8,
+        .accepts                = npcm_gcr_check_mem_op,
+        .unaligned              = false,
+    },
+};
+
+static void npcm7xx_gcr_enter_reset(Object *obj, ResetType type)
+{
+    NPCMGCRState *s = NPCM_GCR(obj);
+    NPCMGCRClass *c = NPCM_GCR_GET_CLASS(obj);
+
+    g_assert(sizeof(s->regs) >= sizeof(c->cold_reset_values));
+    g_assert(sizeof(s->regs) >= c->nr_regs * sizeof(uint32_t));
+    memcpy(s->regs, c->cold_reset_values, c->nr_regs * sizeof(uint32_t));
+    /* These 3 registers are at the same location in both 7xx and 8xx. */
+    s->regs[NPCM7XX_GCR_PWRON] = s->reset_pwron;
+    s->regs[NPCM7XX_GCR_MDLR] = s->reset_mdlr;
+    s->regs[NPCM7XX_GCR_INTCR3] = s->reset_intcr3;
+}
+
+static void npcm8xx_gcr_enter_reset(Object *obj, ResetType type)
+{
+    NPCMGCRState *s = NPCM_GCR(obj);
+    NPCMGCRClass *c = NPCM_GCR_GET_CLASS(obj);
+
+    memcpy(s->regs, c->cold_reset_values, c->nr_regs * sizeof(uint32_t));
+    /* These 3 registers are at the same location in both 7xx and 8xx. */
+    s->regs[NPCM8XX_GCR_PWRON] = s->reset_pwron;
+    s->regs[NPCM8XX_GCR_MDLR] = s->reset_mdlr;
+    s->regs[NPCM8XX_GCR_INTCR3] = s->reset_intcr3;
+    s->regs[NPCM8XX_GCR_SCRPAD_B] = s->reset_scrpad_b;
+}
+
+static void npcm_gcr_realize(DeviceState *dev, Error **errp)
+{
+    ERRP_GUARD();
+    NPCMGCRState *s = NPCM_GCR(dev);
+    uint64_t dram_size;
+    Object *obj;
+
+    obj = object_property_get_link(OBJECT(dev), "dram-mr", errp);
+    if (!obj) {
+        error_prepend(errp, "%s: required dram-mr link not found: ", __func__);
+        return;
+    }
+    dram_size = memory_region_size(MEMORY_REGION(obj));
+    if (!is_power_of_2(dram_size) ||
+        dram_size < NPCM7XX_GCR_MIN_DRAM_SIZE ||
+        dram_size > NPCM7XX_GCR_MAX_DRAM_SIZE) {
+        g_autofree char *sz = size_to_str(dram_size);
+        g_autofree char *min_sz = size_to_str(NPCM7XX_GCR_MIN_DRAM_SIZE);
+        g_autofree char *max_sz = size_to_str(NPCM7XX_GCR_MAX_DRAM_SIZE);
+        error_setg(errp, "%s: unsupported DRAM size %s", __func__, sz);
+        error_append_hint(errp,
+                          "DRAM size must be a power of two between %s and %s,"
+                          " inclusive.\n", min_sz, max_sz);
+        return;
+    }
+
+    /* Power-on reset value */
+    s->reset_intcr3 = 0x00001002;
+
+    /*
+     * The GMMAP (Graphics Memory Map) field is used by u-boot to detect the
+     * DRAM size, and is normally initialized by the boot block as part of DRAM
+     * training. However, since we don't have a complete emulation of the
+     * memory controller and try to make it look like it has already been
+     * initialized, the boot block will skip this initialization, and we need
+     * to make sure this field is set correctly up front.
+     *
+     * WARNING: some versions of u-boot only looks at bits 8 and 9, so 2 GiB of
+     * DRAM will be interpreted as 128 MiB.
+     *
+     * https://github.com/Nuvoton-Israel/u-boot/blob/2aef993bd2aafeb5408dbaad0f3ce099ee40c4aa/board/nuvoton/poleg/poleg.c#L244
+     */
+    s->reset_intcr3 |= ctz64(dram_size / NPCM7XX_GCR_MIN_DRAM_SIZE) << 8;
+
+    /*
+     * The boot block starting from 0.0.6 for NPCM8xx SoCs stores the DRAM size
+     * in the SCRPAD2 registers. We need to set this field correctly since
+     * the initialization is skipped as we mentioned above.
+     * https://github.com/Nuvoton-Israel/u-boot/blob/npcm8mnx-v2019.01_tmp/board/nuvoton/arbel/arbel.c#L737
+     */
+    s->reset_scrpad_b = dram_size;
+}
+
+static void npcm_gcr_init(Object *obj)
+{
+    NPCMGCRState *s = NPCM_GCR(obj);
+
+    memory_region_init_io(&s->iomem, obj, &npcm_gcr_ops, s,
+                          TYPE_NPCM_GCR, 4 * KiB);
+    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
+}
+
+static const VMStateDescription vmstate_npcm_gcr = {
+    .name = "npcm-gcr",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .fields = (const VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(regs, NPCMGCRState, NPCM_GCR_MAX_NR_REGS),
+        VMSTATE_END_OF_LIST(),
+    },
+};
+
+static const Property npcm_gcr_properties[] = {
+    DEFINE_PROP_UINT32("disabled-modules", NPCMGCRState, reset_mdlr, 0),
+    DEFINE_PROP_UINT32("power-on-straps", NPCMGCRState, reset_pwron, 0),
+};
+
+static void npcm_gcr_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = npcm_gcr_realize;
+    dc->vmsd = &vmstate_npcm_gcr;
+
+    device_class_set_props(dc, npcm_gcr_properties);
+}
+
+static void npcm7xx_gcr_class_init(ObjectClass *klass, void *data)
+{
+    NPCMGCRClass *c = NPCM_GCR_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+
+    dc->desc = "NPCM7xx System Global Control Registers";
+    rc->phases.enter = npcm7xx_gcr_enter_reset;
+
+    c->nr_regs = NPCM7XX_GCR_NR_REGS;
+    c->cold_reset_values = npcm7xx_cold_reset_values;
+    rc->phases.enter = npcm7xx_gcr_enter_reset;
+}
+
+static void npcm8xx_gcr_class_init(ObjectClass *klass, void *data)
+{
+    NPCMGCRClass *c = NPCM_GCR_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+
+    dc->desc = "NPCM8xx System Global Control Registers";
+    c->nr_regs = NPCM8XX_GCR_NR_REGS;
+    c->cold_reset_values = npcm8xx_cold_reset_values;
+    rc->phases.enter = npcm8xx_gcr_enter_reset;
+}
+
+static const TypeInfo npcm_gcr_info[] = {
+    {
+        .name               = TYPE_NPCM_GCR,
+        .parent             = TYPE_SYS_BUS_DEVICE,
+        .instance_size      = sizeof(NPCMGCRState),
+        .instance_init      = npcm_gcr_init,
+        .class_size         = sizeof(NPCMGCRClass),
+        .class_init         = npcm_gcr_class_init,
+        .abstract           = true,
+    },
+    {
+        .name               = TYPE_NPCM7XX_GCR,
+        .parent             = TYPE_NPCM_GCR,
+        .class_init         = npcm7xx_gcr_class_init,
+    },
+    {
+        .name               = TYPE_NPCM8XX_GCR,
+        .parent             = TYPE_NPCM_GCR,
+        .class_init         = npcm8xx_gcr_class_init,
+    },
+};
+DEFINE_TYPES(npcm_gcr_info)
diff --git a/hw/misc/pvpanic-mmio.c b/hw/misc/pvpanic-mmio.c
new file mode 100644
index 0000000000000..70097cecc7437
--- /dev/null
+++ b/hw/misc/pvpanic-mmio.c
@@ -0,0 +1,60 @@
+/*
+ * QEMU simulated pvpanic device (MMIO frontend)
+ *
+ * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/misc/pvpanic.h"
+#include "hw/sysbus.h"
+#include "standard-headers/misc/pvpanic.h"
+
+OBJECT_DECLARE_SIMPLE_TYPE(PVPanicMMIOState, PVPANIC_MMIO_DEVICE)
+
+#define PVPANIC_MMIO_SIZE 0x2
+
+struct PVPanicMMIOState {
+    SysBusDevice parent_obj;
+
+    PVPanicState pvpanic;
+};
+
+static void pvpanic_mmio_initfn(Object *obj)
+{
+    PVPanicMMIOState *s = PVPANIC_MMIO_DEVICE(obj);
+
+    pvpanic_setup_io(&s->pvpanic, DEVICE(s), PVPANIC_MMIO_SIZE);
+    sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->pvpanic.mr);
+}
+
+static const Property pvpanic_mmio_properties[] = {
+    DEFINE_PROP_UINT8("events", PVPanicMMIOState, pvpanic.events,
+                      PVPANIC_PANICKED | PVPANIC_CRASH_LOADED),
+};
+
+static void pvpanic_mmio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, pvpanic_mmio_properties);
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+}
+
+static const TypeInfo pvpanic_mmio_info = {
+    .name          = TYPE_PVPANIC_MMIO_DEVICE,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(PVPanicMMIOState),
+    .instance_init = pvpanic_mmio_initfn,
+    .class_init    = pvpanic_mmio_class_init,
+};
+
+static void pvpanic_register_types(void)
+{
+    type_register_static(&pvpanic_mmio_info);
+}
+
+type_init(pvpanic_register_types)
diff --git a/hw/misc/trace-events b/hw/misc/trace-events
index b35b0e77f7dbc..4383808d7ad03 100644
--- a/hw/misc/trace-events
+++ b/hw/misc/trace-events
@@ -130,13 +130,13 @@ mos6522_set_sr_int(void) "set sr_int"
 mos6522_write(uint64_t addr, const char *name, uint64_t val) "reg=0x%"PRIx64 " [%s] val=0x%"PRIx64
 mos6522_read(uint64_t addr, const char *name, unsigned val) "reg=0x%"PRIx64 " [%s] val=0x%x"
 
-# npcm7xx_clk.c
-npcm7xx_clk_read(uint64_t offset, uint32_t value) " offset: 0x%04" PRIx64 " value: 0x%08" PRIx32
-npcm7xx_clk_write(uint64_t offset, uint32_t value) "offset: 0x%04" PRIx64 " value: 0x%08" PRIx32
+# npcm_clk.c
+npcm_clk_read(uint64_t offset, uint32_t value) " offset: 0x%04" PRIx64 " value: 0x%08" PRIx32
+npcm_clk_write(uint64_t offset, uint32_t value) "offset: 0x%04" PRIx64 " value: 0x%08" PRIx32
 
-# npcm7xx_gcr.c
-npcm7xx_gcr_read(uint64_t offset, uint32_t value) " offset: 0x%04" PRIx64 " value: 0x%08" PRIx32
-npcm7xx_gcr_write(uint64_t offset, uint32_t value) "offset: 0x%04" PRIx64 " value: 0x%08" PRIx32
+# npcm_gcr.c
+npcm_gcr_read(uint64_t offset, uint64_t value) " offset: 0x%04" PRIx64 " value: 0x%08" PRIx64
+npcm_gcr_write(uint64_t offset, uint64_t value) "offset: 0x%04" PRIx64 " value: 0x%08" PRIx64
 
 # npcm7xx_mft.c
 npcm7xx_mft_read(const char *name, uint64_t offset, uint16_t value) "%s: offset: 0x%04" PRIx64 " value: 0x%04" PRIx16
diff --git a/hw/misc/xlnx-versal-trng.c b/hw/misc/xlnx-versal-trng.c
index dbd9b58a4ece0..ba93f93cab948 100644
--- a/hw/misc/xlnx-versal-trng.c
+++ b/hw/misc/xlnx-versal-trng.c
@@ -652,7 +652,7 @@ static void trng_prop_fault_event_set(Object *obj, Visitor *v,
 }
 
 static const PropertyInfo trng_prop_fault_events = {
-    .name = "uint32:bits",
+    .type = "uint32",
     .description = "Set to trigger TRNG fault events",
     .set = trng_prop_fault_event_set,
     .realized_set_allowed = true,
diff --git a/hw/net/allwinner-sun8i-emac.c b/hw/net/allwinner-sun8i-emac.c
index ff44554e95716..5adb41dc4690b 100644
--- a/hw/net/allwinner-sun8i-emac.c
+++ b/hw/net/allwinner-sun8i-emac.c
@@ -784,7 +784,7 @@ static void allwinner_sun8i_emac_set_link(NetClientState *nc)
 static const MemoryRegionOps allwinner_sun8i_emac_mem_ops = {
     .read = allwinner_sun8i_emac_read,
     .write = allwinner_sun8i_emac_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
diff --git a/hw/net/allwinner_emac.c b/hw/net/allwinner_emac.c
index 3eb9e09dc5c35..47f1e7f086c7d 100644
--- a/hw/net/allwinner_emac.c
+++ b/hw/net/allwinner_emac.c
@@ -421,7 +421,7 @@ static void aw_emac_set_link(NetClientState *nc)
 static const MemoryRegionOps aw_emac_mem_ops = {
     .read = aw_emac_read,
     .write = aw_emac_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
index f637853073e26..b72cbab7e8890 100644
--- a/hw/net/e1000e.c
+++ b/hw/net/e1000e.c
@@ -372,8 +372,7 @@ static int
 e1000e_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc)
 {
     Error *local_err = NULL;
-    int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset,
-                                 PCI_PM_SIZEOF, &local_err);
+    int ret = pci_pm_init(pdev, offset, &local_err);
 
     if (local_err) {
         error_report_err(local_err);
diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
index 6d853229aec2e..29a39865a6086 100644
--- a/hw/net/eepro100.c
+++ b/hw/net/eepro100.c
@@ -551,9 +551,7 @@ static void e100_pci_reset(EEPRO100State *s, Error **errp)
     if (info->power_management) {
         /* Power Management Capabilities */
         int cfg_offset = 0xdc;
-        int r = pci_add_capability(&s->dev, PCI_CAP_ID_PM,
-                                   cfg_offset, PCI_PM_SIZEOF,
-                                   errp);
+        int r = pci_pm_init(&s->dev, cfg_offset, errp);
         if (r < 0) {
             return;
         }
diff --git a/hw/net/fsl_etsec/etsec.c b/hw/net/fsl_etsec/etsec.c
index 781b9003954bb..adde644892653 100644
--- a/hw/net/fsl_etsec/etsec.c
+++ b/hw/net/fsl_etsec/etsec.c
@@ -423,16 +423,16 @@ static void etsec_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = etsec_realize;
+    dc->desc = "Freescale Enhanced Three-Speed Ethernet Controller";
     device_class_set_legacy_reset(dc, etsec_reset);
     device_class_set_props(dc, etsec_properties);
-    /* Supported by ppce500 machine */
-    dc->user_creatable = true;
+    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
 }
 
 static const TypeInfo etsec_types[] = {
     {
         .name          = TYPE_ETSEC_COMMON,
-        .parent        = TYPE_SYS_BUS_DEVICE,
+        .parent        = TYPE_DYNAMIC_SYS_BUS_DEVICE,
         .instance_size = sizeof(eTSEC),
         .class_init    = etsec_class_init,
         .instance_init = etsec_instance_init,
diff --git a/hw/net/igb.c b/hw/net/igb.c
index 4d93ce629f954..e318df40e0138 100644
--- a/hw/net/igb.c
+++ b/hw/net/igb.c
@@ -356,8 +356,7 @@ static int
 igb_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc)
 {
     Error *local_err = NULL;
-    int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset,
-                                 PCI_PM_SIZEOF, &local_err);
+    int ret = pci_pm_init(pdev, offset, &local_err);
 
     if (local_err) {
         error_report_err(local_err);
@@ -446,9 +445,13 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp)
 
     pcie_ari_init(pci_dev, 0x150);
 
-    pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, TYPE_IGBVF,
-        IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS,
-        IGB_VF_OFFSET, IGB_VF_STRIDE);
+    if (!pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, TYPE_IGBVF,
+                            IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS,
+                            IGB_MAX_VF_FUNCTIONS, IGB_VF_OFFSET, IGB_VF_STRIDE,
+                            errp)) {
+        igb_cleanup_msix(s);
+        return;
+    }
 
     pcie_sriov_pf_init_vf_bar(pci_dev, IGBVF_MMIO_BAR_IDX,
         PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH,
diff --git a/hw/net/meson.build b/hw/net/meson.build
index 3bb5d749a8356..e6759e26ca6cc 100644
--- a/hw/net/meson.build
+++ b/hw/net/meson.build
@@ -40,6 +40,7 @@ system_ss.add(when: 'CONFIG_SUNHME', if_true: files('sunhme.c'))
 system_ss.add(when: 'CONFIG_FTGMAC100', if_true: files('ftgmac100.c'))
 system_ss.add(when: 'CONFIG_SUNGEM', if_true: files('sungem.c'))
 system_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_emc.c', 'npcm_gmac.c'))
+system_ss.add(when: 'CONFIG_NPCM8XX', if_true: files('npcm_pcs.c'))
 
 system_ss.add(when: 'CONFIG_COLDFIRE', if_true: files('mcf_fec.c'))
 specific_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr_llan.c'))
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
index 1f79b82b77f46..903238dca24d0 100644
--- a/hw/net/net_tx_pkt.c
+++ b/hw/net/net_tx_pkt.c
@@ -141,10 +141,6 @@ bool net_tx_pkt_update_sctp_checksum(struct NetTxPkt *pkt)
     uint32_t csum = 0;
     struct iovec *pl_start_frag = pkt->vec + NET_TX_PKT_PL_START_FRAG;
 
-    if (iov_size(pl_start_frag, pkt->payload_frags) < 8 + sizeof(csum)) {
-        return false;
-    }
-
     if (iov_from_buf(pl_start_frag, pkt->payload_frags, 8, &csum, sizeof(csum)) < sizeof(csum)) {
         return false;
     }
diff --git a/hw/net/npcm_pcs.c b/hw/net/npcm_pcs.c
new file mode 100644
index 0000000000000..ce5034e234350
--- /dev/null
+++ b/hw/net/npcm_pcs.c
@@ -0,0 +1,410 @@
+/*
+ * Nuvoton NPCM8xx PCS Module
+ *
+ * Copyright 2022 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+/*
+ * Disclaimer:
+ * Currently we only implemented the default values of the registers and
+ * the soft reset feature. These are required to boot up the GMAC module
+ * in Linux kernel for NPCM845 boards. Other functionalities are not modeled.
+ */
+
+#include "qemu/osdep.h"
+
+#include "exec/hwaddr.h"
+#include "hw/registerfields.h"
+#include "hw/net/npcm_pcs.h"
+#include "migration/vmstate.h"
+#include "qemu/log.h"
+#include "qemu/units.h"
+#include "trace.h"
+
+#define NPCM_PCS_IND_AC_BA      0x1fe
+#define NPCM_PCS_IND_SR_CTL     0x1e00
+#define NPCM_PCS_IND_SR_MII     0x1f00
+#define NPCM_PCS_IND_SR_TIM     0x1f07
+#define NPCM_PCS_IND_VR_MII     0x1f80
+
+REG16(NPCM_PCS_SR_CTL_ID1, 0x08)
+REG16(NPCM_PCS_SR_CTL_ID2, 0x0a)
+REG16(NPCM_PCS_SR_CTL_STS, 0x10)
+
+REG16(NPCM_PCS_SR_MII_CTRL, 0x00)
+REG16(NPCM_PCS_SR_MII_STS, 0x02)
+REG16(NPCM_PCS_SR_MII_DEV_ID1, 0x04)
+REG16(NPCM_PCS_SR_MII_DEV_ID2, 0x06)
+REG16(NPCM_PCS_SR_MII_AN_ADV, 0x08)
+REG16(NPCM_PCS_SR_MII_LP_BABL, 0x0a)
+REG16(NPCM_PCS_SR_MII_AN_EXPN, 0x0c)
+REG16(NPCM_PCS_SR_MII_EXT_STS, 0x1e)
+
+REG16(NPCM_PCS_SR_TIM_SYNC_ABL, 0x10)
+REG16(NPCM_PCS_SR_TIM_SYNC_TX_MAX_DLY_LWR, 0x12)
+REG16(NPCM_PCS_SR_TIM_SYNC_TX_MAX_DLY_UPR, 0x14)
+REG16(NPCM_PCS_SR_TIM_SYNC_TX_MIN_DLY_LWR, 0x16)
+REG16(NPCM_PCS_SR_TIM_SYNC_TX_MIN_DLY_UPR, 0x18)
+REG16(NPCM_PCS_SR_TIM_SYNC_RX_MAX_DLY_LWR, 0x1a)
+REG16(NPCM_PCS_SR_TIM_SYNC_RX_MAX_DLY_UPR, 0x1c)
+REG16(NPCM_PCS_SR_TIM_SYNC_RX_MIN_DLY_LWR, 0x1e)
+REG16(NPCM_PCS_SR_TIM_SYNC_RX_MIN_DLY_UPR, 0x20)
+
+REG16(NPCM_PCS_VR_MII_MMD_DIG_CTRL1, 0x000)
+REG16(NPCM_PCS_VR_MII_AN_CTRL, 0x002)
+REG16(NPCM_PCS_VR_MII_AN_INTR_STS, 0x004)
+REG16(NPCM_PCS_VR_MII_TC, 0x006)
+REG16(NPCM_PCS_VR_MII_DBG_CTRL, 0x00a)
+REG16(NPCM_PCS_VR_MII_EEE_MCTRL0, 0x00c)
+REG16(NPCM_PCS_VR_MII_EEE_TXTIMER, 0x010)
+REG16(NPCM_PCS_VR_MII_EEE_RXTIMER, 0x012)
+REG16(NPCM_PCS_VR_MII_LINK_TIMER_CTRL, 0x014)
+REG16(NPCM_PCS_VR_MII_EEE_MCTRL1, 0x016)
+REG16(NPCM_PCS_VR_MII_DIG_STS, 0x020)
+REG16(NPCM_PCS_VR_MII_ICG_ERRCNT1, 0x022)
+REG16(NPCM_PCS_VR_MII_MISC_STS, 0x030)
+REG16(NPCM_PCS_VR_MII_RX_LSTS, 0x040)
+REG16(NPCM_PCS_VR_MII_MP_TX_BSTCTRL0, 0x070)
+REG16(NPCM_PCS_VR_MII_MP_TX_LVLCTRL0, 0x074)
+REG16(NPCM_PCS_VR_MII_MP_TX_GENCTRL0, 0x07a)
+REG16(NPCM_PCS_VR_MII_MP_TX_GENCTRL1, 0x07c)
+REG16(NPCM_PCS_VR_MII_MP_TX_STS, 0x090)
+REG16(NPCM_PCS_VR_MII_MP_RX_GENCTRL0, 0x0b0)
+REG16(NPCM_PCS_VR_MII_MP_RX_GENCTRL1, 0x0b2)
+REG16(NPCM_PCS_VR_MII_MP_RX_LOS_CTRL0, 0x0ba)
+REG16(NPCM_PCS_VR_MII_MP_MPLL_CTRL0, 0x0f0)
+REG16(NPCM_PCS_VR_MII_MP_MPLL_CTRL1, 0x0f2)
+REG16(NPCM_PCS_VR_MII_MP_MPLL_STS, 0x110)
+REG16(NPCM_PCS_VR_MII_MP_MISC_CTRL2, 0x126)
+REG16(NPCM_PCS_VR_MII_MP_LVL_CTRL, 0x130)
+REG16(NPCM_PCS_VR_MII_MP_MISC_CTRL0, 0x132)
+REG16(NPCM_PCS_VR_MII_MP_MISC_CTRL1, 0x134)
+REG16(NPCM_PCS_VR_MII_DIG_CTRL2, 0x1c2)
+REG16(NPCM_PCS_VR_MII_DIG_ERRCNT_SEL, 0x1c4)
+
+/* Register Fields */
+#define NPCM_PCS_SR_MII_CTRL_RST            BIT(15)
+
+static const uint16_t npcm_pcs_sr_ctl_cold_reset_values[NPCM_PCS_NR_SR_CTLS] = {
+    [R_NPCM_PCS_SR_CTL_ID1]                 = 0x699e,
+    [R_NPCM_PCS_SR_CTL_STS]                 = 0x8000,
+};
+
+static const uint16_t npcm_pcs_sr_mii_cold_reset_values[NPCM_PCS_NR_SR_MIIS] = {
+    [R_NPCM_PCS_SR_MII_CTRL]                = 0x1140,
+    [R_NPCM_PCS_SR_MII_STS]                 = 0x0109,
+    [R_NPCM_PCS_SR_MII_DEV_ID1]             = 0x699e,
+    [R_NPCM_PCS_SR_MII_DEV_ID2]             = 0xced0,
+    [R_NPCM_PCS_SR_MII_AN_ADV]              = 0x0020,
+    [R_NPCM_PCS_SR_MII_EXT_STS]             = 0xc000,
+};
+
+static const uint16_t npcm_pcs_sr_tim_cold_reset_values[NPCM_PCS_NR_SR_TIMS] = {
+    [R_NPCM_PCS_SR_TIM_SYNC_ABL]            = 0x0003,
+    [R_NPCM_PCS_SR_TIM_SYNC_TX_MAX_DLY_LWR] = 0x0038,
+    [R_NPCM_PCS_SR_TIM_SYNC_TX_MIN_DLY_LWR] = 0x0038,
+    [R_NPCM_PCS_SR_TIM_SYNC_RX_MAX_DLY_LWR] = 0x0058,
+    [R_NPCM_PCS_SR_TIM_SYNC_RX_MIN_DLY_LWR] = 0x0048,
+};
+
+static const uint16_t npcm_pcs_vr_mii_cold_reset_values[NPCM_PCS_NR_VR_MIIS] = {
+    [R_NPCM_PCS_VR_MII_MMD_DIG_CTRL1]         = 0x2400,
+    [R_NPCM_PCS_VR_MII_AN_INTR_STS]           = 0x000a,
+    [R_NPCM_PCS_VR_MII_EEE_MCTRL0]            = 0x899c,
+    [R_NPCM_PCS_VR_MII_DIG_STS]               = 0x0010,
+    [R_NPCM_PCS_VR_MII_MP_TX_BSTCTRL0]        = 0x000a,
+    [R_NPCM_PCS_VR_MII_MP_TX_LVLCTRL0]        = 0x007f,
+    [R_NPCM_PCS_VR_MII_MP_TX_GENCTRL0]        = 0x0001,
+    [R_NPCM_PCS_VR_MII_MP_RX_GENCTRL0]        = 0x0100,
+    [R_NPCM_PCS_VR_MII_MP_RX_GENCTRL1]        = 0x1100,
+    [R_NPCM_PCS_VR_MII_MP_RX_LOS_CTRL0]       = 0x000e,
+    [R_NPCM_PCS_VR_MII_MP_MPLL_CTRL0]         = 0x0100,
+    [R_NPCM_PCS_VR_MII_MP_MPLL_CTRL1]         = 0x0032,
+    [R_NPCM_PCS_VR_MII_MP_MPLL_STS]           = 0x0001,
+    [R_NPCM_PCS_VR_MII_MP_LVL_CTRL]           = 0x0019,
+};
+
+static void npcm_pcs_soft_reset(NPCMPCSState *s)
+{
+    memcpy(s->sr_ctl, npcm_pcs_sr_ctl_cold_reset_values,
+           NPCM_PCS_NR_SR_CTLS * sizeof(uint16_t));
+    memcpy(s->sr_mii, npcm_pcs_sr_mii_cold_reset_values,
+           NPCM_PCS_NR_SR_MIIS * sizeof(uint16_t));
+    memcpy(s->sr_tim, npcm_pcs_sr_tim_cold_reset_values,
+           NPCM_PCS_NR_SR_TIMS * sizeof(uint16_t));
+    memcpy(s->vr_mii, npcm_pcs_vr_mii_cold_reset_values,
+           NPCM_PCS_NR_VR_MIIS * sizeof(uint16_t));
+}
+
+static uint16_t npcm_pcs_read_sr_ctl(NPCMPCSState *s, hwaddr offset)
+{
+    hwaddr regno = offset / sizeof(uint16_t);
+
+    if (regno >= NPCM_PCS_NR_SR_CTLS) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: SR_CTL read offset 0x%04" HWADDR_PRIx
+                      " is out of range.\n",
+                      DEVICE(s)->canonical_path, offset);
+        return 0;
+    }
+
+    return s->sr_ctl[regno];
+}
+
+static uint16_t npcm_pcs_read_sr_mii(NPCMPCSState *s, hwaddr offset)
+{
+    hwaddr regno = offset / sizeof(uint16_t);
+
+    if (regno >= NPCM_PCS_NR_SR_MIIS) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: SR_MII read offset 0x%04" HWADDR_PRIx
+                      " is out of range.\n",
+                      DEVICE(s)->canonical_path, offset);
+        return 0;
+    }
+
+    return s->sr_mii[regno];
+}
+
+static uint16_t npcm_pcs_read_sr_tim(NPCMPCSState *s, hwaddr offset)
+{
+    hwaddr regno = offset / sizeof(uint16_t);
+
+    if (regno >= NPCM_PCS_NR_SR_TIMS) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: SR_TIM read offset 0x%04" HWADDR_PRIx
+                      " is out of range.\n",
+                      DEVICE(s)->canonical_path, offset);
+        return 0;
+    }
+
+    return s->sr_tim[regno];
+}
+
+static uint16_t npcm_pcs_read_vr_mii(NPCMPCSState *s, hwaddr offset)
+{
+    hwaddr regno = offset / sizeof(uint16_t);
+
+    if (regno >= NPCM_PCS_NR_VR_MIIS) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: VR_MII read offset 0x%04" HWADDR_PRIx
+                      " is out of range.\n",
+                      DEVICE(s)->canonical_path, offset);
+        return 0;
+    }
+
+    return s->vr_mii[regno];
+}
+
+static void npcm_pcs_write_sr_ctl(NPCMPCSState *s, hwaddr offset, uint16_t v)
+{
+    hwaddr regno = offset / sizeof(uint16_t);
+
+    if (regno >= NPCM_PCS_NR_SR_CTLS) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: SR_CTL write offset 0x%04" HWADDR_PRIx
+                      " is out of range.\n",
+                      DEVICE(s)->canonical_path, offset);
+        return;
+    }
+
+    s->sr_ctl[regno] = v;
+}
+
+static void npcm_pcs_write_sr_mii(NPCMPCSState *s, hwaddr offset, uint16_t v)
+{
+    hwaddr regno = offset / sizeof(uint16_t);
+
+    if (regno >= NPCM_PCS_NR_SR_MIIS) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: SR_MII write offset 0x%04" HWADDR_PRIx
+                      " is out of range.\n",
+                      DEVICE(s)->canonical_path, offset);
+        return;
+    }
+
+    s->sr_mii[regno] = v;
+
+    if ((offset == A_NPCM_PCS_SR_MII_CTRL) && (v & NPCM_PCS_SR_MII_CTRL_RST)) {
+        /* Trigger a soft reset */
+        npcm_pcs_soft_reset(s);
+    }
+}
+
+static void npcm_pcs_write_sr_tim(NPCMPCSState *s, hwaddr offset, uint16_t v)
+{
+    hwaddr regno = offset / sizeof(uint16_t);
+
+    if (regno >= NPCM_PCS_NR_SR_TIMS) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: SR_TIM write offset 0x%04" HWADDR_PRIx
+                      " is out of range.\n",
+                      DEVICE(s)->canonical_path, offset);
+        return;
+    }
+
+    s->sr_tim[regno] = v;
+}
+
+static void npcm_pcs_write_vr_mii(NPCMPCSState *s, hwaddr offset, uint16_t v)
+{
+    hwaddr regno = offset / sizeof(uint16_t);
+
+    if (regno >= NPCM_PCS_NR_VR_MIIS) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: VR_MII write offset 0x%04" HWADDR_PRIx
+                      " is out of range.\n",
+                      DEVICE(s)->canonical_path, offset);
+        return;
+    }
+
+    s->vr_mii[regno] = v;
+}
+
+static uint64_t npcm_pcs_read(void *opaque, hwaddr offset, unsigned size)
+{
+    NPCMPCSState *s = opaque;
+    uint16_t v = 0;
+
+    if (offset == NPCM_PCS_IND_AC_BA) {
+        v = s->indirect_access_base;
+    } else {
+        switch (s->indirect_access_base) {
+        case NPCM_PCS_IND_SR_CTL:
+            v = npcm_pcs_read_sr_ctl(s, offset);
+            break;
+
+        case NPCM_PCS_IND_SR_MII:
+            v = npcm_pcs_read_sr_mii(s, offset);
+            break;
+
+        case NPCM_PCS_IND_SR_TIM:
+            v = npcm_pcs_read_sr_tim(s, offset);
+            break;
+
+        case NPCM_PCS_IND_VR_MII:
+            v = npcm_pcs_read_vr_mii(s, offset);
+            break;
+
+        default:
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: Read with invalid indirect address base: 0x%"
+                          PRIx16 "\n", DEVICE(s)->canonical_path,
+                          s->indirect_access_base);
+        }
+    }
+
+    trace_npcm_pcs_reg_read(DEVICE(s)->canonical_path, s->indirect_access_base,
+                            offset, v);
+    return v;
+}
+
+static void npcm_pcs_write(void *opaque, hwaddr offset,
+                              uint64_t v, unsigned size)
+{
+    NPCMPCSState *s = opaque;
+
+    trace_npcm_pcs_reg_write(DEVICE(s)->canonical_path, s->indirect_access_base,
+                             offset, v);
+    if (offset == NPCM_PCS_IND_AC_BA) {
+        s->indirect_access_base = v;
+    } else {
+        switch (s->indirect_access_base) {
+        case NPCM_PCS_IND_SR_CTL:
+            npcm_pcs_write_sr_ctl(s, offset, v);
+            break;
+
+        case NPCM_PCS_IND_SR_MII:
+            npcm_pcs_write_sr_mii(s, offset, v);
+            break;
+
+        case NPCM_PCS_IND_SR_TIM:
+            npcm_pcs_write_sr_tim(s, offset, v);
+            break;
+
+        case NPCM_PCS_IND_VR_MII:
+            npcm_pcs_write_vr_mii(s, offset, v);
+            break;
+
+        default:
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: Write with invalid indirect address base: 0x%02"
+                          PRIx16 "\n", DEVICE(s)->canonical_path,
+                          s->indirect_access_base);
+        }
+    }
+}
+
+static void npcm_pcs_enter_reset(Object *obj, ResetType type)
+{
+    NPCMPCSState *s = NPCM_PCS(obj);
+
+    npcm_pcs_soft_reset(s);
+}
+
+static const struct MemoryRegionOps npcm_pcs_ops = {
+    .read = npcm_pcs_read,
+    .write = npcm_pcs_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 2,
+        .max_access_size = 2,
+        .unaligned = false,
+    },
+};
+
+static void npcm_pcs_realize(DeviceState *dev, Error **errp)
+{
+    NPCMPCSState *pcs = NPCM_PCS(dev);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+
+    memory_region_init_io(&pcs->iomem, OBJECT(pcs), &npcm_pcs_ops, pcs,
+                          TYPE_NPCM_PCS, 8 * KiB);
+    sysbus_init_mmio(sbd, &pcs->iomem);
+}
+
+static const VMStateDescription vmstate_npcm_pcs = {
+    .name = TYPE_NPCM_PCS,
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT16(indirect_access_base, NPCMPCSState),
+        VMSTATE_UINT16_ARRAY(sr_ctl, NPCMPCSState, NPCM_PCS_NR_SR_CTLS),
+        VMSTATE_UINT16_ARRAY(sr_mii, NPCMPCSState, NPCM_PCS_NR_SR_MIIS),
+        VMSTATE_UINT16_ARRAY(sr_tim, NPCMPCSState, NPCM_PCS_NR_SR_TIMS),
+        VMSTATE_UINT16_ARRAY(vr_mii, NPCMPCSState, NPCM_PCS_NR_VR_MIIS),
+        VMSTATE_END_OF_LIST(),
+    },
+};
+
+static void npcm_pcs_class_init(ObjectClass *klass, void *data)
+{
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    dc->desc = "NPCM PCS Controller";
+    dc->realize = npcm_pcs_realize;
+    dc->vmsd = &vmstate_npcm_pcs;
+    rc->phases.enter = npcm_pcs_enter_reset;
+}
+
+static const TypeInfo npcm_pcs_types[] = {
+    {
+        .name = TYPE_NPCM_PCS,
+        .parent = TYPE_SYS_BUS_DEVICE,
+        .instance_size = sizeof(NPCMPCSState),
+        .class_init = npcm_pcs_class_init,
+    },
+};
+DEFINE_TYPES(npcm_pcs_types)
diff --git a/hw/net/smc91c111.c b/hw/net/smc91c111.c
index b18d5c23c39ac..9ce42b56155b4 100644
--- a/hw/net/smc91c111.c
+++ b/hw/net/smc91c111.c
@@ -13,6 +13,7 @@
 #include "net/net.h"
 #include "hw/irq.h"
 #include "hw/net/smc91c111.h"
+#include "hw/registerfields.h"
 #include "hw/qdev-properties.h"
 #include "qapi/error.h"
 #include "qemu/log.h"
@@ -22,6 +23,13 @@
 
 /* Number of 2k memory pages available.  */
 #define NUM_PACKETS 4
+/*
+ * Maximum size of a data frame, including the leading status word
+ * and byte count fields and the trailing CRC, last data byte
+ * and control byte (per figure 8-1 in the Microchip Technology
+ * LAN91C111 datasheet).
+ */
+#define MAX_PACKET_SIZE 2048
 
 #define TYPE_SMC91C111 "smc91c111"
 OBJECT_DECLARE_SIMPLE_TYPE(smc91c111_state, SMC91C111)
@@ -51,7 +59,7 @@ struct smc91c111_state {
     int tx_fifo_done_len;
     int tx_fifo_done[NUM_PACKETS];
     /* Packet buffer memory.  */
-    uint8_t data[NUM_PACKETS][2048];
+    uint8_t data[NUM_PACKETS][MAX_PACKET_SIZE];
     uint8_t int_level;
     uint8_t int_mask;
     MemoryRegion mmio;
@@ -79,7 +87,8 @@ static const VMStateDescription vmstate_smc91c111 = {
         VMSTATE_INT32_ARRAY(rx_fifo, smc91c111_state, NUM_PACKETS),
         VMSTATE_INT32(tx_fifo_done_len, smc91c111_state),
         VMSTATE_INT32_ARRAY(tx_fifo_done, smc91c111_state, NUM_PACKETS),
-        VMSTATE_BUFFER_UNSAFE(data, smc91c111_state, 0, NUM_PACKETS * 2048),
+        VMSTATE_BUFFER_UNSAFE(data, smc91c111_state, 0,
+                              NUM_PACKETS * MAX_PACKET_SIZE),
         VMSTATE_UINT8(int_level, smc91c111_state),
         VMSTATE_UINT8(int_mask, smc91c111_state),
         VMSTATE_END_OF_LIST()
@@ -118,6 +127,18 @@ static const VMStateDescription vmstate_smc91c111 = {
 #define RS_TOOSHORT     0x0400
 #define RS_MULTICAST    0x0001
 
+FIELD(PTR, PTR, 0, 11)
+FIELD(PTR, NOT_EMPTY, 11, 1)
+FIELD(PTR, RESERVED, 12, 1)
+FIELD(PTR, READ, 13, 1)
+FIELD(PTR, AUTOINCR, 14, 1)
+FIELD(PTR, RCV, 15, 1)
+
+static inline bool packetnum_valid(int packet_num)
+{
+    return packet_num >= 0 && packet_num < NUM_PACKETS;
+}
+
 /* Update interrupt status.  */
 static void smc91c111_update(smc91c111_state *s)
 {
@@ -182,6 +203,15 @@ static void smc91c111_pop_rx_fifo(smc91c111_state *s)
 {
     int i;
 
+    if (s->rx_fifo_len == 0) {
+        /*
+         * The datasheet doesn't document what the behaviour is if the
+         * guest tries to pop an empty RX FIFO, and there's no obvious
+         * error status register to report it. Just ignore the attempt.
+         */
+        return;
+    }
+
     s->rx_fifo_len--;
     if (s->rx_fifo_len) {
         for (i = 0; i < s->rx_fifo_len; i++)
@@ -209,12 +239,33 @@ static void smc91c111_pop_tx_fifo_done(smc91c111_state *s)
 /* Release the memory allocated to a packet.  */
 static void smc91c111_release_packet(smc91c111_state *s, int packet)
 {
+    if (!packetnum_valid(packet)) {
+        /*
+         * Data sheet doesn't document behaviour in this guest error
+         * case, and there is no error status register to report it.
+         * Log and ignore the attempt.
+         */
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "smc91c111: attempt to release invalid packet %d\n",
+                      packet);
+        return;
+    }
     s->allocated &= ~(1 << packet);
     if (s->tx_alloc == 0x80)
         smc91c111_tx_alloc(s);
     smc91c111_flush_queued_packets(s);
 }
 
+static void smc91c111_complete_tx_packet(smc91c111_state *s, int packetnum)
+{
+    if (s->ctr & CTR_AUTO_RELEASE) {
+        /* Race?  */
+        smc91c111_release_packet(s, packetnum);
+    } else if (s->tx_fifo_done_len < NUM_PACKETS) {
+        s->tx_fifo_done[s->tx_fifo_done_len++] = packetnum;
+    }
+}
+
 /* Flush the TX FIFO.  */
 static void smc91c111_do_tx(smc91c111_state *s)
 {
@@ -230,12 +281,25 @@ static void smc91c111_do_tx(smc91c111_state *s)
         return;
     for (i = 0; i < s->tx_fifo_len; i++) {
         packetnum = s->tx_fifo[i];
+        /* queue_tx checked the packet number was valid */
+        assert(packetnum_valid(packetnum));
         p = &s->data[packetnum][0];
         /* Set status word.  */
         *(p++) = 0x01;
         *(p++) = 0x40;
         len = *(p++);
         len |= ((int)*(p++)) << 8;
+        if (len > MAX_PACKET_SIZE) {
+            /*
+             * Datasheet doesn't say what to do here, and there is no
+             * relevant tx error condition listed. Log, and drop the packet.
+             */
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "smc91c111: tx packet with bad length %d, dropping\n",
+                          len);
+            smc91c111_complete_tx_packet(s, packetnum);
+            continue;
+        }
         len -= 6;
         control = p[len + 1];
         if (control & 0x20)
@@ -264,11 +328,7 @@ static void smc91c111_do_tx(smc91c111_state *s)
             }
         }
 #endif
-        if (s->ctr & CTR_AUTO_RELEASE)
-            /* Race?  */
-            smc91c111_release_packet(s, packetnum);
-        else if (s->tx_fifo_done_len < NUM_PACKETS)
-            s->tx_fifo_done[s->tx_fifo_done_len++] = packetnum;
+        smc91c111_complete_tx_packet(s, packetnum);
         qemu_send_packet(qemu_get_queue(s->nic), p, len);
     }
     s->tx_fifo_len = 0;
@@ -278,6 +338,17 @@ static void smc91c111_do_tx(smc91c111_state *s)
 /* Add a packet to the TX FIFO.  */
 static void smc91c111_queue_tx(smc91c111_state *s, int packet)
 {
+    if (!packetnum_valid(packet)) {
+        /*
+         * Datasheet doesn't document behaviour in this error case, and
+         * there's no error status register we could report it in.
+         * Log and ignore.
+         */
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "smc91c111: attempt to queue invalid packet %d\n",
+                      packet);
+        return;
+    }
     if (s->tx_fifo_len == NUM_PACKETS)
         return;
     s->tx_fifo[s->tx_fifo_len++] = packet;
@@ -309,6 +380,49 @@ static void smc91c111_reset(DeviceState *dev)
 #define SET_LOW(name, val) s->name = (s->name & 0xff00) | val
 #define SET_HIGH(name, val) s->name = (s->name & 0xff) | (val << 8)
 
+/*
+ * The pointer register's pointer is an 11 bit value (so it exactly
+ * indexes a 2048-byte data frame). Add the specified offset to it,
+ * wrapping around at the 2048 byte mark, and return the resulting
+ * wrapped value. There are flag bits in the top part of the register,
+ * but we can ignore them here as the mask will mask them out.
+ */
+static int ptr_reg_add(smc91c111_state *s, int offset)
+{
+    return (s->ptr + offset) & R_PTR_PTR_MASK;
+}
+
+/*
+ * For an access to the Data Register at @offset, return the
+ * required offset into the packet's data frame. This will
+ * perform the pointer register autoincrement if required, and
+ * guarantees to return an in-bounds offset.
+ */
+static int data_reg_ptr(smc91c111_state *s, int offset)
+{
+    int p;
+
+    if (s->ptr & R_PTR_AUTOINCR_MASK) {
+        /*
+         * Autoincrement: use the current pointer value, and
+         * increment the pointer register's pointer field.
+         */
+        p = FIELD_EX32(s->ptr, PTR, PTR);
+        s->ptr = FIELD_DP32(s->ptr, PTR, PTR, ptr_reg_add(s, 1));
+    } else {
+        /*
+         * No autoincrement: register offset determines which
+         * byte we're addressing. Setting the pointer to the top
+         * of the data buffer and then using the pointer wrapping
+         * to read the bottom byte of the buffer is not something
+         * sensible guest software will do, but the datasheet
+         * doesn't say what the behaviour is, so we don't forbid it.
+         */
+        p = ptr_reg_add(s, offset & 3);
+    }
+    return p;
+}
+
 static void smc91c111_writeb(void *opaque, hwaddr offset,
                              uint32_t value)
 {
@@ -448,12 +562,14 @@ static void smc91c111_writeb(void *opaque, hwaddr offset,
                     n = s->rx_fifo[0];
                 else
                     n = s->packet_num;
-                p = s->ptr & 0x07ff;
-                if (s->ptr & 0x4000) {
-                    s->ptr = (s->ptr & 0xf800) | ((s->ptr + 1) & 0x7ff);
-                } else {
-                    p += (offset & 3);
+                if (!packetnum_valid(n)) {
+                    /* Datasheet doesn't document what to do here */
+                    qemu_log_mask(LOG_GUEST_ERROR,
+                                  "smc91c111: attempt to write data to invalid packet %d\n",
+                                  n);
+                    return;
                 }
+                p = data_reg_ptr(s, offset);
                 s->data[n][p] = value;
             }
             return;
@@ -596,12 +712,14 @@ static uint32_t smc91c111_readb(void *opaque, hwaddr offset)
                     n = s->rx_fifo[0];
                 else
                     n = s->packet_num;
-                p = s->ptr & 0x07ff;
-                if (s->ptr & 0x4000) {
-                    s->ptr = (s->ptr & 0xf800) | ((s->ptr + 1) & 0x07ff);
-                } else {
-                    p += (offset & 3);
+                if (!packetnum_valid(n)) {
+                    /* Datasheet doesn't document what to do here */
+                    qemu_log_mask(LOG_GUEST_ERROR,
+                                  "smc91c111: attempt to read data from invalid packet %d\n",
+                                  n);
+                    return 0;
                 }
+                p = data_reg_ptr(s, offset);
                 return s->data[n][p];
             }
         case 12: /* Interrupt status.  */
@@ -697,13 +815,16 @@ static ssize_t smc91c111_receive(NetClientState *nc, const uint8_t *buf, size_t
     if (crc)
         packetsize += 4;
     /* TODO: Flag overrun and receive errors.  */
-    if (packetsize > 2048)
+    if (packetsize > MAX_PACKET_SIZE) {
         return -1;
+    }
     packetnum = smc91c111_allocate_packet(s);
     if (packetnum == 0x80)
         return -1;
     s->rx_fifo[s->rx_fifo_len++] = packetnum;
 
+    /* allocate_packet() will not hand us back an invalid packet number */
+    assert(packetnum_valid(packetnum));
     p = &s->data[packetnum][0];
     /* ??? Multicast packets?  */
     status = 0;
diff --git a/hw/net/trace-events b/hw/net/trace-events
index c35bfb2eb8d07..72b69c4a8bb89 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -483,8 +483,8 @@ npcm_gmac_packet_tx_desc_data(const char* name, uint32_t tdes0, uint32_t tdes1)
 npcm_gmac_tx_desc_owner(const char* name, uint32_t desc_addr) "%s: TX Descriptor @0x%04" PRIX32 " is owned by software"
 
 # npcm_pcs.c
-npcm_pcs_reg_read(const char *name, uint16_t indirect_access_baes, uint64_t offset, uint16_t value) "%s: IND: 0x%02" PRIx16 " offset: 0x%04" PRIx64 " value: 0x%04" PRIx16
-npcm_pcs_reg_write(const char *name, uint16_t indirect_access_baes, uint64_t offset, uint16_t value) "%s: IND: 0x%02" PRIx16 " offset: 0x%04" PRIx64 " value: 0x%04" PRIx16
+npcm_pcs_reg_read(const char *name, uint16_t indirect_access_base, uint64_t offset, uint16_t value) "%s: IND: 0x%02" PRIx16 " offset: 0x%04" PRIx64 " value: 0x%04" PRIx16
+npcm_pcs_reg_write(const char *name, uint16_t indirect_access_base, uint64_t offset, uint16_t value) "%s: IND: 0x%02" PRIx16 " offset: 0x%04" PRIx64 " value: 0x%04" PRIx16
 
 # dp8398x.c
 dp8393x_raise_irq(int isr) "raise irq, isr is 0x%04x"
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index d847429b1c33f..de87cfadffe18 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1352,18 +1352,25 @@ static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp)
 
 static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp)
 {
-    bool ret = false;
+    if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
+        return true;
+    }
 
-    if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
-        trace_virtio_net_rss_load(n, n->nr_ebpf_rss_fds, n->ebpf_rss_fds);
-        if (n->ebpf_rss_fds) {
-            ret = virtio_net_load_ebpf_fds(n, errp);
-        } else {
-            ret = ebpf_rss_load(&n->ebpf_rss, errp);
-        }
+    trace_virtio_net_rss_load(n, n->nr_ebpf_rss_fds, n->ebpf_rss_fds);
+
+    /*
+     * If user explicitly gave QEMU RSS FDs to use, then
+     * failing to use them must be considered a fatal
+     * error. If no RSS FDs were provided, QEMU is trying
+     * eBPF on a "best effort" basis only, so report a
+     * warning and allow fallback to software RSS.
+     */
+    if (n->ebpf_rss_fds) {
+        return virtio_net_load_ebpf_fds(n, errp);
     }
 
-    return ret;
+    ebpf_rss_load(&n->ebpf_rss, &error_warn);
+    return true;
 }
 
 static void virtio_net_unload_ebpf(VirtIONet *n)
@@ -3913,23 +3920,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
     net_rx_pkt_init(&n->rx_pkt);
 
     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
-        Error *err = NULL;
-        if (!virtio_net_load_ebpf(n, &err)) {
-            /*
-             * If user explicitly gave QEMU RSS FDs to use, then
-             * failing to use them must be considered a fatal
-             * error. If no RSS FDs were provided, QEMU is trying
-             * eBPF on a "best effort" basis only, so report a
-             * warning and allow fallback to software RSS.
-             */
-            if (n->ebpf_rss_fds) {
-                error_propagate(errp, err);
-            } else {
-                warn_report("unable to load eBPF RSS: %s",
-                            error_get_pretty(err));
-                error_free(err);
-            }
-        }
+        virtio_net_load_ebpf(n, errp);
     }
 }
 
diff --git a/hw/net/xilinx_ethlite.c b/hw/net/xilinx_ethlite.c
index 14bf2b2e17a49..15d9b95aa80c2 100644
--- a/hw/net/xilinx_ethlite.c
+++ b/hw/net/xilinx_ethlite.c
@@ -34,6 +34,7 @@
 #include "hw/sysbus.h"
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
 #include "hw/misc/unimp.h"
 #include "net/net.h"
 #include "trace.h"
@@ -85,6 +86,7 @@ struct XlnxXpsEthLite
 {
     SysBusDevice parent_obj;
 
+    EndianMode model_endianness;
     MemoryRegion container;
     qemu_irq irq;
     NICState *nic;
@@ -183,10 +185,10 @@ static void port_tx_write(void *opaque, hwaddr addr, uint64_t value,
     }
 }
 
-static const MemoryRegionOps eth_porttx_ops = {
+static const MemoryRegionOps eth_porttx_ops[2] = {
+    [0 ... 1] = {
         .read = port_tx_read,
         .write = port_tx_write,
-        .endianness = DEVICE_NATIVE_ENDIAN,
         .impl = {
             .min_access_size = 4,
             .max_access_size = 4,
@@ -195,6 +197,9 @@ static const MemoryRegionOps eth_porttx_ops = {
             .min_access_size = 4,
             .max_access_size = 4,
         },
+    },
+    [0].endianness = DEVICE_LITTLE_ENDIAN,
+    [1].endianness = DEVICE_BIG_ENDIAN,
 };
 
 static uint64_t port_rx_read(void *opaque, hwaddr addr, unsigned int size)
@@ -232,10 +237,10 @@ static void port_rx_write(void *opaque, hwaddr addr, uint64_t value,
     }
 }
 
-static const MemoryRegionOps eth_portrx_ops = {
+static const MemoryRegionOps eth_portrx_ops[2] = {
+    [0 ... 1] = {
         .read = port_rx_read,
         .write = port_rx_write,
-        .endianness = DEVICE_NATIVE_ENDIAN,
         .impl = {
             .min_access_size = 4,
             .max_access_size = 4,
@@ -244,6 +249,9 @@ static const MemoryRegionOps eth_portrx_ops = {
             .min_access_size = 4,
             .max_access_size = 4,
         },
+    },
+    [0].endianness = DEVICE_LITTLE_ENDIAN,
+    [1].endianness = DEVICE_BIG_ENDIAN,
 };
 
 static bool eth_can_rx(NetClientState *nc)
@@ -300,6 +308,14 @@ static NetClientInfo net_xilinx_ethlite_info = {
 static void xilinx_ethlite_realize(DeviceState *dev, Error **errp)
 {
     XlnxXpsEthLite *s = XILINX_ETHLITE(dev);
+    unsigned ops_index;
+
+    if (s->model_endianness == ENDIAN_MODE_UNSPECIFIED) {
+        error_setg(errp, TYPE_XILINX_ETHLITE " property 'endianness'"
+                         " must be set to 'big' or 'little'");
+        return;
+    }
+    ops_index = s->model_endianness == ENDIAN_MODE_BIG ? 1 : 0;
 
     memory_region_init(&s->container, OBJECT(dev),
                        "xlnx.xps-ethernetlite", 0x2000);
@@ -328,7 +344,7 @@ static void xilinx_ethlite_realize(DeviceState *dev, Error **errp)
                                BUFSZ_MAX, &error_abort);
         memory_region_add_subregion(&s->container, 0x0800 * i, &s->port[i].txbuf);
         memory_region_init_io(&s->port[i].txio, OBJECT(dev),
-                              &eth_porttx_ops, s,
+                              &eth_porttx_ops[ops_index], s,
                               i ? "ethlite.tx[1]io" : "ethlite.tx[0]io",
                               4 * TX_MAX);
         memory_region_add_subregion(&s->container, i ? A_TX_BASE1 : A_TX_BASE0,
@@ -340,7 +356,7 @@ static void xilinx_ethlite_realize(DeviceState *dev, Error **errp)
         memory_region_add_subregion(&s->container, 0x1000 + 0x0800 * i,
                                     &s->port[i].rxbuf);
         memory_region_init_io(&s->port[i].rxio, OBJECT(dev),
-                              &eth_portrx_ops, s,
+                              &eth_portrx_ops[ops_index], s,
                               i ? "ethlite.rx[1]io" : "ethlite.rx[0]io",
                               4 * RX_MAX);
         memory_region_add_subregion(&s->container, i ? A_RX_BASE1 : A_RX_BASE0,
@@ -363,6 +379,7 @@ static void xilinx_ethlite_init(Object *obj)
 }
 
 static const Property xilinx_ethlite_properties[] = {
+    DEFINE_PROP_ENDIAN_NODEFAULT("endianness", XlnxXpsEthLite, model_endianness),
     DEFINE_PROP_UINT32("tx-ping-pong", XlnxXpsEthLite, c_tx_pingpong, 1),
     DEFINE_PROP_UINT32("rx-ping-pong", XlnxXpsEthLite, c_rx_pingpong, 1),
     DEFINE_NIC_PROPERTIES(XlnxXpsEthLite, conf),
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 68903d1d70673..518d02dc66706 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -266,7 +266,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
     [NVME_FDP_EVENTS]               = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
 };
 
-static const uint32_t nvme_cse_acs[256] = {
+static const uint32_t nvme_cse_acs_default[256] = {
     [NVME_ADM_CMD_DELETE_SQ]        = NVME_CMD_EFF_CSUPP,
     [NVME_ADM_CMD_CREATE_SQ]        = NVME_CMD_EFF_CSUPP,
     [NVME_ADM_CMD_GET_LOG_PAGE]     = NVME_CMD_EFF_CSUPP,
@@ -277,17 +277,14 @@ static const uint32_t nvme_cse_acs[256] = {
     [NVME_ADM_CMD_SET_FEATURES]     = NVME_CMD_EFF_CSUPP,
     [NVME_ADM_CMD_GET_FEATURES]     = NVME_CMD_EFF_CSUPP,
     [NVME_ADM_CMD_ASYNC_EV_REQ]     = NVME_CMD_EFF_CSUPP,
-    [NVME_ADM_CMD_NS_ATTACHMENT]    = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
-    [NVME_ADM_CMD_VIRT_MNGMT]       = NVME_CMD_EFF_CSUPP,
-    [NVME_ADM_CMD_DBBUF_CONFIG]     = NVME_CMD_EFF_CSUPP,
+    [NVME_ADM_CMD_NS_ATTACHMENT]    = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC |
+                                      NVME_CMD_EFF_CCC,
     [NVME_ADM_CMD_FORMAT_NVM]       = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
     [NVME_ADM_CMD_DIRECTIVE_RECV]   = NVME_CMD_EFF_CSUPP,
     [NVME_ADM_CMD_DIRECTIVE_SEND]   = NVME_CMD_EFF_CSUPP,
 };
 
-static const uint32_t nvme_cse_iocs_none[256];
-
-static const uint32_t nvme_cse_iocs_nvm[256] = {
+static const uint32_t nvme_cse_iocs_nvm_default[256] = {
     [NVME_CMD_FLUSH]                = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
     [NVME_CMD_WRITE_ZEROES]         = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
     [NVME_CMD_WRITE]                = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
@@ -300,7 +297,7 @@ static const uint32_t nvme_cse_iocs_nvm[256] = {
     [NVME_CMD_IO_MGMT_SEND]         = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
 };
 
-static const uint32_t nvme_cse_iocs_zoned[256] = {
+static const uint32_t nvme_cse_iocs_zoned_default[256] = {
     [NVME_CMD_FLUSH]                = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
     [NVME_CMD_WRITE_ZEROES]         = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
     [NVME_CMD_WRITE]                = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
@@ -309,6 +306,9 @@ static const uint32_t nvme_cse_iocs_zoned[256] = {
     [NVME_CMD_VERIFY]               = NVME_CMD_EFF_CSUPP,
     [NVME_CMD_COPY]                 = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
     [NVME_CMD_COMPARE]              = NVME_CMD_EFF_CSUPP,
+    [NVME_CMD_IO_MGMT_RECV]         = NVME_CMD_EFF_CSUPP,
+    [NVME_CMD_IO_MGMT_SEND]         = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+
     [NVME_CMD_ZONE_APPEND]          = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
     [NVME_CMD_ZONE_MGMT_SEND]       = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
     [NVME_CMD_ZONE_MGMT_RECV]       = NVME_CMD_EFF_CSUPP,
@@ -1762,47 +1762,6 @@ static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba,
     return NVME_SUCCESS;
 }
 
-static void nvme_aio_err(NvmeRequest *req, int ret)
-{
-    uint16_t status = NVME_SUCCESS;
-    Error *local_err = NULL;
-
-    switch (req->cmd.opcode) {
-    case NVME_CMD_READ:
-        status = NVME_UNRECOVERED_READ;
-        break;
-    case NVME_CMD_FLUSH:
-    case NVME_CMD_WRITE:
-    case NVME_CMD_WRITE_ZEROES:
-    case NVME_CMD_ZONE_APPEND:
-    case NVME_CMD_COPY:
-        status = NVME_WRITE_FAULT;
-        break;
-    default:
-        status = NVME_INTERNAL_DEV_ERROR;
-        break;
-    }
-
-    if (ret == -ECANCELED) {
-        status = NVME_CMD_ABORT_REQ;
-    }
-
-    trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), status);
-
-    error_setg_errno(&local_err, -ret, "aio failed");
-    error_report_err(local_err);
-
-    /*
-     * Set the command status code to the first encountered error but allow a
-     * subsequent Internal Device Error to trump it.
-     */
-    if (req->status && status != NVME_INTERNAL_DEV_ERROR) {
-        return;
-    }
-
-    req->status = status;
-}
-
 static inline uint32_t nvme_zone_idx(NvmeNamespace *ns, uint64_t slba)
 {
     return ns->zone_size_log2 > 0 ? slba >> ns->zone_size_log2 :
@@ -2161,11 +2120,16 @@ static inline bool nvme_is_write(NvmeRequest *req)
 static void nvme_misc_cb(void *opaque, int ret)
 {
     NvmeRequest *req = opaque;
+    uint16_t cid = nvme_cid(req);
 
-    trace_pci_nvme_misc_cb(nvme_cid(req));
+    trace_pci_nvme_misc_cb(cid);
 
     if (ret) {
-        nvme_aio_err(req, ret);
+        if (!req->status) {
+            req->status = NVME_INTERNAL_DEV_ERROR;
+        }
+
+        trace_pci_nvme_err_aio(cid, strerror(-ret), req->status);
     }
 
     nvme_enqueue_req_completion(nvme_cq(req), req);
@@ -2182,8 +2146,30 @@ void nvme_rw_complete_cb(void *opaque, int ret)
     trace_pci_nvme_rw_complete_cb(nvme_cid(req), blk_name(blk));
 
     if (ret) {
+        Error *err = NULL;
+
         block_acct_failed(stats, acct);
-        nvme_aio_err(req, ret);
+
+        switch (req->cmd.opcode) {
+        case NVME_CMD_READ:
+            req->status = NVME_UNRECOVERED_READ;
+            break;
+
+        case NVME_CMD_WRITE:
+        case NVME_CMD_WRITE_ZEROES:
+        case NVME_CMD_ZONE_APPEND:
+            req->status = NVME_WRITE_FAULT;
+            break;
+
+        default:
+            req->status = NVME_INTERNAL_DEV_ERROR;
+            break;
+        }
+
+        trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status);
+
+        error_setg_errno(&err, -ret, "aio failed");
+        error_report_err(err);
     } else {
         block_acct_done(stats, acct);
     }
@@ -2268,7 +2254,10 @@ static void nvme_verify_cb(void *opaque, int ret)
 
     if (ret) {
         block_acct_failed(stats, acct);
-        nvme_aio_err(req, ret);
+        req->status = NVME_UNRECOVERED_READ;
+
+        trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status);
+
         goto out;
     }
 
@@ -2367,7 +2356,10 @@ static void nvme_compare_mdata_cb(void *opaque, int ret)
 
     if (ret) {
         block_acct_failed(stats, acct);
-        nvme_aio_err(req, ret);
+        req->status = NVME_UNRECOVERED_READ;
+
+        trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status);
+
         goto out;
     }
 
@@ -2449,7 +2441,10 @@ static void nvme_compare_data_cb(void *opaque, int ret)
 
     if (ret) {
         block_acct_failed(stats, acct);
-        nvme_aio_err(req, ret);
+        req->status = NVME_UNRECOVERED_READ;
+
+        trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status);
+
         goto out;
     }
 
@@ -2928,6 +2923,7 @@ static void nvme_copy_out_completed_cb(void *opaque, int ret)
 
     if (ret < 0) {
         iocb->ret = ret;
+        req->status = NVME_WRITE_FAULT;
         goto out;
     } else if (iocb->ret < 0) {
         goto out;
@@ -2992,6 +2988,7 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret)
 
     if (ret < 0) {
         iocb->ret = ret;
+        req->status = NVME_UNRECOVERED_READ;
         goto out;
     } else if (iocb->ret < 0) {
         goto out;
@@ -3514,6 +3511,7 @@ static void nvme_flush_ns_cb(void *opaque, int ret)
 
     if (ret < 0) {
         iocb->ret = ret;
+        iocb->req->status = NVME_WRITE_FAULT;
         goto out;
     } else if (iocb->ret < 0) {
         goto out;
@@ -4605,6 +4603,61 @@ static uint16_t nvme_io_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
     };
 }
 
+static uint16_t __nvme_io_cmd_nvm(NvmeCtrl *n, NvmeRequest *req)
+{
+    switch (req->cmd.opcode) {
+    case NVME_CMD_WRITE:
+        return nvme_write(n, req);
+    case NVME_CMD_READ:
+        return nvme_read(n, req);
+    case NVME_CMD_COMPARE:
+        return nvme_compare(n, req);
+    case NVME_CMD_WRITE_ZEROES:
+        return nvme_write_zeroes(n, req);
+    case NVME_CMD_DSM:
+        return nvme_dsm(n, req);
+    case NVME_CMD_VERIFY:
+        return nvme_verify(n, req);
+    case NVME_CMD_COPY:
+        return nvme_copy(n, req);
+    case NVME_CMD_IO_MGMT_RECV:
+        return nvme_io_mgmt_recv(n, req);
+    case NVME_CMD_IO_MGMT_SEND:
+        return nvme_io_mgmt_send(n, req);
+    }
+
+    g_assert_not_reached();
+}
+
+static uint16_t nvme_io_cmd_nvm(NvmeCtrl *n, NvmeRequest *req)
+{
+    if (!(n->cse.iocs.nvm[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
+        trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
+        return NVME_INVALID_OPCODE | NVME_DNR;
+    }
+
+    return __nvme_io_cmd_nvm(n, req);
+}
+
+static uint16_t nvme_io_cmd_zoned(NvmeCtrl *n, NvmeRequest *req)
+{
+    if (!(n->cse.iocs.zoned[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
+        trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
+        return NVME_INVALID_OPCODE | NVME_DNR;
+    }
+
+    switch (req->cmd.opcode) {
+    case NVME_CMD_ZONE_APPEND:
+        return nvme_zone_append(n, req);
+    case NVME_CMD_ZONE_MGMT_SEND:
+        return nvme_zone_mgmt_send(n, req);
+    case NVME_CMD_ZONE_MGMT_RECV:
+        return nvme_zone_mgmt_recv(n, req);
+    }
+
+    return __nvme_io_cmd_nvm(n, req);
+}
+
 static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
 {
     NvmeNamespace *ns;
@@ -4646,11 +4699,6 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
         return NVME_INVALID_FIELD | NVME_DNR;
     }
 
-    if (!(ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
-        trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
-        return NVME_INVALID_OPCODE | NVME_DNR;
-    }
-
     if (ns->status) {
         return ns->status;
     }
@@ -4661,36 +4709,14 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
 
     req->ns = ns;
 
-    switch (req->cmd.opcode) {
-    case NVME_CMD_WRITE_ZEROES:
-        return nvme_write_zeroes(n, req);
-    case NVME_CMD_ZONE_APPEND:
-        return nvme_zone_append(n, req);
-    case NVME_CMD_WRITE:
-        return nvme_write(n, req);
-    case NVME_CMD_READ:
-        return nvme_read(n, req);
-    case NVME_CMD_COMPARE:
-        return nvme_compare(n, req);
-    case NVME_CMD_DSM:
-        return nvme_dsm(n, req);
-    case NVME_CMD_VERIFY:
-        return nvme_verify(n, req);
-    case NVME_CMD_COPY:
-        return nvme_copy(n, req);
-    case NVME_CMD_ZONE_MGMT_SEND:
-        return nvme_zone_mgmt_send(n, req);
-    case NVME_CMD_ZONE_MGMT_RECV:
-        return nvme_zone_mgmt_recv(n, req);
-    case NVME_CMD_IO_MGMT_RECV:
-        return nvme_io_mgmt_recv(n, req);
-    case NVME_CMD_IO_MGMT_SEND:
-        return nvme_io_mgmt_send(n, req);
-    default:
-        g_assert_not_reached();
+    switch (ns->csi) {
+    case NVME_CSI_NVM:
+        return nvme_io_cmd_nvm(n, req);
+    case NVME_CSI_ZONED:
+        return nvme_io_cmd_zoned(n, req);
     }
 
-    return NVME_INVALID_OPCODE | NVME_DNR;
+    g_assert_not_reached();
 }
 
 static void nvme_cq_notifier(EventNotifier *e)
@@ -4799,6 +4825,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
     while (!QTAILQ_EMPTY(&sq->out_req_list)) {
         r = QTAILQ_FIRST(&sq->out_req_list);
         assert(r->aiocb);
+        r->status = NVME_CMD_ABORT_SQ_DEL;
         blk_aio_cancel(r->aiocb);
     }
 
@@ -4917,6 +4944,45 @@ static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats)
     stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE];
 }
 
+static uint16_t nvme_ocp_extended_smart_info(NvmeCtrl *n, uint8_t rae,
+                                             uint32_t buf_len, uint64_t off,
+                                             NvmeRequest *req)
+{
+    NvmeNamespace *ns = NULL;
+    NvmeSmartLogExtended smart_l = { 0 };
+    struct nvme_stats stats = { 0 };
+    uint32_t trans_len;
+
+    if (off >= sizeof(smart_l)) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    /* accumulate all stats from all namespaces */
+    for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+        ns = nvme_ns(n, i);
+        if (ns) {
+            nvme_set_blk_stats(ns, &stats);
+        }
+    }
+
+    smart_l.physical_media_units_written[0] = cpu_to_le64(stats.units_written);
+    smart_l.physical_media_units_read[0] = cpu_to_le64(stats.units_read);
+    smart_l.log_page_version = 0x0005;
+
+    static const uint8_t guid[16] = {
+        0xC5, 0xAF, 0x10, 0x28, 0xEA, 0xBF, 0xF2, 0xA4,
+        0x9C, 0x4F, 0x6F, 0x7C, 0xC9, 0x14, 0xD5, 0xAF
+    };
+    memcpy(smart_l.log_page_guid, guid, sizeof(smart_l.log_page_guid));
+
+    if (!rae) {
+        nvme_clear_events(n, NVME_AER_TYPE_SMART);
+    }
+
+    trans_len = MIN(sizeof(smart_l) - off, buf_len);
+    return nvme_c2h(n, (uint8_t *) &smart_l + off, trans_len, req);
+}
+
 static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
                                 uint64_t off, NvmeRequest *req)
 {
@@ -5110,7 +5176,7 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
                                  uint64_t off, NvmeRequest *req)
 {
     NvmeEffectsLog log = {};
-    const uint32_t *src_iocs = NULL;
+    const uint32_t *iocs = NULL;
     uint32_t trans_len;
 
     if (off >= sizeof(log)) {
@@ -5120,25 +5186,26 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
 
     switch (NVME_CC_CSS(ldl_le_p(&n->bar.cc))) {
     case NVME_CC_CSS_NVM:
-        src_iocs = nvme_cse_iocs_nvm;
-        /* fall through */
-    case NVME_CC_CSS_ADMIN_ONLY:
+        iocs = n->cse.iocs.nvm;
         break;
-    case NVME_CC_CSS_CSI:
+
+    case NVME_CC_CSS_ALL:
         switch (csi) {
         case NVME_CSI_NVM:
-            src_iocs = nvme_cse_iocs_nvm;
+            iocs = n->cse.iocs.nvm;
             break;
         case NVME_CSI_ZONED:
-            src_iocs = nvme_cse_iocs_zoned;
+            iocs = n->cse.iocs.zoned;
             break;
         }
+
+        break;
     }
 
-    memcpy(log.acs, nvme_cse_acs, sizeof(nvme_cse_acs));
+    memcpy(log.acs, n->cse.acs, sizeof(log.acs));
 
-    if (src_iocs) {
-        memcpy(log.iocs, src_iocs, sizeof(log.iocs));
+    if (iocs) {
+        memcpy(log.iocs, iocs, sizeof(log.iocs));
     }
 
     trans_len = MIN(sizeof(log) - off, buf_len);
@@ -5146,6 +5213,23 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
     return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req);
 }
 
+static uint16_t nvme_vendor_specific_log(NvmeCtrl *n, uint8_t rae,
+                                         uint32_t buf_len, uint64_t off,
+                                         NvmeRequest *req, uint8_t lid)
+{
+    switch (lid) {
+    case NVME_OCP_EXTENDED_SMART_INFO:
+        if (n->params.ocp) {
+            return nvme_ocp_extended_smart_info(n, rae, buf_len, off, req);
+        }
+        break;
+        /* add a case for each additional vendor specific log id */
+    }
+
+    trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
+    return NVME_INVALID_FIELD | NVME_DNR;
+}
+
 static size_t sizeof_fdp_conf_descr(size_t nruh, size_t vss)
 {
     size_t entry_siz = sizeof(NvmeFdpDescrHdr) + nruh * sizeof(NvmeRuhDescr)
@@ -5396,6 +5480,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
         return nvme_smart_info(n, rae, len, off, req);
     case NVME_LOG_FW_SLOT_INFO:
         return nvme_fw_log_info(n, len, off, req);
+    case NVME_LOG_VENDOR_START...NVME_LOG_VENDOR_END:
+        return nvme_vendor_specific_log(n, rae, len, off, req, lid);
     case NVME_LOG_CHANGED_NSLIST:
         return nvme_changed_nslist(n, rae, len, off, req);
     case NVME_LOG_CMD_EFFECTS:
@@ -5583,7 +5669,9 @@ static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req)
     switch (c->csi) {
     case NVME_CSI_NVM:
         id_nvm->vsl = n->params.vsl;
+        id_nvm->dmrl = NVME_ID_CTRL_NVM_DMRL_MAX;
         id_nvm->dmrsl = cpu_to_le32(n->dmrsl);
+        id_nvm->dmsl = NVME_ID_CTRL_NVM_DMRL_MAX * n->dmrsl;
         break;
 
     case NVME_CSI_ZONED:
@@ -5625,7 +5713,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active)
         return nvme_c2h(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), req);
     }
 
-    return NVME_INVALID_CMD_SET | NVME_DNR;
+    return NVME_INVALID_IOCS | NVME_DNR;
 }
 
 static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req,
@@ -6048,6 +6136,7 @@ static uint16_t nvme_abort(NvmeCtrl *n, NvmeRequest *req)
     QTAILQ_FOREACH_SAFE(r, &sq->out_req_list, entry, next) {
         if (r->cqe.cid == cid) {
             if (r->aiocb) {
+                r->status = NVME_CMD_ABORT_REQ;
                 blk_aio_cancel_async(r->aiocb);
             }
             break;
@@ -6591,7 +6680,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
     case NVME_COMMAND_SET_PROFILE:
         if (dw11 & 0x1ff) {
             trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff);
-            return NVME_CMD_SET_CMB_REJECTED | NVME_DNR;
+            return NVME_IOCS_COMBINATION_REJECTED | NVME_DNR;
         }
         break;
     case NVME_FDP_MODE:
@@ -6640,40 +6729,49 @@ static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req)
     return NVME_NO_COMPLETE;
 }
 
-static void nvme_update_dmrsl(NvmeCtrl *n)
+static void nvme_update_dsm_limits(NvmeCtrl *n, NvmeNamespace *ns)
 {
-    int nsid;
+    if (ns) {
+        n->dmrsl =
+            MIN_NON_ZERO(n->dmrsl, BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
 
-    for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
-        NvmeNamespace *ns = nvme_ns(n, nsid);
+        return;
+    }
+
+    for (uint32_t nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
+        ns = nvme_ns(n, nsid);
         if (!ns) {
             continue;
         }
 
-        n->dmrsl = MIN_NON_ZERO(n->dmrsl,
-                                BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
+        n->dmrsl =
+            MIN_NON_ZERO(n->dmrsl, BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
     }
 }
 
-static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns)
+static bool nvme_csi_supported(NvmeCtrl *n, uint8_t csi)
 {
-    uint32_t cc = ldl_le_p(&n->bar.cc);
+    uint32_t cc;
 
-    ns->iocs = nvme_cse_iocs_none;
-    switch (ns->csi) {
+    switch (csi) {
     case NVME_CSI_NVM:
-        if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) {
-            ns->iocs = nvme_cse_iocs_nvm;
-        }
-        break;
+        return true;
+
     case NVME_CSI_ZONED:
-        if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) {
-            ns->iocs = nvme_cse_iocs_zoned;
-        } else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) {
-            ns->iocs = nvme_cse_iocs_nvm;
-        }
-        break;
+        cc = ldl_le_p(&n->bar.cc);
+
+        return NVME_CC_CSS(cc) == NVME_CC_CSS_ALL;
     }
+
+    g_assert_not_reached();
+}
+
+static void nvme_detach_ns(NvmeCtrl *n, NvmeNamespace *ns)
+{
+    assert(ns->attached > 0);
+
+    n->namespaces[ns->params.nsid] = NULL;
+    ns->attached--;
 }
 
 static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
@@ -6718,7 +6816,7 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
 
         switch (sel) {
         case NVME_NS_ATTACHMENT_ATTACH:
-            if (nvme_ns(ctrl, nsid)) {
+            if (nvme_ns(n, nsid)) {
                 return NVME_NS_ALREADY_ATTACHED | NVME_DNR;
             }
 
@@ -6726,20 +6824,18 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
                 return NVME_NS_PRIVATE | NVME_DNR;
             }
 
+            if (!nvme_csi_supported(n, ns->csi)) {
+                return NVME_IOCS_NOT_SUPPORTED | NVME_DNR;
+            }
+
             nvme_attach_ns(ctrl, ns);
-            nvme_select_iocs_ns(ctrl, ns);
+            nvme_update_dsm_limits(ctrl, ns);
 
             break;
 
         case NVME_NS_ATTACHMENT_DETACH:
-            if (!nvme_ns(ctrl, nsid)) {
-                return NVME_NS_NOT_ATTACHED | NVME_DNR;
-            }
-
-            ctrl->namespaces[nsid] = NULL;
-            ns->attached--;
-
-            nvme_update_dmrsl(ctrl);
+            nvme_detach_ns(ctrl, ns);
+            nvme_update_dsm_limits(ctrl, NULL);
 
             break;
 
@@ -7242,7 +7338,7 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
     trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode,
                              nvme_adm_opc_str(req->cmd.opcode));
 
-    if (!(nvme_cse_acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
+    if (!(n->cse.acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
         trace_pci_nvme_err_invalid_admin_opc(req->cmd.opcode);
         return NVME_INVALID_OPCODE | NVME_DNR;
     }
@@ -7589,21 +7685,6 @@ static void nvme_ctrl_shutdown(NvmeCtrl *n)
     }
 }
 
-static void nvme_select_iocs(NvmeCtrl *n)
-{
-    NvmeNamespace *ns;
-    int i;
-
-    for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
-        ns = nvme_ns(n, i);
-        if (!ns) {
-            continue;
-        }
-
-        nvme_select_iocs_ns(n, ns);
-    }
-}
-
 static int nvme_start_ctrl(NvmeCtrl *n)
 {
     uint64_t cap = ldq_le_p(&n->bar.cap);
@@ -7670,7 +7751,18 @@ static int nvme_start_ctrl(NvmeCtrl *n)
 
     nvme_set_timestamp(n, 0ULL);
 
-    nvme_select_iocs(n);
+    /* verify that the command sets of attached namespaces are supported */
+    for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+        NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);
+
+        if (ns && nvme_csi_supported(n, ns->csi) && !ns->params.detached) {
+            if (!ns->attached || ns->params.shared) {
+                nvme_attach_ns(n, ns);
+            }
+        }
+    }
+
+    nvme_update_dsm_limits(n, NULL);
 
     return 0;
 }
@@ -8481,7 +8573,8 @@ static uint64_t nvme_mbar_size(unsigned total_queues, unsigned total_irqs,
     return pow2ceil(bar_size);
 }
 
-static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
+static bool nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset,
+                            Error **errp)
 {
     uint16_t vf_dev_id = n->params.use_intel_id ?
                          PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME;
@@ -8490,12 +8583,16 @@ static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
                                       le16_to_cpu(cap->vifrsm),
                                       NULL, NULL);
 
-    pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
-                       n->params.sriov_max_vfs, n->params.sriov_max_vfs,
-                       NVME_VF_OFFSET, NVME_VF_STRIDE);
+    if (!pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
+                            n->params.sriov_max_vfs, n->params.sriov_max_vfs,
+                            NVME_VF_OFFSET, NVME_VF_STRIDE, errp)) {
+        return false;
+    }
 
     pcie_sriov_pf_init_vf_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
                               PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size);
+
+    return true;
 }
 
 static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset)
@@ -8503,8 +8600,7 @@ static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset)
     Error *err = NULL;
     int ret;
 
-    ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, offset,
-                             PCI_PM_SIZEOF, &err);
+    ret = pci_pm_init(pci_dev, offset, &err);
     if (err) {
         error_report_err(err);
         return ret;
@@ -8620,6 +8716,11 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
         return false;
     }
 
+    if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs &&
+        !nvme_init_sriov(n, pci_dev, 0x120, errp)) {
+        return false;
+    }
+
     nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize);
 
     pcie_cap_deverr_init(pci_dev);
@@ -8649,10 +8750,6 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
         nvme_init_pmr(n, pci_dev);
     }
 
-    if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) {
-        nvme_init_sriov(n, pci_dev, 0x120);
-    }
-
     return true;
 }
 
@@ -8676,6 +8773,12 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
     uint64_t cap = ldq_le_p(&n->bar.cap);
     NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
     uint32_t ctratt;
+    uint16_t oacs;
+
+    memcpy(n->cse.acs, nvme_cse_acs_default, sizeof(n->cse.acs));
+    memcpy(n->cse.iocs.nvm, nvme_cse_iocs_nvm_default, sizeof(n->cse.iocs.nvm));
+    memcpy(n->cse.iocs.zoned, nvme_cse_iocs_zoned_default,
+           sizeof(n->cse.iocs.zoned));
 
     id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
     id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
@@ -8706,9 +8809,23 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
 
     id->mdts = n->params.mdts;
     id->ver = cpu_to_le32(NVME_SPEC_VER);
-    id->oacs =
-        cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT | NVME_OACS_DBBUF |
-                    NVME_OACS_DIRECTIVES);
+
+    oacs = NVME_OACS_NMS | NVME_OACS_FORMAT | NVME_OACS_DIRECTIVES;
+
+    if (n->params.dbcs) {
+        oacs |= NVME_OACS_DBCS;
+
+        n->cse.acs[NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP;
+    }
+
+    if (n->params.sriov_max_vfs) {
+        oacs |= NVME_OACS_VMS;
+
+        n->cse.acs[NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP;
+    }
+
+    id->oacs = cpu_to_le16(oacs);
+
     id->cntrltype = 0x1;
 
     /*
@@ -8759,15 +8876,13 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
     id->psd[0].enlat = cpu_to_le32(0x10);
     id->psd[0].exlat = cpu_to_le32(0x4);
 
-    if (n->subsys) {
-        id->cmic |= NVME_CMIC_MULTI_CTRL;
-        ctratt |= NVME_CTRATT_ENDGRPS;
+    id->cmic |= NVME_CMIC_MULTI_CTRL;
+    ctratt |= NVME_CTRATT_ENDGRPS;
 
-        id->endgidmax = cpu_to_le16(0x1);
+    id->endgidmax = cpu_to_le16(0x1);
 
-        if (n->subsys->endgrp.fdp.enabled) {
-            ctratt |= NVME_CTRATT_FDPS;
-        }
+    if (n->subsys->endgrp.fdp.enabled) {
+        ctratt |= NVME_CTRATT_FDPS;
     }
 
     id->ctratt = cpu_to_le32(ctratt);
@@ -8775,9 +8890,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
     NVME_CAP_SET_MQES(cap, n->params.mqes);
     NVME_CAP_SET_CQR(cap, 1);
     NVME_CAP_SET_TO(cap, 0xf);
-    NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NVM);
-    NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_CSI_SUPP);
-    NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_ADMIN_ONLY);
+    NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NCSS);
+    NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_IOCSS);
     NVME_CAP_SET_MPSMAX(cap, 4);
     NVME_CAP_SET_CMBS(cap, n->params.cmb_size_mb ? 1 : 0);
     NVME_CAP_SET_PMRS(cap, n->pmr.dev ? 1 : 0);
@@ -8796,7 +8910,15 @@ static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
     int cntlid;
 
     if (!n->subsys) {
-        return 0;
+        DeviceState *dev = qdev_new(TYPE_NVME_SUBSYS);
+
+        qdev_prop_set_string(dev, "nqn", n->params.serial);
+
+        if (!qdev_realize(dev, NULL, errp)) {
+            return -1;
+        }
+
+        n->subsys = NVME_SUBSYS(dev);
     }
 
     cntlid = nvme_subsys_register_ctrl(n, errp);
@@ -8816,9 +8938,6 @@ void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns)
 
     n->namespaces[nsid] = ns;
     ns->attached++;
-
-    n->dmrsl = MIN_NON_ZERO(n->dmrsl,
-                            BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
 }
 
 static void nvme_realize(PCIDevice *pci_dev, Error **errp)
@@ -8874,7 +8993,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
             return;
         }
 
-        nvme_attach_ns(n, ns);
+        n->subsys->namespaces[ns->params.nsid] = ns;
     }
 }
 
@@ -8886,17 +9005,15 @@ static void nvme_exit(PCIDevice *pci_dev)
 
     nvme_ctrl_reset(n, NVME_RESET_FUNCTION);
 
-    if (n->subsys) {
-        for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
-            ns = nvme_ns(n, i);
-            if (ns) {
-                ns->attached--;
-            }
+    for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+        ns = nvme_ns(n, i);
+        if (ns) {
+            ns->attached--;
         }
-
-        nvme_subsys_unregister_ctrl(n->subsys, n);
     }
 
+    nvme_subsys_unregister_ctrl(n->subsys, n);
+
     g_free(n->cq);
     g_free(n->sq);
     g_free(n->aer_reqs);
@@ -8945,6 +9062,7 @@ static const Property nvme_props[] = {
     DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false),
     DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false),
     DEFINE_PROP_BOOL("ioeventfd", NvmeCtrl, params.ioeventfd, false),
+    DEFINE_PROP_BOOL("dbcs", NvmeCtrl, params.dbcs, true),
     DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),
     DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl,
                      params.auto_transition_zones, true),
@@ -8965,6 +9083,7 @@ static const Property nvme_props[] = {
     DEFINE_PROP_BOOL("atomic.dn", NvmeCtrl, params.atomic_dn, 0),
     DEFINE_PROP_UINT16("atomic.awun", NvmeCtrl, params.atomic_awun, 0),
     DEFINE_PROP_UINT16("atomic.awupf", NvmeCtrl, params.atomic_awupf, 0),
+    DEFINE_PROP_BOOL("ocp", NvmeCtrl, params.ocp, false),
 };
 
 static void nvme_get_smart_warning(Object *obj, Visitor *v, const char *name,
diff --git a/hw/nvme/nguid.c b/hw/nvme/nguid.c
index be63cb75e168c..4cd6fad6ac9b3 100644
--- a/hw/nvme/nguid.c
+++ b/hw/nvme/nguid.c
@@ -179,7 +179,7 @@ static void set_nguid(Object *obj, Visitor *v, const char *name, void *opaque,
 }
 
 const PropertyInfo qdev_prop_nguid = {
-    .name  = "str",
+    .type  = "str",
     .description =
         "NGUID or \"" NGUID_VALUE_AUTO "\" for random value",
     .get   = get_nguid,
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index 410df29591924..98c1e75a5d296 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -727,25 +727,14 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
     uint32_t nsid = ns->params.nsid;
     int i;
 
-    if (!n->subsys) {
-        /* If no subsys, the ns cannot be attached to more than one ctrl. */
-        ns->params.shared = false;
-        if (ns->params.detached) {
-            error_setg(errp, "detached requires that the nvme device is "
-                       "linked to an nvme-subsys device");
-            return;
-        }
-    } else {
-        /*
-         * If this namespace belongs to a subsystem (through a link on the
-         * controller device), reparent the device.
-         */
-        if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) {
-            return;
-        }
-        ns->subsys = subsys;
-        ns->endgrp = &subsys->endgrp;
+    assert(subsys);
+
+    /* reparent to subsystem bus */
+    if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) {
+        return;
     }
+    ns->subsys = subsys;
+    ns->endgrp = &subsys->endgrp;
 
     if (nvme_ns_setup(ns, errp)) {
         return;
@@ -753,7 +742,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
 
     if (!nsid) {
         for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
-            if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) {
+            if (nvme_subsys_ns(subsys, i)) {
                 continue;
             }
 
@@ -765,38 +754,15 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
             error_setg(errp, "no free namespace id");
             return;
         }
-    } else {
-        if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) {
-            error_setg(errp, "namespace id '%d' already allocated", nsid);
-            return;
-        }
+    } else if (nvme_subsys_ns(subsys, nsid)) {
+        error_setg(errp, "namespace id '%d' already allocated", nsid);
+        return;
     }
 
-    if (subsys) {
-        subsys->namespaces[nsid] = ns;
-
-        ns->id_ns.endgid = cpu_to_le16(0x1);
-        ns->id_ns_ind.endgrpid = cpu_to_le16(0x1);
-
-        if (ns->params.detached) {
-            return;
-        }
-
-        if (ns->params.shared) {
-            for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
-                NvmeCtrl *ctrl = subsys->ctrls[i];
-
-                if (ctrl && ctrl != SUBSYS_SLOT_RSVD) {
-                    nvme_attach_ns(ctrl, ns);
-                }
-            }
-
-            return;
-        }
-
-    }
+    subsys->namespaces[nsid] = ns;
 
-    nvme_attach_ns(n, ns);
+    ns->id_ns.endgid = cpu_to_le16(0x1);
+    ns->id_ns_ind.endgrpid = cpu_to_le16(0x1);
 }
 
 static const Property nvme_ns_props[] = {
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 7242206910570..6f782ba18826d 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -237,7 +237,6 @@ typedef struct NvmeNamespace {
     NvmeLBAF     lbaf;
     unsigned int nlbaf;
     size_t       lbasz;
-    const uint32_t *iocs;
     uint8_t      csi;
     uint16_t     status;
     int          attached;
@@ -539,12 +538,14 @@ typedef struct NvmeParams {
     bool     auto_transition_zones;
     bool     legacy_cmb;
     bool     ioeventfd;
+    bool     dbcs;
     uint16_t  sriov_max_vfs;
     uint16_t sriov_vq_flexible;
     uint16_t sriov_vi_flexible;
     uint32_t  sriov_max_vq_per_vf;
     uint32_t  sriov_max_vi_per_vf;
     bool     msix_exclusive_bar;
+    bool     ocp;
 
     struct {
         bool mem;
@@ -583,6 +584,14 @@ typedef struct NvmeCtrl {
     uint64_t    dbbuf_eis;
     bool        dbbuf_enabled;
 
+    struct {
+        uint32_t acs[256];
+        struct {
+            uint32_t nvm[256];
+            uint32_t zoned[256];
+        } iocs;
+    } cse;
+
     struct {
         MemoryRegion mem;
         uint8_t      *buf;
diff --git a/hw/nvram/eeprom_at24c.c b/hw/nvram/eeprom_at24c.c
index a40cc5dd15ab5..ff7a21eee7f42 100644
--- a/hw/nvram/eeprom_at24c.c
+++ b/hw/nvram/eeprom_at24c.c
@@ -10,6 +10,7 @@
 #include "qemu/osdep.h"
 
 #include "qapi/error.h"
+#include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "hw/i2c/i2c.h"
 #include "hw/nvram/eeprom_at24c.h"
@@ -26,13 +27,8 @@
 #define DPRINTK(FMT, ...) do {} while (0)
 #endif
 
-#define ERR(FMT, ...) fprintf(stderr, TYPE_AT24C_EE " : " FMT, \
-                            ## __VA_ARGS__)
-
 #define TYPE_AT24C_EE "at24c-eeprom"
-typedef struct EEPROMState EEPROMState;
-DECLARE_INSTANCE_CHECKER(EEPROMState, AT24C_EE,
-                         TYPE_AT24C_EE)
+OBJECT_DECLARE_SIMPLE_TYPE(EEPROMState, AT24C_EE)
 
 struct EEPROMState {
     I2CSlave parent_obj;
@@ -77,8 +73,7 @@ int at24c_eeprom_event(I2CSlave *s, enum i2c_event event)
         if (ee->blk && ee->changed) {
             int ret = blk_pwrite(ee->blk, 0, ee->rsize, ee->mem, 0);
             if (ret < 0) {
-                ERR(TYPE_AT24C_EE
-                        " : failed to write backing file\n");
+                error_report("%s: failed to write backing file", __func__);
             }
             DPRINTK("Wrote to backing file\n");
         }
@@ -195,20 +190,18 @@ static void at24c_eeprom_realize(DeviceState *dev, Error **errp)
     }
 
     ee->mem = g_malloc0(ee->rsize);
-    memset(ee->mem, 0, ee->rsize);
-
-    if (ee->init_rom) {
-        memcpy(ee->mem, ee->init_rom, MIN(ee->init_rom_size, ee->rsize));
-    }
 
     if (ee->blk) {
         int ret = blk_pread(ee->blk, 0, ee->rsize, ee->mem, 0);
 
         if (ret < 0) {
-            ERR(TYPE_AT24C_EE
-                    " : Failed initial sync with backing file\n");
+            error_setg(errp, "%s: Failed initial sync with backing file",
+                       TYPE_AT24C_EE);
+            return;
         }
         DPRINTK("Reset read backing file\n");
+    } else if (ee->init_rom) {
+        memcpy(ee->mem, ee->init_rom, MIN(ee->init_rom_size, ee->rsize));
     }
 
     /*
diff --git a/hw/nvram/xlnx-bbram.c b/hw/nvram/xlnx-bbram.c
index 0e8552ce6561d..14cc9073c782b 100644
--- a/hw/nvram/xlnx-bbram.c
+++ b/hw/nvram/xlnx-bbram.c
@@ -502,7 +502,7 @@ static void bbram_prop_release_drive(Object *obj, const char *name,
 }
 
 static const PropertyInfo bbram_prop_drive = {
-    .name  = "str",
+    .type  = "str",
     .description = "Node name or ID of a block device to use as BBRAM backend",
     .realized_set_allowed = true,
     .get = bbram_prop_get_drive,
diff --git a/hw/nvram/xlnx-efuse.c b/hw/nvram/xlnx-efuse.c
index e2e8311a48cb9..29e7dd539ec6d 100644
--- a/hw/nvram/xlnx-efuse.c
+++ b/hw/nvram/xlnx-efuse.c
@@ -257,7 +257,7 @@ static void efuse_prop_release_drive(Object *obj, const char *name,
 }
 
 static const PropertyInfo efuse_prop_drive = {
-    .name  = "str",
+    .type  = "str",
     .description = "Node name or ID of a block device to use as eFUSE backend",
     .realized_set_allowed = true,
     .get = efuse_prop_get_drive,
diff --git a/hw/openrisc/boot.c b/hw/openrisc/boot.c
index 0f08df812dcf2..0a5881be314fe 100644
--- a/hw/openrisc/boot.c
+++ b/hw/openrisc/boot.c
@@ -90,8 +90,8 @@ hwaddr openrisc_load_initrd(void *fdt, const char *filename,
     return start + size;
 }
 
-uint32_t openrisc_load_fdt(void *fdt, hwaddr load_start,
-                           uint64_t mem_size)
+uint32_t openrisc_load_fdt(MachineState *ms, void *fdt,
+                           hwaddr load_start, uint64_t mem_size)
 {
     uint32_t fdt_addr;
     int ret;
@@ -109,7 +109,9 @@ uint32_t openrisc_load_fdt(void *fdt, hwaddr load_start,
     /* Should only fail if we've built a corrupted tree */
     g_assert(ret == 0);
     /* copy in the device tree */
-    qemu_fdt_dumpdtb(fdt, fdtsize);
+
+    /* Save FDT for dumpdtb monitor command */
+    ms->fdt = fdt;
 
     rom_add_blob_fixed_as("fdt", fdt, fdtsize, fdt_addr,
                           &address_space_memory);
diff --git a/hw/openrisc/openrisc_sim.c b/hw/openrisc/openrisc_sim.c
index e0da4067ba396..83d7c2a8afc69 100644
--- a/hw/openrisc/openrisc_sim.c
+++ b/hw/openrisc/openrisc_sim.c
@@ -306,8 +306,6 @@ static void openrisc_sim_init(MachineState *machine)
             exit(1);
         }
 
-        cpu_openrisc_clock_init(cpus[n]);
-
         qemu_register_reset(main_cpu_reset, cpus[n]);
     }
 
@@ -354,7 +352,7 @@ static void openrisc_sim_init(MachineState *machine)
                                              machine->initrd_filename,
                                              load_addr, machine->ram_size);
         }
-        boot_info.fdt_addr = openrisc_load_fdt(state->fdt, load_addr,
+        boot_info.fdt_addr = openrisc_load_fdt(machine, state->fdt, load_addr,
                                                machine->ram_size);
     }
 }
diff --git a/hw/openrisc/virt.c b/hw/openrisc/virt.c
index 7b60bf85094ca..3055306783e1d 100644
--- a/hw/openrisc/virt.c
+++ b/hw/openrisc/virt.c
@@ -487,8 +487,6 @@ static void openrisc_virt_init(MachineState *machine)
             exit(1);
         }
 
-        cpu_openrisc_clock_init(cpus[n]);
-
         qemu_register_reset(main_cpu_reset, cpus[n]);
     }
 
@@ -540,7 +538,7 @@ static void openrisc_virt_init(MachineState *machine)
                                              machine->initrd_filename,
                                              load_addr, machine->ram_size);
         }
-        boot_info.fdt_addr = openrisc_load_fdt(state->fdt, load_addr,
+        boot_info.fdt_addr = openrisc_load_fdt(machine, state->fdt, load_addr,
                                                machine->ram_size);
     }
 }
diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c
index fd4514a595ce0..2429503cfbbf5 100644
--- a/hw/pci-bridge/pcie_pci_bridge.c
+++ b/hw/pci-bridge/pcie_pci_bridge.c
@@ -52,11 +52,10 @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error **errp)
         goto cap_error;
     }
 
-    pos = pci_add_capability(d, PCI_CAP_ID_PM, 0, PCI_PM_SIZEOF, errp);
+    pos = pci_pm_init(d, 0, errp);
     if (pos < 0) {
         goto pm_error;
     }
-    d->exp.pm_cap = pos;
     pci_set_word(d->config + pos + PCI_PM_PMC, 0x3);
 
     pcie_cap_arifwd_init(d);
diff --git a/hw/pci-host/Kconfig b/hw/pci-host/Kconfig
index c91880b2370e9..35c0415242824 100644
--- a/hw/pci-host/Kconfig
+++ b/hw/pci-host/Kconfig
@@ -99,6 +99,9 @@ config ASTRO
     bool
     select PCI
 
+config PCI_EXPRESS_FSL_IMX8M_PHY
+    bool
+
 config GT64120
     bool
     select PCI
diff --git a/hw/pci-host/designware.c b/hw/pci-host/designware.c
index 3e8c36e6a76ba..c07740bfaa455 100644
--- a/hw/pci-host/designware.c
+++ b/hw/pci-host/designware.c
@@ -55,6 +55,17 @@
 #define DESIGNWARE_PCIE_ATU_DEVFN(x)               (((x) >> 16) & 0xff)
 #define DESIGNWARE_PCIE_ATU_UPPER_TARGET           0x91C
 
+static void designware_pcie_root_bus_class_init(ObjectClass *klass, void *data)
+{
+    BusClass *k = BUS_CLASS(klass);
+
+    /*
+     * Designware has only a single root complex. Enforce the limit on the
+     * parent bus
+     */
+    k->max_dev = 1;
+}
+
 static DesignwarePCIEHost *
 designware_pcie_root_to_host(DesignwarePCIERoot *root)
 {
@@ -699,7 +710,7 @@ static void designware_pcie_host_realize(DeviceState *dev, Error **errp)
                                      &s->pci.memory,
                                      &s->pci.io,
                                      0, 4,
-                                     TYPE_PCIE_BUS);
+                                     TYPE_DESIGNWARE_PCIE_ROOT_BUS);
     pci->bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
 
     memory_region_init(&s->pci.address_space_root,
@@ -754,6 +765,11 @@ static void designware_pcie_host_init(Object *obj)
 
 static const TypeInfo designware_pcie_types[] = {
     {
+        .name           = TYPE_DESIGNWARE_PCIE_ROOT_BUS,
+        .parent         = TYPE_PCIE_BUS,
+        .instance_size  = sizeof(DesignwarePCIERootBus),
+        .class_init     = designware_pcie_root_bus_class_init,
+    }, {
         .name           = TYPE_DESIGNWARE_PCIE_HOST,
         .parent         = TYPE_PCI_HOST_BRIDGE,
         .instance_size  = sizeof(DesignwarePCIEHost),
diff --git a/hw/pci-host/fsl_imx8m_phy.c b/hw/pci-host/fsl_imx8m_phy.c
new file mode 100644
index 0000000000000..aa304b102b321
--- /dev/null
+++ b/hw/pci-host/fsl_imx8m_phy.c
@@ -0,0 +1,98 @@
+/*
+ * i.MX8 PCIe PHY emulation
+ *
+ * Copyright (c) 2025 Bernhard Beschow <shentey@gmail.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/pci-host/fsl_imx8m_phy.h"
+#include "hw/resettable.h"
+#include "migration/vmstate.h"
+
+#define CMN_REG075 0x1d4
+#define ANA_PLL_LOCK_DONE BIT(1)
+#define ANA_PLL_AFC_DONE BIT(0)
+
+static uint64_t fsl_imx8m_pcie_phy_read(void *opaque, hwaddr offset,
+                                        unsigned size)
+{
+    FslImx8mPciePhyState *s = opaque;
+
+    if (offset == CMN_REG075) {
+        return s->data[offset] | ANA_PLL_LOCK_DONE | ANA_PLL_AFC_DONE;
+    }
+
+    return s->data[offset];
+}
+
+static void fsl_imx8m_pcie_phy_write(void *opaque, hwaddr offset,
+                                     uint64_t value, unsigned size)
+{
+    FslImx8mPciePhyState *s = opaque;
+
+    s->data[offset] = value;
+}
+
+static const MemoryRegionOps fsl_imx8m_pcie_phy_ops = {
+    .read = fsl_imx8m_pcie_phy_read,
+    .write = fsl_imx8m_pcie_phy_write,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void fsl_imx8m_pcie_phy_realize(DeviceState *dev, Error **errp)
+{
+    FslImx8mPciePhyState *s = FSL_IMX8M_PCIE_PHY(dev);
+
+    memory_region_init_io(&s->iomem, OBJECT(s), &fsl_imx8m_pcie_phy_ops, s,
+                          TYPE_FSL_IMX8M_PCIE_PHY, ARRAY_SIZE(s->data));
+    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
+}
+
+static void fsl_imx8m_pcie_phy_reset_hold(Object *obj, ResetType type)
+{
+    FslImx8mPciePhyState *s = FSL_IMX8M_PCIE_PHY(obj);
+
+    memset(s->data, 0, sizeof(s->data));
+}
+
+static const VMStateDescription fsl_imx8m_pcie_phy_vmstate = {
+    .name = "fsl-imx8m-pcie-phy",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (const VMStateField[]) {
+        VMSTATE_UINT8_ARRAY(data, FslImx8mPciePhyState,
+                            FSL_IMX8M_PCIE_PHY_DATA_SIZE),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void fsl_imx8m_pcie_phy_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+
+    dc->realize = fsl_imx8m_pcie_phy_realize;
+    dc->vmsd = &fsl_imx8m_pcie_phy_vmstate;
+    rc->phases.hold = fsl_imx8m_pcie_phy_reset_hold;
+}
+
+static const TypeInfo fsl_imx8m_pcie_phy_types[] = {
+    {
+        .name = TYPE_FSL_IMX8M_PCIE_PHY,
+        .parent = TYPE_SYS_BUS_DEVICE,
+        .instance_size = sizeof(FslImx8mPciePhyState),
+        .class_init = fsl_imx8m_pcie_phy_class_init,
+    }
+};
+
+DEFINE_TYPES(fsl_imx8m_pcie_phy_types)
diff --git a/hw/pci-host/meson.build b/hw/pci-host/meson.build
index 3001e93a43718..937a0f72acf94 100644
--- a/hw/pci-host/meson.build
+++ b/hw/pci-host/meson.build
@@ -28,6 +28,7 @@ pci_ss.add(when: 'CONFIG_ARTICIA', if_true: files('articia.c'))
 pci_ss.add(when: 'CONFIG_MV64361', if_true: files('mv64361.c'))
 
 # ARM devices
+pci_ss.add(when: 'CONFIG_PCI_EXPRESS_FSL_IMX8M_PHY', if_true: files('fsl_imx8m_phy.c'))
 pci_ss.add(when: 'CONFIG_VERSATILE_PCI', if_true: files('versatile.c'))
 
 # HPPA devices
diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c
index a156839caf5c9..cb8a7e3afa776 100644
--- a/hw/pci-host/pnv_phb4_pec.c
+++ b/hw/pci-host/pnv_phb4_pec.c
@@ -197,6 +197,9 @@ static PnvPHB *pnv_pec_default_phb_realize(PnvPhb4PecState *pec,
     return phb;
 }
 
+#define   XPEC_P9_PCI_LANE_CFG                  PPC_BITMASK(10, 11)
+#define   XPEC_P10_PCI_LANE_CFG                 PPC_BITMASK(0, 1)
+
 static void pnv_pec_realize(DeviceState *dev, Error **errp)
 {
     PnvPhb4PecState *pec = PNV_PHB4_PEC(dev);
@@ -211,6 +214,43 @@ static void pnv_pec_realize(DeviceState *dev, Error **errp)
 
     pec->num_phbs = pecc->num_phbs[pec->index];
 
+    /* Pervasive chiplet */
+    object_initialize_child(OBJECT(pec), "nest-pervasive-common",
+                            &pec->nest_pervasive,
+                            TYPE_PNV_NEST_CHIPLET_PERVASIVE);
+    if (!qdev_realize(DEVICE(&pec->nest_pervasive), NULL, errp)) {
+        return;
+    }
+
+    /* Set up pervasive chiplet registers */
+    /*
+     * Most registers are not set up, this just sets the PCI CONF1 link-width
+     * field because skiboot probes it.
+     */
+    if (pecc->version == PNV_PHB4_VERSION) {
+        /*
+         * On P9, PEC2 has configurable 1/2/3-furcation).
+         * Make it trifurcated (x8, x4, x4) to match pnv_pec_num_phbs.
+         */
+        if (pec->index == 2) {
+            pec->nest_pervasive.control_regs.cplt_cfg1 =
+                    SETFIELD(XPEC_P9_PCI_LANE_CFG,
+                             pec->nest_pervasive.control_regs.cplt_cfg1,
+                             0b10);
+        }
+    } else if (pecc->version == PNV_PHB5_VERSION) {
+        /*
+         * On P10, both PECs are configurable 1/2/3-furcation).
+         * Both are trifurcated to match pnv_phb5_pec_num_stacks.
+         */
+        pec->nest_pervasive.control_regs.cplt_cfg1 =
+                SETFIELD(XPEC_P10_PCI_LANE_CFG,
+                         pec->nest_pervasive.control_regs.cplt_cfg1,
+                         0b10);
+    } else {
+        g_assert_not_reached();
+    }
+
     /* Create PHBs if running with defaults */
     if (defaults_enabled()) {
         g_assert(pec->num_phbs <= MAX_PHBS_PER_PEC);
@@ -290,9 +330,16 @@ static const Property pnv_pec_properties[] = {
                      PnvChip *),
 };
 
+#define XPEC_PCI_CPLT_OFFSET                        0x1000000ULL
+
+static uint32_t pnv_pec_xscom_cplt_base(PnvPhb4PecState *pec)
+{
+    return PNV9_XSCOM_PEC_NEST_CPLT_BASE + XPEC_PCI_CPLT_OFFSET * pec->index;
+}
+
 static uint32_t pnv_pec_xscom_pci_base(PnvPhb4PecState *pec)
 {
-    return PNV9_XSCOM_PEC_PCI_BASE + 0x1000000 * pec->index;
+    return PNV9_XSCOM_PEC_PCI_BASE + XPEC_PCI_CPLT_OFFSET * pec->index;
 }
 
 static uint32_t pnv_pec_xscom_nest_base(PnvPhb4PecState *pec)
@@ -321,6 +368,7 @@ static void pnv_pec_class_init(ObjectClass *klass, void *data)
     device_class_set_props(dc, pnv_pec_properties);
     dc->user_creatable = false;
 
+    pecc->xscom_cplt_base = pnv_pec_xscom_cplt_base;
     pecc->xscom_nest_base = pnv_pec_xscom_nest_base;
     pecc->xscom_pci_base  = pnv_pec_xscom_pci_base;
     pecc->xscom_nest_size = PNV9_XSCOM_PEC_NEST_SIZE;
@@ -349,6 +397,10 @@ static const TypeInfo pnv_pec_type_info = {
 /*
  * POWER10 definitions
  */
+static uint32_t pnv_phb5_pec_xscom_cplt_base(PnvPhb4PecState *pec)
+{
+    return PNV10_XSCOM_PEC_NEST_CPLT_BASE + XPEC_PCI_CPLT_OFFSET * pec->index;
+}
 
 static uint32_t pnv_phb5_pec_xscom_pci_base(PnvPhb4PecState *pec)
 {
@@ -373,6 +425,7 @@ static void pnv_phb5_pec_class_init(ObjectClass *klass, void *data)
     static const char compat[] = "ibm,power10-pbcq";
     static const char stk_compat[] = "ibm,power10-phb-stack";
 
+    pecc->xscom_cplt_base = pnv_phb5_pec_xscom_cplt_base;
     pecc->xscom_nest_base = pnv_phb5_pec_xscom_nest_base;
     pecc->xscom_pci_base  = pnv_phb5_pec_xscom_pci_base;
     pecc->xscom_nest_size = PNV10_XSCOM_PEC_NEST_SIZE;
diff --git a/hw/pci-host/versatile.c b/hw/pci-host/versatile.c
index c3fbf4cbf943c..33a8ceb3b54e1 100644
--- a/hw/pci-host/versatile.c
+++ b/hw/pci-host/versatile.c
@@ -246,7 +246,7 @@ static uint64_t pci_vpb_reg_read(void *opaque, hwaddr addr,
 static const MemoryRegionOps pci_vpb_reg_ops = {
     .read = pci_vpb_reg_read,
     .write = pci_vpb_reg_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
@@ -312,7 +312,7 @@ static uint64_t pci_vpb_config_read(void *opaque, hwaddr addr,
 static const MemoryRegionOps pci_vpb_config_ops = {
     .read = pci_vpb_config_read,
     .write = pci_vpb_config_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static int pci_vpb_map_irq(PCIDevice *d, int irq_num)
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 57ec7084a475b..66f27b9d7120f 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -15,6 +15,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/log.h"
 #include "hw/pci/msi.h"
 #include "hw/pci/msix.h"
 #include "hw/pci/pci.h"
@@ -260,6 +261,14 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
 static void msix_pba_mmio_write(void *opaque, hwaddr addr,
                                 uint64_t val, unsigned size)
 {
+    PCIDevice *dev = opaque;
+
+    qemu_log_mask(LOG_GUEST_ERROR,
+                  "PCI [%s:%02x:%02x.%x] attempt to write to MSI-X "
+                  "PBA at 0x%" FMT_PCIBUS ", ignoring.\n",
+                  pci_root_bus_path(dev), pci_dev_bus_num(dev),
+                  PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
+                  addr);
 }
 
 static const MemoryRegionOps msix_pba_mmio_ops = {
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 2afa423925c52..2844ec55560f7 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -77,7 +77,7 @@ static void prop_pci_busnr_get(Object *obj, Visitor *v, const char *name,
 }
 
 static const PropertyInfo prop_pci_busnr = {
-    .name = "busnr",
+    .type = "busnr",
     .get = prop_pci_busnr_get,
 };
 
@@ -435,6 +435,84 @@ static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg)
                          attrs, NULL);
 }
 
+/*
+ * Register and track a PM capability.  If wmask is also enabled for the power
+ * state field of the pmcsr register, guest writes may change the device PM
+ * state.  BAR access is only enabled while the device is in the D0 state.
+ * Return the capability offset or negative error code.
+ */
+int pci_pm_init(PCIDevice *d, uint8_t offset, Error **errp)
+{
+    int cap = pci_add_capability(d, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF, errp);
+
+    if (cap < 0) {
+        return cap;
+    }
+
+    d->pm_cap = cap;
+    d->cap_present |= QEMU_PCI_CAP_PM;
+
+    return cap;
+}
+
+static uint8_t pci_pm_state(PCIDevice *d)
+{
+    uint16_t pmcsr;
+
+    if (!(d->cap_present & QEMU_PCI_CAP_PM)) {
+        return 0;
+    }
+
+    pmcsr = pci_get_word(d->config + d->pm_cap + PCI_PM_CTRL);
+
+    return pmcsr & PCI_PM_CTRL_STATE_MASK;
+}
+
+/*
+ * Update the PM capability state based on the new value stored in config
+ * space respective to the old, pre-write state provided.  If the new value
+ * is rejected (unsupported or invalid transition) restore the old value.
+ * Return the resulting PM state.
+ */
+static uint8_t pci_pm_update(PCIDevice *d, uint32_t addr, int l, uint8_t old)
+{
+    uint16_t pmc;
+    uint8_t new;
+
+    if (!(d->cap_present & QEMU_PCI_CAP_PM) ||
+        !range_covers_byte(addr, l, d->pm_cap + PCI_PM_CTRL)) {
+        return old;
+    }
+
+    new = pci_pm_state(d);
+    if (new == old) {
+        return old;
+    }
+
+    pmc = pci_get_word(d->config + d->pm_cap + PCI_PM_PMC);
+
+    /*
+     * Transitions to D1 & D2 are only allowed if supported.  Devices may
+     * only transition to higher D-states or to D0.
+     */
+    if ((!(pmc & PCI_PM_CAP_D1) && new == 1) ||
+        (!(pmc & PCI_PM_CAP_D2) && new == 2) ||
+        (old && new && new < old)) {
+        pci_word_test_and_clear_mask(d->config + d->pm_cap + PCI_PM_CTRL,
+                                     PCI_PM_CTRL_STATE_MASK);
+        pci_word_test_and_set_mask(d->config + d->pm_cap + PCI_PM_CTRL,
+                                   old);
+        trace_pci_pm_bad_transition(d->name, pci_dev_bus_num(d),
+                                    PCI_SLOT(d->devfn), PCI_FUNC(d->devfn),
+                                    old, new);
+        return old;
+    }
+
+    trace_pci_pm_transition(d->name, pci_dev_bus_num(d), PCI_SLOT(d->devfn),
+                            PCI_FUNC(d->devfn), old, new);
+    return new;
+}
+
 static void pci_reset_regions(PCIDevice *dev)
 {
     int r;
@@ -474,6 +552,11 @@ static void pci_do_device_reset(PCIDevice *dev)
                               pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) |
                               pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE));
     dev->config[PCI_CACHE_LINE_SIZE] = 0x0;
+    /* Default PM state is D0 */
+    if (dev->cap_present & QEMU_PCI_CAP_PM) {
+        pci_word_test_and_clear_mask(dev->config + dev->pm_cap + PCI_PM_CTRL,
+                                     PCI_PM_CTRL_STATE_MASK);
+    }
     pci_reset_regions(dev);
     pci_update_mappings(dev);
 
@@ -803,10 +886,17 @@ static bool migrate_is_not_pcie(void *opaque, int version_id)
     return !pci_is_express((PCIDevice *)opaque);
 }
 
+static int pci_post_load(void *opaque, int version_id)
+{
+    pcie_sriov_pf_post_load(opaque);
+    return 0;
+}
+
 const VMStateDescription vmstate_pci_device = {
     .name = "PCIDevice",
     .version_id = 2,
     .minimum_version_id = 1,
+    .post_load = pci_post_load,
     .fields = (const VMStateField[]) {
         VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice),
         VMSTATE_BUFFER_UNSAFE_INFO_TEST(config, PCIDevice,
@@ -1391,6 +1481,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
     assert(hdr_type != PCI_HEADER_TYPE_BRIDGE || region_num < 2);
 
     r = &pci_dev->io_regions[region_num];
+    assert(!r->size);
     r->addr = PCI_BAR_UNMAPPED;
     r->size = size;
     r->type = type;
@@ -1598,7 +1689,7 @@ static void pci_update_mappings(PCIDevice *d)
             continue;
 
         new_addr = pci_bar_address(d, i, r->type, r->size);
-        if (!d->enabled) {
+        if (!d->enabled || pci_pm_state(d)) {
             new_addr = PCI_BAR_UNMAPPED;
         }
 
@@ -1664,6 +1755,7 @@ uint32_t pci_default_read_config(PCIDevice *d,
 
 void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int l)
 {
+    uint8_t new_pm_state, old_pm_state = pci_pm_state(d);
     int i, was_irq_disabled = pci_irq_disabled(d);
     uint32_t val = val_in;
 
@@ -1676,11 +1768,16 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
         d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
         d->config[addr + i] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */
     }
+
+    new_pm_state = pci_pm_update(d, addr, l, old_pm_state);
+
     if (ranges_overlap(addr, l, PCI_BASE_ADDRESS_0, 24) ||
         ranges_overlap(addr, l, PCI_ROM_ADDRESS, 4) ||
         ranges_overlap(addr, l, PCI_ROM_ADDRESS1, 4) ||
-        range_covers_byte(addr, l, PCI_COMMAND))
+        range_covers_byte(addr, l, PCI_COMMAND) ||
+        !!new_pm_state != !!old_pm_state) {
         pci_update_mappings(d);
+    }
 
     if (ranges_overlap(addr, l, PCI_COMMAND, 2)) {
         pci_update_irq_disabled(d, was_irq_disabled);
@@ -2963,7 +3060,17 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
 
 void pci_set_power(PCIDevice *d, bool state)
 {
-    pci_set_enabled(d, state);
+    /*
+     * Don't change the enabled state of VFs when powering on/off the device.
+     *
+     * When powering on, VFs must not be enabled immediately but they must
+     * wait until the guest configures SR-IOV.
+     * When powering off, their corresponding PFs will be reset and disable
+     * VFs.
+     */
+    if (!pci_is_vf(d)) {
+        pci_set_enabled(d, state);
+    }
 }
 
 void pci_set_enabled(PCIDevice *d, bool state)
@@ -2977,7 +3084,7 @@ void pci_set_enabled(PCIDevice *d, bool state)
     memory_region_set_enabled(&d->bus_master_enable_region,
                               (pci_get_word(d->config + PCI_COMMAND)
                                & PCI_COMMAND_MASTER) && d->enabled);
-    if (!d->enabled) {
+    if (qdev_is_realized(&d->qdev)) {
         pci_device_reset(d);
     }
 }
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index e9b23221d713c..1eb4358256de1 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -20,23 +20,37 @@
 #include "qapi/error.h"
 #include "trace.h"
 
-static PCIDevice *register_vf(PCIDevice *pf, int devfn,
-                              const char *name, uint16_t vf_num);
-static void unregister_vfs(PCIDevice *dev);
+static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
+{
+    for (uint16_t i = 0; i < total_vfs; i++) {
+        PCIDevice *vf = dev->exp.sriov_pf.vf[i];
+        object_unparent(OBJECT(vf));
+        object_unref(OBJECT(vf));
+    }
+    g_free(dev->exp.sriov_pf.vf);
+    dev->exp.sriov_pf.vf = NULL;
+}
 
-void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
+bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
                         const char *vfname, uint16_t vf_dev_id,
                         uint16_t init_vfs, uint16_t total_vfs,
-                        uint16_t vf_offset, uint16_t vf_stride)
+                        uint16_t vf_offset, uint16_t vf_stride,
+                        Error **errp)
 {
+    BusState *bus = qdev_get_parent_bus(&dev->qdev);
+    int32_t devfn = dev->devfn + vf_offset;
     uint8_t *cfg = dev->config + offset;
     uint8_t *wmask;
 
+    if (total_vfs &&
+        (uint32_t)devfn + (uint32_t)(total_vfs - 1) * vf_stride >= PCI_DEVFN_MAX) {
+        error_setg(errp, "VF addr overflows");
+        return false;
+    }
+
     pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1,
                         offset, PCI_EXT_CAP_SRIOV_SIZEOF);
     dev->exp.sriov_cap = offset;
-    dev->exp.sriov_pf.num_vfs = 0;
-    dev->exp.sriov_pf.vfname = g_strdup(vfname);
     dev->exp.sriov_pf.vf = NULL;
 
     pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
@@ -69,13 +83,37 @@ void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
     pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553);
 
     qdev_prop_set_bit(&dev->qdev, "multifunction", true);
+
+    dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs);
+
+    for (uint16_t i = 0; i < total_vfs; i++) {
+        PCIDevice *vf = pci_new(devfn, vfname);
+        vf->exp.sriov_vf.pf = dev;
+        vf->exp.sriov_vf.vf_number = i;
+
+        if (!qdev_realize(&vf->qdev, bus, errp)) {
+            object_unparent(OBJECT(vf));
+            object_unref(vf);
+            unparent_vfs(dev, i);
+            return false;
+        }
+
+        /* set vid/did according to sr/iov spec - they are not used */
+        pci_config_set_vendor_id(vf->config, 0xffff);
+        pci_config_set_device_id(vf->config, 0xffff);
+
+        dev->exp.sriov_pf.vf[i] = vf;
+        devfn += vf_stride;
+    }
+
+    return true;
 }
 
 void pcie_sriov_pf_exit(PCIDevice *dev)
 {
-    unregister_vfs(dev);
-    g_free((char *)dev->exp.sriov_pf.vfname);
-    dev->exp.sriov_pf.vfname = NULL;
+    uint8_t *cfg = dev->config + dev->exp.sriov_cap;
+
+    unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
 }
 
 void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
@@ -141,80 +179,36 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
     }
 }
 
-static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name,
-                              uint16_t vf_num)
-{
-    PCIDevice *dev = pci_new(devfn, name);
-    dev->exp.sriov_vf.pf = pf;
-    dev->exp.sriov_vf.vf_number = vf_num;
-    PCIBus *bus = pci_get_bus(pf);
-    Error *local_err = NULL;
-
-    qdev_realize(&dev->qdev, &bus->qbus, &local_err);
-    if (local_err) {
-        error_report_err(local_err);
-        return NULL;
-    }
-
-    /* set vid/did according to sr/iov spec - they are not used */
-    pci_config_set_vendor_id(dev->config, 0xffff);
-    pci_config_set_device_id(dev->config, 0xffff);
-
-    return dev;
-}
-
 static void register_vfs(PCIDevice *dev)
 {
     uint16_t num_vfs;
     uint16_t i;
     uint16_t sriov_cap = dev->exp.sriov_cap;
-    uint16_t vf_offset =
-        pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
-    uint16_t vf_stride =
-        pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
-    int32_t devfn = dev->devfn + vf_offset;
 
     assert(sriov_cap > 0);
     num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
-    if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
-        return;
-    }
-
-    dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs);
 
     trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
                              PCI_FUNC(dev->devfn), num_vfs);
     for (i = 0; i < num_vfs; i++) {
-        dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn,
-                                              dev->exp.sriov_pf.vfname, i);
-        if (!dev->exp.sriov_pf.vf[i]) {
-            num_vfs = i;
-            break;
-        }
-        devfn += vf_stride;
+        pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
     }
-    dev->exp.sriov_pf.num_vfs = num_vfs;
+
+    pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_NUM_VF, 0);
 }
 
 static void unregister_vfs(PCIDevice *dev)
 {
-    uint16_t num_vfs = dev->exp.sriov_pf.num_vfs;
+    uint8_t *cfg = dev->config + dev->exp.sriov_cap;
     uint16_t i;
 
     trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
-                               PCI_FUNC(dev->devfn), num_vfs);
-    for (i = 0; i < num_vfs; i++) {
-        Error *err = NULL;
-        PCIDevice *vf = dev->exp.sriov_pf.vf[i];
-        if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) {
-            error_reportf_err(err, "Failed to unplug: ");
-        }
-        object_unparent(OBJECT(vf));
-        object_unref(OBJECT(vf));
+                               PCI_FUNC(dev->devfn));
+    for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
+        pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
     }
-    g_free(dev->exp.sriov_pf.vf);
-    dev->exp.sriov_pf.vf = NULL;
-    dev->exp.sriov_pf.num_vfs = 0;
+
+    pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff);
 }
 
 void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
@@ -235,15 +229,29 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
                              PCI_FUNC(dev->devfn), off, val, len);
 
     if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
-        if (dev->exp.sriov_pf.num_vfs) {
-            if (!(val & PCI_SRIOV_CTRL_VFE)) {
-                unregister_vfs(dev);
-            }
+        if (val & PCI_SRIOV_CTRL_VFE) {
+            register_vfs(dev);
         } else {
-            if (val & PCI_SRIOV_CTRL_VFE) {
-                register_vfs(dev);
-            }
+            unregister_vfs(dev);
         }
+    } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) {
+        uint8_t *cfg = dev->config + sriov_cap;
+        uint8_t *wmask = dev->wmask + sriov_cap;
+        uint16_t num_vfs = pci_get_word(cfg + PCI_SRIOV_NUM_VF);
+        uint16_t wmask_val = PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI;
+
+        if (num_vfs <= pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)) {
+            wmask_val |= PCI_SRIOV_CTRL_VFE;
+        }
+
+        pci_set_word(wmask + PCI_SRIOV_CTRL, wmask_val);
+    }
+}
+
+void pcie_sriov_pf_post_load(PCIDevice *dev)
+{
+    if (dev->exp.sriov_cap) {
+        register_vfs(dev);
     }
 }
 
@@ -260,6 +268,8 @@ void pcie_sriov_pf_reset(PCIDevice *dev)
     unregister_vfs(dev);
 
     pci_set_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF, 0);
+    pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_CTRL,
+                 PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI);
 
     /*
      * Default is to use 4K pages, software can modify it
@@ -306,7 +316,7 @@ PCIDevice *pcie_sriov_get_pf(PCIDevice *dev)
 PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
 {
     assert(!pci_is_vf(dev));
-    if (n < dev->exp.sriov_pf.num_vfs) {
+    if (n < pcie_sriov_num_vfs(dev)) {
         return dev->exp.sriov_pf.vf[n];
     }
     return NULL;
@@ -314,5 +324,10 @@ PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
 
 uint16_t pcie_sriov_num_vfs(PCIDevice *dev)
 {
-    return dev->exp.sriov_pf.num_vfs;
+    uint16_t sriov_cap = dev->exp.sriov_cap;
+    uint8_t *cfg = dev->config + sriov_cap;
+
+    return sriov_cap &&
+           (pci_get_word(cfg + PCI_SRIOV_CTRL) & PCI_SRIOV_CTRL_VFE) ?
+           pci_get_word(cfg + PCI_SRIOV_NUM_VF) : 0;
 }
diff --git a/hw/pci/trace-events b/hw/pci/trace-events
index 19643aa8c6b06..6a9968962f4af 100644
--- a/hw/pci/trace-events
+++ b/hw/pci/trace-events
@@ -1,6 +1,8 @@
 # See docs/devel/tracing.rst for syntax documentation.
 
 # pci.c
+pci_pm_bad_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x REJECTED PM transition D%d->D%d"
+pci_pm_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x PM transition D%d->D%d"
 pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
 pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
 pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s"
@@ -14,7 +16,7 @@ msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d mask
 
 # hw/pci/pcie_sriov.c
 sriov_register_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: creating %d vf devs"
-sriov_unregister_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: Unregistering %d vf devs"
+sriov_unregister_vfs(const char *name, int slot, int function) "%s %02x:%x: Unregistering vf devs"
 sriov_config_write(const char *name, int slot, int fun, uint32_t offset, uint32_t val, uint32_t len) "%s %02x:%x: sriov offset 0x%x val 0x%x len %d"
 
 # pcie.c
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
index b44d91bebb2a5..ced6bbc7404e3 100644
--- a/hw/ppc/Kconfig
+++ b/hw/ppc/Kconfig
@@ -44,15 +44,6 @@ config POWERNV
     select SSI_M25P80
     select PNV_SPI
 
-config PPC405
-    bool
-    default y
-    depends on PPC
-    select M48T59
-    select PFLASH_CFI02
-    select PPC4XX
-    select SERIAL_MM
-
 config PPC440
     bool
     default y
diff --git a/hw/ppc/amigaone.c b/hw/ppc/amigaone.c
index b02792221cc54..483512125fcff 100644
--- a/hw/ppc/amigaone.c
+++ b/hw/ppc/amigaone.c
@@ -21,12 +21,26 @@
 #include "hw/ide/pci.h"
 #include "hw/i2c/smbus_eeprom.h"
 #include "hw/ppc/ppc.h"
+#include "system/block-backend.h"
 #include "system/qtest.h"
 #include "system/reset.h"
 #include "kvm_ppc.h"
+#include "elf.h"
+
+#include <zlib.h> /* for crc32 */
 
 #define BUS_FREQ_HZ 100000000
 
+#define INITRD_MIN_ADDR 0x600000
+#define INIT_RAM_ADDR 0x40000000
+
+#define PCI_HIGH_ADDR 0x80000000
+#define PCI_HIGH_SIZE 0x7d000000
+#define PCI_LOW_ADDR  0xfd000000
+#define PCI_LOW_SIZE  0xe0000
+
+#define ARTICIA_ADDR 0xfe000000
+
 /*
  * Firmware binary available at
  * https://www.hyperion-entertainment.com/index.php/downloads?view=files&parent=28
@@ -41,20 +55,202 @@
 
 /* AmigaOS calls this routine from ROM, use this if no firmware loaded */
 static const char dummy_fw[] = {
-    0x38, 0x00, 0x00, 0x08, /* li      r0,8 */
-    0x7c, 0x09, 0x03, 0xa6, /* mtctr   r0 */
-    0x54, 0x63, 0xf8, 0x7e, /* srwi    r3,r3,1 */
-    0x42, 0x00, 0xff, 0xfc, /* bdnz    0x8 */
+    0x54, 0x63, 0xc2, 0x3e, /* srwi    r3,r3,8 */
     0x7c, 0x63, 0x18, 0xf8, /* not     r3,r3 */
     0x4e, 0x80, 0x00, 0x20, /* blr */
 };
 
+#define NVRAM_ADDR 0xfd0e0000
+#define NVRAM_SIZE (4 * KiB)
+
+static char default_env[] =
+    "baudrate=115200\0"
+    "stdout=vga\0"
+    "stdin=ps2kbd\0"
+    "bootcmd=boota; menu; run menuboot_cmd\0"
+    "boot1=ide\0"
+    "boot2=cdrom\0"
+    "boota_timeout=3\0"
+    "ide_doreset=on\0"
+    "pci_irqa=9\0"
+    "pci_irqa_select=level\0"
+    "pci_irqb=10\0"
+    "pci_irqb_select=level\0"
+    "pci_irqc=11\0"
+    "pci_irqc_select=level\0"
+    "pci_irqd=7\0"
+    "pci_irqd_select=level\0"
+    "a1ide_irq=1111\0"
+    "a1ide_xfer=FFFF\0";
+#define CRC32_DEFAULT_ENV 0xb5548481
+#define CRC32_ALL_ZEROS   0x603b0489
+
+#define TYPE_A1_NVRAM "a1-nvram"
+OBJECT_DECLARE_SIMPLE_TYPE(A1NVRAMState, A1_NVRAM)
+
+struct A1NVRAMState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion mr;
+    BlockBackend *blk;
+};
+
+static uint64_t nvram_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    /* read callback not used because of romd mode */
+    g_assert_not_reached();
+}
+
+static void nvram_write(void *opaque, hwaddr addr, uint64_t val,
+                        unsigned int size)
+{
+    A1NVRAMState *s = opaque;
+    uint8_t *p = memory_region_get_ram_ptr(&s->mr);
+
+    p[addr] = val;
+    if (s->blk) {
+        blk_pwrite(s->blk, addr, 1, &val, 0);
+    }
+}
+
+static const MemoryRegionOps nvram_ops = {
+    .read = nvram_read,
+    .write = nvram_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+};
+
+static void nvram_realize(DeviceState *dev, Error **errp)
+{
+    A1NVRAMState *s = A1_NVRAM(dev);
+    void *p;
+    uint32_t crc, *c;
+
+    memory_region_init_rom_device(&s->mr, NULL, &nvram_ops, s, "nvram",
+                                  NVRAM_SIZE, &error_fatal);
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mr);
+    c = p = memory_region_get_ram_ptr(&s->mr);
+    if (s->blk) {
+        if (blk_getlength(s->blk) != NVRAM_SIZE) {
+            error_setg(errp, "NVRAM backing file size must be %" PRId64 "bytes",
+                       NVRAM_SIZE);
+            return;
+        }
+        blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+                     BLK_PERM_ALL, &error_fatal);
+        if (blk_pread(s->blk, 0, NVRAM_SIZE, p, 0) < 0) {
+            error_setg(errp, "Cannot read NVRAM contents from backing file");
+            return;
+        }
+    }
+    crc = crc32(0, p + 4, NVRAM_SIZE - 4);
+    if (crc == CRC32_ALL_ZEROS) { /* If env is uninitialized set default */
+        *c = cpu_to_be32(CRC32_DEFAULT_ENV);
+        /* Also copies terminating \0 as env is terminated by \0\0 */
+        memcpy(p + 4, default_env, sizeof(default_env));
+        if (s->blk) {
+            blk_pwrite(s->blk, 0, sizeof(crc) + sizeof(default_env), p, 0);
+        }
+        return;
+    }
+    if (*c == 0) {
+        *c = cpu_to_be32(crc32(0, p + 4, NVRAM_SIZE - 4));
+        if (s->blk) {
+            blk_pwrite(s->blk, 0, 4, p, 0);
+        }
+    }
+    if (be32_to_cpu(*c) != crc) {
+        warn_report("NVRAM checksum mismatch");
+    }
+}
+
+static const Property nvram_properties[] = {
+    DEFINE_PROP_DRIVE("drive", A1NVRAMState, blk),
+};
+
+static void nvram_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    dc->realize = nvram_realize;
+    device_class_set_props(dc, nvram_properties);
+}
+
+static const TypeInfo nvram_types[] = {
+    {
+        .name = TYPE_A1_NVRAM,
+        .parent = TYPE_SYS_BUS_DEVICE,
+        .instance_size = sizeof(A1NVRAMState),
+        .class_init = nvram_class_init,
+    },
+};
+DEFINE_TYPES(nvram_types)
+
+struct boot_info {
+    hwaddr entry;
+    hwaddr stack;
+    hwaddr bd_info;
+    hwaddr initrd_start;
+    hwaddr initrd_end;
+    hwaddr cmdline_start;
+    hwaddr cmdline_end;
+};
+
+/* Board info struct from U-Boot */
+struct bd_info {
+    uint32_t bi_memstart;
+    uint32_t bi_memsize;
+    uint32_t bi_flashstart;
+    uint32_t bi_flashsize;
+    uint32_t bi_flashoffset;
+    uint32_t bi_sramstart;
+    uint32_t bi_sramsize;
+    uint32_t bi_bootflags;
+    uint32_t bi_ip_addr;
+    uint8_t  bi_enetaddr[6];
+    uint16_t bi_ethspeed;
+    uint32_t bi_intfreq;
+    uint32_t bi_busfreq;
+    uint32_t bi_baudrate;
+} QEMU_PACKED;
+
+static void create_bd_info(hwaddr addr, ram_addr_t ram_size)
+{
+    struct bd_info *bd = g_new0(struct bd_info, 1);
+
+    bd->bi_memsize =    cpu_to_be32(ram_size);
+    bd->bi_flashstart = cpu_to_be32(PROM_ADDR);
+    bd->bi_flashsize =  cpu_to_be32(1); /* match what U-Boot detects */
+    bd->bi_bootflags =  cpu_to_be32(1);
+    bd->bi_intfreq =    cpu_to_be32(11.5 * BUS_FREQ_HZ);
+    bd->bi_busfreq =    cpu_to_be32(BUS_FREQ_HZ);
+    bd->bi_baudrate =   cpu_to_be32(115200);
+
+    cpu_physical_memory_write(addr, bd, sizeof(*bd));
+}
+
 static void amigaone_cpu_reset(void *opaque)
 {
     PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
 
     cpu_reset(CPU(cpu));
-    cpu_ppc_tb_reset(&cpu->env);
+    if (env->load_info) {
+        struct boot_info *bi = env->load_info;
+
+        env->gpr[1] = bi->stack;
+        env->gpr[2] = 1024;
+        env->gpr[3] = bi->bd_info;
+        env->gpr[4] = bi->initrd_start;
+        env->gpr[5] = bi->initrd_end;
+        env->gpr[6] = bi->cmdline_start;
+        env->gpr[7] = bi->cmdline_end;
+        env->nip = bi->entry;
+    }
+    cpu_ppc_tb_reset(env);
 }
 
 static void fix_spd_data(uint8_t *spd)
@@ -75,7 +271,9 @@ static void amigaone_init(MachineState *machine)
     DeviceState *dev;
     I2CBus *i2c_bus;
     uint8_t *spd_data;
-    int i;
+    DriveInfo *di;
+    hwaddr loadaddr;
+    struct boot_info *bi = NULL;
 
     /* init CPU */
     cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
@@ -97,9 +295,19 @@ static void amigaone_init(MachineState *machine)
         /* Firmware uses this area for startup */
         mr = g_new(MemoryRegion, 1);
         memory_region_init_ram(mr, NULL, "init-cache", 32 * KiB, &error_fatal);
-        memory_region_add_subregion(get_system_memory(), 0x40000000, mr);
+        memory_region_add_subregion(get_system_memory(), INIT_RAM_ADDR, mr);
     }
 
+    /* nvram */
+    dev = qdev_new(TYPE_A1_NVRAM);
+    di = drive_get(IF_MTD, 0, 0);
+    if (di) {
+        qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(di));
+    }
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+    memory_region_add_subregion(get_system_memory(), NVRAM_ADDR,
+                                sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0));
+
     /* allocate and load firmware */
     rom = g_new(MemoryRegion, 1);
     memory_region_init_rom(rom, NULL, "rom", PROM_SIZE, &error_fatal);
@@ -122,7 +330,7 @@ static void amigaone_init(MachineState *machine)
     }
 
     /* Articia S */
-    dev = sysbus_create_simple(TYPE_ARTICIA, 0xfe000000, NULL);
+    dev = sysbus_create_simple(TYPE_ARTICIA, ARTICIA_ADDR, NULL);
 
     i2c_bus = I2C_BUS(qdev_get_child_bus(dev, "smbus"));
     if (machine->ram_size > 512 * MiB) {
@@ -139,12 +347,12 @@ static void amigaone_init(MachineState *machine)
     pci_mem = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
     mr = g_new(MemoryRegion, 1);
     memory_region_init_alias(mr, OBJECT(dev), "pci-mem-low", pci_mem,
-                             0, 0x1000000);
-    memory_region_add_subregion(get_system_memory(), 0xfd000000, mr);
+                             0, PCI_LOW_SIZE);
+    memory_region_add_subregion(get_system_memory(), PCI_LOW_ADDR, mr);
     mr = g_new(MemoryRegion, 1);
     memory_region_init_alias(mr, OBJECT(dev), "pci-mem-high", pci_mem,
-                             0x80000000, 0x7d000000);
-    memory_region_add_subregion(get_system_memory(), 0x80000000, mr);
+                             PCI_HIGH_ADDR, PCI_HIGH_SIZE);
+    memory_region_add_subregion(get_system_memory(), PCI_HIGH_ADDR, mr);
     pci_bus = PCI_BUS(qdev_get_child_bus(dev, "pci.0"));
 
     /* VIA VT82c686B South Bridge (multifunction PCI device) */
@@ -156,12 +364,62 @@ static void amigaone_init(MachineState *machine)
     qdev_connect_gpio_out_named(DEVICE(via), "intr", 0,
                                 qdev_get_gpio_in(DEVICE(cpu),
                                 PPC6xx_INPUT_INT));
-    for (i = 0; i < PCI_NUM_PINS; i++) {
+    for (int i = 0; i < PCI_NUM_PINS; i++) {
         qdev_connect_gpio_out(dev, i, qdev_get_gpio_in_named(DEVICE(via),
                                                              "pirq", i));
     }
     pci_ide_create_devs(PCI_DEVICE(object_resolve_path_component(via, "ide")));
     pci_vga_init(pci_bus);
+
+    if (!machine->kernel_filename) {
+        return;
+    }
+
+    /* handle -kernel, -initrd, -append options and emulate U-Boot */
+    bi = g_new0(struct boot_info, 1);
+    cpu->env.load_info = bi;
+
+    loadaddr = MIN(machine->ram_size, 256 * MiB);
+    bi->bd_info = loadaddr - 8 * MiB;
+    create_bd_info(bi->bd_info, machine->ram_size);
+    bi->stack = bi->bd_info - 64 * KiB - 8;
+
+    if (machine->kernel_cmdline && machine->kernel_cmdline[0]) {
+        size_t len = strlen(machine->kernel_cmdline);
+
+        loadaddr = bi->bd_info + 1 * MiB;
+        cpu_physical_memory_write(loadaddr, machine->kernel_cmdline, len + 1);
+        bi->cmdline_start = loadaddr;
+        bi->cmdline_end = loadaddr + len + 1; /* including terminating '\0' */
+    }
+
+    sz = load_elf(machine->kernel_filename, NULL, NULL, NULL,
+                  &bi->entry, &loadaddr, NULL, NULL,
+                  ELFDATA2MSB, PPC_ELF_MACHINE, 0, 0);
+    if (sz <= 0) {
+        sz = load_uimage(machine->kernel_filename, &bi->entry, &loadaddr,
+                         NULL, NULL, NULL);
+    }
+    if (sz <= 0) {
+        error_report("Could not load kernel '%s'",
+                     machine->kernel_filename);
+        exit(1);
+    }
+    loadaddr += sz;
+
+    if (machine->initrd_filename) {
+        loadaddr = ROUND_UP(loadaddr + 4 * MiB, 4 * KiB);
+        loadaddr = MAX(loadaddr, INITRD_MIN_ADDR);
+        sz = load_image_targphys(machine->initrd_filename, loadaddr,
+                                 bi->bd_info - loadaddr);
+        if (sz <= 0) {
+            error_report("Could not load initrd '%s'",
+                         machine->initrd_filename);
+            exit(1);
+        }
+        bi->initrd_start = loadaddr;
+        bi->initrd_end = loadaddr + sz;
+    }
 }
 
 static void amigaone_machine_init(MachineClass *mc)
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 26933e0457e6b..69269aa24c4f4 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -658,7 +658,6 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms,
 
 done:
     if (!dry_run) {
-        qemu_fdt_dumpdtb(fdt, fdt_size);
         cpu_physical_memory_write(addr, fdt, fdt_size);
 
         /* Set machine->fdt for 'dumpdtb' QMP/HMP command */
@@ -1044,6 +1043,7 @@ void ppce500_init(MachineState *machine)
         dev = qdev_new(TYPE_SYSBUS_SDHCI);
         qdev_prop_set_uint8(dev, "sd-spec-version", 2);
         qdev_prop_set_uint8(dev, "endianness", DEVICE_BIG_ENDIAN);
+        qdev_prop_set_uint8(dev, "vendor", SDHCI_VENDOR_FSL);
         s = SYS_BUS_DEVICE(dev);
         sysbus_realize_and_unref(s, &error_fatal);
         sysbus_connect_irq(s, 0, qdev_get_gpio_in(mpicdev, MPC85XX_ESDHC_IRQ));
diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build
index 7cd91898699e6..9893f8adebb0e 100644
--- a/hw/ppc/meson.build
+++ b/hw/ppc/meson.build
@@ -57,9 +57,6 @@ ppc_ss.add(when: 'CONFIG_POWERNV', if_true: files(
   'pnv_n1_chiplet.c',
 ))
 # PowerPC 4xx boards
-ppc_ss.add(when: 'CONFIG_PPC405', if_true: files(
-  'ppc405_boards.c',
-  'ppc405_uc.c'))
 ppc_ss.add(when: 'CONFIG_PPC440', if_true: files(
   'ppc440_bamboo.c',
   'ppc440_uc.c'))
diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
index b057672e829d3..246d6d633b56a 100644
--- a/hw/ppc/pegasos2.c
+++ b/hw/ppc/pegasos2.c
@@ -417,7 +417,6 @@ static void pegasos2_machine_reset(MachineState *machine, ResetType type)
     d[1] = cpu_to_be64(pm->kernel_size - (pm->kernel_entry - pm->kernel_addr));
     qemu_fdt_setprop(fdt, "/chosen", "qemu,boot-kernel", d, sizeof(d));
 
-    qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
     g_free(pm->fdt_blob);
     pm->fdt_blob = fdt;
 
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 11fd477b71bef..59365370c378a 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -1,7 +1,9 @@
 /*
  * QEMU PowerPC PowerNV machine model
  *
- * Copyright (c) 2016, IBM Corporation.
+ * Copyright (c) 2016-2024, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -64,6 +66,8 @@
 #define FW_LOAD_ADDR            0x0
 #define FW_MAX_SIZE             (16 * MiB)
 
+#define PNOR_FILE_NAME          "pnv-pnor.bin"
+
 #define KERNEL_LOAD_ADDR        0x20000000
 #define KERNEL_MAX_SIZE         (128 * MiB)
 #define INITRD_LOAD_ADDR        0x28000000
@@ -744,7 +748,6 @@ static void pnv_reset(MachineState *machine, ResetType type)
         _FDT((fdt_pack(fdt)));
     }
 
-    qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
     cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt));
 
     /* Update machine->fdt with latest fdt */
@@ -942,7 +945,7 @@ static void pnv_init(MachineState *machine)
     uint64_t chip_ram_start = 0;
     int i;
     char *chip_typename;
-    DriveInfo *pnor = drive_get(IF_MTD, 0, 0);
+    DriveInfo *pnor;
     DeviceState *dev;
 
     if (kvm_enabled()) {
@@ -972,6 +975,18 @@ static void pnv_init(MachineState *machine)
      * Create our simple PNOR device
      */
     dev = qdev_new(TYPE_PNV_PNOR);
+    pnor = drive_get(IF_MTD, 0, 0);
+    if (!pnor && defaults_enabled()) {
+        fw_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, PNOR_FILE_NAME);
+        if (!fw_filename) {
+            warn_report("Could not find PNOR '%s'", PNOR_FILE_NAME);
+        } else {
+            QemuOpts *opts;
+            opts = drive_add(IF_MTD, -1, fw_filename, "format=raw,readonly=on");
+            pnor = drive_new(opts, IF_MTD, &error_fatal);
+            g_free(fw_filename);
+        }
+    }
     if (pnor) {
         qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(pnor));
     }
@@ -1556,7 +1571,21 @@ static void pnv_chip_power8_realize(DeviceState *dev, Error **errp)
         return;
     }
 
+    /* HOMER (must be created before OCC) */
+    object_property_set_link(OBJECT(&chip8->homer), "chip", OBJECT(chip),
+                             &error_abort);
+    if (!qdev_realize(DEVICE(&chip8->homer), NULL, errp)) {
+        return;
+    }
+    /* Homer Xscom region */
+    pnv_xscom_add_subregion(chip, PNV_XSCOM_PBA_BASE, &chip8->homer.pba_regs);
+    /* Homer RAM region */
+    memory_region_add_subregion(get_system_memory(), chip8->homer.base,
+                                &chip8->homer.mem);
+
     /* Create the simplified OCC model */
+    object_property_set_link(OBJECT(&chip8->occ), "homer",
+                             OBJECT(&chip8->homer), &error_abort);
     if (!qdev_realize(DEVICE(&chip8->occ), NULL, errp)) {
         return;
     }
@@ -1568,19 +1597,6 @@ static void pnv_chip_power8_realize(DeviceState *dev, Error **errp)
     memory_region_add_subregion(get_system_memory(), PNV_OCC_SENSOR_BASE(chip),
                                 &chip8->occ.sram_regs);
 
-    /* HOMER */
-    object_property_set_link(OBJECT(&chip8->homer), "chip", OBJECT(chip),
-                             &error_abort);
-    if (!qdev_realize(DEVICE(&chip8->homer), NULL, errp)) {
-        return;
-    }
-    /* Homer Xscom region */
-    pnv_xscom_add_subregion(chip, PNV_XSCOM_PBA_BASE, &chip8->homer.pba_regs);
-
-    /* Homer mmio region */
-    memory_region_add_subregion(get_system_memory(), PNV_HOMER_BASE(chip),
-                                &chip8->homer.regs);
-
     /* PHB controllers */
     for (i = 0; i < chip8->num_phbs; i++) {
         PnvPHB *phb = chip8->phbs[i];
@@ -1754,6 +1770,7 @@ static void pnv_chip_power9_pec_realize(PnvChip *chip, Error **errp)
     for (i = 0; i < chip->num_pecs; i++) {
         PnvPhb4PecState *pec = &chip9->pecs[i];
         PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec);
+        uint32_t pec_cplt_base;
         uint32_t pec_nest_base;
         uint32_t pec_pci_base;
 
@@ -1766,9 +1783,12 @@ static void pnv_chip_power9_pec_realize(PnvChip *chip, Error **errp)
             return;
         }
 
+        pec_cplt_base = pecc->xscom_cplt_base(pec);
         pec_nest_base = pecc->xscom_nest_base(pec);
         pec_pci_base = pecc->xscom_pci_base(pec);
 
+        pnv_xscom_add_subregion(chip, pec_cplt_base,
+                 &pec->nest_pervasive.xscom_ctrl_regs_mr);
         pnv_xscom_add_subregion(chip, pec_nest_base, &pec->nest_regs_mr);
         pnv_xscom_add_subregion(chip, pec_pci_base, &pec->pci_regs_mr);
     }
@@ -1860,18 +1880,6 @@ static void pnv_chip_power9_realize(DeviceState *dev, Error **errp)
     pnv_xscom_add_subregion(chip, PNV9_XSCOM_CHIPTOD_BASE,
                             &chip9->chiptod.xscom_regs);
 
-    /* Create the simplified OCC model */
-    if (!qdev_realize(DEVICE(&chip9->occ), NULL, errp)) {
-        return;
-    }
-    pnv_xscom_add_subregion(chip, PNV9_XSCOM_OCC_BASE, &chip9->occ.xscom_regs);
-    qdev_connect_gpio_out(DEVICE(&chip9->occ), 0, qdev_get_gpio_in(
-                              DEVICE(psi9), PSIHB9_IRQ_OCC));
-
-    /* OCC SRAM model */
-    memory_region_add_subregion(get_system_memory(), PNV9_OCC_SENSOR_BASE(chip),
-                                &chip9->occ.sram_regs);
-
     /* SBE */
     if (!qdev_realize(DEVICE(&chip9->sbe), NULL, errp)) {
         return;
@@ -1883,7 +1891,7 @@ static void pnv_chip_power9_realize(DeviceState *dev, Error **errp)
     qdev_connect_gpio_out(DEVICE(&chip9->sbe), 0, qdev_get_gpio_in(
                               DEVICE(psi9), PSIHB9_IRQ_PSU));
 
-    /* HOMER */
+    /* HOMER (must be created before OCC) */
     object_property_set_link(OBJECT(&chip9->homer), "chip", OBJECT(chip),
                              &error_abort);
     if (!qdev_realize(DEVICE(&chip9->homer), NULL, errp)) {
@@ -1891,10 +1899,23 @@ static void pnv_chip_power9_realize(DeviceState *dev, Error **errp)
     }
     /* Homer Xscom region */
     pnv_xscom_add_subregion(chip, PNV9_XSCOM_PBA_BASE, &chip9->homer.pba_regs);
+    /* Homer RAM region */
+    memory_region_add_subregion(get_system_memory(), chip9->homer.base,
+                                &chip9->homer.mem);
+
+    /* Create the simplified OCC model */
+    object_property_set_link(OBJECT(&chip9->occ), "homer",
+                             OBJECT(&chip9->homer), &error_abort);
+    if (!qdev_realize(DEVICE(&chip9->occ), NULL, errp)) {
+        return;
+    }
+    pnv_xscom_add_subregion(chip, PNV9_XSCOM_OCC_BASE, &chip9->occ.xscom_regs);
+    qdev_connect_gpio_out(DEVICE(&chip9->occ), 0, qdev_get_gpio_in(
+                              DEVICE(psi9), PSIHB9_IRQ_OCC));
 
-    /* Homer mmio region */
-    memory_region_add_subregion(get_system_memory(), PNV9_HOMER_BASE(chip),
-                                &chip9->homer.regs);
+    /* OCC SRAM model */
+    memory_region_add_subregion(get_system_memory(), PNV9_OCC_SENSOR_BASE(chip),
+                                &chip9->occ.sram_regs);
 
     /* PEC PHBs */
     pnv_chip_power9_pec_realize(chip, &local_err);
@@ -2028,6 +2049,7 @@ static void pnv_chip_power10_phb_realize(PnvChip *chip, Error **errp)
     for (i = 0; i < chip->num_pecs; i++) {
         PnvPhb4PecState *pec = &chip10->pecs[i];
         PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec);
+        uint32_t pec_cplt_base;
         uint32_t pec_nest_base;
         uint32_t pec_pci_base;
 
@@ -2040,9 +2062,12 @@ static void pnv_chip_power10_phb_realize(PnvChip *chip, Error **errp)
             return;
         }
 
+        pec_cplt_base = pecc->xscom_cplt_base(pec);
         pec_nest_base = pecc->xscom_nest_base(pec);
         pec_pci_base = pecc->xscom_pci_base(pec);
 
+        pnv_xscom_add_subregion(chip, pec_cplt_base,
+                 &pec->nest_pervasive.xscom_ctrl_regs_mr);
         pnv_xscom_add_subregion(chip, pec_nest_base, &pec->nest_regs_mr);
         pnv_xscom_add_subregion(chip, pec_pci_base, &pec->pci_regs_mr);
     }
@@ -2137,7 +2162,22 @@ static void pnv_chip_power10_realize(DeviceState *dev, Error **errp)
     pnv_xscom_add_subregion(chip, PNV10_XSCOM_CHIPTOD_BASE,
                             &chip10->chiptod.xscom_regs);
 
+    /* HOMER (must be created before OCC) */
+    object_property_set_link(OBJECT(&chip10->homer), "chip", OBJECT(chip),
+                             &error_abort);
+    if (!qdev_realize(DEVICE(&chip10->homer), NULL, errp)) {
+        return;
+    }
+    /* Homer Xscom region */
+    pnv_xscom_add_subregion(chip, PNV10_XSCOM_PBA_BASE,
+                            &chip10->homer.pba_regs);
+    /* Homer RAM region */
+    memory_region_add_subregion(get_system_memory(), chip10->homer.base,
+                                &chip10->homer.mem);
+
     /* Create the simplified OCC model */
+    object_property_set_link(OBJECT(&chip10->occ), "homer",
+                             OBJECT(&chip10->homer), &error_abort);
     if (!qdev_realize(DEVICE(&chip10->occ), NULL, errp)) {
         return;
     }
@@ -2162,20 +2202,6 @@ static void pnv_chip_power10_realize(DeviceState *dev, Error **errp)
     qdev_connect_gpio_out(DEVICE(&chip10->sbe), 0, qdev_get_gpio_in(
                               DEVICE(&chip10->psi), PSIHB9_IRQ_PSU));
 
-    /* HOMER */
-    object_property_set_link(OBJECT(&chip10->homer), "chip", OBJECT(chip),
-                             &error_abort);
-    if (!qdev_realize(DEVICE(&chip10->homer), NULL, errp)) {
-        return;
-    }
-    /* Homer Xscom region */
-    pnv_xscom_add_subregion(chip, PNV10_XSCOM_PBA_BASE,
-                            &chip10->homer.pba_regs);
-
-    /* Homer mmio region */
-    memory_region_add_subregion(get_system_memory(), PNV10_HOMER_BASE(chip),
-                                &chip10->homer.regs);
-
     /* N1 chiplet */
     if (!qdev_realize(DEVICE(&chip10->n1_chiplet), NULL, errp)) {
         return;
@@ -2226,6 +2252,8 @@ static void pnv_chip_power10_realize(DeviceState *dev, Error **errp)
         /* pib_spic[2] connected to 25csm04 which implements 1 byte transfer */
         object_property_set_int(OBJECT(&chip10->pib_spic[i]), "transfer_len",
                                 (i == 2) ? 1 : 4, &error_fatal);
+        object_property_set_int(OBJECT(&chip10->pib_spic[i]), "chip-id",
+                                chip->chip_id, &error_fatal);
         if (!sysbus_realize(SYS_BUS_DEVICE(OBJECT
                                         (&chip10->pib_spic[i])), errp)) {
             return;
@@ -2582,7 +2610,7 @@ static void pnv_pic_print_info(InterruptStatsProvider *obj, GString *buf)
 
 static int pnv_match_nvt(XiveFabric *xfb, uint8_t format,
                          uint8_t nvt_blk, uint32_t nvt_idx,
-                         bool cam_ignore, uint8_t priority,
+                         bool crowd, bool cam_ignore, uint8_t priority,
                          uint32_t logic_serv,
                          XiveTCTXMatch *match)
 {
@@ -2596,8 +2624,8 @@ static int pnv_match_nvt(XiveFabric *xfb, uint8_t format,
         XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
         int count;
 
-        count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, cam_ignore,
-                               priority, logic_serv, match);
+        count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd,
+                               cam_ignore, priority, logic_serv, match);
 
         if (count < 0) {
             return count;
@@ -2611,7 +2639,7 @@ static int pnv_match_nvt(XiveFabric *xfb, uint8_t format,
 
 static int pnv10_xive_match_nvt(XiveFabric *xfb, uint8_t format,
                                 uint8_t nvt_blk, uint32_t nvt_idx,
-                                bool cam_ignore, uint8_t priority,
+                                bool crowd, bool cam_ignore, uint8_t priority,
                                 uint32_t logic_serv,
                                 XiveTCTXMatch *match)
 {
@@ -2625,8 +2653,8 @@ static int pnv10_xive_match_nvt(XiveFabric *xfb, uint8_t format,
         XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
         int count;
 
-        count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, cam_ignore,
-                               priority, logic_serv, match);
+        count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd,
+                               cam_ignore, priority, logic_serv, match);
 
         if (count < 0) {
             return count;
@@ -2638,6 +2666,24 @@ static int pnv10_xive_match_nvt(XiveFabric *xfb, uint8_t format,
     return total_count;
 }
 
+static int pnv10_xive_broadcast(XiveFabric *xfb,
+                                uint8_t nvt_blk, uint32_t nvt_idx,
+                                bool crowd, bool cam_ignore,
+                                uint8_t priority)
+{
+    PnvMachineState *pnv = PNV_MACHINE(xfb);
+    int i;
+
+    for (i = 0; i < pnv->num_chips; i++) {
+        Pnv10Chip *chip10 = PNV10_CHIP(pnv->chips[i]);
+        XivePresenter *xptr = XIVE_PRESENTER(&chip10->xive);
+        XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
+
+        xpc->broadcast(xptr, nvt_blk, nvt_idx, crowd, cam_ignore, priority);
+    }
+    return 0;
+}
+
 static bool pnv_machine_get_big_core(Object *obj, Error **errp)
 {
     PnvMachineState *pnv = PNV_MACHINE(obj);
@@ -2771,6 +2817,7 @@ static void pnv_machine_p10_common_class_init(ObjectClass *oc, void *data)
     pmc->dt_power_mgt = pnv_dt_power_mgt;
 
     xfc->match_nvt = pnv10_xive_match_nvt;
+    xfc->broadcast = pnv10_xive_broadcast;
 
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_PNV_PHB);
 }
diff --git a/hw/ppc/pnv_bmc.c b/hw/ppc/pnv_bmc.c
index 0c1274df21a94..811ba3d7a499a 100644
--- a/hw/ppc/pnv_bmc.c
+++ b/hw/ppc/pnv_bmc.c
@@ -251,10 +251,38 @@ static const IPMINetfn hiomap_netfn = {
 
 void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor)
 {
+    uint32_t pnor_size = pnor->size;
+    uint32_t pnor_addr = PNOR_SPI_OFFSET;
+
     if (!pnv_bmc_is_simulator(bmc)) {
         return;
     }
 
+    /*
+     * The HIOMAP protocol uses block units and 16-bit addressing.
+     * Prevent overflow or misalign.
+     */
+    if (pnor_addr >= 1U << (BLOCK_SHIFT + 16)) {
+        warn_report("PNOR address is larger than 2^%d, disabling PNOR",
+                    BLOCK_SHIFT + 16);
+        return;
+    }
+    if (pnor_addr & ((1U << BLOCK_SHIFT) - 1)) {
+        warn_report("PNOR address is not aligned to 2^%d, disabling PNOR",
+                    BLOCK_SHIFT);
+        return;
+    }
+    if (pnor_size > 1U << (BLOCK_SHIFT + 16)) {
+        warn_report("PNOR size is larger than 2^%d, disabling PNOR",
+                    BLOCK_SHIFT + 16);
+        return;
+    }
+    if (pnor_size & ((1U << BLOCK_SHIFT) - 1)) {
+        warn_report("PNOR size is not aligned to 2^%d, disabling PNOR",
+                    BLOCK_SHIFT);
+        return;
+    }
+
     object_ref(OBJECT(pnor));
     object_property_add_const_link(OBJECT(bmc), "pnor", OBJECT(pnor));
 
diff --git a/hw/ppc/pnv_homer.c b/hw/ppc/pnv_homer.c
index a1d83c8149d37..18a53a80c183a 100644
--- a/hw/ppc/pnv_homer.c
+++ b/hw/ppc/pnv_homer.c
@@ -29,94 +29,6 @@
 #include "hw/ppc/pnv_homer.h"
 #include "hw/ppc/pnv_xscom.h"
 
-
-static bool core_max_array(PnvHomer *homer, hwaddr addr)
-{
-    int i;
-    PnvHomerClass *hmrc = PNV_HOMER_GET_CLASS(homer);
-
-    for (i = 0; i <= homer->chip->nr_cores; i++) {
-        if (addr == (hmrc->core_max_base + i)) {
-            return true;
-       }
-    }
-    return false;
-}
-
-/* P8 Pstate table */
-
-#define PNV8_OCC_PSTATE_VERSION          0x1f8001
-#define PNV8_OCC_PSTATE_MIN              0x1f8003
-#define PNV8_OCC_PSTATE_VALID            0x1f8000
-#define PNV8_OCC_PSTATE_THROTTLE         0x1f8002
-#define PNV8_OCC_PSTATE_NOM              0x1f8004
-#define PNV8_OCC_PSTATE_TURBO            0x1f8005
-#define PNV8_OCC_PSTATE_ULTRA_TURBO      0x1f8006
-#define PNV8_OCC_PSTATE_DATA             0x1f8008
-#define PNV8_OCC_PSTATE_ID_ZERO          0x1f8010
-#define PNV8_OCC_PSTATE_ID_ONE           0x1f8018
-#define PNV8_OCC_PSTATE_ID_TWO           0x1f8020
-#define PNV8_OCC_VDD_VOLTAGE_IDENTIFIER  0x1f8012
-#define PNV8_OCC_VCS_VOLTAGE_IDENTIFIER  0x1f8013
-#define PNV8_OCC_PSTATE_ZERO_FREQUENCY   0x1f8014
-#define PNV8_OCC_PSTATE_ONE_FREQUENCY    0x1f801c
-#define PNV8_OCC_PSTATE_TWO_FREQUENCY    0x1f8024
-#define PNV8_CORE_MAX_BASE               0x1f8810
-
-
-static uint64_t pnv_power8_homer_read(void *opaque, hwaddr addr,
-                                      unsigned size)
-{
-    PnvHomer *homer = PNV_HOMER(opaque);
-
-    switch (addr) {
-    case PNV8_OCC_PSTATE_VERSION:
-    case PNV8_OCC_PSTATE_MIN:
-    case PNV8_OCC_PSTATE_ID_ZERO:
-        return 0;
-    case PNV8_OCC_PSTATE_VALID:
-    case PNV8_OCC_PSTATE_THROTTLE:
-    case PNV8_OCC_PSTATE_NOM:
-    case PNV8_OCC_PSTATE_TURBO:
-    case PNV8_OCC_PSTATE_ID_ONE:
-    case PNV8_OCC_VDD_VOLTAGE_IDENTIFIER:
-    case PNV8_OCC_VCS_VOLTAGE_IDENTIFIER:
-        return 1;
-    case PNV8_OCC_PSTATE_ULTRA_TURBO:
-    case PNV8_OCC_PSTATE_ID_TWO:
-        return 2;
-    case PNV8_OCC_PSTATE_DATA:
-        return 0x1000000000000000;
-    /* P8 frequency for 0, 1, and 2 pstates */
-    case PNV8_OCC_PSTATE_ZERO_FREQUENCY:
-    case PNV8_OCC_PSTATE_ONE_FREQUENCY:
-    case PNV8_OCC_PSTATE_TWO_FREQUENCY:
-        return 3000;
-    }
-    /* pstate table core max array */
-    if (core_max_array(homer, addr)) {
-        return 1;
-    }
-    return 0;
-}
-
-static void pnv_power8_homer_write(void *opaque, hwaddr addr,
-                                   uint64_t val, unsigned size)
-{
-    /* callback function defined to homer write */
-    return;
-}
-
-static const MemoryRegionOps pnv_power8_homer_ops = {
-    .read = pnv_power8_homer_read,
-    .write = pnv_power8_homer_write,
-    .valid.min_access_size = 1,
-    .valid.max_access_size = 8,
-    .impl.min_access_size = 1,
-    .impl.max_access_size = 8,
-    .endianness = DEVICE_BIG_ENDIAN,
-};
-
 /* P8 PBA BARs */
 #define PBA_BAR0                     0x00
 #define PBA_BAR1                     0x01
@@ -131,16 +43,16 @@ static uint64_t pnv_homer_power8_pba_read(void *opaque, hwaddr addr,
                                           unsigned size)
 {
     PnvHomer *homer = PNV_HOMER(opaque);
-    PnvChip *chip = homer->chip;
+    PnvHomerClass *hmrc = PNV_HOMER_GET_CLASS(homer);
     uint32_t reg = addr >> 3;
     uint64_t val = 0;
 
     switch (reg) {
     case PBA_BAR0:
-        val = PNV_HOMER_BASE(chip);
+        val = homer->base;
         break;
     case PBA_BARMASK0: /* P8 homer region mask */
-        val = (PNV_HOMER_SIZE - 1) & 0x300000;
+        val = (hmrc->size - 1) & 0x300000;
         break;
     case PBA_BAR3: /* P8 occ common area */
         val = PNV_OCC_COMMON_AREA_BASE;
@@ -172,15 +84,19 @@ static const MemoryRegionOps pnv_homer_power8_pba_ops = {
     .endianness = DEVICE_BIG_ENDIAN,
 };
 
+static hwaddr pnv_homer_power8_get_base(PnvChip *chip)
+{
+    return PNV_HOMER_BASE(chip);
+}
+
 static void pnv_homer_power8_class_init(ObjectClass *klass, void *data)
 {
     PnvHomerClass *homer = PNV_HOMER_CLASS(klass);
 
+    homer->get_base = pnv_homer_power8_get_base;
+    homer->size = PNV_HOMER_SIZE;
     homer->pba_size = PNV_XSCOM_PBA_SIZE;
     homer->pba_ops = &pnv_homer_power8_pba_ops;
-    homer->homer_size = PNV_HOMER_SIZE;
-    homer->homer_ops = &pnv_power8_homer_ops;
-    homer->core_max_base = PNV8_CORE_MAX_BASE;
 }
 
 static const TypeInfo pnv_homer_power8_type_info = {
@@ -190,100 +106,20 @@ static const TypeInfo pnv_homer_power8_type_info = {
     .class_init    = pnv_homer_power8_class_init,
 };
 
-/* P9 Pstate table */
-
-#define PNV9_OCC_PSTATE_ID_ZERO          0xe2018
-#define PNV9_OCC_PSTATE_ID_ONE           0xe2020
-#define PNV9_OCC_PSTATE_ID_TWO           0xe2028
-#define PNV9_OCC_PSTATE_DATA             0xe2000
-#define PNV9_OCC_PSTATE_DATA_AREA        0xe2008
-#define PNV9_OCC_PSTATE_MIN              0xe2003
-#define PNV9_OCC_PSTATE_NOM              0xe2004
-#define PNV9_OCC_PSTATE_TURBO            0xe2005
-#define PNV9_OCC_PSTATE_ULTRA_TURBO      0xe2818
-#define PNV9_OCC_MAX_PSTATE_ULTRA_TURBO  0xe2006
-#define PNV9_OCC_PSTATE_MAJOR_VERSION    0xe2001
-#define PNV9_OCC_OPAL_RUNTIME_DATA       0xe2b85
-#define PNV9_CHIP_HOMER_IMAGE_POINTER    0x200008
-#define PNV9_CHIP_HOMER_BASE             0x0
-#define PNV9_OCC_PSTATE_ZERO_FREQUENCY   0xe201c
-#define PNV9_OCC_PSTATE_ONE_FREQUENCY    0xe2024
-#define PNV9_OCC_PSTATE_TWO_FREQUENCY    0xe202c
-#define PNV9_OCC_ROLE_MASTER_OR_SLAVE    0xe2002
-#define PNV9_CORE_MAX_BASE               0xe2819
-
-
-static uint64_t pnv_power9_homer_read(void *opaque, hwaddr addr,
-                                      unsigned size)
-{
-    PnvHomer *homer = PNV_HOMER(opaque);
-
-    switch (addr) {
-    case PNV9_OCC_MAX_PSTATE_ULTRA_TURBO:
-    case PNV9_OCC_PSTATE_ID_ZERO:
-        return 0;
-    case PNV9_OCC_PSTATE_DATA:
-    case PNV9_OCC_ROLE_MASTER_OR_SLAVE:
-    case PNV9_OCC_PSTATE_NOM:
-    case PNV9_OCC_PSTATE_TURBO:
-    case PNV9_OCC_PSTATE_ID_ONE:
-    case PNV9_OCC_PSTATE_ULTRA_TURBO:
-    case PNV9_OCC_OPAL_RUNTIME_DATA:
-        return 1;
-    case PNV9_OCC_PSTATE_MIN:
-    case PNV9_OCC_PSTATE_ID_TWO:
-        return 2;
-
-    /* 3000 khz frequency for 0, 1, and 2 pstates */
-    case PNV9_OCC_PSTATE_ZERO_FREQUENCY:
-    case PNV9_OCC_PSTATE_ONE_FREQUENCY:
-    case PNV9_OCC_PSTATE_TWO_FREQUENCY:
-        return 3000;
-    case PNV9_OCC_PSTATE_MAJOR_VERSION:
-        return 0x90;
-    case PNV9_CHIP_HOMER_BASE:
-    case PNV9_OCC_PSTATE_DATA_AREA:
-    case PNV9_CHIP_HOMER_IMAGE_POINTER:
-        return 0x1000000000000000;
-    }
-    /* pstate table core max array */
-    if (core_max_array(homer, addr)) {
-        return 1;
-    }
-    return 0;
-}
-
-static void pnv_power9_homer_write(void *opaque, hwaddr addr,
-                                   uint64_t val, unsigned size)
-{
-    /* callback function defined to homer write */
-    return;
-}
-
-static const MemoryRegionOps pnv_power9_homer_ops = {
-    .read = pnv_power9_homer_read,
-    .write = pnv_power9_homer_write,
-    .valid.min_access_size = 1,
-    .valid.max_access_size = 8,
-    .impl.min_access_size = 1,
-    .impl.max_access_size = 8,
-    .endianness = DEVICE_BIG_ENDIAN,
-};
-
 static uint64_t pnv_homer_power9_pba_read(void *opaque, hwaddr addr,
                                           unsigned size)
 {
     PnvHomer *homer = PNV_HOMER(opaque);
-    PnvChip *chip = homer->chip;
+    PnvHomerClass *hmrc = PNV_HOMER_GET_CLASS(homer);
     uint32_t reg = addr >> 3;
     uint64_t val = 0;
 
     switch (reg) {
     case PBA_BAR0:
-        val = PNV9_HOMER_BASE(chip);
+        val = homer->base;
         break;
     case PBA_BARMASK0: /* P9 homer region mask */
-        val = (PNV9_HOMER_SIZE - 1) & 0x300000;
+        val = (hmrc->size - 1) & 0x300000;
         break;
     case PBA_BAR2: /* P9 occ common area */
         val = PNV9_OCC_COMMON_AREA_BASE;
@@ -315,15 +151,19 @@ static const MemoryRegionOps pnv_homer_power9_pba_ops = {
     .endianness = DEVICE_BIG_ENDIAN,
 };
 
+static hwaddr pnv_homer_power9_get_base(PnvChip *chip)
+{
+    return PNV9_HOMER_BASE(chip);
+}
+
 static void pnv_homer_power9_class_init(ObjectClass *klass, void *data)
 {
     PnvHomerClass *homer = PNV_HOMER_CLASS(klass);
 
+    homer->get_base = pnv_homer_power9_get_base;
+    homer->size = PNV_HOMER_SIZE;
     homer->pba_size = PNV9_XSCOM_PBA_SIZE;
     homer->pba_ops = &pnv_homer_power9_pba_ops;
-    homer->homer_size = PNV9_HOMER_SIZE;
-    homer->homer_ops = &pnv_power9_homer_ops;
-    homer->core_max_base = PNV9_CORE_MAX_BASE;
 }
 
 static const TypeInfo pnv_homer_power9_type_info = {
@@ -337,16 +177,16 @@ static uint64_t pnv_homer_power10_pba_read(void *opaque, hwaddr addr,
                                           unsigned size)
 {
     PnvHomer *homer = PNV_HOMER(opaque);
-    PnvChip *chip = homer->chip;
+    PnvHomerClass *hmrc = PNV_HOMER_GET_CLASS(homer);
     uint32_t reg = addr >> 3;
     uint64_t val = 0;
 
     switch (reg) {
     case PBA_BAR0:
-        val = PNV10_HOMER_BASE(chip);
+        val = homer->base;
         break;
     case PBA_BARMASK0: /* P10 homer region mask */
-        val = (PNV10_HOMER_SIZE - 1) & 0x300000;
+        val = (hmrc->size - 1) & 0x300000;
         break;
     case PBA_BAR2: /* P10 occ common area */
         val = PNV10_OCC_COMMON_AREA_BASE;
@@ -378,15 +218,19 @@ static const MemoryRegionOps pnv_homer_power10_pba_ops = {
     .endianness = DEVICE_BIG_ENDIAN,
 };
 
+static hwaddr pnv_homer_power10_get_base(PnvChip *chip)
+{
+    return PNV10_HOMER_BASE(chip);
+}
+
 static void pnv_homer_power10_class_init(ObjectClass *klass, void *data)
 {
     PnvHomerClass *homer = PNV_HOMER_CLASS(klass);
 
+    homer->get_base = pnv_homer_power10_get_base;
+    homer->size = PNV_HOMER_SIZE;
     homer->pba_size = PNV10_XSCOM_PBA_SIZE;
     homer->pba_ops = &pnv_homer_power10_pba_ops;
-    homer->homer_size = PNV10_HOMER_SIZE;
-    homer->homer_ops = &pnv_power9_homer_ops; /* TODO */
-    homer->core_max_base = PNV9_CORE_MAX_BASE;
 }
 
 static const TypeInfo pnv_homer_power10_type_info = {
@@ -400,16 +244,22 @@ static void pnv_homer_realize(DeviceState *dev, Error **errp)
 {
     PnvHomer *homer = PNV_HOMER(dev);
     PnvHomerClass *hmrc = PNV_HOMER_GET_CLASS(homer);
+    char homer_str[32];
 
     assert(homer->chip);
 
     pnv_xscom_region_init(&homer->pba_regs, OBJECT(dev), hmrc->pba_ops,
                           homer, "xscom-pba", hmrc->pba_size);
 
-    /* homer region */
-    memory_region_init_io(&homer->regs, OBJECT(dev),
-                          hmrc->homer_ops, homer, "homer-main-memory",
-                          hmrc->homer_size);
+    /* Homer RAM region */
+    homer->base = hmrc->get_base(homer->chip);
+
+    snprintf(homer_str, sizeof(homer_str), "homer-chip%d-memory",
+             homer->chip->chip_id);
+    if (!memory_region_init_ram(&homer->mem, OBJECT(homer),
+                                homer_str, hmrc->size, errp)) {
+        return;
+    }
 }
 
 static const Property pnv_homer_properties[] = {
diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c
index 0480a60f3f79c..d812dc826810a 100644
--- a/hw/ppc/pnv_lpc.c
+++ b/hw/ppc/pnv_lpc.c
@@ -85,7 +85,7 @@ enum {
 
 #define ISA_IO_SIZE             0x00010000
 #define ISA_MEM_SIZE            0x10000000
-#define ISA_FW_SIZE             0x10000000
+#define ISA_FW_SIZE             0x100000000
 #define LPC_IO_OPB_ADDR         0xd0010000
 #define LPC_IO_OPB_SIZE         0x00010000
 #define LPC_MEM_OPB_ADDR        0xe0000000
@@ -353,6 +353,8 @@ static const MemoryRegionOps pnv_lpc_xscom_ops = {
     .endianness = DEVICE_BIG_ENDIAN,
 };
 
+static void pnv_lpc_opb_noresponse(PnvLpcController *lpc);
+
 static uint64_t pnv_lpc_mmio_read(void *opaque, hwaddr addr, unsigned size)
 {
     PnvLpcController *lpc = PNV_LPC(opaque);
@@ -376,6 +378,7 @@ static uint64_t pnv_lpc_mmio_read(void *opaque, hwaddr addr, unsigned size)
     }
 
     if (result != MEMTX_OK) {
+        pnv_lpc_opb_noresponse(lpc);
         qemu_log_mask(LOG_GUEST_ERROR, "OPB read failed at @0x%"
                       HWADDR_PRIx "\n", addr);
     }
@@ -406,6 +409,7 @@ static void pnv_lpc_mmio_write(void *opaque, hwaddr addr,
     }
 
     if (result != MEMTX_OK) {
+        pnv_lpc_opb_noresponse(lpc);
         qemu_log_mask(LOG_GUEST_ERROR, "OPB write failed at @0x%"
                       HWADDR_PRIx "\n", addr);
     }
@@ -456,46 +460,18 @@ static void pnv_lpc_eval_irqs(PnvLpcController *lpc)
 {
     uint32_t active_irqs = 0;
 
-    if (lpc->lpc_hc_irqstat & PPC_BITMASK32(16, 31)) {
-        qemu_log_mask(LOG_UNIMP, "LPC HC Unimplemented irqs in IRQSTAT: "
-                                 "0x%08"PRIx32"\n", lpc->lpc_hc_irqstat);
-    }
-
-    if (lpc->lpc_hc_irqser_ctrl & LPC_HC_IRQSER_EN) {
-        active_irqs = lpc->lpc_hc_irqstat & lpc->lpc_hc_irqmask;
+    active_irqs = lpc->lpc_hc_irqstat & lpc->lpc_hc_irqmask;
+    if (!(lpc->lpc_hc_irqser_ctrl & LPC_HC_IRQSER_EN)) {
+        active_irqs &= ~LPC_HC_IRQ_SERIRQ_ALL;
     }
 
     /* Reflect the interrupt */
-    if (!lpc->psi_has_serirq) {
+    if (lpc->psi_has_serirq) {
         /*
-         * POWER8 ORs all irqs together (also with LPCHC internal interrupt
-         * sources) and outputs a single line that raises the PSI LPCHC irq
-         * which then latches an OPB IRQ status register that sends the irq
-         * to PSI.
-         *
-         * We don't honor the polarity register, it's pointless and unused
-         * anyway
-         */
-        if (active_irqs) {
-            lpc->opb_irq_input |= OPB_MASTER_IRQ_LPC;
-        } else {
-            lpc->opb_irq_input &= ~OPB_MASTER_IRQ_LPC;
-        }
-
-        /* Update OPB internal latch */
-        lpc->opb_irq_stat |= lpc->opb_irq_input & lpc->opb_irq_mask;
-
-        qemu_set_irq(lpc->psi_irq_lpchc, lpc->opb_irq_stat != 0);
-    } else {
-        /*
-         * POWER9 and POWER10 have routing fields in OPB master registers that
+         * POWER9 and later have routing fields in OPB master registers that
          * send LPC irqs to 4 output lines that raise the PSI SERIRQ irqs.
          * These don't appear to get latched into an OPB register like the
          * LPCHC irqs.
-         *
-         * POWER9 LPC controller internal irqs still go via the OPB
-         * and LPCHC PSI irqs like P8, but we have no such internal sources
-         * modelled yet.
          */
         bool serirq_out[4] = { false, false, false, false };
         int irq;
@@ -510,7 +486,39 @@ static void pnv_lpc_eval_irqs(PnvLpcController *lpc)
         qemu_set_irq(lpc->psi_irq_serirq[1], serirq_out[1]);
         qemu_set_irq(lpc->psi_irq_serirq[2], serirq_out[2]);
         qemu_set_irq(lpc->psi_irq_serirq[3], serirq_out[3]);
+
+        /*
+         * POWER9 and later LPC controller internal irqs still go via the OPB
+         * and LPCHC PSI irqs like P8, so take the SERIRQs out and continue.
+         */
+        active_irqs &= ~LPC_HC_IRQ_SERIRQ_ALL;
     }
+
+    /*
+     * POWER8 ORs all irqs together (also with LPCHC internal interrupt
+     * sources) and outputs a single line that raises the PSI LPCHC irq
+     * which then latches an OPB IRQ status register that sends the irq
+     * to PSI.
+     *
+     * We don't honor the polarity register, it's pointless and unused
+     * anyway
+     */
+    if (active_irqs) {
+        lpc->opb_irq_input |= OPB_MASTER_IRQ_LPC;
+    } else {
+        lpc->opb_irq_input &= ~OPB_MASTER_IRQ_LPC;
+    }
+
+    /* Update OPB internal latch */
+    lpc->opb_irq_stat |= lpc->opb_irq_input & lpc->opb_irq_mask;
+
+    qemu_set_irq(lpc->psi_irq_lpchc, lpc->opb_irq_stat != 0);
+}
+
+static void pnv_lpc_opb_noresponse(PnvLpcController *lpc)
+{
+    lpc->lpc_hc_irqstat |= LPC_HC_IRQ_SYNC_NORESP_ERR;
+    pnv_lpc_eval_irqs(lpc);
 }
 
 static uint64_t lpc_hc_read(void *opaque, hwaddr addr, unsigned size)
@@ -553,10 +561,13 @@ static void lpc_hc_write(void *opaque, hwaddr addr, uint64_t val,
 
     switch (addr) {
     case LPC_HC_FW_SEG_IDSEL:
-        /* XXX Actually figure out how that works as this impact
-         * memory regions/aliases
+        /*
+         * ISA FW "devices" are modeled as 16x256MB windows into a
+         * 4GB LPC FW address space.
          */
+        val &= 0xf; /* Selects device 0-15 */
         lpc->lpc_hc_fw_seg_idsel = val;
+        memory_region_set_alias_offset(&lpc->opb_isa_fw, val * LPC_FW_OPB_SIZE);
         break;
     case LPC_HC_FW_RD_ACC_SIZE:
         lpc->lpc_hc_fw_rd_acc_size = val;
@@ -790,9 +801,9 @@ static void pnv_lpc_realize(DeviceState *dev, Error **errp)
     memory_region_init(&lpc->opb_mr, OBJECT(dev), "lpc-opb", 0x100000000ull);
     address_space_init(&lpc->opb_as, &lpc->opb_mr, "lpc-opb");
 
-    /* Create ISA IO and Mem space regions which are the root of
-     * the ISA bus (ie, ISA address spaces). We don't create a
-     * separate one for FW which we alias to memory.
+    /*
+     * Create ISA IO, Mem, and FW space regions which are the root of
+     * the ISA bus (ie, ISA address spaces).
      */
     memory_region_init(&lpc->isa_io, OBJECT(dev), "isa-io", ISA_IO_SIZE);
     memory_region_init(&lpc->isa_mem, OBJECT(dev), "isa-mem", ISA_MEM_SIZE);
diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c
index 48123ceae1766..bda6b23ad3c1c 100644
--- a/hw/ppc/pnv_occ.c
+++ b/hw/ppc/pnv_occ.c
@@ -24,40 +24,53 @@
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
 #include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_chip.h"
 #include "hw/ppc/pnv_xscom.h"
 #include "hw/ppc/pnv_occ.h"
 
+#define P8_HOMER_OPAL_DATA_OFFSET    0x1F8000
+#define P9_HOMER_OPAL_DATA_OFFSET    0x0E2000
+
 #define OCB_OCI_OCCMISC         0x4020
 #define OCB_OCI_OCCMISC_AND     0x4021
 #define OCB_OCI_OCCMISC_OR      0x4022
+#define   OCCMISC_PSI_IRQ       PPC_BIT(0)
+#define   OCCMISC_IRQ_SHMEM     PPC_BIT(3)
 
 /* OCC sensors */
-#define OCC_SENSOR_DATA_BLOCK_OFFSET          0x580000
-#define OCC_SENSOR_DATA_VALID                 0x580001
-#define OCC_SENSOR_DATA_VERSION               0x580002
-#define OCC_SENSOR_DATA_READING_VERSION       0x580004
-#define OCC_SENSOR_DATA_NR_SENSORS            0x580008
-#define OCC_SENSOR_DATA_NAMES_OFFSET          0x580010
-#define OCC_SENSOR_DATA_READING_PING_OFFSET   0x580014
-#define OCC_SENSOR_DATA_READING_PONG_OFFSET   0x58000c
-#define OCC_SENSOR_DATA_NAME_LENGTH           0x58000d
-#define OCC_SENSOR_NAME_STRUCTURE_TYPE        0x580023
-#define OCC_SENSOR_LOC_CORE                   0x580022
-#define OCC_SENSOR_LOC_GPU                    0x580020
-#define OCC_SENSOR_TYPE_POWER                 0x580003
-#define OCC_SENSOR_NAME                       0x580005
-#define HWMON_SENSORS_MASK                    0x58001e
-#define SLW_IMAGE_BASE                        0x0
+#define OCC_SENSOR_DATA_BLOCK_OFFSET          0x0000
+#define OCC_SENSOR_DATA_VALID                 0x0001
+#define OCC_SENSOR_DATA_VERSION               0x0002
+#define OCC_SENSOR_DATA_READING_VERSION       0x0004
+#define OCC_SENSOR_DATA_NR_SENSORS            0x0008
+#define OCC_SENSOR_DATA_NAMES_OFFSET          0x0010
+#define OCC_SENSOR_DATA_READING_PING_OFFSET   0x0014
+#define OCC_SENSOR_DATA_READING_PONG_OFFSET   0x000c
+#define OCC_SENSOR_DATA_NAME_LENGTH           0x000d
+#define OCC_SENSOR_NAME_STRUCTURE_TYPE        0x0023
+#define OCC_SENSOR_LOC_CORE                   0x0022
+#define OCC_SENSOR_LOC_GPU                    0x0020
+#define OCC_SENSOR_TYPE_POWER                 0x0003
+#define OCC_SENSOR_NAME                       0x0005
+#define HWMON_SENSORS_MASK                    0x001e
 
 static void pnv_occ_set_misc(PnvOCC *occ, uint64_t val)
 {
-    bool irq_state;
-
-    val &= 0xffff000000000000ull;
+    val &= PPC_BITMASK(0, 18); /* Mask out unimplemented bits */
 
     occ->occmisc = val;
-    irq_state = !!(val >> 63);
-    qemu_set_irq(occ->psi_irq, irq_state);
+
+    /*
+     * OCCMISC IRQ bit triggers the interrupt on a 0->1 edge, but not clear
+     * how that is handled in PSI so it is level-triggered here, which is not
+     * really correct (but skiboot is okay with it).
+     */
+    qemu_set_irq(occ->psi_irq, !!(val & OCCMISC_PSI_IRQ));
+}
+
+static void pnv_occ_raise_msg_irq(PnvOCC *occ)
+{
+    pnv_occ_set_misc(occ, occ->occmisc | OCCMISC_PSI_IRQ | OCCMISC_IRQ_SHMEM);
 }
 
 static uint64_t pnv_occ_power8_xscom_read(void *opaque, hwaddr addr,
@@ -129,8 +142,6 @@ static uint64_t pnv_occ_common_area_read(void *opaque, hwaddr addr,
     case HWMON_SENSORS_MASK:
     case OCC_SENSOR_LOC_GPU:
         return 0x8e00;
-    case SLW_IMAGE_BASE:
-        return 0x1000000000000000;
     }
     return 0;
 }
@@ -165,7 +176,11 @@ const MemoryRegionOps pnv_occ_sram_ops = {
 static void pnv_occ_power8_class_init(ObjectClass *klass, void *data)
 {
     PnvOCCClass *poc = PNV_OCC_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
+    dc->desc = "PowerNV OCC Controller (POWER8)";
+    poc->opal_shared_memory_offset = P8_HOMER_OPAL_DATA_OFFSET;
+    poc->opal_shared_memory_version = 0x02;
     poc->xscom_size = PNV_XSCOM_OCC_SIZE;
     poc->xscom_ops = &pnv_occ_power8_xscom_ops;
 }
@@ -238,8 +253,11 @@ static void pnv_occ_power9_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->desc = "PowerNV OCC Controller (POWER9)";
+    poc->opal_shared_memory_offset = P9_HOMER_OPAL_DATA_OFFSET;
+    poc->opal_shared_memory_version = 0x90;
     poc->xscom_size = PNV9_XSCOM_OCC_SIZE;
     poc->xscom_ops = &pnv_occ_power9_xscom_ops;
+    assert(!dc->user_creatable);
 }
 
 static const TypeInfo pnv_occ_power9_type_info = {
@@ -251,21 +269,50 @@ static const TypeInfo pnv_occ_power9_type_info = {
 
 static void pnv_occ_power10_class_init(ObjectClass *klass, void *data)
 {
+    PnvOCCClass *poc = PNV_OCC_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->desc = "PowerNV OCC Controller (POWER10)";
+    poc->opal_shared_memory_offset = P9_HOMER_OPAL_DATA_OFFSET;
+    poc->opal_shared_memory_version = 0xA0;
+    poc->xscom_size = PNV9_XSCOM_OCC_SIZE;
+    poc->xscom_ops = &pnv_occ_power9_xscom_ops;
+    assert(!dc->user_creatable);
 }
 
 static const TypeInfo pnv_occ_power10_type_info = {
     .name          = TYPE_PNV10_OCC,
-    .parent        = TYPE_PNV9_OCC,
+    .parent        = TYPE_PNV_OCC,
     .class_init    = pnv_occ_power10_class_init,
 };
 
+static bool occ_init_homer_memory(PnvOCC *occ, Error **errp);
+static bool occ_model_tick(PnvOCC *occ);
+
+/* Relatively arbitrary */
+#define OCC_POLL_MS 100
+
+static void occ_state_machine_timer(void *opaque)
+{
+    PnvOCC *occ = opaque;
+    uint64_t next = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + OCC_POLL_MS;
+
+    if (occ_model_tick(occ)) {
+        timer_mod(&occ->state_machine_timer, next);
+    }
+}
+
 static void pnv_occ_realize(DeviceState *dev, Error **errp)
 {
     PnvOCC *occ = PNV_OCC(dev);
     PnvOCCClass *poc = PNV_OCC_GET_CLASS(occ);
+    PnvHomer *homer = occ->homer;
+
+    assert(homer);
+
+    if (!occ_init_homer_memory(occ, errp)) {
+        return;
+    }
 
     occ->occmisc = 0;
 
@@ -279,14 +326,22 @@ static void pnv_occ_realize(DeviceState *dev, Error **errp)
                           PNV_OCC_SENSOR_DATA_BLOCK_SIZE);
 
     qdev_init_gpio_out(dev, &occ->psi_irq, 1);
+
+    timer_init_ms(&occ->state_machine_timer, QEMU_CLOCK_VIRTUAL,
+                  occ_state_machine_timer, occ);
+    timer_mod(&occ->state_machine_timer, OCC_POLL_MS);
 }
 
+static const Property pnv_occ_properties[] = {
+    DEFINE_PROP_LINK("homer", PnvOCC, homer, TYPE_PNV_HOMER, PnvHomer *),
+};
+
 static void pnv_occ_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = pnv_occ_realize;
-    dc->desc = "PowerNV OCC Controller";
+    device_class_set_props(dc, pnv_occ_properties);
     dc->user_creatable = false;
 }
 
@@ -308,3 +363,570 @@ static void pnv_occ_register_types(void)
 }
 
 type_init(pnv_occ_register_types);
+
+/* From skiboot/hw/occ.c with tab to space conversion */
+/* OCC Communication Area for PStates */
+
+#define OPAL_DYNAMIC_DATA_OFFSET        0x0B80
+/* relative to HOMER_OPAL_DATA_OFFSET */
+
+#define MAX_PSTATES                     256
+#define MAX_P8_CORES                    12
+#define MAX_P9_CORES                    24
+#define MAX_P10_CORES                   32
+
+#define MAX_OPAL_CMD_DATA_LENGTH        4090
+#define MAX_OCC_RSP_DATA_LENGTH         8698
+
+#define P8_PIR_CORE_MASK                0xFFF8
+#define P9_PIR_QUAD_MASK                0xFFF0
+#define P10_PIR_CHIP_MASK               0x0000
+#define FREQ_MAX_IN_DOMAIN              0
+#define FREQ_MOST_RECENTLY_SET          1
+
+#define u8 uint8_t
+#define s8 int8_t
+#define u16 uint16_t
+#define s16 int16_t
+#define u32 uint32_t
+#define s32 int32_t
+#define u64 uint64_t
+#define s64 int64_t
+#define __be16 uint16_t
+#define __be32 uint32_t
+#ifndef __packed
+#define __packed QEMU_PACKED
+#endif /* !__packed */
+
+/**
+ * OCC-OPAL Shared Memory Region
+ *
+ * Reference document :
+ * https://github.com/open-power/docs/blob/master/occ/OCC_OpenPwr_FW_Interfaces.pdf
+ *
+ * Supported layout versions:
+ * - 0x01, 0x02 : P8
+ * https://github.com/open-power/occ/blob/master_p8/src/occ/proc/proc_pstate.h
+ *
+ * - 0x90 : P9
+ * https://github.com/open-power/occ/blob/master/src/occ_405/proc/proc_pstate.h
+ *   In 0x90 the data is separated into :-
+ *   -- Static Data (struct occ_pstate_table): Data is written once by OCC
+ *   -- Dynamic Data (struct occ_dynamic_data): Data is updated at runtime
+ *
+ * struct occ_pstate_table -    Pstate table layout
+ * @valid:                      Indicates if data is valid
+ * @version:                    Layout version [Major/Minor]
+ * @v2.throttle:                Reason for limiting the max pstate
+ * @v9.occ_role:                OCC role (Master/Slave)
+ * @v#.pstate_min:              Minimum pstate ever allowed
+ * @v#.pstate_nom:              Nominal pstate
+ * @v#.pstate_turbo:            Maximum turbo pstate
+ * @v#.pstate_ultra_turbo:      Maximum ultra turbo pstate and the maximum
+ *                              pstate ever allowed
+ * @v#.pstates:                 Pstate-id and frequency list from Pmax to Pmin
+ * @v#.pstates.id:              Pstate-id
+ * @v#.pstates.flags:           Pstate-flag(reserved)
+ * @v2.pstates.vdd:             Voltage Identifier
+ * @v2.pstates.vcs:             Voltage Identifier
+ * @v#.pstates.freq_khz:        Frequency in KHz
+ * @v#.core_max[1..N]:          Max pstate with N active cores
+ * @spare/reserved/pad:         Unused data
+ */
+struct occ_pstate_table {
+    u8 valid;
+    u8 version;
+    union __packed {
+        struct __packed { /* Version 0x01 and 0x02 */
+            u8 throttle;
+            s8 pstate_min;
+            s8 pstate_nom;
+            s8 pstate_turbo;
+            s8 pstate_ultra_turbo;
+            u8 spare;
+            u64 reserved;
+            struct __packed {
+                s8 id;
+                u8 flags;
+                u8 vdd;
+                u8 vcs;
+                __be32 freq_khz;
+            } pstates[MAX_PSTATES];
+            s8 core_max[MAX_P8_CORES];
+            u8 pad[100];
+        } v2;
+        struct __packed { /* Version 0x90 */
+            u8 occ_role;
+            u8 pstate_min;
+            u8 pstate_nom;
+            u8 pstate_turbo;
+            u8 pstate_ultra_turbo;
+            u8 spare;
+            u64 reserved1;
+            u64 reserved2;
+            struct __packed {
+                u8 id;
+                u8 flags;
+                u16 reserved;
+                __be32 freq_khz;
+            } pstates[MAX_PSTATES];
+            u8 core_max[MAX_P9_CORES];
+            u8 pad[56];
+        } v9;
+        struct __packed { /* Version 0xA0 */
+            u8 occ_role;
+            u8 pstate_min;
+            u8 pstate_fixed_freq;
+            u8 pstate_base;
+            u8 pstate_ultra_turbo;
+            u8 pstate_fmax;
+            u8 minor;
+            u8 pstate_bottom_throttle;
+            u8 spare;
+            u8 spare1;
+            u32 reserved_32;
+            u64 reserved_64;
+            struct __packed {
+                u8 id;
+                u8 valid;
+                u16 reserved;
+                __be32 freq_khz;
+            } pstates[MAX_PSTATES];
+            u8 core_max[MAX_P10_CORES];
+            u8 pad[48];
+        } v10;
+    };
+} __packed;
+
+/**
+ * OPAL-OCC Command Response Interface
+ *
+ * OPAL-OCC Command Buffer
+ *
+ * ---------------------------------------------------------------------
+ * | OPAL  |  Cmd    | OPAL |          | Cmd Data | Cmd Data | OPAL    |
+ * | Cmd   | Request | OCC  | Reserved | Length   | Length   | Cmd     |
+ * | Flags |   ID    | Cmd  |          | (MSB)    | (LSB)    | Data... |
+ * ---------------------------------------------------------------------
+ * |  ….OPAL Command Data up to max of Cmd Data Length 4090 bytes      |
+ * |                                                                   |
+ * ---------------------------------------------------------------------
+ *
+ * OPAL Command Flag
+ *
+ * -----------------------------------------------------------------
+ * | Bit 7 | Bit 6 | Bit 5 | Bit 4 | Bit 3 | Bit 2 | Bit 1 | Bit 0 |
+ * | (msb) |       |       |       |       |       |       | (lsb) |
+ * -----------------------------------------------------------------
+ * |Cmd    |       |       |       |       |       |       |       |
+ * |Ready  |       |       |       |       |       |       |       |
+ * -----------------------------------------------------------------
+ *
+ * struct opal_command_buffer - Defines the layout of OPAL command buffer
+ * @flag:                       Provides general status of the command
+ * @request_id:                 Token to identify request
+ * @cmd:                        Command sent
+ * @data_size:                  Command data length
+ * @data:                       Command specific data
+ * @spare:                      Unused byte
+ */
+struct opal_command_buffer {
+    u8 flag;
+    u8 request_id;
+    u8 cmd;
+    u8 spare;
+    __be16 data_size;
+    u8 data[MAX_OPAL_CMD_DATA_LENGTH];
+} __packed;
+
+/**
+ * OPAL-OCC Response Buffer
+ *
+ * ---------------------------------------------------------------------
+ * | OCC   |  Cmd    | OPAL | Response | Rsp Data | Rsp Data | OPAL    |
+ * | Rsp   | Request | OCC  |  Status  | Length   | Length   | Rsp     |
+ * | Flags |   ID    | Cmd  |          | (MSB)    | (LSB)    | Data... |
+ * ---------------------------------------------------------------------
+ * |  ….OPAL Response Data up to max of Rsp Data Length 8698 bytes     |
+ * |                                                                   |
+ * ---------------------------------------------------------------------
+ *
+ * OCC Response Flag
+ *
+ * -----------------------------------------------------------------
+ * | Bit 7 | Bit 6 | Bit 5 | Bit 4 | Bit 3 | Bit 2 | Bit 1 | Bit 0 |
+ * | (msb) |       |       |       |       |       |       | (lsb) |
+ * -----------------------------------------------------------------
+ * |       |       |       |       |       |       |OCC in  | Rsp  |
+ * |       |       |       |       |       |       |progress|Ready |
+ * -----------------------------------------------------------------
+ *
+ * struct occ_response_buffer - Defines the layout of OCC response buffer
+ * @flag:                       Provides general status of the response
+ * @request_id:                 Token to identify request
+ * @cmd:                        Command requested
+ * @status:                     Indicates success/failure status of
+ *                              the command
+ * @data_size:                  Response data length
+ * @data:                       Response specific data
+ */
+struct occ_response_buffer {
+    u8 flag;
+    u8 request_id;
+    u8 cmd;
+    u8 status;
+    __be16 data_size;
+    u8 data[MAX_OCC_RSP_DATA_LENGTH];
+} __packed;
+
+/**
+ * OCC-OPAL Shared Memory Interface Dynamic Data Vx90
+ *
+ * struct occ_dynamic_data -    Contains runtime attributes
+ * @occ_state:                  Current state of OCC
+ * @major_version:              Major version number
+ * @minor_version:              Minor version number (backwards compatible)
+ *                              Version 1 indicates GPU presence populated
+ * @gpus_present:               Bitmask of GPUs present (on systems where GPU
+ *                              presence is detected through APSS)
+ * @cpu_throttle:               Reason for limiting the max pstate
+ * @mem_throttle:               Reason for throttling memory
+ * @quick_pwr_drop:             Indicates if QPD is asserted
+ * @pwr_shifting_ratio:         Indicates the current percentage of power to
+ *                              take away from the CPU vs GPU when shifting
+ *                              power to maintain a power cap. Value of 100
+ *                              means take all power from CPU.
+ * @pwr_cap_type:               Indicates type of power cap in effect
+ * @hard_min_pwr_cap:           Hard minimum system power cap in Watts.
+ *                              Guaranteed unless hardware failure
+ * @max_pwr_cap:                Maximum allowed system power cap in Watts
+ * @cur_pwr_cap:                Current system power cap
+ * @soft_min_pwr_cap:           Soft powercap minimum. OCC may or may not be
+ *                              able to maintain this
+ * @spare/reserved:             Unused data
+ * @cmd:                        Opal Command Buffer
+ * @rsp:                        OCC Response Buffer
+ */
+struct occ_dynamic_data {
+    u8 occ_state;
+    u8 major_version;
+    u8 minor_version;
+    u8 gpus_present;
+    union __packed {
+        struct __packed { /* Version 0x90 */
+            u8 spare1;
+        } v9;
+        struct __packed { /* Version 0xA0 */
+            u8 wof_enabled;
+        } v10;
+    };
+    u8 cpu_throttle;
+    u8 mem_throttle;
+    u8 quick_pwr_drop;
+    u8 pwr_shifting_ratio;
+    u8 pwr_cap_type;
+    __be16 hard_min_pwr_cap;
+    __be16 max_pwr_cap;
+    __be16 cur_pwr_cap;
+    __be16 soft_min_pwr_cap;
+    u8 pad[110];
+    struct opal_command_buffer cmd;
+    struct occ_response_buffer rsp;
+} __packed;
+
+enum occ_response_status {
+    OCC_RSP_SUCCESS                 = 0x00,
+    OCC_RSP_INVALID_COMMAND         = 0x11,
+    OCC_RSP_INVALID_CMD_DATA_LENGTH = 0x12,
+    OCC_RSP_INVALID_DATA            = 0x13,
+    OCC_RSP_INTERNAL_ERROR          = 0x15,
+};
+
+#define OCC_ROLE_SLAVE                  0x00
+#define OCC_ROLE_MASTER                 0x01
+
+#define OCC_FLAG_RSP_READY              0x01
+#define OCC_FLAG_CMD_IN_PROGRESS        0x02
+#define OPAL_FLAG_CMD_READY             0x80
+
+#define PCAP_MAX_POWER_W                100
+#define PCAP_SOFT_MIN_POWER_W            20
+#define PCAP_HARD_MIN_POWER_W            10
+
+static bool occ_write_static_data(PnvOCC *occ,
+                                 struct occ_pstate_table *static_data,
+                                 Error **errp)
+{
+    PnvOCCClass *poc = PNV_OCC_GET_CLASS(occ);
+    PnvHomer *homer = occ->homer;
+    hwaddr static_addr = homer->base + poc->opal_shared_memory_offset;
+    MemTxResult ret;
+
+    ret = address_space_write(&address_space_memory, static_addr,
+                             MEMTXATTRS_UNSPECIFIED, static_data,
+                             sizeof(*static_data));
+    if (ret != MEMTX_OK) {
+        error_setg(errp, "OCC: cannot write OCC-OPAL static data");
+        return false;
+    }
+
+    return true;
+}
+
+static bool occ_read_dynamic_data(PnvOCC *occ,
+                                  struct occ_dynamic_data *dynamic_data,
+                                  Error **errp)
+{
+    PnvOCCClass *poc = PNV_OCC_GET_CLASS(occ);
+    PnvHomer *homer = occ->homer;
+    hwaddr static_addr = homer->base + poc->opal_shared_memory_offset;
+    hwaddr dynamic_addr = static_addr + OPAL_DYNAMIC_DATA_OFFSET;
+    MemTxResult ret;
+
+    ret = address_space_read(&address_space_memory, dynamic_addr,
+                             MEMTXATTRS_UNSPECIFIED, dynamic_data,
+                             sizeof(*dynamic_data));
+    if (ret != MEMTX_OK) {
+        error_setg(errp, "OCC: cannot read OCC-OPAL dynamic data");
+        return false;
+    }
+
+    return true;
+}
+
+static bool occ_write_dynamic_data(PnvOCC *occ,
+                                  struct occ_dynamic_data *dynamic_data,
+                                  Error **errp)
+{
+    PnvOCCClass *poc = PNV_OCC_GET_CLASS(occ);
+    PnvHomer *homer = occ->homer;
+    hwaddr static_addr = homer->base + poc->opal_shared_memory_offset;
+    hwaddr dynamic_addr = static_addr + OPAL_DYNAMIC_DATA_OFFSET;
+    MemTxResult ret;
+
+    ret = address_space_write(&address_space_memory, dynamic_addr,
+                             MEMTXATTRS_UNSPECIFIED, dynamic_data,
+                             sizeof(*dynamic_data));
+    if (ret != MEMTX_OK) {
+        error_setg(errp, "OCC: cannot write OCC-OPAL dynamic data");
+        return false;
+    }
+
+    return true;
+}
+
+static bool occ_opal_send_response(PnvOCC *occ,
+                                   struct occ_dynamic_data *dynamic_data,
+                                   enum occ_response_status status,
+                                   uint8_t *data, uint16_t datalen)
+{
+    struct opal_command_buffer *cmd = &dynamic_data->cmd;
+    struct occ_response_buffer *rsp = &dynamic_data->rsp;
+
+    rsp->request_id = cmd->request_id;
+    rsp->cmd = cmd->cmd;
+    rsp->status = status;
+    rsp->data_size = cpu_to_be16(datalen);
+    if (datalen) {
+        memcpy(rsp->data, data, datalen);
+    }
+    if (!occ_write_dynamic_data(occ, dynamic_data, NULL)) {
+        return false;
+    }
+    /* Would be a memory barrier here */
+    rsp->flag = OCC_FLAG_RSP_READY;
+    cmd->flag = 0;
+    if (!occ_write_dynamic_data(occ, dynamic_data, NULL)) {
+        return false;
+    }
+
+    pnv_occ_raise_msg_irq(occ);
+
+    return true;
+}
+
+/* Returns error status */
+static bool occ_opal_process_command(PnvOCC *occ,
+                                     struct occ_dynamic_data *dynamic_data)
+{
+    struct opal_command_buffer *cmd = &dynamic_data->cmd;
+    struct occ_response_buffer *rsp = &dynamic_data->rsp;
+
+    if (rsp->flag == 0) {
+        /* Spend one "tick" in the in-progress state */
+        rsp->flag = OCC_FLAG_CMD_IN_PROGRESS;
+        return occ_write_dynamic_data(occ, dynamic_data, NULL);
+    } else if (rsp->flag != OCC_FLAG_CMD_IN_PROGRESS) {
+        return occ_opal_send_response(occ, dynamic_data,
+                                      OCC_RSP_INTERNAL_ERROR,
+                                      NULL, 0);
+    }
+
+    switch (cmd->cmd) {
+    case 0xD1: { /* SET_POWER_CAP */
+        uint16_t data;
+        if (be16_to_cpu(cmd->data_size) != 2) {
+            return occ_opal_send_response(occ, dynamic_data,
+                                          OCC_RSP_INVALID_CMD_DATA_LENGTH,
+                                          (uint8_t *)&dynamic_data->cur_pwr_cap,
+                                          2);
+        }
+        data = be16_to_cpu(*(uint16_t *)cmd->data);
+        if (data == 0) { /* clear power cap */
+            dynamic_data->pwr_cap_type = 0x00; /* none */
+            data = PCAP_MAX_POWER_W;
+        } else {
+            dynamic_data->pwr_cap_type = 0x02; /* user set in-band */
+            if (data < PCAP_HARD_MIN_POWER_W) {
+                data = PCAP_HARD_MIN_POWER_W;
+            } else if (data > PCAP_MAX_POWER_W) {
+                data = PCAP_MAX_POWER_W;
+            }
+        }
+        dynamic_data->cur_pwr_cap = cpu_to_be16(data);
+        return occ_opal_send_response(occ, dynamic_data,
+                                      OCC_RSP_SUCCESS,
+                                      (uint8_t *)&dynamic_data->cur_pwr_cap, 2);
+    }
+
+    default:
+        return occ_opal_send_response(occ, dynamic_data,
+                                      OCC_RSP_INVALID_COMMAND,
+                                      NULL, 0);
+    }
+    g_assert_not_reached();
+}
+
+static bool occ_model_tick(PnvOCC *occ)
+{
+    struct occ_dynamic_data dynamic_data;
+
+    if (!occ_read_dynamic_data(occ, &dynamic_data, NULL)) {
+        /* Can't move OCC state field to safe because we can't map it! */
+        qemu_log("OCC: failed to read HOMER data, shutting down OCC\n");
+        return false;
+    }
+    if (dynamic_data.cmd.flag == OPAL_FLAG_CMD_READY) {
+        if (!occ_opal_process_command(occ, &dynamic_data)) {
+            qemu_log("OCC: failed to write HOMER data, shutting down OCC\n");
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static bool occ_init_homer_memory(PnvOCC *occ, Error **errp)
+{
+    PnvOCCClass *poc = PNV_OCC_GET_CLASS(occ);
+    PnvHomer *homer = occ->homer;
+    PnvChip *chip = homer->chip;
+    struct occ_pstate_table static_data;
+    struct occ_dynamic_data dynamic_data;
+    int i;
+
+    memset(&static_data, 0, sizeof(static_data));
+    static_data.valid = 1;
+    static_data.version = poc->opal_shared_memory_version;
+    switch (poc->opal_shared_memory_version) {
+    case 0x02:
+        static_data.v2.throttle = 0;
+        static_data.v2.pstate_min = -2;
+        static_data.v2.pstate_nom = -1;
+        static_data.v2.pstate_turbo = -1;
+        static_data.v2.pstate_ultra_turbo = 0;
+        static_data.v2.pstates[0].id = 0;
+        static_data.v2.pstates[1].freq_khz = cpu_to_be32(4000000);
+        static_data.v2.pstates[1].id = -1;
+        static_data.v2.pstates[1].freq_khz = cpu_to_be32(3000000);
+        static_data.v2.pstates[2].id = -2;
+        static_data.v2.pstates[2].freq_khz = cpu_to_be32(2000000);
+        for (i = 0; i < chip->nr_cores; i++) {
+            static_data.v2.core_max[i] = 1;
+        }
+        break;
+    case 0x90:
+        if (chip->chip_id == 0) {
+            static_data.v9.occ_role = OCC_ROLE_MASTER;
+        } else {
+            static_data.v9.occ_role = OCC_ROLE_SLAVE;
+        }
+        static_data.v9.pstate_min = 2;
+        static_data.v9.pstate_nom = 1;
+        static_data.v9.pstate_turbo = 1;
+        static_data.v9.pstate_ultra_turbo = 0;
+        static_data.v9.pstates[0].id = 0;
+        static_data.v9.pstates[0].freq_khz = cpu_to_be32(4000000);
+        static_data.v9.pstates[1].id = 1;
+        static_data.v9.pstates[1].freq_khz = cpu_to_be32(3000000);
+        static_data.v9.pstates[2].id = 2;
+        static_data.v9.pstates[2].freq_khz = cpu_to_be32(2000000);
+        for (i = 0; i < chip->nr_cores; i++) {
+            static_data.v9.core_max[i] = 1;
+        }
+        break;
+    case 0xA0:
+        if (chip->chip_id == 0) {
+            static_data.v10.occ_role = OCC_ROLE_MASTER;
+        } else {
+            static_data.v10.occ_role = OCC_ROLE_SLAVE;
+        }
+        static_data.v10.pstate_min = 4;
+        static_data.v10.pstate_fixed_freq = 3;
+        static_data.v10.pstate_base = 2;
+        static_data.v10.pstate_ultra_turbo = 0;
+        static_data.v10.pstate_fmax = 1;
+        static_data.v10.minor = 0x01;
+        static_data.v10.pstates[0].valid = 1;
+        static_data.v10.pstates[0].id = 0;
+        static_data.v10.pstates[0].freq_khz = cpu_to_be32(4200000);
+        static_data.v10.pstates[1].valid = 1;
+        static_data.v10.pstates[1].id = 1;
+        static_data.v10.pstates[1].freq_khz = cpu_to_be32(4000000);
+        static_data.v10.pstates[2].valid = 1;
+        static_data.v10.pstates[2].id = 2;
+        static_data.v10.pstates[2].freq_khz = cpu_to_be32(3800000);
+        static_data.v10.pstates[3].valid = 1;
+        static_data.v10.pstates[3].id = 3;
+        static_data.v10.pstates[3].freq_khz = cpu_to_be32(3000000);
+        static_data.v10.pstates[4].valid = 1;
+        static_data.v10.pstates[4].id = 4;
+        static_data.v10.pstates[4].freq_khz = cpu_to_be32(2000000);
+        for (i = 0; i < chip->nr_cores; i++) {
+            static_data.v10.core_max[i] = 1;
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    if (!occ_write_static_data(occ, &static_data, errp)) {
+        return false;
+    }
+
+    memset(&dynamic_data, 0, sizeof(dynamic_data));
+    dynamic_data.occ_state = 0x3; /* active */
+    dynamic_data.major_version = 0x0;
+    dynamic_data.hard_min_pwr_cap = cpu_to_be16(PCAP_HARD_MIN_POWER_W);
+    dynamic_data.max_pwr_cap = cpu_to_be16(PCAP_MAX_POWER_W);
+    dynamic_data.cur_pwr_cap = cpu_to_be16(PCAP_MAX_POWER_W);
+    dynamic_data.soft_min_pwr_cap = cpu_to_be16(PCAP_SOFT_MIN_POWER_W);
+    switch (poc->opal_shared_memory_version) {
+    case 0xA0:
+        dynamic_data.minor_version = 0x1;
+        dynamic_data.v10.wof_enabled = 0x1;
+        break;
+    case 0x90:
+        dynamic_data.minor_version = 0x1;
+        break;
+    case 0x02:
+        dynamic_data.minor_version = 0x0;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    if (!occ_write_dynamic_data(occ, &dynamic_data, errp)) {
+        return false;
+    }
+
+    return true;
+}
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 90e3db5cfebab..3a80931538f13 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -1123,16 +1123,21 @@ void cpu_ppc_tb_reset(CPUPPCState *env)
         timer_del(tb_env->hdecr_timer);
         ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 0);
         tb_env->hdecr_next = 0;
+        _cpu_ppc_store_hdecr(cpu, 0, 0, 0, 64);
     }
 
     /*
      * There is a bug in Linux 2.4 kernels:
      * if a decrementer exception is pending when it enables msr_ee at startup,
      * it's not ready to handle it...
+     *
+     * On machine reset, this is called before icount is reset, so for
+     * icount-mode, setting TB registers using now == qemu_clock_get_ns()
+     * results in them being garbage after icount is reset. Use an
+     * explicit now == 0 to get a consistent reset state.
      */
-    cpu_ppc_store_decr(env, -1);
-    cpu_ppc_store_hdecr(env, -1);
-    cpu_ppc_store_purr(env, 0x0000000000000000ULL);
+    _cpu_ppc_store_decr(cpu, 0, 0, -1, 64);
+    _cpu_ppc_store_purr(env, 0, 0);
 }
 
 void cpu_ppc_tb_free(CPUPPCState *env)
diff --git a/hw/ppc/ppc405.h b/hw/ppc/ppc405.h
deleted file mode 100644
index 9a4312691e15a..0000000000000
--- a/hw/ppc/ppc405.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * QEMU PowerPC 405 shared definitions
- *
- * Copyright (c) 2007 Jocelyn Mayer
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef PPC405_H
-#define PPC405_H
-
-#include "qom/object.h"
-#include "hw/ppc/ppc4xx.h"
-#include "hw/intc/ppc-uic.h"
-#include "hw/i2c/ppc4xx_i2c.h"
-
-/* PLB to OPB bridge */
-#define TYPE_PPC405_POB "ppc405-pob"
-OBJECT_DECLARE_SIMPLE_TYPE(Ppc405PobState, PPC405_POB);
-struct Ppc405PobState {
-    Ppc4xxDcrDeviceState parent_obj;
-
-    uint32_t bear;
-    uint32_t besr0;
-    uint32_t besr1;
-};
-
-/* OPB arbitrer */
-#define TYPE_PPC405_OPBA "ppc405-opba"
-OBJECT_DECLARE_SIMPLE_TYPE(Ppc405OpbaState, PPC405_OPBA);
-struct Ppc405OpbaState {
-    SysBusDevice parent_obj;
-
-    MemoryRegion io;
-    uint8_t cr;
-    uint8_t pr;
-};
-
-/* DMA controller */
-#define TYPE_PPC405_DMA "ppc405-dma"
-OBJECT_DECLARE_SIMPLE_TYPE(Ppc405DmaState, PPC405_DMA);
-struct Ppc405DmaState {
-    Ppc4xxDcrDeviceState parent_obj;
-
-    qemu_irq irqs[4];
-    uint32_t cr[4];
-    uint32_t ct[4];
-    uint32_t da[4];
-    uint32_t sa[4];
-    uint32_t sg[4];
-    uint32_t sr;
-    uint32_t sgc;
-    uint32_t slp;
-    uint32_t pol;
-};
-
-/* GPIO */
-#define TYPE_PPC405_GPIO "ppc405-gpio"
-OBJECT_DECLARE_SIMPLE_TYPE(Ppc405GpioState, PPC405_GPIO);
-struct Ppc405GpioState {
-    SysBusDevice parent_obj;
-
-    MemoryRegion io;
-    uint32_t or;
-    uint32_t tcr;
-    uint32_t osrh;
-    uint32_t osrl;
-    uint32_t tsrh;
-    uint32_t tsrl;
-    uint32_t odr;
-    uint32_t ir;
-    uint32_t rr1;
-    uint32_t isr1h;
-    uint32_t isr1l;
-};
-
-/* On Chip Memory */
-#define TYPE_PPC405_OCM "ppc405-ocm"
-OBJECT_DECLARE_SIMPLE_TYPE(Ppc405OcmState, PPC405_OCM);
-struct Ppc405OcmState {
-    Ppc4xxDcrDeviceState parent_obj;
-
-    MemoryRegion ram;
-    MemoryRegion isarc_ram;
-    MemoryRegion dsarc_ram;
-    uint32_t isarc;
-    uint32_t isacntl;
-    uint32_t dsarc;
-    uint32_t dsacntl;
-};
-
-/* General purpose timers */
-#define TYPE_PPC405_GPT "ppc405-gpt"
-OBJECT_DECLARE_SIMPLE_TYPE(Ppc405GptState, PPC405_GPT);
-struct Ppc405GptState {
-    SysBusDevice parent_obj;
-
-    MemoryRegion iomem;
-
-    int64_t tb_offset;
-    uint32_t tb_freq;
-    QEMUTimer *timer;
-    qemu_irq irqs[5];
-    uint32_t oe;
-    uint32_t ol;
-    uint32_t im;
-    uint32_t is;
-    uint32_t ie;
-    uint32_t comp[5];
-    uint32_t mask[5];
-};
-
-#define TYPE_PPC405_CPC "ppc405-cpc"
-OBJECT_DECLARE_SIMPLE_TYPE(Ppc405CpcState, PPC405_CPC);
-
-enum {
-    PPC405EP_CPU_CLK   = 0,
-    PPC405EP_PLB_CLK   = 1,
-    PPC405EP_OPB_CLK   = 2,
-    PPC405EP_EBC_CLK   = 3,
-    PPC405EP_MAL_CLK   = 4,
-    PPC405EP_PCI_CLK   = 5,
-    PPC405EP_UART0_CLK = 6,
-    PPC405EP_UART1_CLK = 7,
-    PPC405EP_CLK_NB    = 8,
-};
-
-struct Ppc405CpcState {
-    Ppc4xxDcrDeviceState parent_obj;
-
-    uint32_t sysclk;
-    clk_setup_t clk_setup[PPC405EP_CLK_NB];
-    uint32_t boot;
-    uint32_t epctl;
-    uint32_t pllmr[2];
-    uint32_t ucr;
-    uint32_t srr;
-    uint32_t jtagid;
-    uint32_t pci;
-    /* Clock and power management */
-    uint32_t er;
-    uint32_t fr;
-    uint32_t sr;
-};
-
-#define TYPE_PPC405_SOC "ppc405-soc"
-OBJECT_DECLARE_SIMPLE_TYPE(Ppc405SoCState, PPC405_SOC);
-
-struct Ppc405SoCState {
-    /* Private */
-    DeviceState parent_obj;
-
-    /* Public */
-    PowerPCCPU cpu;
-    PPCUIC uic;
-    Ppc405CpcState cpc;
-    Ppc405GptState gpt;
-    Ppc405OcmState ocm;
-    Ppc405GpioState gpio;
-    Ppc405DmaState dma;
-    PPC4xxI2CState i2c;
-    Ppc4xxEbcState ebc;
-    Ppc405OpbaState opba;
-    Ppc405PobState pob;
-    Ppc4xxPlbState plb;
-    Ppc4xxMalState mal;
-    Ppc4xxSdramDdrState sdram;
-};
-
-#endif /* PPC405_H */
diff --git a/hw/ppc/ppc405_boards.c b/hw/ppc/ppc405_boards.c
deleted file mode 100644
index 969cac345ac86..0000000000000
--- a/hw/ppc/ppc405_boards.c
+++ /dev/null
@@ -1,520 +0,0 @@
-/*
- * QEMU PowerPC 405 evaluation boards emulation
- *
- * Copyright (c) 2007 Jocelyn Mayer
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/units.h"
-#include "qapi/error.h"
-#include "qemu/datadir.h"
-#include "cpu.h"
-#include "hw/ppc/ppc.h"
-#include "hw/qdev-properties.h"
-#include "hw/sysbus.h"
-#include "ppc405.h"
-#include "hw/rtc/m48t59.h"
-#include "hw/block/flash.h"
-#include "system/qtest.h"
-#include "system/reset.h"
-#include "system/block-backend.h"
-#include "hw/boards.h"
-#include "qemu/error-report.h"
-#include "hw/loader.h"
-#include "qemu/cutils.h"
-#include "elf.h"
-
-#define BIOS_FILENAME "ppc405_rom.bin"
-#define BIOS_SIZE (2 * MiB)
-
-#define KERNEL_LOAD_ADDR 0x01000000
-#define INITRD_LOAD_ADDR 0x01800000
-
-#define PPC405EP_SDRAM_BASE 0x00000000
-#define PPC405EP_SRAM_BASE  0xFFF00000
-#define PPC405EP_SRAM_SIZE  (512 * KiB)
-
-#define USE_FLASH_BIOS
-
-#define TYPE_PPC405_MACHINE MACHINE_TYPE_NAME("ppc405")
-OBJECT_DECLARE_SIMPLE_TYPE(Ppc405MachineState, PPC405_MACHINE);
-
-struct Ppc405MachineState {
-    /* Private */
-    MachineState parent_obj;
-    /* Public */
-
-    Ppc405SoCState soc;
-};
-
-/* CPU reset handler when booting directly from a loaded kernel */
-static struct boot_info {
-    uint32_t entry;
-    uint32_t bdloc;
-    uint32_t initrd_base;
-    uint32_t initrd_size;
-    uint32_t cmdline_base;
-    uint32_t cmdline_size;
-} boot_info;
-
-static void main_cpu_reset(void *opaque)
-{
-    PowerPCCPU *cpu = opaque;
-    CPUPPCState *env = &cpu->env;
-    struct boot_info *bi = env->load_info;
-
-    cpu_reset(CPU(cpu));
-
-    /* stack: top of sram */
-    env->gpr[1] = PPC405EP_SRAM_BASE + PPC405EP_SRAM_SIZE - 8;
-
-    /* Tune our boot state */
-    env->gpr[3] = bi->bdloc;
-    env->gpr[4] = bi->initrd_base;
-    env->gpr[5] = bi->initrd_base + bi->initrd_size;
-    env->gpr[6] = bi->cmdline_base;
-    env->gpr[7] = bi->cmdline_size;
-
-    env->nip = bi->entry;
-}
-
-/* Bootinfo as set-up by u-boot */
-typedef struct {
-    uint32_t bi_memstart;
-    uint32_t bi_memsize;
-    uint32_t bi_flashstart;
-    uint32_t bi_flashsize;
-    uint32_t bi_flashoffset; /* 0x10 */
-    uint32_t bi_sramstart;
-    uint32_t bi_sramsize;
-    uint32_t bi_bootflags;
-    uint32_t bi_ipaddr; /* 0x20 */
-    uint8_t  bi_enetaddr[6];
-    uint16_t bi_ethspeed;
-    uint32_t bi_intfreq;
-    uint32_t bi_busfreq; /* 0x30 */
-    uint32_t bi_baudrate;
-    uint8_t  bi_s_version[4];
-    uint8_t  bi_r_version[32];
-    uint32_t bi_procfreq;
-    uint32_t bi_plb_busfreq;
-    uint32_t bi_pci_busfreq;
-    uint8_t  bi_pci_enetaddr[6];
-    uint8_t  bi_pci_enetaddr2[6]; /* PPC405EP specific */
-    uint32_t bi_opbfreq;
-    uint32_t bi_iic_fast[2];
-} ppc4xx_bd_info_t;
-
-static void ppc405_set_default_bootinfo(ppc4xx_bd_info_t *bd,
-                                        ram_addr_t ram_size)
-{
-        memset(bd, 0, sizeof(*bd));
-
-        bd->bi_memstart = PPC405EP_SDRAM_BASE;
-        bd->bi_memsize = ram_size;
-        bd->bi_sramstart = PPC405EP_SRAM_BASE;
-        bd->bi_sramsize = PPC405EP_SRAM_SIZE;
-        bd->bi_bootflags = 0;
-        bd->bi_intfreq = 133333333;
-        bd->bi_busfreq = 33333333;
-        bd->bi_baudrate = 115200;
-        bd->bi_s_version[0] = 'Q';
-        bd->bi_s_version[1] = 'M';
-        bd->bi_s_version[2] = 'U';
-        bd->bi_s_version[3] = '\0';
-        bd->bi_r_version[0] = 'Q';
-        bd->bi_r_version[1] = 'E';
-        bd->bi_r_version[2] = 'M';
-        bd->bi_r_version[3] = 'U';
-        bd->bi_r_version[4] = '\0';
-        bd->bi_procfreq = 133333333;
-        bd->bi_plb_busfreq = 33333333;
-        bd->bi_pci_busfreq = 33333333;
-        bd->bi_opbfreq = 33333333;
-}
-
-static ram_addr_t __ppc405_set_bootinfo(CPUPPCState *env, ppc4xx_bd_info_t *bd)
-{
-    CPUState *cs = env_cpu(env);
-    ram_addr_t bdloc;
-    int i, n;
-
-    /* We put the bd structure at the top of memory */
-    if (bd->bi_memsize >= 0x01000000UL) {
-        bdloc = 0x01000000UL - sizeof(ppc4xx_bd_info_t);
-    } else {
-        bdloc = bd->bi_memsize - sizeof(ppc4xx_bd_info_t);
-    }
-    stl_be_phys(cs->as, bdloc + 0x00, bd->bi_memstart);
-    stl_be_phys(cs->as, bdloc + 0x04, bd->bi_memsize);
-    stl_be_phys(cs->as, bdloc + 0x08, bd->bi_flashstart);
-    stl_be_phys(cs->as, bdloc + 0x0C, bd->bi_flashsize);
-    stl_be_phys(cs->as, bdloc + 0x10, bd->bi_flashoffset);
-    stl_be_phys(cs->as, bdloc + 0x14, bd->bi_sramstart);
-    stl_be_phys(cs->as, bdloc + 0x18, bd->bi_sramsize);
-    stl_be_phys(cs->as, bdloc + 0x1C, bd->bi_bootflags);
-    stl_be_phys(cs->as, bdloc + 0x20, bd->bi_ipaddr);
-    for (i = 0; i < 6; i++) {
-        stb_phys(cs->as, bdloc + 0x24 + i, bd->bi_enetaddr[i]);
-    }
-    stw_be_phys(cs->as, bdloc + 0x2A, bd->bi_ethspeed);
-    stl_be_phys(cs->as, bdloc + 0x2C, bd->bi_intfreq);
-    stl_be_phys(cs->as, bdloc + 0x30, bd->bi_busfreq);
-    stl_be_phys(cs->as, bdloc + 0x34, bd->bi_baudrate);
-    for (i = 0; i < 4; i++) {
-        stb_phys(cs->as, bdloc + 0x38 + i, bd->bi_s_version[i]);
-    }
-    for (i = 0; i < 32; i++) {
-        stb_phys(cs->as, bdloc + 0x3C + i, bd->bi_r_version[i]);
-    }
-    stl_be_phys(cs->as, bdloc + 0x5C, bd->bi_procfreq);
-    stl_be_phys(cs->as, bdloc + 0x60, bd->bi_plb_busfreq);
-    stl_be_phys(cs->as, bdloc + 0x64, bd->bi_pci_busfreq);
-    for (i = 0; i < 6; i++) {
-        stb_phys(cs->as, bdloc + 0x68 + i, bd->bi_pci_enetaddr[i]);
-    }
-    n = 0x70; /* includes 2 bytes hole */
-    for (i = 0; i < 6; i++) {
-        stb_phys(cs->as, bdloc + n++, bd->bi_pci_enetaddr2[i]);
-    }
-    stl_be_phys(cs->as, bdloc + n, bd->bi_opbfreq);
-    n += 4;
-    for (i = 0; i < 2; i++) {
-        stl_be_phys(cs->as, bdloc + n, bd->bi_iic_fast[i]);
-        n += 4;
-    }
-
-    return bdloc;
-}
-
-static ram_addr_t ppc405_set_bootinfo(CPUPPCState *env, ram_addr_t ram_size)
-{
-    ppc4xx_bd_info_t bd;
-
-    memset(&bd, 0, sizeof(bd));
-
-    ppc405_set_default_bootinfo(&bd, ram_size);
-
-    return __ppc405_set_bootinfo(env, &bd);
-}
-
-static void boot_from_kernel(MachineState *machine, PowerPCCPU *cpu)
-{
-    CPUPPCState *env = &cpu->env;
-    hwaddr boot_entry;
-    hwaddr kernel_base;
-    int kernel_size;
-    hwaddr initrd_base;
-    int initrd_size;
-    ram_addr_t bdloc;
-    int len;
-
-    bdloc = ppc405_set_bootinfo(env, machine->ram_size);
-    boot_info.bdloc = bdloc;
-
-    kernel_size = load_elf(machine->kernel_filename, NULL, NULL, NULL,
-                           &boot_entry, &kernel_base, NULL, NULL,
-                           ELFDATA2MSB, PPC_ELF_MACHINE, 0, 0);
-    if (kernel_size < 0) {
-        error_report("Could not load kernel '%s' : %s",
-                     machine->kernel_filename, load_elf_strerror(kernel_size));
-        exit(1);
-    }
-    boot_info.entry = boot_entry;
-
-    /* load initrd */
-    if (machine->initrd_filename) {
-        initrd_base = INITRD_LOAD_ADDR;
-        initrd_size = load_image_targphys(machine->initrd_filename, initrd_base,
-                                          machine->ram_size - initrd_base);
-        if (initrd_size < 0) {
-            error_report("could not load initial ram disk '%s'",
-                         machine->initrd_filename);
-            exit(1);
-        }
-
-        boot_info.initrd_base = initrd_base;
-        boot_info.initrd_size = initrd_size;
-    }
-
-    if (machine->kernel_cmdline) {
-        len = strlen(machine->kernel_cmdline);
-        bdloc -= ((len + 255) & ~255);
-        cpu_physical_memory_write(bdloc, machine->kernel_cmdline, len + 1);
-        boot_info.cmdline_base = bdloc;
-        boot_info.cmdline_size = bdloc + len;
-    }
-
-    /* Install our custom reset handler to start from Linux */
-    qemu_register_reset(main_cpu_reset, cpu);
-    env->load_info = &boot_info;
-}
-
-static void ppc405_init(MachineState *machine)
-{
-    Ppc405MachineState *ppc405 = PPC405_MACHINE(machine);
-    const char *kernel_filename = machine->kernel_filename;
-    MemoryRegion *sysmem = get_system_memory();
-
-    object_initialize_child(OBJECT(machine), "soc", &ppc405->soc,
-                            TYPE_PPC405_SOC);
-    object_property_set_link(OBJECT(&ppc405->soc), "dram",
-                             OBJECT(machine->ram), &error_abort);
-    object_property_set_uint(OBJECT(&ppc405->soc), "sys-clk", 33333333,
-                             &error_abort);
-    qdev_realize(DEVICE(&ppc405->soc), NULL, &error_fatal);
-
-    /* allocate and load BIOS */
-    if (machine->firmware) {
-        MemoryRegion *bios = g_new(MemoryRegion, 1);
-        g_autofree char *filename = qemu_find_file(QEMU_FILE_TYPE_BIOS,
-                                                   machine->firmware);
-        long bios_size;
-
-        memory_region_init_rom(bios, NULL, "ef405ep.bios", BIOS_SIZE,
-                               &error_fatal);
-
-        if (!filename) {
-            error_report("Could not find firmware '%s'", machine->firmware);
-            exit(1);
-        }
-
-        bios_size = load_image_size(filename,
-                                    memory_region_get_ram_ptr(bios),
-                                    BIOS_SIZE);
-        if (bios_size < 0) {
-            error_report("Could not load PowerPC BIOS '%s'", machine->firmware);
-            exit(1);
-        }
-
-        bios_size = (bios_size + 0xfff) & ~0xfff;
-        memory_region_add_subregion(sysmem, (uint32_t)(-bios_size), bios);
-    }
-
-    /* Load kernel and initrd using U-Boot images */
-    if (kernel_filename && machine->firmware) {
-        target_ulong kernel_base, initrd_base;
-        long kernel_size, initrd_size;
-
-        kernel_base = KERNEL_LOAD_ADDR;
-        kernel_size = load_image_targphys(kernel_filename, kernel_base,
-                                          machine->ram_size - kernel_base);
-        if (kernel_size < 0) {
-            error_report("could not load kernel '%s'", kernel_filename);
-            exit(1);
-        }
-
-        /* load initrd */
-        if (machine->initrd_filename) {
-            initrd_base = INITRD_LOAD_ADDR;
-            initrd_size = load_image_targphys(machine->initrd_filename,
-                                              initrd_base,
-                                              machine->ram_size - initrd_base);
-            if (initrd_size < 0) {
-                error_report("could not load initial ram disk '%s'",
-                             machine->initrd_filename);
-                exit(1);
-            }
-        }
-
-    /* Load ELF kernel and rootfs.cpio */
-    } else if (kernel_filename && !machine->firmware) {
-        ppc4xx_sdram_ddr_enable(&ppc405->soc.sdram);
-        boot_from_kernel(machine, &ppc405->soc.cpu);
-    }
-}
-
-static void ppc405_machine_class_init(ObjectClass *oc, void *data)
-{
-    MachineClass *mc = MACHINE_CLASS(oc);
-
-    mc->desc = "PPC405 generic machine";
-    mc->init = ppc405_init;
-    mc->default_ram_size = 128 * MiB;
-    mc->default_ram_id = "ppc405.ram";
-    mc->deprecation_reason = "machine is old and unmaintained";
-}
-
-static const TypeInfo ppc405_machine_type = {
-    .name = TYPE_PPC405_MACHINE,
-    .parent = TYPE_MACHINE,
-    .instance_size = sizeof(Ppc405MachineState),
-    .class_init = ppc405_machine_class_init,
-    .abstract = true,
-};
-
-/*****************************************************************************/
-/* PPC405EP reference board (IBM) */
-/*
- * Standalone board with:
- * - PowerPC 405EP CPU
- * - SDRAM (0x00000000)
- * - Flash (0xFFF80000)
- * - SRAM  (0xFFF00000)
- * - NVRAM (0xF0000000)
- * - FPGA  (0xF0300000)
- */
-
-#define PPC405EP_NVRAM_BASE 0xF0000000
-#define PPC405EP_FPGA_BASE  0xF0300000
-#define PPC405EP_FLASH_BASE 0xFFF80000
-
-#define TYPE_REF405EP_FPGA "ref405ep-fpga"
-OBJECT_DECLARE_SIMPLE_TYPE(Ref405epFpgaState, REF405EP_FPGA);
-struct Ref405epFpgaState {
-    SysBusDevice parent_obj;
-
-    MemoryRegion iomem;
-
-    uint8_t reg0;
-    uint8_t reg1;
-};
-
-static uint64_t ref405ep_fpga_readb(void *opaque, hwaddr addr, unsigned size)
-{
-    Ref405epFpgaState *fpga = opaque;
-    uint32_t ret;
-
-    switch (addr) {
-    case 0x0:
-        ret = fpga->reg0;
-        break;
-    case 0x1:
-        ret = fpga->reg1;
-        break;
-    default:
-        ret = 0;
-        break;
-    }
-
-    return ret;
-}
-
-static void ref405ep_fpga_writeb(void *opaque, hwaddr addr, uint64_t value,
-                                 unsigned size)
-{
-    Ref405epFpgaState *fpga = opaque;
-
-    switch (addr) {
-    case 0x0:
-        /* Read only */
-        break;
-    case 0x1:
-        fpga->reg1 = value;
-        break;
-    default:
-        break;
-    }
-}
-
-static const MemoryRegionOps ref405ep_fpga_ops = {
-    .read = ref405ep_fpga_readb,
-    .write = ref405ep_fpga_writeb,
-    .impl.min_access_size = 1,
-    .impl.max_access_size = 1,
-    .valid.min_access_size = 1,
-    .valid.max_access_size = 4,
-    .endianness = DEVICE_BIG_ENDIAN,
-};
-
-static void ref405ep_fpga_reset(DeviceState *dev)
-{
-    Ref405epFpgaState *fpga = REF405EP_FPGA(dev);
-
-    fpga->reg0 = 0x00;
-    fpga->reg1 = 0x0F;
-}
-
-static void ref405ep_fpga_realize(DeviceState *dev, Error **errp)
-{
-    Ref405epFpgaState *s = REF405EP_FPGA(dev);
-
-    memory_region_init_io(&s->iomem, OBJECT(s), &ref405ep_fpga_ops, s,
-                          "fpga", 0x00000100);
-    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
-}
-
-static void ref405ep_fpga_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-
-    dc->realize = ref405ep_fpga_realize;
-    device_class_set_legacy_reset(dc, ref405ep_fpga_reset);
-    /* Reason: only works as part of a ppc405 board */
-    dc->user_creatable = false;
-}
-
-static const TypeInfo ref405ep_fpga_type = {
-    .name = TYPE_REF405EP_FPGA,
-    .parent = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(Ref405epFpgaState),
-    .class_init = ref405ep_fpga_class_init,
-};
-
-static void ref405ep_init(MachineState *machine)
-{
-    DeviceState *dev;
-    SysBusDevice *s;
-    MemoryRegion *sram = g_new(MemoryRegion, 1);
-
-    ppc405_init(machine);
-
-    /* allocate SRAM */
-    memory_region_init_ram(sram, NULL, "ref405ep.sram", PPC405EP_SRAM_SIZE,
-                           &error_fatal);
-    memory_region_add_subregion(get_system_memory(), PPC405EP_SRAM_BASE, sram);
-
-    /* Register FPGA */
-    dev = qdev_new(TYPE_REF405EP_FPGA);
-    object_property_add_child(OBJECT(machine), "fpga", OBJECT(dev));
-    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, PPC405EP_FPGA_BASE);
-
-    /* Register NVRAM */
-    dev = qdev_new("sysbus-m48t08");
-    qdev_prop_set_int32(dev, "base-year", 1968);
-    s = SYS_BUS_DEVICE(dev);
-    sysbus_realize_and_unref(s, &error_fatal);
-    sysbus_mmio_map(s, 0, PPC405EP_NVRAM_BASE);
-}
-
-static void ref405ep_class_init(ObjectClass *oc, void *data)
-{
-    MachineClass *mc = MACHINE_CLASS(oc);
-
-    mc->desc = "ref405ep";
-    mc->init = ref405ep_init;
-}
-
-static const TypeInfo ref405ep_type = {
-    .name = MACHINE_TYPE_NAME("ref405ep"),
-    .parent = TYPE_PPC405_MACHINE,
-    .class_init = ref405ep_class_init,
-};
-
-static void ppc405_machine_init(void)
-{
-    type_register_static(&ppc405_machine_type);
-    type_register_static(&ref405ep_type);
-    type_register_static(&ref405ep_fpga_type);
-}
-
-type_init(ppc405_machine_init)
diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c
deleted file mode 100644
index 8250824a1a248..0000000000000
--- a/hw/ppc/ppc405_uc.c
+++ /dev/null
@@ -1,1216 +0,0 @@
-/*
- * QEMU PowerPC 405 embedded processors emulation
- *
- * Copyright (c) 2007 Jocelyn Mayer
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/units.h"
-#include "qapi/error.h"
-#include "qemu/log.h"
-#include "cpu.h"
-#include "hw/ppc/ppc.h"
-#include "hw/i2c/ppc4xx_i2c.h"
-#include "hw/irq.h"
-#include "hw/qdev-properties.h"
-#include "ppc405.h"
-#include "hw/char/serial-mm.h"
-#include "qemu/timer.h"
-#include "system/reset.h"
-#include "system/system.h"
-#include "exec/address-spaces.h"
-#include "hw/intc/ppc-uic.h"
-#include "trace.h"
-
-/*****************************************************************************/
-/* Shared peripherals */
-
-/*****************************************************************************/
-/* PLB to OPB bridge */
-enum {
-    POB0_BESR0 = 0x0A0,
-    POB0_BESR1 = 0x0A2,
-    POB0_BEAR  = 0x0A4,
-};
-
-static uint32_t dcr_read_pob(void *opaque, int dcrn)
-{
-    Ppc405PobState *pob = opaque;
-    uint32_t ret;
-
-    switch (dcrn) {
-    case POB0_BEAR:
-        ret = pob->bear;
-        break;
-    case POB0_BESR0:
-        ret = pob->besr0;
-        break;
-    case POB0_BESR1:
-        ret = pob->besr1;
-        break;
-    default:
-        /* Avoid gcc warning */
-        ret = 0;
-        break;
-    }
-
-    return ret;
-}
-
-static void dcr_write_pob(void *opaque, int dcrn, uint32_t val)
-{
-    Ppc405PobState *pob = opaque;
-
-    switch (dcrn) {
-    case POB0_BEAR:
-        /* Read only */
-        break;
-    case POB0_BESR0:
-        /* Write-clear */
-        pob->besr0 &= ~val;
-        break;
-    case POB0_BESR1:
-        /* Write-clear */
-        pob->besr1 &= ~val;
-        break;
-    }
-}
-
-static void ppc405_pob_reset(DeviceState *dev)
-{
-    Ppc405PobState *pob = PPC405_POB(dev);
-
-    /* No error */
-    pob->bear = 0x00000000;
-    pob->besr0 = 0x0000000;
-    pob->besr1 = 0x0000000;
-}
-
-static void ppc405_pob_realize(DeviceState *dev, Error **errp)
-{
-    Ppc405PobState *pob = PPC405_POB(dev);
-    Ppc4xxDcrDeviceState *dcr = PPC4xx_DCR_DEVICE(dev);
-
-    ppc4xx_dcr_register(dcr, POB0_BEAR, pob, &dcr_read_pob, &dcr_write_pob);
-    ppc4xx_dcr_register(dcr, POB0_BESR0, pob, &dcr_read_pob, &dcr_write_pob);
-    ppc4xx_dcr_register(dcr, POB0_BESR1, pob, &dcr_read_pob, &dcr_write_pob);
-}
-
-static void ppc405_pob_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-
-    dc->realize = ppc405_pob_realize;
-    device_class_set_legacy_reset(dc, ppc405_pob_reset);
-    /* Reason: only works as function of a ppc4xx SoC */
-    dc->user_creatable = false;
-}
-
-/*****************************************************************************/
-/* OPB arbitrer */
-static uint64_t opba_readb(void *opaque, hwaddr addr, unsigned size)
-{
-    Ppc405OpbaState *opba = opaque;
-    uint32_t ret;
-
-    switch (addr) {
-    case 0x00:
-        ret = opba->cr;
-        break;
-    case 0x01:
-        ret = opba->pr;
-        break;
-    default:
-        ret = 0x00;
-        break;
-    }
-
-    trace_opba_readb(addr, ret);
-    return ret;
-}
-
-static void opba_writeb(void *opaque, hwaddr addr, uint64_t value,
-                        unsigned size)
-{
-    Ppc405OpbaState *opba = opaque;
-
-    trace_opba_writeb(addr, value);
-
-    switch (addr) {
-    case 0x00:
-        opba->cr = value & 0xF8;
-        break;
-    case 0x01:
-        opba->pr = value & 0xFF;
-        break;
-    default:
-        break;
-    }
-}
-static const MemoryRegionOps opba_ops = {
-    .read = opba_readb,
-    .write = opba_writeb,
-    .impl.min_access_size = 1,
-    .impl.max_access_size = 1,
-    .valid.min_access_size = 1,
-    .valid.max_access_size = 4,
-    .endianness = DEVICE_BIG_ENDIAN,
-};
-
-static void ppc405_opba_reset(DeviceState *dev)
-{
-    Ppc405OpbaState *opba = PPC405_OPBA(dev);
-
-    opba->cr = 0x00; /* No dynamic priorities - park disabled */
-    opba->pr = 0x11;
-}
-
-static void ppc405_opba_realize(DeviceState *dev, Error **errp)
-{
-    Ppc405OpbaState *s = PPC405_OPBA(dev);
-
-    memory_region_init_io(&s->io, OBJECT(s), &opba_ops, s, "opba", 2);
-    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->io);
-}
-
-static void ppc405_opba_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-
-    dc->realize = ppc405_opba_realize;
-    device_class_set_legacy_reset(dc, ppc405_opba_reset);
-    /* Reason: only works as function of a ppc4xx SoC */
-    dc->user_creatable = false;
-}
-
-/*****************************************************************************/
-/* Code decompression controller */
-/* XXX: TODO */
-
-/*****************************************************************************/
-/* DMA controller */
-enum {
-    DMA0_CR0 = 0x100,
-    DMA0_CT0 = 0x101,
-    DMA0_DA0 = 0x102,
-    DMA0_SA0 = 0x103,
-    DMA0_SG0 = 0x104,
-    DMA0_CR1 = 0x108,
-    DMA0_CT1 = 0x109,
-    DMA0_DA1 = 0x10A,
-    DMA0_SA1 = 0x10B,
-    DMA0_SG1 = 0x10C,
-    DMA0_CR2 = 0x110,
-    DMA0_CT2 = 0x111,
-    DMA0_DA2 = 0x112,
-    DMA0_SA2 = 0x113,
-    DMA0_SG2 = 0x114,
-    DMA0_CR3 = 0x118,
-    DMA0_CT3 = 0x119,
-    DMA0_DA3 = 0x11A,
-    DMA0_SA3 = 0x11B,
-    DMA0_SG3 = 0x11C,
-    DMA0_SR  = 0x120,
-    DMA0_SGC = 0x123,
-    DMA0_SLP = 0x125,
-    DMA0_POL = 0x126,
-};
-
-static uint32_t dcr_read_dma(void *opaque, int dcrn)
-{
-    return 0;
-}
-
-static void dcr_write_dma(void *opaque, int dcrn, uint32_t val)
-{
-}
-
-static void ppc405_dma_reset(DeviceState *dev)
-{
-    Ppc405DmaState *dma = PPC405_DMA(dev);
-    int i;
-
-    for (i = 0; i < 4; i++) {
-        dma->cr[i] = 0x00000000;
-        dma->ct[i] = 0x00000000;
-        dma->da[i] = 0x00000000;
-        dma->sa[i] = 0x00000000;
-        dma->sg[i] = 0x00000000;
-    }
-    dma->sr = 0x00000000;
-    dma->sgc = 0x00000000;
-    dma->slp = 0x7C000000;
-    dma->pol = 0x00000000;
-}
-
-static void ppc405_dma_realize(DeviceState *dev, Error **errp)
-{
-    Ppc405DmaState *dma = PPC405_DMA(dev);
-    Ppc4xxDcrDeviceState *dcr = PPC4xx_DCR_DEVICE(dev);
-    int i;
-
-    for (i = 0; i < ARRAY_SIZE(dma->irqs); i++) {
-        sysbus_init_irq(SYS_BUS_DEVICE(dma), &dma->irqs[i]);
-    }
-
-    ppc4xx_dcr_register(dcr, DMA0_CR0, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_CT0, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_DA0, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_SA0, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_SG0, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_CR1, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_CT1, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_DA1, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_SA1, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_SG1, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_CR2, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_CT2, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_DA2, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_SA2, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_SG2, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_CR3, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_CT3, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_DA3, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_SA3, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_SG3, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_SR,  dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_SGC, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_SLP, dma, &dcr_read_dma, &dcr_write_dma);
-    ppc4xx_dcr_register(dcr, DMA0_POL, dma, &dcr_read_dma, &dcr_write_dma);
-}
-
-static void ppc405_dma_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-
-    dc->realize = ppc405_dma_realize;
-    device_class_set_legacy_reset(dc, ppc405_dma_reset);
-    /* Reason: only works as function of a ppc4xx SoC */
-    dc->user_creatable = false;
-}
-
-/*****************************************************************************/
-/* GPIO */
-static uint64_t ppc405_gpio_read(void *opaque, hwaddr addr, unsigned size)
-{
-    trace_ppc405_gpio_read(addr, size);
-    return 0;
-}
-
-static void ppc405_gpio_write(void *opaque, hwaddr addr, uint64_t value,
-                              unsigned size)
-{
-    trace_ppc405_gpio_write(addr, size, value);
-}
-
-static const MemoryRegionOps ppc405_gpio_ops = {
-    .read = ppc405_gpio_read,
-    .write = ppc405_gpio_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static void ppc405_gpio_realize(DeviceState *dev, Error **errp)
-{
-    Ppc405GpioState *s = PPC405_GPIO(dev);
-
-    memory_region_init_io(&s->io, OBJECT(s), &ppc405_gpio_ops, s, "gpio",
-                          0x38);
-    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->io);
-}
-
-static void ppc405_gpio_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-
-    dc->realize = ppc405_gpio_realize;
-    /* Reason: only works as function of a ppc4xx SoC */
-    dc->user_creatable = false;
-}
-
-/*****************************************************************************/
-/* On Chip Memory */
-enum {
-    OCM0_ISARC   = 0x018,
-    OCM0_ISACNTL = 0x019,
-    OCM0_DSARC   = 0x01A,
-    OCM0_DSACNTL = 0x01B,
-};
-
-static void ocm_update_mappings(Ppc405OcmState *ocm,
-                                uint32_t isarc, uint32_t isacntl,
-                                uint32_t dsarc, uint32_t dsacntl)
-{
-    trace_ocm_update_mappings(isarc, isacntl, dsarc, dsacntl, ocm->isarc,
-                              ocm->isacntl, ocm->dsarc, ocm->dsacntl);
-
-    if (ocm->isarc != isarc ||
-        (ocm->isacntl & 0x80000000) != (isacntl & 0x80000000)) {
-        if (ocm->isacntl & 0x80000000) {
-            /* Unmap previously assigned memory region */
-            trace_ocm_unmap("ISA", ocm->isarc);
-            memory_region_del_subregion(get_system_memory(), &ocm->isarc_ram);
-        }
-        if (isacntl & 0x80000000) {
-            /* Map new instruction memory region */
-            trace_ocm_map("ISA", isarc);
-            memory_region_add_subregion(get_system_memory(), isarc,
-                                        &ocm->isarc_ram);
-        }
-    }
-    if (ocm->dsarc != dsarc ||
-        (ocm->dsacntl & 0x80000000) != (dsacntl & 0x80000000)) {
-        if (ocm->dsacntl & 0x80000000) {
-            /* Beware not to unmap the region we just mapped */
-            if (!(isacntl & 0x80000000) || ocm->dsarc != isarc) {
-                /* Unmap previously assigned memory region */
-                trace_ocm_unmap("DSA", ocm->dsarc);
-                memory_region_del_subregion(get_system_memory(),
-                                            &ocm->dsarc_ram);
-            }
-        }
-        if (dsacntl & 0x80000000) {
-            /* Beware not to remap the region we just mapped */
-            if (!(isacntl & 0x80000000) || dsarc != isarc) {
-                /* Map new data memory region */
-                trace_ocm_map("DSA", dsarc);
-                memory_region_add_subregion(get_system_memory(), dsarc,
-                                            &ocm->dsarc_ram);
-            }
-        }
-    }
-}
-
-static uint32_t dcr_read_ocm(void *opaque, int dcrn)
-{
-    Ppc405OcmState *ocm = opaque;
-    uint32_t ret;
-
-    switch (dcrn) {
-    case OCM0_ISARC:
-        ret = ocm->isarc;
-        break;
-    case OCM0_ISACNTL:
-        ret = ocm->isacntl;
-        break;
-    case OCM0_DSARC:
-        ret = ocm->dsarc;
-        break;
-    case OCM0_DSACNTL:
-        ret = ocm->dsacntl;
-        break;
-    default:
-        ret = 0;
-        break;
-    }
-
-    return ret;
-}
-
-static void dcr_write_ocm(void *opaque, int dcrn, uint32_t val)
-{
-    Ppc405OcmState *ocm = opaque;
-    uint32_t isarc, dsarc, isacntl, dsacntl;
-
-    isarc = ocm->isarc;
-    dsarc = ocm->dsarc;
-    isacntl = ocm->isacntl;
-    dsacntl = ocm->dsacntl;
-    switch (dcrn) {
-    case OCM0_ISARC:
-        isarc = val & 0xFC000000;
-        break;
-    case OCM0_ISACNTL:
-        isacntl = val & 0xC0000000;
-        break;
-    case OCM0_DSARC:
-        isarc = val & 0xFC000000;
-        break;
-    case OCM0_DSACNTL:
-        isacntl = val & 0xC0000000;
-        break;
-    }
-    ocm_update_mappings(ocm, isarc, isacntl, dsarc, dsacntl);
-    ocm->isarc = isarc;
-    ocm->dsarc = dsarc;
-    ocm->isacntl = isacntl;
-    ocm->dsacntl = dsacntl;
-}
-
-static void ppc405_ocm_reset(DeviceState *dev)
-{
-    Ppc405OcmState *ocm = PPC405_OCM(dev);
-    uint32_t isarc, dsarc, isacntl, dsacntl;
-
-    isarc = 0x00000000;
-    isacntl = 0x00000000;
-    dsarc = 0x00000000;
-    dsacntl = 0x00000000;
-    ocm_update_mappings(ocm, isarc, isacntl, dsarc, dsacntl);
-    ocm->isarc = isarc;
-    ocm->dsarc = dsarc;
-    ocm->isacntl = isacntl;
-    ocm->dsacntl = dsacntl;
-}
-
-static void ppc405_ocm_realize(DeviceState *dev, Error **errp)
-{
-    Ppc405OcmState *ocm = PPC405_OCM(dev);
-    Ppc4xxDcrDeviceState *dcr = PPC4xx_DCR_DEVICE(dev);
-
-    /* XXX: Size is 4096 or 0x04000000 */
-    memory_region_init_ram(&ocm->isarc_ram, OBJECT(ocm), "ppc405.ocm", 4 * KiB,
-                           &error_fatal);
-    memory_region_init_alias(&ocm->dsarc_ram, OBJECT(ocm), "ppc405.dsarc",
-                             &ocm->isarc_ram, 0, 4 * KiB);
-
-    ppc4xx_dcr_register(dcr, OCM0_ISARC, ocm, &dcr_read_ocm, &dcr_write_ocm);
-    ppc4xx_dcr_register(dcr, OCM0_ISACNTL, ocm, &dcr_read_ocm, &dcr_write_ocm);
-    ppc4xx_dcr_register(dcr, OCM0_DSARC, ocm, &dcr_read_ocm, &dcr_write_ocm);
-    ppc4xx_dcr_register(dcr, OCM0_DSACNTL, ocm, &dcr_read_ocm, &dcr_write_ocm);
-}
-
-static void ppc405_ocm_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-
-    dc->realize = ppc405_ocm_realize;
-    device_class_set_legacy_reset(dc, ppc405_ocm_reset);
-    /* Reason: only works as function of a ppc4xx SoC */
-    dc->user_creatable = false;
-}
-
-/*****************************************************************************/
-/* General purpose timers */
-static int ppc4xx_gpt_compare(Ppc405GptState *gpt, int n)
-{
-    /* XXX: TODO */
-    return 0;
-}
-
-static void ppc4xx_gpt_set_output(Ppc405GptState *gpt, int n, int level)
-{
-    /* XXX: TODO */
-}
-
-static void ppc4xx_gpt_set_outputs(Ppc405GptState *gpt)
-{
-    uint32_t mask;
-    int i;
-
-    mask = 0x80000000;
-    for (i = 0; i < 5; i++) {
-        if (gpt->oe & mask) {
-            /* Output is enabled */
-            if (ppc4xx_gpt_compare(gpt, i)) {
-                /* Comparison is OK */
-                ppc4xx_gpt_set_output(gpt, i, gpt->ol & mask);
-            } else {
-                /* Comparison is KO */
-                ppc4xx_gpt_set_output(gpt, i, gpt->ol & mask ? 0 : 1);
-            }
-        }
-        mask = mask >> 1;
-    }
-}
-
-static void ppc4xx_gpt_set_irqs(Ppc405GptState *gpt)
-{
-    uint32_t mask;
-    int i;
-
-    mask = 0x00008000;
-    for (i = 0; i < 5; i++) {
-        if (gpt->is & gpt->im & mask) {
-            qemu_irq_raise(gpt->irqs[i]);
-        } else {
-            qemu_irq_lower(gpt->irqs[i]);
-        }
-        mask = mask >> 1;
-    }
-}
-
-static void ppc4xx_gpt_compute_timer(Ppc405GptState *gpt)
-{
-    /* XXX: TODO */
-}
-
-static uint64_t ppc4xx_gpt_read(void *opaque, hwaddr addr, unsigned size)
-{
-    Ppc405GptState *gpt = opaque;
-    uint32_t ret;
-    int idx;
-
-    trace_ppc4xx_gpt_read(addr, size);
-
-    switch (addr) {
-    case 0x00:
-        /* Time base counter */
-        ret = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + gpt->tb_offset,
-                       gpt->tb_freq, NANOSECONDS_PER_SECOND);
-        break;
-    case 0x10:
-        /* Output enable */
-        ret = gpt->oe;
-        break;
-    case 0x14:
-        /* Output level */
-        ret = gpt->ol;
-        break;
-    case 0x18:
-        /* Interrupt mask */
-        ret = gpt->im;
-        break;
-    case 0x1C:
-    case 0x20:
-        /* Interrupt status */
-        ret = gpt->is;
-        break;
-    case 0x24:
-        /* Interrupt enable */
-        ret = gpt->ie;
-        break;
-    case 0x80 ... 0x90:
-        /* Compare timer */
-        idx = (addr - 0x80) >> 2;
-        ret = gpt->comp[idx];
-        break;
-    case 0xC0 ... 0xD0:
-        /* Compare mask */
-        idx = (addr - 0xC0) >> 2;
-        ret = gpt->mask[idx];
-        break;
-    default:
-        ret = -1;
-        break;
-    }
-
-    return ret;
-}
-
-static void ppc4xx_gpt_write(void *opaque, hwaddr addr, uint64_t value,
-                             unsigned size)
-{
-    Ppc405GptState *gpt = opaque;
-    int idx;
-
-    trace_ppc4xx_gpt_write(addr, size, value);
-
-    switch (addr) {
-    case 0x00:
-        /* Time base counter */
-        gpt->tb_offset = muldiv64(value, NANOSECONDS_PER_SECOND, gpt->tb_freq)
-            - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-        ppc4xx_gpt_compute_timer(gpt);
-        break;
-    case 0x10:
-        /* Output enable */
-        gpt->oe = value & 0xF8000000;
-        ppc4xx_gpt_set_outputs(gpt);
-        break;
-    case 0x14:
-        /* Output level */
-        gpt->ol = value & 0xF8000000;
-        ppc4xx_gpt_set_outputs(gpt);
-        break;
-    case 0x18:
-        /* Interrupt mask */
-        gpt->im = value & 0x0000F800;
-        break;
-    case 0x1C:
-        /* Interrupt status set */
-        gpt->is |= value & 0x0000F800;
-        ppc4xx_gpt_set_irqs(gpt);
-        break;
-    case 0x20:
-        /* Interrupt status clear */
-        gpt->is &= ~(value & 0x0000F800);
-        ppc4xx_gpt_set_irqs(gpt);
-        break;
-    case 0x24:
-        /* Interrupt enable */
-        gpt->ie = value & 0x0000F800;
-        ppc4xx_gpt_set_irqs(gpt);
-        break;
-    case 0x80 ... 0x90:
-        /* Compare timer */
-        idx = (addr - 0x80) >> 2;
-        gpt->comp[idx] = value & 0xF8000000;
-        ppc4xx_gpt_compute_timer(gpt);
-        break;
-    case 0xC0 ... 0xD0:
-        /* Compare mask */
-        idx = (addr - 0xC0) >> 2;
-        gpt->mask[idx] = value & 0xF8000000;
-        ppc4xx_gpt_compute_timer(gpt);
-        break;
-    }
-}
-
-static const MemoryRegionOps gpt_ops = {
-    .read = ppc4xx_gpt_read,
-    .write = ppc4xx_gpt_write,
-    .valid.min_access_size = 4,
-    .valid.max_access_size = 4,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static void ppc4xx_gpt_cb(void *opaque)
-{
-    Ppc405GptState *gpt = opaque;
-
-    ppc4xx_gpt_set_irqs(gpt);
-    ppc4xx_gpt_set_outputs(gpt);
-    ppc4xx_gpt_compute_timer(gpt);
-}
-
-static void ppc405_gpt_reset(DeviceState *dev)
-{
-    Ppc405GptState *gpt = PPC405_GPT(dev);
-    int i;
-
-    timer_del(gpt->timer);
-    gpt->oe = 0x00000000;
-    gpt->ol = 0x00000000;
-    gpt->im = 0x00000000;
-    gpt->is = 0x00000000;
-    gpt->ie = 0x00000000;
-    for (i = 0; i < 5; i++) {
-        gpt->comp[i] = 0x00000000;
-        gpt->mask[i] = 0x00000000;
-    }
-}
-
-static void ppc405_gpt_realize(DeviceState *dev, Error **errp)
-{
-    Ppc405GptState *s = PPC405_GPT(dev);
-    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
-    int i;
-
-    s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &ppc4xx_gpt_cb, s);
-    memory_region_init_io(&s->iomem, OBJECT(s), &gpt_ops, s, "gpt", 0xd4);
-    sysbus_init_mmio(sbd, &s->iomem);
-
-    for (i = 0; i < ARRAY_SIZE(s->irqs); i++) {
-        sysbus_init_irq(sbd, &s->irqs[i]);
-    }
-}
-
-static void ppc405_gpt_finalize(Object *obj)
-{
-    /* timer will be NULL if the GPT wasn't realized */
-    if (PPC405_GPT(obj)->timer) {
-        timer_del(PPC405_GPT(obj)->timer);
-    }
-}
-
-static void ppc405_gpt_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-
-    dc->realize = ppc405_gpt_realize;
-    device_class_set_legacy_reset(dc, ppc405_gpt_reset);
-    /* Reason: only works as function of a ppc4xx SoC */
-    dc->user_creatable = false;
-}
-
-/*****************************************************************************/
-/* PowerPC 405EP */
-/* CPU control */
-enum {
-    PPC405EP_CPC0_PLLMR0 = 0x0F0,
-    PPC405EP_CPC0_BOOT   = 0x0F1,
-    PPC405EP_CPC0_EPCTL  = 0x0F3,
-    PPC405EP_CPC0_PLLMR1 = 0x0F4,
-    PPC405EP_CPC0_UCR    = 0x0F5,
-    PPC405EP_CPC0_SRR    = 0x0F6,
-    PPC405EP_CPC0_JTAGID = 0x0F7,
-    PPC405EP_CPC0_PCI    = 0x0F9,
-#if 0
-    PPC405EP_CPC0_ER     = xxx,
-    PPC405EP_CPC0_FR     = xxx,
-    PPC405EP_CPC0_SR     = xxx,
-#endif
-};
-
-static void ppc405ep_compute_clocks(Ppc405CpcState *cpc)
-{
-    uint32_t CPU_clk, PLB_clk, OPB_clk, EBC_clk, MAL_clk, PCI_clk;
-    uint32_t UART0_clk, UART1_clk;
-    uint64_t VCO_out, PLL_out;
-    int M, D;
-
-    VCO_out = 0;
-    if ((cpc->pllmr[1] & 0x80000000) && !(cpc->pllmr[1] & 0x40000000)) {
-        M = (((cpc->pllmr[1] >> 20) - 1) & 0xF) + 1; /* FBMUL */
-        trace_ppc405ep_clocks_compute("FBMUL", (cpc->pllmr[1] >> 20) & 0xF, M);
-        D = 8 - ((cpc->pllmr[1] >> 16) & 0x7); /* FWDA */
-        trace_ppc405ep_clocks_compute("FWDA", (cpc->pllmr[1] >> 16) & 0x7, D);
-        VCO_out = (uint64_t)cpc->sysclk * M * D;
-        if (VCO_out < 500000000UL || VCO_out > 1000000000UL) {
-            /* Error - unlock the PLL */
-            qemu_log_mask(LOG_GUEST_ERROR, "VCO out of range %" PRIu64 "\n",
-                          VCO_out);
-#if 0
-            cpc->pllmr[1] &= ~0x80000000;
-            goto pll_bypass;
-#endif
-        }
-        PLL_out = VCO_out / D;
-        /* Pretend the PLL is locked */
-        cpc->boot |= 0x00000001;
-    } else {
-#if 0
-    pll_bypass:
-#endif
-        PLL_out = cpc->sysclk;
-        if (cpc->pllmr[1] & 0x40000000) {
-            /* Pretend the PLL is not locked */
-            cpc->boot &= ~0x00000001;
-        }
-    }
-    /* Now, compute all other clocks */
-    D = ((cpc->pllmr[0] >> 20) & 0x3) + 1; /* CCDV */
-     trace_ppc405ep_clocks_compute("CCDV", (cpc->pllmr[0] >> 20) & 0x3, D);
-    CPU_clk = PLL_out / D;
-    D = ((cpc->pllmr[0] >> 16) & 0x3) + 1; /* CBDV */
-    trace_ppc405ep_clocks_compute("CBDV", (cpc->pllmr[0] >> 16) & 0x3, D);
-    PLB_clk = CPU_clk / D;
-    D = ((cpc->pllmr[0] >> 12) & 0x3) + 1; /* OPDV */
-    trace_ppc405ep_clocks_compute("OPDV", (cpc->pllmr[0] >> 12) & 0x3, D);
-    OPB_clk = PLB_clk / D;
-    D = ((cpc->pllmr[0] >> 8) & 0x3) + 2; /* EPDV */
-    trace_ppc405ep_clocks_compute("EPDV", (cpc->pllmr[0] >> 8) & 0x3, D);
-    EBC_clk = PLB_clk / D;
-    D = ((cpc->pllmr[0] >> 4) & 0x3) + 1; /* MPDV */
-    trace_ppc405ep_clocks_compute("MPDV", (cpc->pllmr[0] >> 4) & 0x3, D);
-    MAL_clk = PLB_clk / D;
-    D = (cpc->pllmr[0] & 0x3) + 1; /* PPDV */
-    trace_ppc405ep_clocks_compute("PPDV", cpc->pllmr[0] & 0x3, D);
-    PCI_clk = PLB_clk / D;
-    D = ((cpc->ucr - 1) & 0x7F) + 1; /* U0DIV */
-    trace_ppc405ep_clocks_compute("U0DIV", cpc->ucr & 0x7F, D);
-    UART0_clk = PLL_out / D;
-    D = (((cpc->ucr >> 8) - 1) & 0x7F) + 1; /* U1DIV */
-    trace_ppc405ep_clocks_compute("U1DIV", (cpc->ucr >> 8) & 0x7F, D);
-    UART1_clk = PLL_out / D;
-
-    if (trace_event_get_state_backends(TRACE_PPC405EP_CLOCKS_SETUP)) {
-        g_autofree char *trace = g_strdup_printf(
-            "Setup PPC405EP clocks - sysclk %" PRIu32 " VCO %" PRIu64
-            " PLL out %" PRIu64 " Hz\n"
-            "CPU %" PRIu32 " PLB %" PRIu32 " OPB %" PRIu32 " EBC %" PRIu32
-            " MAL %" PRIu32 " PCI %" PRIu32 " UART0 %" PRIu32
-            " UART1 %" PRIu32 "\n",
-            cpc->sysclk, VCO_out, PLL_out,
-            CPU_clk, PLB_clk, OPB_clk, EBC_clk, MAL_clk, PCI_clk,
-            UART0_clk, UART1_clk);
-        trace_ppc405ep_clocks_setup(trace);
-    }
-
-    /* Setup CPU clocks */
-    clk_setup(&cpc->clk_setup[PPC405EP_CPU_CLK], CPU_clk);
-    /* Setup PLB clock */
-    clk_setup(&cpc->clk_setup[PPC405EP_PLB_CLK], PLB_clk);
-    /* Setup OPB clock */
-    clk_setup(&cpc->clk_setup[PPC405EP_OPB_CLK], OPB_clk);
-    /* Setup external clock */
-    clk_setup(&cpc->clk_setup[PPC405EP_EBC_CLK], EBC_clk);
-    /* Setup MAL clock */
-    clk_setup(&cpc->clk_setup[PPC405EP_MAL_CLK], MAL_clk);
-    /* Setup PCI clock */
-    clk_setup(&cpc->clk_setup[PPC405EP_PCI_CLK], PCI_clk);
-    /* Setup UART0 clock */
-    clk_setup(&cpc->clk_setup[PPC405EP_UART0_CLK], UART0_clk);
-    /* Setup UART1 clock */
-    clk_setup(&cpc->clk_setup[PPC405EP_UART1_CLK], UART1_clk);
-}
-
-static uint32_t dcr_read_epcpc(void *opaque, int dcrn)
-{
-    Ppc405CpcState *cpc = opaque;
-    uint32_t ret;
-
-    switch (dcrn) {
-    case PPC405EP_CPC0_BOOT:
-        ret = cpc->boot;
-        break;
-    case PPC405EP_CPC0_EPCTL:
-        ret = cpc->epctl;
-        break;
-    case PPC405EP_CPC0_PLLMR0:
-        ret = cpc->pllmr[0];
-        break;
-    case PPC405EP_CPC0_PLLMR1:
-        ret = cpc->pllmr[1];
-        break;
-    case PPC405EP_CPC0_UCR:
-        ret = cpc->ucr;
-        break;
-    case PPC405EP_CPC0_SRR:
-        ret = cpc->srr;
-        break;
-    case PPC405EP_CPC0_JTAGID:
-        ret = cpc->jtagid;
-        break;
-    case PPC405EP_CPC0_PCI:
-        ret = cpc->pci;
-        break;
-    default:
-        /* Avoid gcc warning */
-        ret = 0;
-        break;
-    }
-
-    return ret;
-}
-
-static void dcr_write_epcpc(void *opaque, int dcrn, uint32_t val)
-{
-    Ppc405CpcState *cpc = opaque;
-
-    switch (dcrn) {
-    case PPC405EP_CPC0_BOOT:
-        /* Read-only register */
-        break;
-    case PPC405EP_CPC0_EPCTL:
-        /* Don't care for now */
-        cpc->epctl = val & 0xC00000F3;
-        break;
-    case PPC405EP_CPC0_PLLMR0:
-        cpc->pllmr[0] = val & 0x00633333;
-        ppc405ep_compute_clocks(cpc);
-        break;
-    case PPC405EP_CPC0_PLLMR1:
-        cpc->pllmr[1] = val & 0xC0F73FFF;
-        ppc405ep_compute_clocks(cpc);
-        break;
-    case PPC405EP_CPC0_UCR:
-        /* UART control - don't care for now */
-        cpc->ucr = val & 0x003F7F7F;
-        break;
-    case PPC405EP_CPC0_SRR:
-        cpc->srr = val;
-        break;
-    case PPC405EP_CPC0_JTAGID:
-        /* Read-only */
-        break;
-    case PPC405EP_CPC0_PCI:
-        cpc->pci = val;
-        break;
-    }
-}
-
-static void ppc405_cpc_reset(DeviceState *dev)
-{
-    Ppc405CpcState *cpc = PPC405_CPC(dev);
-
-    cpc->boot = 0x00000010;     /* Boot from PCI - IIC EEPROM disabled */
-    cpc->epctl = 0x00000000;
-    cpc->pllmr[0] = 0x00021002;
-    cpc->pllmr[1] = 0x80a552be;
-    cpc->ucr = 0x00004646;
-    cpc->srr = 0x00040000;
-    cpc->pci = 0x00000000;
-    cpc->er = 0x00000000;
-    cpc->fr = 0x00000000;
-    cpc->sr = 0x00000000;
-    cpc->jtagid = 0x20267049;
-    ppc405ep_compute_clocks(cpc);
-}
-
-/* XXX: sysclk should be between 25 and 100 MHz */
-static void ppc405_cpc_realize(DeviceState *dev, Error **errp)
-{
-    Ppc405CpcState *cpc = PPC405_CPC(dev);
-    Ppc4xxDcrDeviceState *dcr = PPC4xx_DCR_DEVICE(dev);
-
-    assert(dcr->cpu);
-    cpc->clk_setup[PPC405EP_CPU_CLK].cb =
-        ppc_40x_timers_init(&dcr->cpu->env, cpc->sysclk, PPC_INTERRUPT_PIT);
-    cpc->clk_setup[PPC405EP_CPU_CLK].opaque = &dcr->cpu->env;
-
-    ppc4xx_dcr_register(dcr, PPC405EP_CPC0_BOOT, cpc,
-                        &dcr_read_epcpc, &dcr_write_epcpc);
-    ppc4xx_dcr_register(dcr, PPC405EP_CPC0_EPCTL, cpc,
-                        &dcr_read_epcpc, &dcr_write_epcpc);
-    ppc4xx_dcr_register(dcr, PPC405EP_CPC0_PLLMR0, cpc,
-                        &dcr_read_epcpc, &dcr_write_epcpc);
-    ppc4xx_dcr_register(dcr, PPC405EP_CPC0_PLLMR1, cpc,
-                        &dcr_read_epcpc, &dcr_write_epcpc);
-    ppc4xx_dcr_register(dcr, PPC405EP_CPC0_UCR, cpc,
-                        &dcr_read_epcpc, &dcr_write_epcpc);
-    ppc4xx_dcr_register(dcr, PPC405EP_CPC0_SRR, cpc,
-                        &dcr_read_epcpc, &dcr_write_epcpc);
-    ppc4xx_dcr_register(dcr, PPC405EP_CPC0_JTAGID, cpc,
-                        &dcr_read_epcpc, &dcr_write_epcpc);
-    ppc4xx_dcr_register(dcr, PPC405EP_CPC0_PCI, cpc,
-                        &dcr_read_epcpc, &dcr_write_epcpc);
-}
-
-static const Property ppc405_cpc_properties[] = {
-    DEFINE_PROP_UINT32("sys-clk", Ppc405CpcState, sysclk, 0),
-};
-
-static void ppc405_cpc_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-
-    dc->realize = ppc405_cpc_realize;
-    device_class_set_legacy_reset(dc, ppc405_cpc_reset);
-    /* Reason: only works as function of a ppc4xx SoC */
-    dc->user_creatable = false;
-    device_class_set_props(dc, ppc405_cpc_properties);
-}
-
-/* PPC405_SOC */
-
-static void ppc405_soc_instance_init(Object *obj)
-{
-    Ppc405SoCState *s = PPC405_SOC(obj);
-
-    object_initialize_child(obj, "cpu", &s->cpu,
-                            POWERPC_CPU_TYPE_NAME("405ep"));
-
-    object_initialize_child(obj, "uic", &s->uic, TYPE_PPC_UIC);
-
-    object_initialize_child(obj, "cpc", &s->cpc, TYPE_PPC405_CPC);
-    object_property_add_alias(obj, "sys-clk", OBJECT(&s->cpc), "sys-clk");
-
-    object_initialize_child(obj, "gpt", &s->gpt, TYPE_PPC405_GPT);
-
-    object_initialize_child(obj, "ocm", &s->ocm, TYPE_PPC405_OCM);
-
-    object_initialize_child(obj, "gpio", &s->gpio, TYPE_PPC405_GPIO);
-
-    object_initialize_child(obj, "dma", &s->dma, TYPE_PPC405_DMA);
-
-    object_initialize_child(obj, "i2c", &s->i2c, TYPE_PPC4xx_I2C);
-
-    object_initialize_child(obj, "ebc", &s->ebc, TYPE_PPC4xx_EBC);
-
-    object_initialize_child(obj, "opba", &s->opba, TYPE_PPC405_OPBA);
-
-    object_initialize_child(obj, "pob", &s->pob, TYPE_PPC405_POB);
-
-    object_initialize_child(obj, "plb", &s->plb, TYPE_PPC4xx_PLB);
-
-    object_initialize_child(obj, "mal", &s->mal, TYPE_PPC4xx_MAL);
-
-    object_initialize_child(obj, "sdram", &s->sdram, TYPE_PPC4xx_SDRAM_DDR);
-    object_property_add_alias(obj, "dram", OBJECT(&s->sdram), "dram");
-}
-
-static void ppc405_reset(void *opaque)
-{
-    cpu_reset(CPU(opaque));
-}
-
-static void ppc405_soc_realize(DeviceState *dev, Error **errp)
-{
-    Ppc405SoCState *s = PPC405_SOC(dev);
-    CPUPPCState *env;
-    SysBusDevice *sbd;
-    int i;
-
-    /* init CPUs */
-    if (!qdev_realize(DEVICE(&s->cpu), NULL, errp)) {
-        return;
-    }
-    qemu_register_reset(ppc405_reset, &s->cpu);
-
-    env = &s->cpu.env;
-
-    ppc_dcr_init(env, NULL, NULL);
-
-    /* CPU control */
-    if (!ppc4xx_dcr_realize(PPC4xx_DCR_DEVICE(&s->cpc), &s->cpu, errp)) {
-        return;
-    }
-
-    /* PLB arbitrer */
-    if (!ppc4xx_dcr_realize(PPC4xx_DCR_DEVICE(&s->plb), &s->cpu, errp)) {
-        return;
-    }
-
-    /* PLB to OPB bridge */
-    if (!ppc4xx_dcr_realize(PPC4xx_DCR_DEVICE(&s->pob), &s->cpu, errp)) {
-        return;
-    }
-
-    /* OBP arbitrer */
-    sbd = SYS_BUS_DEVICE(&s->opba);
-    if (!sysbus_realize(sbd, errp)) {
-        return;
-    }
-    sysbus_mmio_map(sbd, 0, 0xef600600);
-
-    /* Universal interrupt controller */
-    if (!ppc4xx_dcr_realize(PPC4xx_DCR_DEVICE(&s->uic), &s->cpu, errp)) {
-        return;
-    }
-    sbd = SYS_BUS_DEVICE(&s->uic);
-    sysbus_connect_irq(sbd, PPCUIC_OUTPUT_INT,
-                       qdev_get_gpio_in(DEVICE(&s->cpu), PPC40x_INPUT_INT));
-    sysbus_connect_irq(sbd, PPCUIC_OUTPUT_CINT,
-                       qdev_get_gpio_in(DEVICE(&s->cpu), PPC40x_INPUT_CINT));
-
-    /* SDRAM controller */
-    /*
-     * We use the 440 DDR SDRAM controller which has more regs and features
-     * but it's compatible enough for now
-     */
-    object_property_set_int(OBJECT(&s->sdram), "nbanks", 2, &error_abort);
-    if (!ppc4xx_dcr_realize(PPC4xx_DCR_DEVICE(&s->sdram), &s->cpu, errp)) {
-        return;
-    }
-    /* XXX 405EP has no ECC interrupt */
-    sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdram), 0,
-                       qdev_get_gpio_in(DEVICE(&s->uic), 17));
-
-    /* External bus controller */
-    if (!ppc4xx_dcr_realize(PPC4xx_DCR_DEVICE(&s->ebc), &s->cpu, errp)) {
-        return;
-    }
-
-    /* DMA controller */
-    if (!ppc4xx_dcr_realize(PPC4xx_DCR_DEVICE(&s->dma), &s->cpu, errp)) {
-        return;
-    }
-    sbd = SYS_BUS_DEVICE(&s->dma);
-    for (i = 0; i < ARRAY_SIZE(s->dma.irqs); i++) {
-        sysbus_connect_irq(sbd, i, qdev_get_gpio_in(DEVICE(&s->uic), 5 + i));
-    }
-
-    /* I2C controller */
-    sbd = SYS_BUS_DEVICE(&s->i2c);
-    if (!sysbus_realize(sbd, errp)) {
-        return;
-    }
-    sysbus_mmio_map(sbd, 0, 0xef600500);
-    sysbus_connect_irq(sbd, 0, qdev_get_gpio_in(DEVICE(&s->uic), 2));
-
-    /* GPIO */
-    sbd = SYS_BUS_DEVICE(&s->gpio);
-    if (!sysbus_realize(sbd, errp)) {
-        return;
-    }
-    sysbus_mmio_map(sbd, 0, 0xef600700);
-
-    /* Serial ports */
-    if (serial_hd(0) != NULL) {
-        serial_mm_init(get_system_memory(), 0xef600300, 0,
-                       qdev_get_gpio_in(DEVICE(&s->uic), 0),
-                       PPC_SERIAL_MM_BAUDBASE, serial_hd(0),
-                       DEVICE_BIG_ENDIAN);
-    }
-    if (serial_hd(1) != NULL) {
-        serial_mm_init(get_system_memory(), 0xef600400, 0,
-                       qdev_get_gpio_in(DEVICE(&s->uic), 1),
-                       PPC_SERIAL_MM_BAUDBASE, serial_hd(1),
-                       DEVICE_BIG_ENDIAN);
-    }
-
-    /* OCM */
-    if (!ppc4xx_dcr_realize(PPC4xx_DCR_DEVICE(&s->ocm), &s->cpu, errp)) {
-        return;
-    }
-
-    /* GPT */
-    sbd = SYS_BUS_DEVICE(&s->gpt);
-    if (!sysbus_realize(sbd, errp)) {
-        return;
-    }
-    sysbus_mmio_map(sbd, 0, 0xef600000);
-    for (i = 0; i < ARRAY_SIZE(s->gpt.irqs); i++) {
-        sysbus_connect_irq(sbd, i, qdev_get_gpio_in(DEVICE(&s->uic), 19 + i));
-    }
-
-    /* MAL */
-    object_property_set_int(OBJECT(&s->mal), "txc-num", 4, &error_abort);
-    object_property_set_int(OBJECT(&s->mal), "rxc-num", 2, &error_abort);
-    if (!ppc4xx_dcr_realize(PPC4xx_DCR_DEVICE(&s->mal), &s->cpu, errp)) {
-        return;
-    }
-    sbd = SYS_BUS_DEVICE(&s->mal);
-    for (i = 0; i < ARRAY_SIZE(s->mal.irqs); i++) {
-        sysbus_connect_irq(sbd, i, qdev_get_gpio_in(DEVICE(&s->uic), 11 + i));
-    }
-
-    /* Ethernet */
-    /* Uses UIC IRQs 9, 15, 17 */
-}
-
-static void ppc405_soc_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-
-    dc->realize = ppc405_soc_realize;
-    /* Reason: only works as part of a ppc405 board/machine */
-    dc->user_creatable = false;
-}
-
-static const TypeInfo ppc405_types[] = {
-    {
-        .name           = TYPE_PPC405_POB,
-        .parent         = TYPE_PPC4xx_DCR_DEVICE,
-        .instance_size  = sizeof(Ppc405PobState),
-        .class_init     = ppc405_pob_class_init,
-    }, {
-        .name           = TYPE_PPC405_OPBA,
-        .parent         = TYPE_SYS_BUS_DEVICE,
-        .instance_size  = sizeof(Ppc405OpbaState),
-        .class_init     = ppc405_opba_class_init,
-    }, {
-        .name           = TYPE_PPC405_DMA,
-        .parent         = TYPE_PPC4xx_DCR_DEVICE,
-        .instance_size  = sizeof(Ppc405DmaState),
-        .class_init     = ppc405_dma_class_init,
-    }, {
-        .name           = TYPE_PPC405_GPIO,
-        .parent         = TYPE_SYS_BUS_DEVICE,
-        .instance_size  = sizeof(Ppc405GpioState),
-        .class_init     = ppc405_gpio_class_init,
-    }, {
-        .name           = TYPE_PPC405_OCM,
-        .parent         = TYPE_PPC4xx_DCR_DEVICE,
-        .instance_size  = sizeof(Ppc405OcmState),
-        .class_init     = ppc405_ocm_class_init,
-    }, {
-        .name           = TYPE_PPC405_GPT,
-        .parent         = TYPE_SYS_BUS_DEVICE,
-        .instance_size  = sizeof(Ppc405GptState),
-        .instance_finalize = ppc405_gpt_finalize,
-        .class_init     = ppc405_gpt_class_init,
-    }, {
-        .name           = TYPE_PPC405_CPC,
-        .parent         = TYPE_PPC4xx_DCR_DEVICE,
-        .instance_size  = sizeof(Ppc405CpcState),
-        .class_init     = ppc405_cpc_class_init,
-    }, {
-        .name           = TYPE_PPC405_SOC,
-        .parent         = TYPE_DEVICE,
-        .instance_size  = sizeof(Ppc405SoCState),
-        .instance_init  = ppc405_soc_instance_init,
-        .class_init     = ppc405_soc_class_init,
-    }
-};
-
-DEFINE_TYPES(ppc405_types)
diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c
index 3ecae6a95048f..7dc3b309c89f5 100644
--- a/hw/ppc/sam460ex.c
+++ b/hw/ppc/sam460ex.c
@@ -234,7 +234,7 @@ static void main_cpu_reset(void *opaque)
 
         /* Create a mapping for the kernel.  */
         booke_set_tlb(&env->tlb.tlbe[0], 0, 0, 1 << 31);
-        env->gpr[6] = tswap32(EPAPR_MAGIC);
+        env->gpr[6] = EPAPR_MAGIC;
         env->gpr[7] = (16 * MiB) - 8; /* bi->ima_size; */
 
     } else {
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index f3a4b4235d438..a415e51d077af 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4,6 +4,9 @@
  * Copyright (c) 2004-2007 Fabrice Bellard
  * Copyright (c) 2007 Jocelyn Mayer
  * Copyright (c) 2010 David Gibson, IBM Corporation.
+ * Copyright (c) 2010-2024, IBM Corporation..
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -243,7 +246,7 @@ static void spapr_dt_pa_features(SpaprMachineState *spapr,
         0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 48 - 53 */
         /* 54: DecFP, 56: DecI, 58: SHA */
         0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 54 - 59 */
-        /* 60: NM atomic, 62: RNG */
+        /* 60: NM atomic, 62: RNG, 64: DAWR1 (ISA 3.1) */
         0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 60 - 65 */
         /* 68: DEXCR[SBHE|IBRTPDUS|SRAPD|NPHIE|PHIE] */
         0x00, 0x00, 0xce, 0x00, 0x00, 0x00, /* 66 - 71 */
@@ -292,6 +295,9 @@ static void spapr_dt_pa_features(SpaprMachineState *spapr,
          * in pa-features. So hide it from them. */
         pa_features[40 + 2] &= ~0x80; /* Radix MMU */
     }
+    if (spapr_get_cap(spapr, SPAPR_CAP_DAWR1)) {
+        pa_features[66] |= 0x80;
+    }
 
     _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
 }
@@ -1399,11 +1405,34 @@ static bool spapr_get_pate(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu,
     }
 }
 
-#define HPTE(_table, _i)   (void *)(((uint64_t *)(_table)) + ((_i) * 2))
-#define HPTE_VALID(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_VALID)
-#define HPTE_DIRTY(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_HPTE_DIRTY)
-#define CLEAN_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
-#define DIRTY_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY))
+static uint64_t *hpte_get_ptr(SpaprMachineState *s, unsigned index)
+{
+    uint64_t *table = s->htab;
+
+    return &table[2 * index];
+}
+
+static bool hpte_is_valid(SpaprMachineState *s, unsigned index)
+{
+    return ldq_be_p(hpte_get_ptr(s, index)) & HPTE64_V_VALID;
+}
+
+static bool hpte_is_dirty(SpaprMachineState *s, unsigned index)
+{
+    return ldq_be_p(hpte_get_ptr(s, index)) & HPTE64_V_HPTE_DIRTY;
+}
+
+static void hpte_set_clean(SpaprMachineState *s, unsigned index)
+{
+    stq_be_p(hpte_get_ptr(s, index),
+             ldq_be_p(hpte_get_ptr(s, index)) & ~HPTE64_V_HPTE_DIRTY);
+}
+
+static void hpte_set_dirty(SpaprMachineState *s, unsigned index)
+{
+    stq_be_p(hpte_get_ptr(s, index),
+             ldq_be_p(hpte_get_ptr(s, index)) | HPTE64_V_HPTE_DIRTY);
+}
 
 /*
  * Get the fd to access the kernel htab, re-opening it if necessary
@@ -1614,7 +1643,7 @@ int spapr_reallocate_hpt(SpaprMachineState *spapr, int shift, Error **errp)
         spapr->htab_shift = shift;
 
         for (i = 0; i < size / HASH_PTE_SIZE_64; i++) {
-            DIRTY_HPTE(HPTE(spapr->htab, i));
+            hpte_set_dirty(spapr, i);
         }
     }
     /* We're setting up a hash table, so that means we're not radix */
@@ -1760,7 +1789,6 @@ static void spapr_machine_reset(MachineState *machine, ResetType type)
                                   0, fdt_addr, 0);
         cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
     }
-    qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
 
     g_free(spapr->fdt_blob);
     spapr->fdt_size = fdt_totalsize(fdt);
@@ -2138,6 +2166,7 @@ static const VMStateDescription vmstate_spapr = {
         &vmstate_spapr_cap_rpt_invalidate,
         &vmstate_spapr_cap_ail_mode_3,
         &vmstate_spapr_cap_nested_papr,
+        &vmstate_spapr_cap_dawr1,
         NULL
     }
 };
@@ -2172,7 +2201,7 @@ static void htab_save_chunk(QEMUFile *f, SpaprMachineState *spapr,
     qemu_put_be32(f, chunkstart);
     qemu_put_be16(f, n_valid);
     qemu_put_be16(f, n_invalid);
-    qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
+    qemu_put_buffer(f, (void *)hpte_get_ptr(spapr, chunkstart),
                     HASH_PTE_SIZE_64 * n_valid);
 }
 
@@ -2198,16 +2227,16 @@ static void htab_save_first_pass(QEMUFile *f, SpaprMachineState *spapr,
 
         /* Consume invalid HPTEs */
         while ((index < htabslots)
-               && !HPTE_VALID(HPTE(spapr->htab, index))) {
-            CLEAN_HPTE(HPTE(spapr->htab, index));
+               && !hpte_is_valid(spapr, index)) {
+            hpte_set_clean(spapr, index);
             index++;
         }
 
         /* Consume valid HPTEs */
         chunkstart = index;
         while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
-               && HPTE_VALID(HPTE(spapr->htab, index))) {
-            CLEAN_HPTE(HPTE(spapr->htab, index));
+               && hpte_is_valid(spapr, index)) {
+            hpte_set_clean(spapr, index);
             index++;
         }
 
@@ -2247,7 +2276,7 @@ static int htab_save_later_pass(QEMUFile *f, SpaprMachineState *spapr,
 
         /* Consume non-dirty HPTEs */
         while ((index < htabslots)
-               && !HPTE_DIRTY(HPTE(spapr->htab, index))) {
+               && !hpte_is_dirty(spapr, index)) {
             index++;
             examined++;
         }
@@ -2255,9 +2284,9 @@ static int htab_save_later_pass(QEMUFile *f, SpaprMachineState *spapr,
         chunkstart = index;
         /* Consume valid dirty HPTEs */
         while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
-               && HPTE_DIRTY(HPTE(spapr->htab, index))
-               && HPTE_VALID(HPTE(spapr->htab, index))) {
-            CLEAN_HPTE(HPTE(spapr->htab, index));
+               && hpte_is_dirty(spapr, index)
+               && hpte_is_valid(spapr, index)) {
+            hpte_set_clean(spapr, index);
             index++;
             examined++;
         }
@@ -2265,9 +2294,9 @@ static int htab_save_later_pass(QEMUFile *f, SpaprMachineState *spapr,
         invalidstart = index;
         /* Consume invalid dirty HPTEs */
         while ((index < htabslots) && (index - invalidstart < USHRT_MAX)
-               && HPTE_DIRTY(HPTE(spapr->htab, index))
-               && !HPTE_VALID(HPTE(spapr->htab, index))) {
-            CLEAN_HPTE(HPTE(spapr->htab, index));
+               && hpte_is_dirty(spapr, index)
+               && !hpte_is_valid(spapr, index)) {
+            hpte_set_clean(spapr, index);
             index++;
             examined++;
         }
@@ -2449,11 +2478,11 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
 
         if (spapr->htab) {
             if (n_valid) {
-                qemu_get_buffer(f, HPTE(spapr->htab, index),
+                qemu_get_buffer(f, (void *)hpte_get_ptr(spapr, index),
                                 HASH_PTE_SIZE_64 * n_valid);
             }
             if (n_invalid) {
-                memset(HPTE(spapr->htab, index + n_valid), 0,
+                memset(hpte_get_ptr(spapr, index + n_valid), 0,
                        HASH_PTE_SIZE_64 * n_invalid);
             }
         } else {
@@ -2888,6 +2917,9 @@ static void spapr_machine_init(MachineState *machine)
         spapr_ovec_set(spapr->ov5, OV5_XIVE_EXPLOIT);
     }
 
+    qemu_guest_getrandom_nofail(&spapr->hashpkey_val,
+                                sizeof(spapr->hashpkey_val));
+
     /* init CPUs */
     spapr_init_cpus(spapr);
 
@@ -4437,7 +4469,7 @@ static void spapr_pic_print_info(InterruptStatsProvider *obj, GString *buf)
  */
 static int spapr_match_nvt(XiveFabric *xfb, uint8_t format,
                            uint8_t nvt_blk, uint32_t nvt_idx,
-                           bool cam_ignore, uint8_t priority,
+                           bool crowd, bool cam_ignore, uint8_t priority,
                            uint32_t logic_serv, XiveTCTXMatch *match)
 {
     SpaprMachineState *spapr = SPAPR_MACHINE(xfb);
@@ -4445,7 +4477,7 @@ static int spapr_match_nvt(XiveFabric *xfb, uint8_t format,
     XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
     int count;
 
-    count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, cam_ignore,
+    count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd, cam_ignore,
                            priority, logic_serv, match);
     if (count < 0) {
         return count;
@@ -4655,6 +4687,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
     smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
     smc->default_caps.caps[SPAPR_CAP_RPT_INVALIDATE] = SPAPR_CAP_OFF;
+    smc->default_caps.caps[SPAPR_CAP_DAWR1] = SPAPR_CAP_ON;
 
     /*
      * This cap specifies whether the AIL 3 mode for
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 7edd13836015c..815c94ed2ff03 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -34,6 +34,7 @@
 #include "kvm_ppc.h"
 #include "migration/vmstate.h"
 #include "system/tcg.h"
+#include "system/hostmem.h"
 
 #include "hw/ppc/spapr.h"
 
@@ -696,6 +697,34 @@ static void cap_ail_mode_3_apply(SpaprMachineState *spapr,
     }
 }
 
+static void cap_dawr1_apply(SpaprMachineState *spapr, uint8_t val,
+                               Error **errp)
+{
+    ERRP_GUARD();
+
+    if (!val) {
+        return; /* Disable by default */
+    }
+
+    if (!ppc_type_check_compat(MACHINE(spapr)->cpu_type,
+                               CPU_POWERPC_LOGICAL_3_10, 0,
+                               spapr->max_compat_pvr)) {
+        error_setg(errp, "DAWR1 supported only on POWER10 and later CPUs");
+        error_append_hint(errp, "Try appending -machine cap-dawr1=off\n");
+        return;
+    }
+
+    if (kvm_enabled()) {
+        if (!kvmppc_has_cap_dawr1()) {
+            error_setg(errp, "DAWR1 not supported by KVM.");
+            error_append_hint(errp, "Try appending -machine cap-dawr1=off");
+        } else if (kvmppc_set_cap_dawr1(val) < 0) {
+            error_setg(errp, "Error enabling cap-dawr1 with KVM.");
+            error_append_hint(errp, "Try appending -machine cap-dawr1=off");
+        }
+    }
+}
+
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
     [SPAPR_CAP_HTM] = {
         .name = "htm",
@@ -831,6 +860,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
         .type = "bool",
         .apply = cap_ail_mode_3_apply,
     },
+    [SPAPR_CAP_DAWR1] = {
+        .name = "dawr1",
+        .description = "Allow 2nd Data Address Watchpoint Register (DAWR1)",
+        .index = SPAPR_CAP_DAWR1,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_dawr1_apply,
+    },
 };
 
 static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
@@ -841,6 +879,11 @@ static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
 
     caps = smc->default_caps;
 
+    if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_3_10,
+                               0, spapr->max_compat_pvr)) {
+        caps.caps[SPAPR_CAP_DAWR1] = SPAPR_CAP_OFF;
+    }
+
     if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_3_00,
                                0, spapr->max_compat_pvr)) {
         caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
@@ -975,6 +1018,7 @@ SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
 SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
 SPAPR_CAP_MIG_STATE(rpt_invalidate, SPAPR_CAP_RPT_INVALIDATE);
 SPAPR_CAP_MIG_STATE(ail_mode_3, SPAPR_CAP_AIL_MODE_3);
+SPAPR_CAP_MIG_STATE(dawr1, SPAPR_CAP_DAWR1);
 
 void spapr_caps_init(SpaprMachineState *spapr)
 {
@@ -1034,7 +1078,7 @@ void spapr_caps_add_properties(SpaprMachineClass *smc)
     for (i = 0; i < ARRAY_SIZE(capability_table); i++) {
         SpaprCapabilityInfo *cap = &capability_table[i];
         g_autofree char *name = g_strdup_printf("cap-%s", cap->name);
-        g_autofree char *desc = g_strdup_printf("%s", cap->description);
+        g_autofree char *desc = g_strdup(cap->description);
 
         object_class_property_add(klass, name, cap->type,
                                   cap->get, cap->set,
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 9e0e0648a7231..0671d9e44b48f 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -273,6 +273,8 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr,
     env->spr_cb[SPR_PIR].default_value = cs->cpu_index;
     env->spr_cb[SPR_TIR].default_value = thread_index;
 
+    env->spr_cb[SPR_HASHPKEYR].default_value = spapr->hashpkey_val;
+
     cpu_ppc_set_1lpar(cpu);
 
     /* Set time-base frequency to 512 MHz. vhyp must be set first. */
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index f8ab767063067..406aea4ecbeb2 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -299,8 +299,10 @@ static target_ulong h_page_init(PowerPCCPU *cpu, SpaprMachineState *spapr,
     if (flags & (H_ICACHE_SYNCHRONIZE | H_ICACHE_INVALIDATE)) {
         if (kvm_enabled()) {
             kvmppc_icbi_range(cpu, pdst, len);
-        } else {
+        } else if (tcg_enabled()) {
             tb_flush(CPU(cpu));
+        } else {
+            g_assert_not_reached();
         }
     }
 
@@ -578,6 +580,8 @@ static target_ulong h_confer(PowerPCCPU *cpu, SpaprMachineState *spapr,
     CPUState *cs = CPU(cpu);
     SpaprCpuState *spapr_cpu;
 
+    assert(tcg_enabled()); /* KVM will have handled this */
+
     /*
      * -1 means confer to all other CPUs without dispatch counter check,
      *  otherwise it's a targeted confer.
@@ -818,11 +822,12 @@ static target_ulong h_set_mode_resource_set_ciabr(PowerPCCPU *cpu,
     return H_SUCCESS;
 }
 
-static target_ulong h_set_mode_resource_set_dawr0(PowerPCCPU *cpu,
-                                                  SpaprMachineState *spapr,
-                                                  target_ulong mflags,
-                                                  target_ulong value1,
-                                                  target_ulong value2)
+static target_ulong h_set_mode_resource_set_dawr(PowerPCCPU *cpu,
+                                                 SpaprMachineState *spapr,
+                                                 target_ulong mflags,
+                                                 target_ulong resource,
+                                                 target_ulong value1,
+                                                 target_ulong value2)
 {
     CPUPPCState *env = &cpu->env;
 
@@ -835,8 +840,15 @@ static target_ulong h_set_mode_resource_set_dawr0(PowerPCCPU *cpu,
         return H_P4;
     }
 
-    ppc_store_dawr0(env, value1);
-    ppc_store_dawrx0(env, value2);
+    if (resource == H_SET_MODE_RESOURCE_SET_DAWR0) {
+        ppc_store_dawr0(env, value1);
+        ppc_store_dawrx0(env, value2);
+    } else if (resource == H_SET_MODE_RESOURCE_SET_DAWR1) {
+        ppc_store_dawr1(env, value1);
+        ppc_store_dawrx1(env, value2);
+    } else {
+        g_assert_not_reached();
+    }
 
     return H_SUCCESS;
 }
@@ -915,8 +927,9 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, SpaprMachineState *spapr,
                                             args[3]);
         break;
     case H_SET_MODE_RESOURCE_SET_DAWR0:
-        ret = h_set_mode_resource_set_dawr0(cpu, spapr, args[0], args[2],
-                                            args[3]);
+    case H_SET_MODE_RESOURCE_SET_DAWR1:
+        ret = h_set_mode_resource_set_dawr(cpu, spapr, args[0], args[1],
+                                           args[2], args[3]);
         break;
     case H_SET_MODE_RESOURCE_LE:
         ret = h_set_mode_resource_le(cpu, spapr, args[0], args[2], args[3]);
diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c
index 7def8eb73b83c..201f62920332d 100644
--- a/hw/ppc/spapr_nested.c
+++ b/hw/ppc/spapr_nested.c
@@ -1,6 +1,7 @@
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
 #include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "helper_regs.h"
 #include "hw/ppc/ppc.h"
 #include "hw/ppc/spapr.h"
@@ -64,10 +65,9 @@ static
 SpaprMachineStateNestedGuest *spapr_get_nested_guest(SpaprMachineState *spapr,
                                                      target_ulong guestid)
 {
-    SpaprMachineStateNestedGuest *guest;
-
-    guest = g_hash_table_lookup(spapr->nested.guests, GINT_TO_POINTER(guestid));
-    return guest;
+    return spapr->nested.guests ?
+        g_hash_table_lookup(spapr->nested.guests,
+                            GINT_TO_POINTER(guestid)) : NULL;
 }
 
 bool spapr_get_pate_nested_papr(SpaprMachineState *spapr, PowerPCCPU *cpu,
@@ -593,26 +593,37 @@ static bool spapr_nested_vcpu_check(SpaprMachineStateNestedGuest *guest,
     return false;
 }
 
-static void *get_vcpu_state_ptr(SpaprMachineStateNestedGuest *guest,
-                              target_ulong vcpuid)
+static void *get_vcpu_state_ptr(SpaprMachineState *spapr,
+                                SpaprMachineStateNestedGuest *guest,
+                                target_ulong vcpuid)
 {
     assert(spapr_nested_vcpu_check(guest, vcpuid, false));
     return &guest->vcpus[vcpuid].state;
 }
 
-static void *get_vcpu_ptr(SpaprMachineStateNestedGuest *guest,
-                                   target_ulong vcpuid)
+static void *get_vcpu_ptr(SpaprMachineState *spapr,
+                          SpaprMachineStateNestedGuest *guest,
+                          target_ulong vcpuid)
 {
     assert(spapr_nested_vcpu_check(guest, vcpuid, false));
     return &guest->vcpus[vcpuid];
 }
 
-static void *get_guest_ptr(SpaprMachineStateNestedGuest *guest,
+static void *get_guest_ptr(SpaprMachineState *spapr,
+                           SpaprMachineStateNestedGuest *guest,
                            target_ulong vcpuid)
 {
     return guest; /* for GSBE_NESTED */
 }
 
+static void *get_machine_ptr(SpaprMachineState *spapr,
+                             SpaprMachineStateNestedGuest *guest,
+                             target_ulong vcpuid)
+{
+    /* ignore guest and vcpuid for this */
+    return &spapr->nested;
+}
+
 /*
  * set=1 means the L1 is trying to set some state
  * set=0 means the L1 is trying to get some state
@@ -1012,7 +1023,15 @@ struct guest_state_element_type guest_state_element_types[] = {
     GSBE_NESTED_VCPU(GSB_VCPU_OUT_BUFFER, 0x10, runbufout,   copy_state_runbuf),
     GSBE_NESTED_VCPU(GSB_VCPU_OUT_BUF_MIN_SZ, 0x8, runbufout, out_buf_min_size),
     GSBE_NESTED_VCPU(GSB_VCPU_HDEC_EXPIRY_TB, 0x8, hdecr_expiry_tb,
-                     copy_state_hdecr)
+                     copy_state_hdecr),
+    GSBE_NESTED_MACHINE_DW(GSB_L0_GUEST_HEAP_INUSE, l0_guest_heap_inuse),
+    GSBE_NESTED_MACHINE_DW(GSB_L0_GUEST_HEAP_MAX, l0_guest_heap_max),
+    GSBE_NESTED_MACHINE_DW(GSB_L0_GUEST_PGTABLE_SIZE_INUSE,
+                           l0_guest_pgtable_size_inuse),
+    GSBE_NESTED_MACHINE_DW(GSB_L0_GUEST_PGTABLE_SIZE_MAX,
+                           l0_guest_pgtable_size_max),
+    GSBE_NESTED_MACHINE_DW(GSB_L0_GUEST_PGTABLE_RECLAIMED,
+                           l0_guest_pgtable_reclaimed),
 };
 
 void spapr_nested_gsb_init(void)
@@ -1030,8 +1049,13 @@ void spapr_nested_gsb_init(void)
         else if (type->id >= GSB_VCPU_IN_BUFFER)
             /* 0x0c00 - 0xf000 Thread + RW */
             type->flags = 0;
+        else if (type->id >= GSB_L0_GUEST_HEAP_INUSE)
+
+            /*0x0800 - 0x0804 Hostwide Counters + RO */
+            type->flags = GUEST_STATE_ELEMENT_TYPE_FLAG_HOST_WIDE |
+                          GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY;
         else if (type->id >= GSB_VCPU_LPVR)
-            /* 0x0003 - 0x0bff Guest + RW */
+            /* 0x0003 - 0x07ff Guest + RW */
             type->flags = GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE;
         else if (type->id >= GSB_HV_VCPU_STATE_SIZE)
             /* 0x0001 - 0x0002 Guest + RO */
@@ -1138,18 +1162,26 @@ static bool guest_state_request_check(struct guest_state_request *gsr)
             return false;
         }
 
-        if (type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE) {
+        if (type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_HOST_WIDE) {
+            /* Hostwide elements cant be clubbed with other types */
+            if (!(gsr->flags & GUEST_STATE_REQUEST_HOST_WIDE)) {
+                qemu_log_mask(LOG_GUEST_ERROR, "trying to get/set a host wide "
+                              "Element ID:%04x.\n", id);
+                return false;
+            }
+        } else  if (type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE) {
             /* guest wide element type */
             if (!(gsr->flags & GUEST_STATE_REQUEST_GUEST_WIDE)) {
-                qemu_log_mask(LOG_GUEST_ERROR, "trying to set a guest wide "
+                qemu_log_mask(LOG_GUEST_ERROR, "trying to get/set a guest wide "
                               "Element ID:%04x.\n", id);
                 return false;
             }
         } else {
             /* thread wide element type */
-            if (gsr->flags & GUEST_STATE_REQUEST_GUEST_WIDE) {
-                qemu_log_mask(LOG_GUEST_ERROR, "trying to set a thread wide "
-                              "Element ID:%04x.\n", id);
+            if (gsr->flags & (GUEST_STATE_REQUEST_GUEST_WIDE |
+                              GUEST_STATE_REQUEST_HOST_WIDE)) {
+                qemu_log_mask(LOG_GUEST_ERROR, "trying to get/set a thread wide"
+                            " Element ID:%04x.\n", id);
                 return false;
             }
         }
@@ -1418,7 +1450,8 @@ static target_ulong h_guest_create_vcpu(PowerPCCPU *cpu,
     return H_SUCCESS;
 }
 
-static target_ulong getset_state(SpaprMachineStateNestedGuest *guest,
+static target_ulong getset_state(SpaprMachineState *spapr,
+                                 SpaprMachineStateNestedGuest *guest,
                                  uint64_t vcpuid,
                                  struct guest_state_request *gsr)
 {
@@ -1451,7 +1484,7 @@ static target_ulong getset_state(SpaprMachineStateNestedGuest *guest,
 
         /* Get pointer to guest data to get/set */
         if (type->location && type->copy) {
-            ptr = type->location(guest, vcpuid);
+            ptr = type->location(spapr, guest, vcpuid);
             assert(ptr);
             if (!~(type->mask) && is_gsr_invalid(gsr, element, type)) {
                 return H_INVALID_ELEMENT_VALUE;
@@ -1468,6 +1501,7 @@ static target_ulong getset_state(SpaprMachineStateNestedGuest *guest,
 }
 
 static target_ulong map_and_getset_state(PowerPCCPU *cpu,
+                                         SpaprMachineState *spapr,
                                          SpaprMachineStateNestedGuest *guest,
                                          uint64_t vcpuid,
                                          struct guest_state_request *gsr)
@@ -1491,7 +1525,7 @@ static target_ulong map_and_getset_state(PowerPCCPU *cpu,
         goto out1;
     }
 
-    rc = getset_state(guest, vcpuid, gsr);
+    rc = getset_state(spapr, guest, vcpuid, gsr);
 
 out1:
     address_space_unmap(CPU(cpu)->as, gsr->gsb, len, is_write, len);
@@ -1509,27 +1543,46 @@ static target_ulong h_guest_getset_state(PowerPCCPU *cpu,
     target_ulong buf = args[3];
     target_ulong buflen = args[4];
     struct guest_state_request gsr;
-    SpaprMachineStateNestedGuest *guest;
+    SpaprMachineStateNestedGuest *guest = NULL;
 
-    guest = spapr_get_nested_guest(spapr, lpid);
-    if (!guest) {
-        return H_P2;
-    }
     gsr.buf = buf;
     assert(buflen <= GSB_MAX_BUF_SIZE);
     gsr.len = buflen;
     gsr.flags = 0;
-    if (flags & H_GUEST_GETSET_STATE_FLAG_GUEST_WIDE) {
+
+    /* Works for both get/set state */
+    if ((flags & H_GUEST_GET_STATE_FLAGS_GUEST_WIDE) ||
+        (flags & H_GUEST_SET_STATE_FLAGS_GUEST_WIDE)) {
         gsr.flags |= GUEST_STATE_REQUEST_GUEST_WIDE;
     }
-    if (flags & ~H_GUEST_GETSET_STATE_FLAG_GUEST_WIDE) {
-        return H_PARAMETER; /* flag not supported yet */
-    }
 
     if (set) {
+        if (flags & ~H_GUEST_SET_STATE_FLAGS_MASK) {
+            return H_PARAMETER;
+        }
         gsr.flags |= GUEST_STATE_REQUEST_SET;
+    } else {
+        /*
+         * No reserved fields to be set in flags nor both
+         * GUEST/HOST wide bits
+         */
+        if ((flags & ~H_GUEST_GET_STATE_FLAGS_MASK) ||
+            (flags == H_GUEST_GET_STATE_FLAGS_MASK)) {
+            return H_PARAMETER;
+        }
+
+        if (flags & H_GUEST_GET_STATE_FLAGS_HOST_WIDE) {
+            gsr.flags |= GUEST_STATE_REQUEST_HOST_WIDE;
+        }
+    }
+
+    if (!(gsr.flags & GUEST_STATE_REQUEST_HOST_WIDE)) {
+        guest = spapr_get_nested_guest(spapr, lpid);
+        if (!guest) {
+            return H_P2;
+        }
     }
-    return map_and_getset_state(cpu, guest, vcpuid, &gsr);
+    return map_and_getset_state(cpu, spapr, guest, vcpuid, &gsr);
 }
 
 static target_ulong h_guest_set_state(PowerPCCPU *cpu,
@@ -1640,7 +1693,8 @@ static int get_exit_ids(uint64_t srr0, uint16_t ids[16])
     return nr;
 }
 
-static void exit_process_output_buffer(PowerPCCPU *cpu,
+static void exit_process_output_buffer(SpaprMachineState *spapr,
+                                       PowerPCCPU *cpu,
                                        SpaprMachineStateNestedGuest *guest,
                                        target_ulong vcpuid,
                                        target_ulong *r3)
@@ -1678,7 +1732,7 @@ static void exit_process_output_buffer(PowerPCCPU *cpu,
     gsr.gsb = gsb;
     gsr.len = VCPU_OUT_BUF_MIN_SZ;
     gsr.flags = 0; /* get + never guest wide */
-    getset_state(guest, vcpuid, &gsr);
+    getset_state(spapr, guest, vcpuid, &gsr);
 
     address_space_unmap(CPU(cpu)->as, gsb, len, true, len);
     return;
@@ -1704,7 +1758,7 @@ void spapr_exit_nested_papr(SpaprMachineState *spapr, PowerPCCPU *cpu, int excp)
 
     exit_nested_store_l2(cpu, excp, vcpu);
     /* do the output buffer for run_vcpu*/
-    exit_process_output_buffer(cpu, guest, vcpuid, &r3_return);
+    exit_process_output_buffer(spapr, cpu, guest, vcpuid, &r3_return);
 
     assert(env->spr[SPR_LPIDR] != 0);
     nested_load_state(cpu, spapr_cpu->nested_host_state);
@@ -1819,7 +1873,7 @@ static target_ulong h_guest_run_vcpu(PowerPCCPU *cpu,
     gsr.buf = vcpu->runbufin.addr;
     gsr.len = vcpu->runbufin.size;
     gsr.flags = GUEST_STATE_REQUEST_SET; /* Thread wide + writing */
-    rc = map_and_getset_state(cpu, guest, vcpuid, &gsr);
+    rc = map_and_getset_state(cpu, spapr,  guest, vcpuid, &gsr);
     if (rc == H_SUCCESS) {
         nested_papr_run_vcpu(cpu, lpid, vcpu);
     } else {
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 904227d9aa1f1..e0a9d50edc3df 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1283,8 +1283,7 @@ static void spapr_dt_pci_device_cb(PCIBus *bus, PCIDevice *pdev,
     PciWalkFdt *p = opaque;
     int err;
 
-    if (p->err) {
-        /* Something's already broken, don't keep going */
+    if (p->err || !pdev->enabled) {
         return;
     }
 
@@ -1550,7 +1549,9 @@ static void spapr_pci_pre_plug(HotplugHandler *plug_handler,
      * hotplug, we do not allow functions to be hotplugged to a
      * slot that already has function 0 present
      */
-    if (plugged_dev->hotplugged && bus->devices[PCI_DEVFN(slotnr, 0)] &&
+    if (plugged_dev->hotplugged &&
+        !pci_is_vf(pdev) &&
+        bus->devices[PCI_DEVFN(slotnr, 0)] &&
         PCI_FUNC(pdev->devfn) != 0) {
         error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
                    " additional functions can no longer be exposed to guest.",
@@ -1572,6 +1573,14 @@ static void spapr_pci_plug(HotplugHandler *plug_handler,
     SpaprDrc *drc = drc_from_dev(phb, pdev);
     uint32_t slotnr = PCI_SLOT(pdev->devfn);
 
+    /*
+     * If DR or the PCI device is disabled we don't need to do anything
+     * in the case of hotplug or coldplug callbacks.
+     */
+    if (!pdev->enabled) {
+        return;
+    }
+
     g_assert(drc);
 
     if (IS_PCI_BRIDGE(plugged_dev)) {
@@ -1647,6 +1656,11 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
     SpaprDrc *drc = drc_from_dev(phb, pdev);
 
     g_assert(drc);
+
+    if (!drc->dev) {
+        return;
+    }
+
     g_assert(drc->dev == plugged_dev);
 
     if (!spapr_drc_unplug_requested(drc)) {
diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c
index 2323811927311..17115be74d528 100644
--- a/hw/ppc/virtex_ml507.c
+++ b/hw/ppc/virtex_ml507.c
@@ -119,7 +119,7 @@ static void main_cpu_reset(void *opaque)
     /* Create a mapping spanning the 32bit addr space. */
     booke_set_tlb(&env->tlb.tlbe[0], 0, 0, 1U << 31);
     booke_set_tlb(&env->tlb.tlbe[1], 0x80000000, 0x80000000, 1U << 31);
-    env->gpr[6] = tswap32(EPAPR_MAGIC);
+    env->gpr[6] = EPAPR_MAGIC;
     env->gpr[7] = bi->ima_size;
 }
 
@@ -217,6 +217,7 @@ static void virtex_init(MachineState *machine)
 
     cpu_irq = qdev_get_gpio_in(DEVICE(cpu), PPC40x_INPUT_INT);
     dev = qdev_new("xlnx.xps-intc");
+    qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_BIG);
     qdev_prop_set_uint32(dev, "kind-of-intr", 0);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
     sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, INTC_BASEADDR);
@@ -230,6 +231,7 @@ static void virtex_init(MachineState *machine)
 
     /* 2 timers at irq 2 @ 62 Mhz.  */
     dev = qdev_new("xlnx.xps-timer");
+    qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_BIG);
     qdev_prop_set_uint32(dev, "one-timer-only", 0);
     qdev_prop_set_uint32(dev, "clock-frequency", 62 * 1000000);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c
index e25f97680d2d0..49885a1db6e26 100644
--- a/hw/remote/mpqemu-link.c
+++ b/hw/remote/mpqemu-link.c
@@ -110,7 +110,7 @@ static ssize_t mpqemu_read(QIOChannel *ioc, void *buf, size_t len, int **fds,
         bql_unlock();
     }
 
-    ret = qio_channel_readv_full_all_eof(ioc, &iov, 1, fds, nfds, errp);
+    ret = qio_channel_readv_full_all_eof(ioc, &iov, 1, fds, nfds, 0, errp);
 
     if (drop_bql && !iothread && !qemu_in_coroutine()) {
         bql_lock();
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
index 9e5ff6d87a9dc..6e51a92856fb6 100644
--- a/hw/remote/vfio-user-obj.c
+++ b/hw/remote/vfio-user-obj.c
@@ -358,7 +358,7 @@ static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset,
     int access_size;
     uint64_t val;
 
-    if (memory_access_is_direct(mr, is_write)) {
+    if (memory_access_is_direct(mr, is_write, MEMTXATTRS_UNSPECIFIED)) {
         /**
          * Some devices expose a PCI expansion ROM, which could be buffer
          * based as compared to other regions which are primarily based on
diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
index c309441b7d88e..765b9e2b1ab3d 100644
--- a/hw/riscv/boot.c
+++ b/hw/riscv/boot.c
@@ -374,8 +374,6 @@ void riscv_load_fdt(hwaddr fdt_addr, void *fdt)
     uint32_t fdtsize = fdt_totalsize(fdt);
 
     /* copy in the device tree */
-    qemu_fdt_dumpdtb(fdt, fdtsize);
-
     rom_add_blob_fixed_as("fdt", fdt, fdtsize, fdt_addr,
                           &address_space_memory);
     qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds,
diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
index 3c7e083aca1db..c22f3a721629a 100644
--- a/hw/riscv/meson.build
+++ b/hw/riscv/meson.build
@@ -10,7 +10,8 @@ riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c'))
 riscv_ss.add(when: 'CONFIG_SPIKE', if_true: files('spike.c'))
 riscv_ss.add(when: 'CONFIG_MICROCHIP_PFSOC', if_true: files('microchip_pfsoc.c'))
 riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c'))
-riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files('riscv-iommu.c', 'riscv-iommu-pci.c', 'riscv-iommu-sys.c'))
+riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files(
+	'riscv-iommu.c', 'riscv-iommu-pci.c', 'riscv-iommu-sys.c', 'riscv-iommu-hpm.c'))
 riscv_ss.add(when: 'CONFIG_MICROBLAZE_V', if_true: files('microblaze-v-generic.c'))
 
 hw_arch += {'riscv': riscv_ss}
diff --git a/hw/riscv/microblaze-v-generic.c b/hw/riscv/microblaze-v-generic.c
index 26788a1824aa9..d8e67906d2673 100644
--- a/hw/riscv/microblaze-v-generic.c
+++ b/hw/riscv/microblaze-v-generic.c
@@ -79,6 +79,7 @@ static void mb_v_generic_init(MachineState *machine)
     memory_region_add_subregion(sysmem, ddr_base, phys_ram);
 
     dev = qdev_new("xlnx.xps-intc");
+    qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
     qdev_prop_set_uint32(dev, "kind-of-intr",
                          1 << UARTLITE_IRQ);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
@@ -91,6 +92,7 @@ static void mb_v_generic_init(MachineState *machine)
 
     /* Uartlite */
     dev = qdev_new(TYPE_XILINX_UARTLITE);
+    qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
     qdev_prop_set_chr(dev, "chardev", serial_hd(0));
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
     sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, UARTLITE_BASEADDR);
@@ -103,6 +105,7 @@ static void mb_v_generic_init(MachineState *machine)
 
     /* 2 timers at irq 0 @ 100 Mhz.  */
     dev = qdev_new("xlnx.xps-timer");
+    qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
     qdev_prop_set_uint32(dev, "one-timer-only", 0);
     qdev_prop_set_uint32(dev, "clock-frequency", 100000000);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
@@ -111,6 +114,7 @@ static void mb_v_generic_init(MachineState *machine)
 
     /* 2 timers at irq 3 @ 100 Mhz.  */
     dev = qdev_new("xlnx.xps-timer");
+    qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
     qdev_prop_set_uint32(dev, "one-timer-only", 0);
     qdev_prop_set_uint32(dev, "clock-frequency", 100000000);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
@@ -119,6 +123,7 @@ static void mb_v_generic_init(MachineState *machine)
 
     /* Emaclite */
     dev = qdev_new("xlnx.xps-ethernetlite");
+    qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
     qemu_configure_nic_device(dev, true, NULL);
     qdev_prop_set_uint32(dev, "tx-ping-pong", 0);
     qdev_prop_set_uint32(dev, "rx-ping-pong", 0);
diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c
index ec7e2e42264ea..9c846f9b5baf7 100644
--- a/hw/riscv/microchip_pfsoc.c
+++ b/hw/riscv/microchip_pfsoc.c
@@ -650,6 +650,7 @@ static void microchip_icicle_kit_machine_class_init(ObjectClass *oc, void *data)
     mc->min_cpus = MICROCHIP_PFSOC_MANAGEMENT_CPU_COUNT + 1;
     mc->default_cpus = mc->min_cpus;
     mc->default_ram_id = "microchip.icicle.kit.ram";
+    mc->auto_create_sdcard = true;
 
     /*
      * Map 513 MiB high memory, the minimum required high memory size, because
diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c
index b9e56235d87ba..98a67fe52a88f 100644
--- a/hw/riscv/opentitan.c
+++ b/hw/riscv/opentitan.c
@@ -28,6 +28,7 @@
 #include "hw/riscv/boot.h"
 #include "qemu/units.h"
 #include "system/system.h"
+#include "exec/address-spaces.h"
 
 /*
  * This version of the OpenTitan machine currently supports
diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h
index 485f36b9c921e..b7cb1bc7367f1 100644
--- a/hw/riscv/riscv-iommu-bits.h
+++ b/hw/riscv/riscv-iommu-bits.h
@@ -50,8 +50,14 @@ struct riscv_iommu_pq_record {
 #define RISCV_IOMMU_PREQ_HDR_PRIV       BIT_ULL(33)
 #define RISCV_IOMMU_PREQ_HDR_EXEC       BIT_ULL(34)
 #define RISCV_IOMMU_PREQ_HDR_DID        GENMASK_ULL(63, 40)
+
 /* Payload fields */
+#define RISCV_IOMMU_PREQ_PAYLOAD_R      BIT_ULL(0)
+#define RISCV_IOMMU_PREQ_PAYLOAD_W      BIT_ULL(1)
+#define RISCV_IOMMU_PREQ_PAYLOAD_L      BIT_ULL(2)
 #define RISCV_IOMMU_PREQ_PAYLOAD_M      GENMASK_ULL(2, 0)
+#define RISCV_IOMMU_PREQ_PRG_INDEX      GENMASK_ULL(11, 3)
+#define RISCV_IOMMU_PREQ_UADDR          GENMASK_ULL(63, 12)
 
 /* Common field positions */
 #define RISCV_IOMMU_PPN_FIELD           GENMASK_ULL(53, 10)
@@ -82,6 +88,7 @@ struct riscv_iommu_pq_record {
 #define RISCV_IOMMU_CAP_ATS             BIT_ULL(25)
 #define RISCV_IOMMU_CAP_T2GPA           BIT_ULL(26)
 #define RISCV_IOMMU_CAP_IGS             GENMASK_ULL(29, 28)
+#define RISCV_IOMMU_CAP_HPM             BIT_ULL(30)
 #define RISCV_IOMMU_CAP_DBG             BIT_ULL(31)
 #define RISCV_IOMMU_CAP_PAS             GENMASK_ULL(37, 32)
 #define RISCV_IOMMU_CAP_PD8             BIT_ULL(38)
@@ -191,6 +198,52 @@ enum {
     RISCV_IOMMU_INTR_COUNT
 };
 
+#define RISCV_IOMMU_IOCOUNT_NUM         31
+
+/* 5.19 Performance monitoring counter overflow status (32bits) */
+#define RISCV_IOMMU_REG_IOCOUNTOVF      0x0058
+#define RISCV_IOMMU_IOCOUNTOVF_CY       BIT(0)
+
+/* 5.20 Performance monitoring counter inhibits (32bits) */
+#define RISCV_IOMMU_REG_IOCOUNTINH      0x005C
+#define RISCV_IOMMU_IOCOUNTINH_CY       BIT(0)
+
+/* 5.21 Performance monitoring cycles counter (64bits) */
+#define RISCV_IOMMU_REG_IOHPMCYCLES     0x0060
+#define RISCV_IOMMU_IOHPMCYCLES_COUNTER GENMASK_ULL(62, 0)
+#define RISCV_IOMMU_IOHPMCYCLES_OVF     BIT_ULL(63)
+
+/* 5.22 Performance monitoring event counters (31 * 64bits) */
+#define RISCV_IOMMU_REG_IOHPMCTR_BASE   0x0068
+#define RISCV_IOMMU_REG_IOHPMCTR(_n)    \
+    (RISCV_IOMMU_REG_IOHPMCTR_BASE + (_n * 0x8))
+
+/* 5.23 Performance monitoring event selectors (31 * 64bits) */
+#define RISCV_IOMMU_REG_IOHPMEVT_BASE   0x0160
+#define RISCV_IOMMU_REG_IOHPMEVT(_n)    \
+    (RISCV_IOMMU_REG_IOHPMEVT_BASE + (_n * 0x8))
+#define RISCV_IOMMU_IOHPMEVT_EVENT_ID   GENMASK_ULL(14, 0)
+#define RISCV_IOMMU_IOHPMEVT_DMASK      BIT_ULL(15)
+#define RISCV_IOMMU_IOHPMEVT_PID_PSCID  GENMASK_ULL(35, 16)
+#define RISCV_IOMMU_IOHPMEVT_DID_GSCID  GENMASK_ULL(59, 36)
+#define RISCV_IOMMU_IOHPMEVT_PV_PSCV    BIT_ULL(60)
+#define RISCV_IOMMU_IOHPMEVT_DV_GSCV    BIT_ULL(61)
+#define RISCV_IOMMU_IOHPMEVT_IDT        BIT_ULL(62)
+#define RISCV_IOMMU_IOHPMEVT_OF         BIT_ULL(63)
+
+enum RISCV_IOMMU_HPMEVENT_id {
+    RISCV_IOMMU_HPMEVENT_INVALID    = 0,
+    RISCV_IOMMU_HPMEVENT_URQ        = 1,
+    RISCV_IOMMU_HPMEVENT_TRQ        = 2,
+    RISCV_IOMMU_HPMEVENT_ATS_RQ     = 3,
+    RISCV_IOMMU_HPMEVENT_TLB_MISS   = 4,
+    RISCV_IOMMU_HPMEVENT_DD_WALK    = 5,
+    RISCV_IOMMU_HPMEVENT_PD_WALK    = 6,
+    RISCV_IOMMU_HPMEVENT_S_VS_WALKS = 7,
+    RISCV_IOMMU_HPMEVENT_G_WALKS    = 8,
+    RISCV_IOMMU_HPMEVENT_MAX        = 9
+};
+
 /* 5.24 Translation request IOVA (64bits) */
 #define RISCV_IOMMU_REG_TR_REQ_IOVA     0x0258
 
@@ -382,22 +435,6 @@ enum riscv_iommu_fq_ttypes {
     RISCV_IOMMU_FW_TTYPE_PCIE_MSG_REQ = 9,
 };
 
-/* Header fields */
-#define RISCV_IOMMU_PREQ_HDR_PID        GENMASK_ULL(31, 12)
-#define RISCV_IOMMU_PREQ_HDR_PV         BIT_ULL(32)
-#define RISCV_IOMMU_PREQ_HDR_PRIV       BIT_ULL(33)
-#define RISCV_IOMMU_PREQ_HDR_EXEC       BIT_ULL(34)
-#define RISCV_IOMMU_PREQ_HDR_DID        GENMASK_ULL(63, 40)
-
-/* Payload fields */
-#define RISCV_IOMMU_PREQ_PAYLOAD_R      BIT_ULL(0)
-#define RISCV_IOMMU_PREQ_PAYLOAD_W      BIT_ULL(1)
-#define RISCV_IOMMU_PREQ_PAYLOAD_L      BIT_ULL(2)
-#define RISCV_IOMMU_PREQ_PAYLOAD_M      GENMASK_ULL(2, 0)
-#define RISCV_IOMMU_PREQ_PRG_INDEX      GENMASK_ULL(11, 3)
-#define RISCV_IOMMU_PREQ_UADDR          GENMASK_ULL(63, 12)
-
-
 /*
  * struct riscv_iommu_msi_pte - MSI Page Table Entry
  */
diff --git a/hw/riscv/riscv-iommu-hpm.c b/hw/riscv/riscv-iommu-hpm.c
new file mode 100644
index 0000000000000..c5034bff7953e
--- /dev/null
+++ b/hw/riscv/riscv-iommu-hpm.c
@@ -0,0 +1,381 @@
+/*
+ * RISC-V IOMMU - Hardware Performance Monitor (HPM) helpers
+ *
+ * Copyright (C) 2022-2023 Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/timer.h"
+#include "cpu_bits.h"
+#include "riscv-iommu-hpm.h"
+#include "riscv-iommu.h"
+#include "riscv-iommu-bits.h"
+#include "trace.h"
+
+/* For now we assume IOMMU HPM frequency to be 1GHz so 1-cycle is of 1-ns. */
+static inline uint64_t get_cycles(void)
+{
+    return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+}
+
+uint64_t riscv_iommu_hpmcycle_read(RISCVIOMMUState *s)
+{
+    const uint64_t cycle = riscv_iommu_reg_get64(
+        s, RISCV_IOMMU_REG_IOHPMCYCLES);
+    const uint32_t inhibit = riscv_iommu_reg_get32(
+        s, RISCV_IOMMU_REG_IOCOUNTINH);
+    const uint64_t ctr_prev = s->hpmcycle_prev;
+    const uint64_t ctr_val = s->hpmcycle_val;
+
+    trace_riscv_iommu_hpm_read(cycle, inhibit, ctr_prev, ctr_val);
+
+    if (get_field(inhibit, RISCV_IOMMU_IOCOUNTINH_CY)) {
+        /*
+         * Counter should not increment if inhibit bit is set. We can't really
+         * stop the QEMU_CLOCK_VIRTUAL, so we just return the last updated
+         * counter value to indicate that counter was not incremented.
+         */
+        return (ctr_val & RISCV_IOMMU_IOHPMCYCLES_COUNTER) |
+               (cycle & RISCV_IOMMU_IOHPMCYCLES_OVF);
+    }
+
+    return (ctr_val + get_cycles() - ctr_prev) |
+        (cycle & RISCV_IOMMU_IOHPMCYCLES_OVF);
+}
+
+static void hpm_incr_ctr(RISCVIOMMUState *s, uint32_t ctr_idx)
+{
+    const uint32_t off = ctr_idx << 3;
+    uint64_t cntr_val;
+
+    cntr_val = ldq_le_p(&s->regs_rw[RISCV_IOMMU_REG_IOHPMCTR_BASE + off]);
+    stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_IOHPMCTR_BASE + off], cntr_val + 1);
+
+    trace_riscv_iommu_hpm_incr_ctr(cntr_val);
+
+    /* Handle the overflow scenario. */
+    if (cntr_val == UINT64_MAX) {
+        /*
+         * Generate interrupt only if OF bit is clear. +1 to offset the cycle
+         * register OF bit.
+         */
+        const uint32_t ovf =
+            riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IOCOUNTOVF,
+                                  BIT(ctr_idx + 1), 0);
+        if (!get_field(ovf, BIT(ctr_idx + 1))) {
+            riscv_iommu_reg_mod64(s,
+                                  RISCV_IOMMU_REG_IOHPMEVT_BASE + off,
+                                  RISCV_IOMMU_IOHPMEVT_OF,
+                                  0);
+            riscv_iommu_notify(s, RISCV_IOMMU_INTR_PM);
+        }
+    }
+}
+
+void riscv_iommu_hpm_incr_ctr(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
+                              unsigned event_id)
+{
+    const uint32_t inhibit = riscv_iommu_reg_get32(
+        s, RISCV_IOMMU_REG_IOCOUNTINH);
+    uint32_t did_gscid;
+    uint32_t pid_pscid;
+    uint32_t ctr_idx;
+    gpointer value;
+    uint32_t ctrs;
+    uint64_t evt;
+
+    if (!(s->cap & RISCV_IOMMU_CAP_HPM)) {
+        return;
+    }
+
+    value = g_hash_table_lookup(s->hpm_event_ctr_map,
+                                GUINT_TO_POINTER(event_id));
+    if (value == NULL) {
+        return;
+    }
+
+    for (ctrs = GPOINTER_TO_UINT(value); ctrs != 0; ctrs &= ctrs - 1) {
+        ctr_idx = ctz32(ctrs);
+        if (get_field(inhibit, BIT(ctr_idx + 1))) {
+            continue;
+        }
+
+        evt = riscv_iommu_reg_get64(s,
+            RISCV_IOMMU_REG_IOHPMEVT_BASE + (ctr_idx << 3));
+
+        /*
+         * It's quite possible that event ID has been changed in counter
+         * but hashtable hasn't been updated yet. We don't want to increment
+         * counter for the old event ID.
+         */
+        if (event_id != get_field(evt, RISCV_IOMMU_IOHPMEVT_EVENT_ID)) {
+            continue;
+        }
+
+        if (get_field(evt, RISCV_IOMMU_IOHPMEVT_IDT)) {
+            did_gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID);
+            pid_pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID);
+        } else {
+            did_gscid = ctx->devid;
+            pid_pscid = ctx->process_id;
+        }
+
+        if (get_field(evt, RISCV_IOMMU_IOHPMEVT_PV_PSCV)) {
+            /*
+             * If the transaction does not have a valid process_id, counter
+             * increments if device_id matches DID_GSCID. If the transaction
+             * has a valid process_id, counter increments if device_id
+             * matches DID_GSCID and process_id matches PID_PSCID. See
+             * IOMMU Specification, Chapter 5.23. Performance-monitoring
+             * event selector.
+             */
+            if (ctx->process_id &&
+                get_field(evt, RISCV_IOMMU_IOHPMEVT_PID_PSCID) != pid_pscid) {
+                continue;
+            }
+        }
+
+        if (get_field(evt, RISCV_IOMMU_IOHPMEVT_DV_GSCV)) {
+            uint32_t mask = ~0;
+
+            if (get_field(evt, RISCV_IOMMU_IOHPMEVT_DMASK)) {
+                /*
+                 * 1001 1011   mask = GSCID
+                 * 0000 0111   mask = mask ^ (mask + 1)
+                 * 1111 1000   mask = ~mask;
+                 */
+                mask = get_field(evt, RISCV_IOMMU_IOHPMEVT_DID_GSCID);
+                mask = mask ^ (mask + 1);
+                mask = ~mask;
+            }
+
+            if ((get_field(evt, RISCV_IOMMU_IOHPMEVT_DID_GSCID) & mask) !=
+                (did_gscid & mask)) {
+                continue;
+            }
+        }
+
+        hpm_incr_ctr(s, ctr_idx);
+    }
+}
+
+/* Timer callback for cycle counter overflow. */
+void riscv_iommu_hpm_timer_cb(void *priv)
+{
+    RISCVIOMMUState *s = priv;
+    const uint32_t inhibit = riscv_iommu_reg_get32(
+        s, RISCV_IOMMU_REG_IOCOUNTINH);
+    uint32_t ovf;
+
+    if (get_field(inhibit, RISCV_IOMMU_IOCOUNTINH_CY)) {
+        return;
+    }
+
+    if (s->irq_overflow_left > 0) {
+        uint64_t irq_trigger_at =
+            qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + s->irq_overflow_left;
+        timer_mod_anticipate_ns(s->hpm_timer, irq_trigger_at);
+        s->irq_overflow_left = 0;
+        return;
+    }
+
+    ovf = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTOVF);
+    if (!get_field(ovf, RISCV_IOMMU_IOCOUNTOVF_CY)) {
+        /*
+         * We don't need to set hpmcycle_val to zero and update hpmcycle_prev to
+         * current clock value. The way we calculate iohpmcycs will overflow
+         * and return the correct value. This avoids the need to synchronize
+         * timer callback and write callback.
+         */
+        riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IOCOUNTOVF,
+            RISCV_IOMMU_IOCOUNTOVF_CY, 0);
+        riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_IOHPMCYCLES,
+            RISCV_IOMMU_IOHPMCYCLES_OVF, 0);
+        riscv_iommu_notify(s, RISCV_IOMMU_INTR_PM);
+    }
+}
+
+static void hpm_setup_timer(RISCVIOMMUState *s, uint64_t value)
+{
+    const uint32_t inhibit = riscv_iommu_reg_get32(
+        s, RISCV_IOMMU_REG_IOCOUNTINH);
+    uint64_t overflow_at, overflow_ns;
+
+    if (get_field(inhibit, RISCV_IOMMU_IOCOUNTINH_CY)) {
+        return;
+    }
+
+    /*
+     * We are using INT64_MAX here instead to UINT64_MAX because cycle counter
+     * has 63-bit precision and INT64_MAX is the maximum it can store.
+     */
+    if (value) {
+        overflow_ns = INT64_MAX - value + 1;
+    } else {
+        overflow_ns = INT64_MAX;
+    }
+
+    overflow_at = (uint64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + overflow_ns;
+
+    if (overflow_at > INT64_MAX) {
+        s->irq_overflow_left = overflow_at - INT64_MAX;
+        overflow_at = INT64_MAX;
+    }
+
+    timer_mod_anticipate_ns(s->hpm_timer, overflow_at);
+}
+
+/* Updates the internal cycle counter state when iocntinh:CY is changed. */
+void riscv_iommu_process_iocntinh_cy(RISCVIOMMUState *s, bool prev_cy_inh)
+{
+    const uint32_t inhibit = riscv_iommu_reg_get32(
+        s, RISCV_IOMMU_REG_IOCOUNTINH);
+
+    /* We only need to process CY bit toggle. */
+    if (!(inhibit ^ prev_cy_inh)) {
+        return;
+    }
+
+    trace_riscv_iommu_hpm_iocntinh_cy(prev_cy_inh);
+
+    if (!(inhibit & RISCV_IOMMU_IOCOUNTINH_CY)) {
+        /*
+         * Cycle counter is enabled. Just start the timer again and update
+         * the clock snapshot value to point to the current time to make
+         * sure iohpmcycles read is correct.
+         */
+        s->hpmcycle_prev = get_cycles();
+        hpm_setup_timer(s, s->hpmcycle_val);
+    } else {
+        /*
+         * Cycle counter is disabled. Stop the timer and update the cycle
+         * counter to record the current value which is last programmed
+         * value + the cycles passed so far.
+         */
+        s->hpmcycle_val = s->hpmcycle_val + (get_cycles() - s->hpmcycle_prev);
+        timer_del(s->hpm_timer);
+    }
+}
+
+void riscv_iommu_process_hpmcycle_write(RISCVIOMMUState *s)
+{
+    const uint64_t val = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_IOHPMCYCLES);
+    const uint32_t ovf = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTOVF);
+
+    trace_riscv_iommu_hpm_cycle_write(ovf, val);
+
+    /*
+     * Clear OF bit in IOCNTOVF if it's being cleared in IOHPMCYCLES register.
+     */
+    if (get_field(ovf, RISCV_IOMMU_IOCOUNTOVF_CY) &&
+        !get_field(val, RISCV_IOMMU_IOHPMCYCLES_OVF)) {
+        riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IOCOUNTOVF, 0,
+            RISCV_IOMMU_IOCOUNTOVF_CY);
+    }
+
+    s->hpmcycle_val = val & ~RISCV_IOMMU_IOHPMCYCLES_OVF;
+    s->hpmcycle_prev = get_cycles();
+    hpm_setup_timer(s, s->hpmcycle_val);
+}
+
+static inline bool check_valid_event_id(unsigned event_id)
+{
+    return event_id > RISCV_IOMMU_HPMEVENT_INVALID &&
+           event_id < RISCV_IOMMU_HPMEVENT_MAX;
+}
+
+static gboolean hpm_event_equal(gpointer key, gpointer value, gpointer udata)
+{
+    uint32_t *pair = udata;
+
+    if (GPOINTER_TO_UINT(value) & (1 << pair[0])) {
+        pair[1] = GPOINTER_TO_UINT(key);
+        return true;
+    }
+
+    return false;
+}
+
+/* Caller must check ctr_idx against hpm_ctrs to see if its supported or not. */
+static void update_event_map(RISCVIOMMUState *s, uint64_t value,
+                             uint32_t ctr_idx)
+{
+    unsigned event_id = get_field(value, RISCV_IOMMU_IOHPMEVT_EVENT_ID);
+    uint32_t pair[2] = { ctr_idx, RISCV_IOMMU_HPMEVENT_INVALID };
+    uint32_t new_value = 1 << ctr_idx;
+    gpointer data;
+
+    /*
+     * If EventID field is RISCV_IOMMU_HPMEVENT_INVALID
+     * remove the current mapping.
+     */
+    if (event_id == RISCV_IOMMU_HPMEVENT_INVALID) {
+        data = g_hash_table_find(s->hpm_event_ctr_map, hpm_event_equal, pair);
+
+        new_value = GPOINTER_TO_UINT(data) & ~(new_value);
+        if (new_value != 0) {
+            g_hash_table_replace(s->hpm_event_ctr_map,
+                                 GUINT_TO_POINTER(pair[1]),
+                                 GUINT_TO_POINTER(new_value));
+        } else {
+            g_hash_table_remove(s->hpm_event_ctr_map,
+                                GUINT_TO_POINTER(pair[1]));
+        }
+
+        return;
+    }
+
+    /* Update the counter mask if the event is already enabled. */
+    if (g_hash_table_lookup_extended(s->hpm_event_ctr_map,
+                                     GUINT_TO_POINTER(event_id),
+                                     NULL,
+                                     &data)) {
+        new_value |= GPOINTER_TO_UINT(data);
+    }
+
+    g_hash_table_insert(s->hpm_event_ctr_map,
+                        GUINT_TO_POINTER(event_id),
+                        GUINT_TO_POINTER(new_value));
+}
+
+void riscv_iommu_process_hpmevt_write(RISCVIOMMUState *s, uint32_t evt_reg)
+{
+    const uint32_t ctr_idx = (evt_reg - RISCV_IOMMU_REG_IOHPMEVT_BASE) >> 3;
+    const uint32_t ovf = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTOVF);
+    uint64_t val = riscv_iommu_reg_get64(s, evt_reg);
+
+    if (ctr_idx >= s->hpm_cntrs) {
+        return;
+    }
+
+    trace_riscv_iommu_hpm_evt_write(ctr_idx, ovf, val);
+
+    /* Clear OF bit in IOCNTOVF if it's being cleared in IOHPMEVT register. */
+    if (get_field(ovf, BIT(ctr_idx + 1)) &&
+        !get_field(val, RISCV_IOMMU_IOHPMEVT_OF)) {
+        /* +1 to offset CYCLE register OF bit. */
+        riscv_iommu_reg_mod32(
+            s, RISCV_IOMMU_REG_IOCOUNTOVF, 0, BIT(ctr_idx + 1));
+    }
+
+    if (!check_valid_event_id(get_field(val, RISCV_IOMMU_IOHPMEVT_EVENT_ID))) {
+        /* Reset EventID (WARL) field to invalid. */
+        val = set_field(val, RISCV_IOMMU_IOHPMEVT_EVENT_ID,
+            RISCV_IOMMU_HPMEVENT_INVALID);
+        riscv_iommu_reg_set64(s, evt_reg, val);
+    }
+
+    update_event_map(s, val, ctr_idx);
+}
diff --git a/hw/riscv/riscv-iommu-hpm.h b/hw/riscv/riscv-iommu-hpm.h
new file mode 100644
index 0000000000000..5fc4ef2e8bd5f
--- /dev/null
+++ b/hw/riscv/riscv-iommu-hpm.h
@@ -0,0 +1,33 @@
+/*
+ * RISC-V IOMMU - Hardware Performance Monitor (HPM) helpers
+ *
+ * Copyright (C) 2022-2023 Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RISCV_IOMMU_HPM_H
+#define HW_RISCV_IOMMU_HPM_H
+
+#include "qom/object.h"
+#include "hw/riscv/riscv-iommu.h"
+
+uint64_t riscv_iommu_hpmcycle_read(RISCVIOMMUState *s);
+void riscv_iommu_hpm_incr_ctr(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
+                              unsigned event_id);
+void riscv_iommu_hpm_timer_cb(void *priv);
+void riscv_iommu_process_iocntinh_cy(RISCVIOMMUState *s, bool prev_cy_inh);
+void riscv_iommu_process_hpmcycle_write(RISCVIOMMUState *s);
+void riscv_iommu_process_hpmevt_write(RISCVIOMMUState *s, uint32_t evt_reg);
+
+#endif
diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c
index e7568ca227a87..d46beb2d64ce3 100644
--- a/hw/riscv/riscv-iommu.c
+++ b/hw/riscv/riscv-iommu.c
@@ -29,6 +29,7 @@
 #include "cpu_bits.h"
 #include "riscv-iommu.h"
 #include "riscv-iommu-bits.h"
+#include "riscv-iommu-hpm.h"
 #include "trace.h"
 
 #define LIMIT_CACHE_CTX               (1U << 7)
@@ -38,7 +39,6 @@
 #define PPN_PHYS(ppn)                 ((ppn) << TARGET_PAGE_BITS)
 #define PPN_DOWN(phy)                 ((phy) >> TARGET_PAGE_BITS)
 
-typedef struct RISCVIOMMUContext RISCVIOMMUContext;
 typedef struct RISCVIOMMUEntry RISCVIOMMUEntry;
 
 /* Device assigned I/O address space */
@@ -51,19 +51,6 @@ struct RISCVIOMMUSpace {
     QLIST_ENTRY(RISCVIOMMUSpace) list;
 };
 
-/* Device translation context state. */
-struct RISCVIOMMUContext {
-    uint64_t devid:24;          /* Requester Id, AKA device_id */
-    uint64_t process_id:20;     /* Process ID. PASID for PCIe */
-    uint64_t tc;                /* Translation Control */
-    uint64_t ta;                /* Translation Attributes */
-    uint64_t satp;              /* S-Stage address translation and protection */
-    uint64_t gatp;              /* G-Stage address translation and protection */
-    uint64_t msi_addr_mask;     /* MSI filtering - address mask */
-    uint64_t msi_addr_pattern;  /* MSI filtering - address pattern */
-    uint64_t msiptp;            /* MSI redirection page table pointer */
-};
-
 typedef enum RISCVIOMMUTransTag {
     RISCV_IOMMU_TRANS_TAG_BY,  /* Bypass */
     RISCV_IOMMU_TRANS_TAG_SS,  /* Single Stage */
@@ -100,7 +87,7 @@ static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type)
     }
 }
 
-static void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type)
+void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type)
 {
     uint32_t ipsr, icvec, vector;
 
@@ -422,6 +409,13 @@ static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
             }
         }
 
+
+        if (pass == S_STAGE) {
+            riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_S_VS_WALKS);
+        } else {
+            riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_G_WALKS);
+        }
+
         /* Read page table entry */
         if (sc[pass].ptesize == 4) {
             uint32_t pte32 = 0;
@@ -940,6 +934,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
 
     /* Device directory tree walk */
     for (; depth-- > 0; ) {
+        riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK);
         /*
          * Select device id index bits based on device directory tree level
          * and device context format.
@@ -967,6 +962,8 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
         addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN));
     }
 
+    riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK);
+
     /* index into device context entry page */
     addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK;
 
@@ -1032,6 +1029,8 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
     }
 
     for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) {
+        riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK);
+
         /*
          * Select process id index bits based on process directory tree
          * level. See IOMMU Specification, 2.2. Process-Directory-Table.
@@ -1049,6 +1048,8 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
         addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PC_FSC_PPN));
     }
 
+    riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK);
+
     /* Leaf entry in PDT */
     addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK;
     if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2,
@@ -1418,6 +1419,8 @@ static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
     GHashTable *iot_cache;
     int fault;
 
+    riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_URQ);
+
     iot_cache = g_hash_table_ref(s->iot_cache);
     /*
      * TC[32] is reserved for custom extensions, used here to temporarily
@@ -1428,6 +1431,7 @@ static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
 
     /* Check for ATS request. */
     if (iotlb->perm == IOMMU_NONE) {
+        riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_ATS_RQ);
         /* Check if ATS is disabled. */
         if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) {
             enable_pri = false;
@@ -1446,6 +1450,8 @@ static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
         goto done;
     }
 
+    riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_TLB_MISS);
+
     /* Translate using device directory / page table information. */
     fault = riscv_iommu_spa_fetch(s, ctx, iotlb);
 
@@ -2018,6 +2024,27 @@ static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data)
     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr);
 }
 
+static void riscv_iommu_process_hpm_writes(RISCVIOMMUState *s,
+                                           uint32_t regb,
+                                           bool prev_cy_inh)
+{
+    switch (regb) {
+    case RISCV_IOMMU_REG_IOCOUNTINH:
+        riscv_iommu_process_iocntinh_cy(s, prev_cy_inh);
+        break;
+
+    case RISCV_IOMMU_REG_IOHPMCYCLES:
+    case RISCV_IOMMU_REG_IOHPMCYCLES + 4:
+        riscv_iommu_process_hpmcycle_write(s);
+        break;
+
+    case RISCV_IOMMU_REG_IOHPMEVT_BASE ...
+        RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4:
+        riscv_iommu_process_hpmevt_write(s, regb & ~7);
+        break;
+    }
+}
+
 /*
  * Write the resulting value of 'data' for the reg specified
  * by 'reg_addr', after considering read-only/read-write/write-clear
@@ -2045,6 +2072,7 @@ static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr,
     uint32_t regb = addr & ~3;
     uint32_t busy = 0;
     uint64_t val = 0;
+    bool cy_inh = false;
 
     if ((addr & (size - 1)) != 0) {
         /* Unsupported MMIO alignment or access size */
@@ -2112,6 +2140,16 @@ static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr,
         busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY;
         break;
 
+    case RISCV_IOMMU_REG_IOCOUNTINH:
+        if (addr != RISCV_IOMMU_REG_IOCOUNTINH) {
+            break;
+        }
+        /* Store previous value of CY bit. */
+        cy_inh = !!(riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTINH) &
+            RISCV_IOMMU_IOCOUNTINH_CY);
+        break;
+
+
     default:
         break;
     }
@@ -2130,6 +2168,12 @@ static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr,
         stl_le_p(&s->regs_rw[regb], rw | busy);
     }
 
+    /* Process HPM writes and update any internal state if needed. */
+    if (regb >= RISCV_IOMMU_REG_IOCOUNTOVF &&
+        regb <= (RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4)) {
+        riscv_iommu_process_hpm_writes(s, regb, cy_inh);
+    }
+
     if (process_fn) {
         process_fn(s);
     }
@@ -2153,7 +2197,28 @@ static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr,
         return MEMTX_ACCESS_ERROR;
     }
 
-    ptr = &s->regs_rw[addr];
+    /* Compute cycle register value. */
+    if ((addr & ~7) == RISCV_IOMMU_REG_IOHPMCYCLES) {
+        val = riscv_iommu_hpmcycle_read(s);
+        ptr = (uint8_t *)&val + (addr & 7);
+    } else if ((addr & ~3) == RISCV_IOMMU_REG_IOCOUNTOVF) {
+        /*
+         * Software can read RISCV_IOMMU_REG_IOCOUNTOVF before timer
+         * callback completes. In which case CY_OF bit in
+         * RISCV_IOMMU_IOHPMCYCLES_OVF would be 0. Here we take the
+         * CY_OF bit state from RISCV_IOMMU_REG_IOHPMCYCLES register as
+         * it's not dependent over the timer callback and is computed
+         * from cycle overflow.
+         */
+        val = ldq_le_p(&s->regs_rw[addr]);
+        val |= (riscv_iommu_hpmcycle_read(s) & RISCV_IOMMU_IOHPMCYCLES_OVF)
+                   ? RISCV_IOMMU_IOCOUNTOVF_CY
+                   : 0;
+        ptr = (uint8_t *)&val + (addr & 3);
+    } else {
+        ptr = &s->regs_rw[addr];
+    }
+
     val = ldn_le_p(ptr, size);
 
     *data = val;
@@ -2292,6 +2357,15 @@ static void riscv_iommu_realize(DeviceState *dev, Error **errp)
                   RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4;
     }
 
+    if (s->hpm_cntrs > 0) {
+        /* Clip number of HPM counters to maximum supported (31). */
+        if (s->hpm_cntrs > RISCV_IOMMU_IOCOUNT_NUM) {
+            s->hpm_cntrs = RISCV_IOMMU_IOCOUNT_NUM;
+        }
+        /* Enable hardware performance monitor interface */
+        s->cap |= RISCV_IOMMU_CAP_HPM;
+    }
+
     /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */
     s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ?
                         RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE);
@@ -2339,6 +2413,18 @@ static void riscv_iommu_realize(DeviceState *dev, Error **errp)
             RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
     }
 
+    /* If HPM registers are enabled. */
+    if (s->cap & RISCV_IOMMU_CAP_HPM) {
+        /* +1 for cycle counter bit. */
+        stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOCOUNTINH],
+                 ~((2 << s->hpm_cntrs) - 1));
+        stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCYCLES], 0);
+        memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCTR_BASE],
+               0x00, s->hpm_cntrs * 8);
+        memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMEVT_BASE],
+               0x00, s->hpm_cntrs * 8);
+    }
+
     /* Memory region for downstream access, if specified. */
     if (s->target_mr) {
         s->target_as = g_new0(AddressSpace, 1);
@@ -2353,6 +2439,12 @@ static void riscv_iommu_realize(DeviceState *dev, Error **errp)
     memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s,
             "riscv-iommu-trap", ~0ULL);
     address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as");
+
+    if (s->cap & RISCV_IOMMU_CAP_HPM) {
+        s->hpm_timer =
+            timer_new_ns(QEMU_CLOCK_VIRTUAL, riscv_iommu_hpm_timer_cb, s);
+        s->hpm_event_ctr_map = g_hash_table_new(g_direct_hash, g_direct_equal);
+    }
 }
 
 static void riscv_iommu_unrealize(DeviceState *dev)
@@ -2361,6 +2453,11 @@ static void riscv_iommu_unrealize(DeviceState *dev)
 
     g_hash_table_unref(s->iot_cache);
     g_hash_table_unref(s->ctx_cache);
+
+    if (s->cap & RISCV_IOMMU_CAP_HPM) {
+        g_hash_table_unref(s->hpm_event_ctr_map);
+        timer_free(s->hpm_timer);
+    }
 }
 
 void riscv_iommu_reset(RISCVIOMMUState *s)
@@ -2411,6 +2508,8 @@ static const Property riscv_iommu_properties[] = {
     DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE),
     DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr,
         TYPE_MEMORY_REGION, MemoryRegion *),
+    DEFINE_PROP_UINT8("hpm-counters", RISCVIOMMUState, hpm_cntrs,
+                      RISCV_IOMMU_IOCOUNT_NUM),
 };
 
 static void riscv_iommu_class_init(ObjectClass *klass, void* data)
diff --git a/hw/riscv/riscv-iommu.h b/hw/riscv/riscv-iommu.h
index 9424989df41a4..a31aa62144f4b 100644
--- a/hw/riscv/riscv-iommu.h
+++ b/hw/riscv/riscv-iommu.h
@@ -20,6 +20,8 @@
 #define HW_RISCV_IOMMU_STATE_H
 
 #include "qom/object.h"
+#include "hw/qdev-properties.h"
+#include "system/dma.h"
 #include "hw/riscv/iommu.h"
 #include "hw/riscv/riscv-iommu-bits.h"
 
@@ -58,11 +60,6 @@ struct RISCVIOMMUState {
     /* interrupt notifier */
     void (*notify)(RISCVIOMMUState *iommu, unsigned vector);
 
-    /* IOMMU State Machine */
-    QemuThread core_proc; /* Background processing thread */
-    QemuCond core_cond;   /* Background processing wake up signal */
-    unsigned core_exec;   /* Processing thread execution actions */
-
     /* IOMMU target address space */
     AddressSpace *target_as;
     MemoryRegion *target_mr;
@@ -84,12 +81,37 @@ struct RISCVIOMMUState {
 
     QLIST_ENTRY(RISCVIOMMUState) iommus;
     QLIST_HEAD(, RISCVIOMMUSpace) spaces;
+
+    /* HPM cycle counter */
+    QEMUTimer *hpm_timer;
+    uint64_t hpmcycle_val;      /* Current value of cycle register */
+    uint64_t hpmcycle_prev;     /* Saved value of QEMU_CLOCK_VIRTUAL clock */
+    uint64_t irq_overflow_left; /* Value beyond INT64_MAX after overflow */
+
+    /* HPM event counters */
+    GHashTable *hpm_event_ctr_map; /* Mapping of events to counters */
+    uint8_t hpm_cntrs;
 };
 
 void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus,
          Error **errp);
 void riscv_iommu_set_cap_igs(RISCVIOMMUState *s, riscv_iommu_igs_mode mode);
 void riscv_iommu_reset(RISCVIOMMUState *s);
+void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type);
+
+typedef struct RISCVIOMMUContext RISCVIOMMUContext;
+/* Device translation context state. */
+struct RISCVIOMMUContext {
+    uint64_t devid:24;          /* Requester Id, AKA device_id */
+    uint64_t process_id:20;     /* Process ID. PASID for PCIe */
+    uint64_t tc;                /* Translation Control */
+    uint64_t ta;                /* Translation Attributes */
+    uint64_t satp;              /* S-Stage address translation and protection */
+    uint64_t gatp;              /* G-Stage address translation and protection */
+    uint64_t msi_addr_mask;     /* MSI filtering - address mask */
+    uint64_t msi_addr_pattern;  /* MSI filtering - address pattern */
+    uint64_t msiptp;            /* MSI redirection page table pointer */
+};
 
 /* private helpers */
 
diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
index 9a20bcbf7fbdf..679f2024bc6aa 100644
--- a/hw/riscv/sifive_u.c
+++ b/hw/riscv/sifive_u.c
@@ -724,6 +724,7 @@ static void sifive_u_machine_class_init(ObjectClass *oc, void *data)
     mc->default_cpu_type = SIFIVE_U_CPU;
     mc->default_cpus = mc->min_cpus;
     mc->default_ram_id = "riscv.sifive.u.ram";
+    mc->auto_create_sdcard = true;
 
     object_class_property_add_bool(oc, "start-in-flash",
                                    sifive_u_machine_get_start_in_flash,
diff --git a/hw/riscv/trace-events b/hw/riscv/trace-events
index 7bcbb03d08360..b50b14a65422c 100644
--- a/hw/riscv/trace-events
+++ b/hw/riscv/trace-events
@@ -19,3 +19,8 @@ riscv_iommu_sys_irq_sent(uint32_t vector) "IRQ sent to vector %u"
 riscv_iommu_sys_msi_sent(uint32_t vector, uint64_t msi_addr, uint32_t msi_data, uint32_t result) "MSI sent to vector %u msi_addr 0x%"PRIx64" msi_data 0x%x result %u"
 riscv_iommu_sys_reset_hold(int reset_type) "reset type %d"
 riscv_iommu_pci_reset_hold(int reset_type) "reset type %d"
+riscv_iommu_hpm_read(uint64_t cycle, uint32_t inhibit, uint64_t ctr_prev, uint64_t ctr_val) "cycle 0x%"PRIx64" inhibit 0x%x ctr_prev 0x%"PRIx64" ctr_val 0x%"PRIx64
+riscv_iommu_hpm_incr_ctr(uint64_t cntr_val) "cntr_val 0x%"PRIx64
+riscv_iommu_hpm_iocntinh_cy(bool prev_cy_inh) "prev_cy_inh %d"
+riscv_iommu_hpm_cycle_write(uint32_t ovf, uint64_t val) "ovf 0x%x val 0x%"PRIx64
+riscv_iommu_hpm_evt_write(uint32_t ctr_idx, uint32_t ovf, uint64_t val) "ctr_idx 0x%x ovf 0x%x val 0x%"PRIx64
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 241389d72f8a2..dae46f4733cd5 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -971,6 +971,7 @@ static void create_fdt_uart(RISCVVirtState *s, const MemMapEntry *memmap,
     }
 
     qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", name);
+    qemu_fdt_setprop_string(ms->fdt, "/aliases", "serial0", name);
 }
 
 static void create_fdt_rtc(RISCVVirtState *s, const MemMapEntry *memmap,
@@ -1180,6 +1181,8 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap)
     qemu_fdt_setprop(ms->fdt, "/chosen", "rng-seed",
                      rng_seed, sizeof(rng_seed));
 
+    qemu_fdt_add_subnode(ms->fdt, "/aliases");
+
     create_fdt_flash(s, memmap);
     create_fdt_fw_cfg(s, memmap);
     create_fdt_pmu(s);
diff --git a/hw/rtc/Kconfig b/hw/rtc/Kconfig
index 2fe04ec1d0490..315b0e4eccdd8 100644
--- a/hw/rtc/Kconfig
+++ b/hw/rtc/Kconfig
@@ -26,3 +26,8 @@ config GOLDFISH_RTC
 
 config LS7A_RTC
     bool
+
+config RS5C372_RTC
+    bool
+    depends on I2C
+    default y if I2C_DEVICES
diff --git a/hw/rtc/allwinner-rtc.c b/hw/rtc/allwinner-rtc.c
index a19e4310bb1be..fd8355a86763b 100644
--- a/hw/rtc/allwinner-rtc.c
+++ b/hw/rtc/allwinner-rtc.c
@@ -259,7 +259,7 @@ static void allwinner_rtc_write(void *opaque, hwaddr offset,
 static const MemoryRegionOps allwinner_rtc_ops = {
     .read = allwinner_rtc_read,
     .write = allwinner_rtc_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
diff --git a/hw/rtc/goldfish_rtc.c b/hw/rtc/goldfish_rtc.c
index fa1d9051f4a53..0f1b53e0e4d35 100644
--- a/hw/rtc/goldfish_rtc.c
+++ b/hw/rtc/goldfish_rtc.c
@@ -178,38 +178,21 @@ static void goldfish_rtc_write(void *opaque, hwaddr offset,
     trace_goldfish_rtc_write(offset, value);
 }
 
-static int goldfish_rtc_pre_save(void *opaque)
-{
-    uint64_t delta;
-    GoldfishRTCState *s = opaque;
-
-    /*
-     * We want to migrate this offset, which sounds straightforward.
-     * Unfortunately, we cannot directly pass tick_offset because
-     * rtc_clock on destination Host might not be same source Host.
-     *
-     * To tackle, this we pass tick_offset relative to vm_clock from
-     * source Host and make it relative to rtc_clock at destination Host.
-     */
-    delta = qemu_clock_get_ns(rtc_clock) -
-            qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-    s->tick_offset_vmstate = s->tick_offset + delta;
-
-    return 0;
-}
-
 static int goldfish_rtc_post_load(void *opaque, int version_id)
 {
-    uint64_t delta;
     GoldfishRTCState *s = opaque;
 
-    /*
-     * We extract tick_offset from tick_offset_vmstate by doing
-     * reverse math compared to pre_save() function.
-     */
-    delta = qemu_clock_get_ns(rtc_clock) -
-            qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-    s->tick_offset = s->tick_offset_vmstate - delta;
+    if (version_id < 3) {
+        /*
+         * Previous versions didn't migrate tick_offset directly. Instead, they
+         * migrated tick_offset_vmstate, which is a recalculation based on
+         * QEMU_CLOCK_VIRTUAL. We use tick_offset_vmstate when migrating from
+         * older versions.
+         */
+        uint64_t delta = qemu_clock_get_ns(rtc_clock) -
+                 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        s->tick_offset = s->tick_offset_vmstate - delta;
+    }
 
     goldfish_rtc_set_alarm(s);
 
@@ -239,8 +222,7 @@ static const MemoryRegionOps goldfish_rtc_ops[2] = {
 
 static const VMStateDescription goldfish_rtc_vmstate = {
     .name = TYPE_GOLDFISH_RTC,
-    .version_id = 2,
-    .pre_save = goldfish_rtc_pre_save,
+    .version_id = 3,
     .post_load = goldfish_rtc_post_load,
     .fields = (const VMStateField[]) {
         VMSTATE_UINT64(tick_offset_vmstate, GoldfishRTCState),
@@ -249,6 +231,7 @@ static const VMStateDescription goldfish_rtc_vmstate = {
         VMSTATE_UINT32(irq_pending, GoldfishRTCState),
         VMSTATE_UINT32(irq_enabled, GoldfishRTCState),
         VMSTATE_UINT32(time_high, GoldfishRTCState),
+        VMSTATE_UINT64_V(tick_offset, GoldfishRTCState, 3),
         VMSTATE_END_OF_LIST()
     }
 };
diff --git a/hw/rtc/m48t59-isa.c b/hw/rtc/m48t59-isa.c
index 38bc8dcf100fe..9c3855a3ef164 100644
--- a/hw/rtc/m48t59-isa.c
+++ b/hw/rtc/m48t59-isa.c
@@ -129,7 +129,7 @@ static void m48txx_isa_class_init(ObjectClass *klass, void *data)
 static void m48txx_isa_concrete_class_init(ObjectClass *klass, void *data)
 {
     M48txxISADeviceClass *u = M48TXX_ISA_CLASS(klass);
-    M48txxInfo *info = data;
+    const M48txxInfo *info = data;
 
     u->info = *info;
 }
diff --git a/hw/rtc/m48t59.c b/hw/rtc/m48t59.c
index c9bd6f878fe55..3fb2f27d9d16d 100644
--- a/hw/rtc/m48t59.c
+++ b/hw/rtc/m48t59.c
@@ -639,7 +639,7 @@ static void m48txx_sysbus_class_init(ObjectClass *klass, void *data)
 static void m48txx_sysbus_concrete_class_init(ObjectClass *klass, void *data)
 {
     M48txxSysBusDeviceClass *u = M48TXX_SYS_BUS_CLASS(klass);
-    M48txxInfo *info = data;
+    const M48txxInfo *info = data;
 
     u->info = *info;
 }
diff --git a/hw/rtc/meson.build b/hw/rtc/meson.build
index 8ecc2d792c1fb..6c87864dc07cf 100644
--- a/hw/rtc/meson.build
+++ b/hw/rtc/meson.build
@@ -13,3 +13,4 @@ system_ss.add(when: 'CONFIG_GOLDFISH_RTC', if_true: files('goldfish_rtc.c'))
 system_ss.add(when: 'CONFIG_LS7A_RTC', if_true: files('ls7a_rtc.c'))
 system_ss.add(when: 'CONFIG_ALLWINNER_H3', if_true: files('allwinner-rtc.c'))
 system_ss.add(when: 'CONFIG_MC146818RTC', if_true: files('mc146818rtc.c'))
+system_ss.add(when: 'CONFIG_RS5C372_RTC', if_true: files('rs5c372.c'))
diff --git a/hw/rtc/rs5c372.c b/hw/rtc/rs5c372.c
new file mode 100644
index 0000000000000..5542f74085a24
--- /dev/null
+++ b/hw/rtc/rs5c372.c
@@ -0,0 +1,236 @@
+/*
+ * Ricoh RS5C372, R222x I2C RTC
+ *
+ * Copyright (c) 2025 Bernhard Beschow <shentey@gmail.com>
+ *
+ * Based on hw/rtc/ds1338.c
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/i2c/i2c.h"
+#include "hw/qdev-properties.h"
+#include "hw/resettable.h"
+#include "migration/vmstate.h"
+#include "qemu/bcd.h"
+#include "qom/object.h"
+#include "system/rtc.h"
+#include "trace.h"
+
+#define NVRAM_SIZE 0x10
+
+/* Flags definitions */
+#define SECONDS_CH 0x80
+#define HOURS_PM   0x20
+#define CTRL2_24   0x20
+
+#define TYPE_RS5C372 "rs5c372"
+OBJECT_DECLARE_SIMPLE_TYPE(RS5C372State, RS5C372)
+
+struct RS5C372State {
+    I2CSlave parent_obj;
+
+    int64_t offset;
+    uint8_t wday_offset;
+    uint8_t nvram[NVRAM_SIZE];
+    uint8_t ptr;
+    uint8_t tx_format;
+    bool addr_byte;
+};
+
+static void capture_current_time(RS5C372State *s)
+{
+    /*
+     * Capture the current time into the secondary registers which will be
+     * actually read by the data transfer operation.
+     */
+    struct tm now;
+    qemu_get_timedate(&now, s->offset);
+    s->nvram[0] = to_bcd(now.tm_sec);
+    s->nvram[1] = to_bcd(now.tm_min);
+    if (s->nvram[0xf] & CTRL2_24) {
+        s->nvram[2] = to_bcd(now.tm_hour);
+    } else {
+        int tmp = now.tm_hour;
+        if (tmp % 12 == 0) {
+            tmp += 12;
+        }
+        if (tmp <= 12) {
+            s->nvram[2] = to_bcd(tmp);
+        } else {
+            s->nvram[2] = HOURS_PM | to_bcd(tmp - 12);
+        }
+    }
+    s->nvram[3] = (now.tm_wday + s->wday_offset) % 7 + 1;
+    s->nvram[4] = to_bcd(now.tm_mday);
+    s->nvram[5] = to_bcd(now.tm_mon + 1);
+    s->nvram[6] = to_bcd(now.tm_year - 100);
+}
+
+static void inc_regptr(RS5C372State *s)
+{
+    s->ptr = (s->ptr + 1) & (NVRAM_SIZE - 1);
+}
+
+static int rs5c372_event(I2CSlave *i2c, enum i2c_event event)
+{
+    RS5C372State *s = RS5C372(i2c);
+
+    switch (event) {
+    case I2C_START_RECV:
+        /*
+         * In h/w, capture happens on any START condition, not just a
+         * START_RECV, but there is no need to actually capture on
+         * START_SEND, because the guest can't get at that data
+         * without going through a START_RECV which would overwrite it.
+         */
+        capture_current_time(s);
+        s->ptr = 0xf;
+        break;
+    case I2C_START_SEND:
+        s->addr_byte = true;
+        break;
+    default:
+        break;
+    }
+
+    return 0;
+}
+
+static uint8_t rs5c372_recv(I2CSlave *i2c)
+{
+    RS5C372State *s = RS5C372(i2c);
+    uint8_t res;
+
+    res  = s->nvram[s->ptr];
+
+    trace_rs5c372_recv(s->ptr, res);
+
+    inc_regptr(s);
+    return res;
+}
+
+static int rs5c372_send(I2CSlave *i2c, uint8_t data)
+{
+    RS5C372State *s = RS5C372(i2c);
+
+    if (s->addr_byte) {
+        s->ptr = data >> 4;
+        s->tx_format = data & 0xf;
+        s->addr_byte = false;
+        return 0;
+    }
+
+    trace_rs5c372_send(s->ptr, data);
+
+    if (s->ptr < 7) {
+        /* Time register. */
+        struct tm now;
+        qemu_get_timedate(&now, s->offset);
+        switch (s->ptr) {
+        case 0:
+            now.tm_sec = from_bcd(data & 0x7f);
+            break;
+        case 1:
+            now.tm_min = from_bcd(data & 0x7f);
+            break;
+        case 2:
+            if (s->nvram[0xf] & CTRL2_24) {
+                now.tm_hour = from_bcd(data & 0x3f);
+            } else {
+                int tmp = from_bcd(data & (HOURS_PM - 1));
+                if (data & HOURS_PM) {
+                    tmp += 12;
+                }
+                if (tmp % 12 == 0) {
+                    tmp -= 12;
+                }
+                now.tm_hour = tmp;
+            }
+            break;
+        case 3:
+            {
+                /*
+                 * The day field is supposed to contain a value in the range
+                 * 1-7. Otherwise behavior is undefined.
+                 */
+                int user_wday = (data & 7) - 1;
+                s->wday_offset = (user_wday - now.tm_wday + 7) % 7;
+            }
+            break;
+        case 4:
+            now.tm_mday = from_bcd(data & 0x3f);
+            break;
+        case 5:
+            now.tm_mon = from_bcd(data & 0x1f) - 1;
+            break;
+        case 6:
+            now.tm_year = from_bcd(data) + 100;
+            break;
+        }
+        s->offset = qemu_timedate_diff(&now);
+    } else {
+        s->nvram[s->ptr] = data;
+    }
+    inc_regptr(s);
+    return 0;
+}
+
+static void rs5c372_reset_hold(Object *obj, ResetType type)
+{
+    RS5C372State *s = RS5C372(obj);
+
+    /* The clock is running and synchronized with the host */
+    s->offset = 0;
+    s->wday_offset = 0;
+    memset(s->nvram, 0, NVRAM_SIZE);
+    s->ptr = 0;
+    s->addr_byte = false;
+}
+
+static const VMStateDescription rs5c372_vmstate = {
+    .name = "rs5c372",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (const VMStateField[]) {
+        VMSTATE_I2C_SLAVE(parent_obj, RS5C372State),
+        VMSTATE_INT64(offset, RS5C372State),
+        VMSTATE_UINT8_V(wday_offset, RS5C372State, 2),
+        VMSTATE_UINT8_ARRAY(nvram, RS5C372State, NVRAM_SIZE),
+        VMSTATE_UINT8(ptr, RS5C372State),
+        VMSTATE_UINT8(tx_format, RS5C372State),
+        VMSTATE_BOOL(addr_byte, RS5C372State),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void rs5c372_init(Object *obj)
+{
+    qdev_prop_set_uint8(DEVICE(obj), "address", 0x32);
+}
+
+static void rs5c372_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    I2CSlaveClass *k = I2C_SLAVE_CLASS(klass);
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+
+    k->event = rs5c372_event;
+    k->recv = rs5c372_recv;
+    k->send = rs5c372_send;
+    dc->vmsd = &rs5c372_vmstate;
+    rc->phases.hold = rs5c372_reset_hold;
+}
+
+static const TypeInfo rs5c372_types[] = {
+    {
+        .name          = TYPE_RS5C372,
+        .parent        = TYPE_I2C_SLAVE,
+        .instance_size = sizeof(RS5C372State),
+        .instance_init = rs5c372_init,
+        .class_init    = rs5c372_class_init,
+    },
+};
+
+DEFINE_TYPES(rs5c372_types)
diff --git a/hw/rtc/trace-events b/hw/rtc/trace-events
index 8012afe1021d9..b9f2852d35fca 100644
--- a/hw/rtc/trace-events
+++ b/hw/rtc/trace-events
@@ -35,3 +35,7 @@ m48txx_nvram_mem_write(uint32_t addr, uint32_t value) "mem write addr:0x%04x val
 # goldfish_rtc.c
 goldfish_rtc_read(uint64_t addr, uint64_t value) "addr 0x%02" PRIx64 " value 0x%08" PRIx64
 goldfish_rtc_write(uint64_t addr, uint64_t value) "addr 0x%02" PRIx64 " value 0x%08" PRIx64
+
+# rs5c372.c
+rs5c372_recv(uint32_t addr, uint8_t value) "[0x%" PRIx32 "] -> 0x%02" PRIx8
+rs5c372_send(uint32_t addr, uint8_t value) "[0x%" PRIx32 "] <- 0x%02" PRIx8
diff --git a/hw/rx/rx-gdbsim.c b/hw/rx/rx-gdbsim.c
index 88c8f12c1019c..4afd77efd569f 100644
--- a/hw/rx/rx-gdbsim.c
+++ b/hw/rx/rx-gdbsim.c
@@ -110,9 +110,6 @@ static void rx_gdbsim_init(MachineState *machine)
     if (!kernel_filename) {
         if (machine->firmware) {
             rom_add_file_fixed(machine->firmware, RX62N_CFLASH_BASE, 0);
-        } else if (!qtest_enabled()) {
-            error_report("No bios or kernel specified");
-            exit(1);
         }
     }
 
diff --git a/hw/s390x/ccw-device.c b/hw/s390x/ccw-device.c
index 494faebb5a8ad..1ea9934f6cee9 100644
--- a/hw/s390x/ccw-device.c
+++ b/hw/s390x/ccw-device.c
@@ -74,9 +74,9 @@ static void ccw_device_set_loadparm(Object *obj, Visitor *v,
 }
 
 const PropertyInfo ccw_loadparm = {
-    .name  = "ccw_loadparm",
-    .description = "Up to 8 chars in set of [A-Za-z0-9. ] to pass"
-            " to the guest loader/kernel",
+    .type  = "str",
+    .description = "Up to 8 chars in set of [A-Za-z0-9. ] to select"
+            " a guest kernel",
     .get = ccw_device_get_loadparm,
     .set = ccw_device_set_loadparm,
 };
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 4e27b2961b8c4..738800c98df5e 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -2523,7 +2523,7 @@ static void set_css_devid(Object *obj, Visitor *v, const char *name,
 }
 
 const PropertyInfo css_devid_propinfo = {
-    .name = "str",
+    .type = "str",
     .description = "Identifier of an I/O device in the channel "
                    "subsystem, example: fe.1.23ab",
     .get = get_css_devid,
@@ -2531,7 +2531,7 @@ const PropertyInfo css_devid_propinfo = {
 };
 
 const PropertyInfo css_devid_ro_propinfo = {
-    .name = "str",
+    .type = "str",
     .description = "Read-only identifier of an I/O device in the channel "
                    "subsystem, example: fe.1.23ab",
     .get = get_css_devid,
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index eead269cc285f..2591ee49c11f8 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -18,6 +18,8 @@
 #include "hw/s390x/s390-pci-inst.h"
 #include "hw/s390x/s390-pci-kvm.h"
 #include "hw/s390x/s390-pci-vfio.h"
+#include "hw/s390x/s390-virtio-ccw.h"
+#include "hw/boards.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/qdev-properties.h"
 #include "hw/pci/pci_bridge.h"
@@ -724,12 +726,42 @@ void s390_pci_iommu_enable(S390PCIIOMMU *iommu)
     g_free(name);
 }
 
+void s390_pci_iommu_direct_map_enable(S390PCIIOMMU *iommu)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    S390CcwMachineState *s390ms = S390_CCW_MACHINE(ms);
+
+    /*
+     * For direct-mapping we must map the entire guest address space.  Rather
+     * than using an iommu, create a memory region alias that maps GPA X to
+     * IOVA X + SDMA.  VFIO will handle pinning via its memory listener.
+     */
+    g_autofree char *name = g_strdup_printf("iommu-dm-s390-%04x",
+                                            iommu->pbdev->uid);
+
+    iommu->dm_mr = g_malloc0(sizeof(*iommu->dm_mr));
+    memory_region_init_alias(iommu->dm_mr, OBJECT(&iommu->mr), name,
+                             get_system_memory(), 0,
+                             s390_get_memory_limit(s390ms));
+    iommu->enabled = true;
+    memory_region_add_subregion(&iommu->mr, iommu->pbdev->zpci_fn.sdma,
+                                iommu->dm_mr);
+}
+
 void s390_pci_iommu_disable(S390PCIIOMMU *iommu)
 {
     iommu->enabled = false;
     g_hash_table_remove_all(iommu->iotlb);
-    memory_region_del_subregion(&iommu->mr, MEMORY_REGION(&iommu->iommu_mr));
-    object_unparent(OBJECT(&iommu->iommu_mr));
+    if (iommu->dm_mr) {
+        memory_region_del_subregion(&iommu->mr, iommu->dm_mr);
+        object_unparent(OBJECT(iommu->dm_mr));
+        g_free(iommu->dm_mr);
+        iommu->dm_mr = NULL;
+    } else {
+        memory_region_del_subregion(&iommu->mr,
+                                    MEMORY_REGION(&iommu->iommu_mr));
+        object_unparent(OBJECT(&iommu->iommu_mr));
+    }
 }
 
 static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn)
@@ -971,14 +1003,7 @@ static void s390_pcihost_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
                     "this device");
     }
 
-    if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
-        PCIDevice *pdev = PCI_DEVICE(dev);
-
-        if (pdev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
-            error_setg(errp, "multifunction not supported in s390");
-            return;
-        }
-    } else if (object_dynamic_cast(OBJECT(dev), TYPE_S390_PCI_DEVICE)) {
+    if (object_dynamic_cast(OBJECT(dev), TYPE_S390_PCI_DEVICE)) {
         S390PCIBusDevice *pbdev = S390_PCI_DEVICE(dev);
 
         if (!s390_pci_alloc_idx(s, pbdev)) {
@@ -1069,6 +1094,18 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
         pdev = PCI_DEVICE(dev);
 
+        /*
+         * Multifunction is not supported due to the lack of CLP. However,
+         * do not check for multifunction capability for SR-IOV devices because
+         * SR-IOV devices automatically add the multifunction capability whether
+         * the user intends to use the functions other than the PF.
+         */
+        if (pdev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION &&
+            !pdev->exp.sriov_cap) {
+            error_setg(errp, "multifunction not supported in s390");
+            return;
+        }
+
         if (!dev->id) {
             /* In the case the PCI device does not define an id */
             /* we generate one based on the PCI address         */
@@ -1080,6 +1117,16 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
 
         pbdev = s390_pci_find_dev_by_target(s, dev->id);
         if (!pbdev) {
+            /*
+             * VFs are automatically created by PF, and creating zpci for them
+             * will result in unexpected usage of fids. Currently QEMU does not
+             * support multifunction for s390x so we don't need zpci for VFs
+             * anyway.
+             */
+            if (pci_is_vf(pdev)) {
+                return;
+            }
+
             pbdev = s390_pci_device_new(s, dev->id, errp);
             if (!pbdev) {
                 return;
@@ -1130,6 +1177,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
             /* Always intercept emulated devices */
             pbdev->interp = false;
             pbdev->forwarding_assist = false;
+            pbdev->rtr_avail = false;
         }
 
         if (s390_pci_msix_init(pbdev) && !pbdev->interp) {
@@ -1167,7 +1215,10 @@ static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
         int32_t devfn;
 
         pbdev = s390_pci_find_dev_by_pci(s, PCI_DEVICE(dev));
-        g_assert(pbdev);
+        if (!pbdev) {
+            g_assert(pci_is_vf(pci_dev));
+            return;
+        }
 
         s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
                                      pbdev->fh, pbdev->fid);
@@ -1206,7 +1257,11 @@ static void s390_pcihost_unplug_request(HotplugHandler *hotplug_dev,
          * we've checked the PCI device already (to prevent endless recursion).
          */
         pbdev = s390_pci_find_dev_by_pci(s, PCI_DEVICE(dev));
-        g_assert(pbdev);
+        if (!pbdev) {
+            g_assert(pci_is_vf(PCI_DEVICE(dev)));
+            return;
+        }
+
         pbdev->pci_unplug_request_processed = true;
         qdev_unplug(DEVICE(pbdev), errp);
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_S390_PCI_DEVICE)) {
@@ -1473,7 +1528,8 @@ static void s390_pci_set_fid(Object *obj, Visitor *v, const char *name,
 }
 
 static const PropertyInfo s390_pci_fid_propinfo = {
-    .name = "zpci_fid",
+    .type = "uint32",
+    .description = "zpci_fid",
     .get = s390_pci_get_fid,
     .set = s390_pci_set_fid,
 };
@@ -1488,6 +1544,8 @@ static const Property s390_pci_device_properties[] = {
     DEFINE_PROP_BOOL("interpret", S390PCIBusDevice, interp, true),
     DEFINE_PROP_BOOL("forwarding-assist", S390PCIBusDevice, forwarding_assist,
                      true),
+    DEFINE_PROP_BOOL("relaxed-translation", S390PCIBusDevice, rtr_avail,
+                     true),
 };
 
 static const VMStateDescription s390_pci_device_vmstate = {
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index e386d75d58c3b..8cdeb6cb7f717 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -16,6 +16,7 @@
 #include "exec/memory.h"
 #include "qemu/error-report.h"
 #include "system/hw_accel.h"
+#include "hw/boards.h"
 #include "hw/pci/pci_device.h"
 #include "hw/s390x/s390-pci-inst.h"
 #include "hw/s390x/s390-pci-bus.h"
@@ -1008,17 +1009,25 @@ static int reg_ioat(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib,
     }
 
     /* currently we only support designation type 1 with translation */
-    if (!(dt == ZPCI_IOTA_RTTO && t)) {
+    if (t && dt != ZPCI_IOTA_RTTO) {
         error_report("unsupported ioat dt %d t %d", dt, t);
         s390_program_interrupt(env, PGM_OPERAND, ra);
         return -EINVAL;
+    } else if (!t && !pbdev->rtr_avail) {
+        error_report("relaxed translation not allowed");
+        s390_program_interrupt(env, PGM_OPERAND, ra);
+        return -EINVAL;
     }
 
     iommu->pba = pba;
     iommu->pal = pal;
     iommu->g_iota = g_iota;
 
-    s390_pci_iommu_enable(iommu);
+    if (t) {
+        s390_pci_iommu_enable(iommu);
+    } else {
+        s390_pci_iommu_direct_map_enable(iommu);
+    }
 
     return 0;
 }
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
index 7dbbc76823a52..6236ac7f1e686 100644
--- a/hw/s390x/s390-pci-vfio.c
+++ b/hw/s390x/s390-pci-vfio.c
@@ -131,13 +131,28 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev,
     /* Store function type separately for type-specific behavior */
     pbdev->pft = cap->pft;
 
+    /*
+     * If the device is a passthrough ISM device, disallow relaxed
+     * translation.
+     */
+    if (pbdev->pft == ZPCI_PFT_ISM) {
+        pbdev->rtr_avail = false;
+    }
+
     /*
      * If appropriate, reduce the size of the supported DMA aperture reported
-     * to the guest based upon the vfio DMA limit.
+     * to the guest based upon the vfio DMA limit.  This is applicable for
+     * devices that are guaranteed to not use relaxed translation.  If the
+     * device is capable of relaxed translation then we must advertise the
+     * full aperture.  In this case, if translation is used then we will
+     * rely on the vfio DMA limit counting and use RPCIT CC1 / status 16
+     * to request that the guest free DMA mappings as necessary.
      */
-    vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS;
-    if (vfio_size > 0 && vfio_size < cap->end_dma - cap->start_dma + 1) {
-        pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1;
+    if (!pbdev->rtr_avail) {
+        vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS;
+        if (vfio_size > 0 && vfio_size < cap->end_dma - cap->start_dma + 1) {
+            pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1;
+        }
     }
 }
 
@@ -223,8 +238,11 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev,
         pbdev->pci_group = s390_group_create(pbdev->zpci_fn.pfgid, start_gid);
 
         resgrp = &pbdev->pci_group->zpci_group;
+        if (pbdev->rtr_avail) {
+            resgrp->fr |= CLP_RSP_QPCIG_MASK_RTR;
+        }
         if (cap->flags & VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH) {
-            resgrp->fr = 1;
+            resgrp->fr |= CLP_RSP_QPCIG_MASK_REFRESH;
         }
         resgrp->dasm = cap->dasm;
         resgrp->msia = cap->msi_addr;
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index d9e683c5b4970..75b32182eb019 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -41,6 +41,7 @@
 #include "hw/s390x/tod.h"
 #include "system/system.h"
 #include "system/cpus.h"
+#include "system/hostmem.h"
 #include "target/s390x/kvm/pv.h"
 #include "migration/blocker.h"
 #include "qapi/visitor.h"
@@ -817,7 +818,6 @@ static void ccw_machine_class_init(ObjectClass *oc, void *data)
     mc->no_cdrom = 1;
     mc->no_floppy = 1;
     mc->no_parallel = 1;
-    mc->no_sdcard = 1;
     mc->max_cpus = S390_MAX_CPUS;
     mc->has_hotpluggable_cpus = true;
     mc->smp_props.books_supported = true;
@@ -937,8 +937,13 @@ static void ccw_machine_9_2_instance_options(MachineState *machine)
 
 static void ccw_machine_9_2_class_options(MachineClass *mc)
 {
+    static GlobalProperty compat[] = {
+        { TYPE_S390_PCI_DEVICE, "relaxed-translation", "off", },
+    };
+
     ccw_machine_10_0_class_options(mc);
     compat_props_add(mc->compat_props, hw_compat_9_2, hw_compat_9_2_len);
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
 }
 DEFINE_CCW_MACHINE(9, 2);
 
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index 7d4546800fa6d..ece1107ee8717 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -100,8 +100,15 @@ static void scsi_device_for_each_req_sync(SCSIDevice *s,
     assert(!runstate_is_running());
     assert(qemu_in_main_thread());
 
-    QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
-        fn(req, opaque);
+    /*
+     * Locking is not necessary because the guest is stopped and no other
+     * threads can be accessing the requests list, but take the lock for
+     * consistency.
+     */
+    WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
+        QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
+            fn(req, opaque);
+        }
     }
 }
 
@@ -115,21 +122,29 @@ static void scsi_device_for_each_req_async_bh(void *opaque)
 {
     g_autofree SCSIDeviceForEachReqAsyncData *data = opaque;
     SCSIDevice *s = data->s;
-    AioContext *ctx;
-    SCSIRequest *req;
-    SCSIRequest *next;
+    g_autoptr(GList) reqs = NULL;
 
     /*
-     * The BB cannot have changed contexts between this BH being scheduled and
-     * now: BBs' AioContexts, when they have a node attached, can only be
-     * changed via bdrv_try_change_aio_context(), in a drained section.  While
-     * we have the in-flight counter incremented, that drain must block.
+     * Build a list of requests in this AioContext so fn() can be invoked later
+     * outside requests_lock.
      */
-    ctx = blk_get_aio_context(s->conf.blk);
-    assert(ctx == qemu_get_current_aio_context());
+    WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
+        AioContext *ctx = qemu_get_current_aio_context();
+        SCSIRequest *req;
+        SCSIRequest *next;
+
+        QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
+            if (req->ctx == ctx) {
+                scsi_req_ref(req); /* dropped after calling fn() */
+                reqs = g_list_prepend(reqs, req);
+            }
+        }
+    }
 
-    QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
-        data->fn(req, data->fn_opaque);
+    /* Call fn() on each request */
+    for (GList *elem = g_list_first(reqs); elem; elem = g_list_next(elem)) {
+        data->fn(elem->data, data->fn_opaque);
+        scsi_req_unref(elem->data);
     }
 
     /* Drop the reference taken by scsi_device_for_each_req_async() */
@@ -139,9 +154,35 @@ static void scsi_device_for_each_req_async_bh(void *opaque)
     blk_dec_in_flight(s->conf.blk);
 }
 
+static void scsi_device_for_each_req_async_do_ctx(gpointer key, gpointer value,
+                                                  gpointer user_data)
+{
+    AioContext *ctx = key;
+    SCSIDeviceForEachReqAsyncData *params = user_data;
+    SCSIDeviceForEachReqAsyncData *data;
+
+    data = g_new(SCSIDeviceForEachReqAsyncData, 1);
+    data->s = params->s;
+    data->fn = params->fn;
+    data->fn_opaque = params->fn_opaque;
+
+    /*
+     * Hold a reference to the SCSIDevice until
+     * scsi_device_for_each_req_async_bh() finishes.
+     */
+    object_ref(OBJECT(data->s));
+
+    /* Paired with scsi_device_for_each_req_async_bh() */
+    blk_inc_in_flight(data->s->conf.blk);
+
+    aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, data);
+}
+
 /*
  * Schedule @fn() to be invoked for each enqueued request in device @s. @fn()
- * runs in the AioContext that is executing the request.
+ * must be thread-safe because it runs concurrently in each AioContext that is
+ * executing a request.
+ *
  * Keeps the BlockBackend's in-flight counter incremented until everything is
  * done, so draining it will settle all scheduled @fn() calls.
  */
@@ -151,24 +192,26 @@ static void scsi_device_for_each_req_async(SCSIDevice *s,
 {
     assert(qemu_in_main_thread());
 
-    SCSIDeviceForEachReqAsyncData *data =
-        g_new(SCSIDeviceForEachReqAsyncData, 1);
-
-    data->s = s;
-    data->fn = fn;
-    data->fn_opaque = opaque;
-
-    /*
-     * Hold a reference to the SCSIDevice until
-     * scsi_device_for_each_req_async_bh() finishes.
-     */
-    object_ref(OBJECT(s));
+    /* The set of AioContexts where the requests are being processed */
+    g_autoptr(GHashTable) aio_contexts = g_hash_table_new(NULL, NULL);
+    WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
+        SCSIRequest *req;
+        QTAILQ_FOREACH(req, &s->requests, next) {
+            g_hash_table_add(aio_contexts, req->ctx);
+        }
+    }
 
-    /* Paired with blk_dec_in_flight() in scsi_device_for_each_req_async_bh() */
-    blk_inc_in_flight(s->conf.blk);
-    aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk),
-                            scsi_device_for_each_req_async_bh,
-                            data);
+    /* Schedule a BH for each AioContext */
+    SCSIDeviceForEachReqAsyncData params = {
+        .s = s,
+        .fn = fn,
+        .fn_opaque = opaque,
+    };
+    g_hash_table_foreach(
+            aio_contexts,
+            scsi_device_for_each_req_async_do_ctx,
+            &params
+    );
 }
 
 static void scsi_device_realize(SCSIDevice *s, Error **errp)
@@ -349,6 +392,7 @@ static void scsi_qdev_realize(DeviceState *qdev, Error **errp)
         dev->lun = lun;
     }
 
+    qemu_mutex_init(&dev->requests_lock);
     QTAILQ_INIT(&dev->requests);
     scsi_device_realize(dev, &local_err);
     if (local_err) {
@@ -369,6 +413,8 @@ static void scsi_qdev_unrealize(DeviceState *qdev)
 
     scsi_device_purge_requests(dev, SENSE_CODE(NO_SENSE));
 
+    qemu_mutex_destroy(&dev->requests_lock);
+
     scsi_device_unrealize(dev);
 
     blockdev_mark_auto_del(dev->conf.blk);
@@ -868,6 +914,7 @@ SCSIRequest *scsi_req_new(SCSIDevice *d, uint32_t tag, uint32_t lun,
         }
     }
 
+    req->ctx = qemu_get_current_aio_context();
     req->cmd = cmd;
     req->residual = req->cmd.xfer;
 
@@ -964,7 +1011,10 @@ static void scsi_req_enqueue_internal(SCSIRequest *req)
         req->sg = NULL;
     }
     req->enqueued = true;
-    QTAILQ_INSERT_TAIL(&req->dev->requests, req, next);
+
+    WITH_QEMU_LOCK_GUARD(&req->dev->requests_lock) {
+        QTAILQ_INSERT_TAIL(&req->dev->requests, req, next);
+    }
 }
 
 int32_t scsi_req_enqueue(SCSIRequest *req)
@@ -984,7 +1034,9 @@ static void scsi_req_dequeue(SCSIRequest *req)
     trace_scsi_req_dequeue(req->dev->id, req->lun, req->tag);
     req->retry = false;
     if (req->enqueued) {
-        QTAILQ_REMOVE(&req->dev->requests, req, next);
+        WITH_QEMU_LOCK_GUARD(&req->dev->requests_lock) {
+            QTAILQ_REMOVE(&req->dev->requests, req, next);
+        }
         req->enqueued = false;
         scsi_req_unref(req);
     }
@@ -1961,8 +2013,7 @@ static void scsi_device_class_init(ObjectClass *klass, void *data)
 
 static void scsi_dev_instance_init(Object *obj)
 {
-    DeviceState *dev = DEVICE(obj);
-    SCSIDevice *s = SCSI_DEVICE(dev);
+    SCSIDevice *s = SCSI_DEVICE(obj);
 
     device_add_bootindex_property(obj, &s->conf.bootindex,
                                   "bootindex", NULL,
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index e7f738b4841cb..8da1d5a77cdbf 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -106,7 +106,6 @@ struct SCSIDiskState {
     uint64_t max_unmap_size;
     uint64_t max_io_size;
     uint32_t quirks;
-    QEMUBH *bh;
     char *version;
     char *serial;
     char *vendor;
@@ -329,9 +328,8 @@ static void scsi_aio_complete(void *opaque, int ret)
     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
-    /* The request must only run in the BlockBackend's AioContext */
-    assert(blk_get_aio_context(s->qdev.conf.blk) ==
-           qemu_get_current_aio_context());
+    /* The request must run in its AioContext */
+    assert(r->req.ctx == qemu_get_current_aio_context());
 
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
@@ -431,12 +429,10 @@ static void scsi_dma_complete(void *opaque, int ret)
 
 static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
 {
-    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
     uint32_t n;
 
-    /* The request must only run in the BlockBackend's AioContext */
-    assert(blk_get_aio_context(s->qdev.conf.blk) ==
-           qemu_get_current_aio_context());
+    /* The request must run in its AioContext */
+    assert(r->req.ctx == qemu_get_current_aio_context());
 
     assert(r->req.aiocb == NULL);
     if (scsi_disk_req_check_error(r, ret, ret > 0)) {
@@ -488,8 +484,7 @@ static void scsi_do_read(SCSIDiskReq *r, int ret)
     if (r->req.sg) {
         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
         r->req.residual -= r->req.sg->size;
-        r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
-                                  r->req.sg, r->sector << BDRV_SECTOR_BITS,
+        r->req.aiocb = dma_blk_io(r->req.sg, r->sector << BDRV_SECTOR_BITS,
                                   BDRV_SECTOR_SIZE,
                                   sdc->dma_readv, r, scsi_dma_complete, r,
                                   DMA_DIRECTION_FROM_DEVICE);
@@ -564,12 +559,10 @@ static void scsi_read_data(SCSIRequest *req)
 
 static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
 {
-    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
     uint32_t n;
 
-    /* The request must only run in the BlockBackend's AioContext */
-    assert(blk_get_aio_context(s->qdev.conf.blk) ==
-           qemu_get_current_aio_context());
+    /* The request must run in its AioContext */
+    assert(r->req.ctx == qemu_get_current_aio_context());
 
     assert (r->req.aiocb == NULL);
     if (scsi_disk_req_check_error(r, ret, ret > 0)) {
@@ -651,8 +644,7 @@ static void scsi_write_data(SCSIRequest *req)
     if (r->req.sg) {
         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
         r->req.residual -= r->req.sg->size;
-        r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
-                                  r->req.sg, r->sector << BDRV_SECTOR_BITS,
+        r->req.aiocb = dma_blk_io(r->req.sg, r->sector << BDRV_SECTOR_BITS,
                                   BDRV_SECTOR_SIZE,
                                   sdc->dma_writev, r, scsi_dma_complete, r,
                                   DMA_DIRECTION_TO_DEVICE);
@@ -3165,7 +3157,7 @@ static void scsi_property_add_specifics(DeviceClass *dc)
     ObjectClass *oc = OBJECT_CLASS(dc);
 
     /* The loadparm property is only supported on s390x */
-    if (arch_type & QEMU_ARCH_S390X) {
+    if (qemu_arch_available(QEMU_ARCH_S390X)) {
         object_class_property_add_str(oc, "loadparm",
                                       scsi_property_get_loadparm,
                                       scsi_property_set_loadparm);
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index f49ab98eccfd2..95f13fb7c2853 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -18,6 +18,7 @@
 #include "system/block-backend.h"
 #include "hw/scsi/scsi.h"
 #include "scsi/constants.h"
+#include "hw/virtio/iothread-vq-mapping.h"
 #include "hw/virtio/virtio-bus.h"
 
 /* Context: BQL held */
@@ -28,7 +29,14 @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
 
-    if (vs->conf.iothread) {
+    if (vs->conf.iothread && vs->conf.iothread_vq_mapping_list) {
+        error_setg(errp,
+                   "iothread and iothread-vq-mapping properties cannot be set "
+                   "at the same time");
+        return;
+    }
+
+    if (vs->conf.iothread || vs->conf.iothread_vq_mapping_list) {
         if (!k->set_guest_notifiers || !k->ioeventfd_assign) {
             error_setg(errp,
                        "device is incompatible with iothread "
@@ -39,15 +47,64 @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
             error_setg(errp, "ioeventfd is required for iothread");
             return;
         }
-        s->ctx = iothread_get_aio_context(vs->conf.iothread);
-    } else {
-        if (!virtio_device_ioeventfd_enabled(vdev)) {
+    }
+
+    s->vq_aio_context = g_new(AioContext *, vs->conf.num_queues +
+                                            VIRTIO_SCSI_VQ_NUM_FIXED);
+
+    /*
+     * Handle the ctrl virtqueue in the main loop thread where device resets
+     * can be performed.
+     */
+    s->vq_aio_context[0] = qemu_get_aio_context();
+
+    /*
+     * Handle the event virtqueue in the main loop thread where its no_poll
+     * behavior won't stop IOThread polling.
+     */
+    s->vq_aio_context[1] = qemu_get_aio_context();
+
+    if (vs->conf.iothread_vq_mapping_list) {
+        if (!iothread_vq_mapping_apply(vs->conf.iothread_vq_mapping_list,
+                    &s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED],
+                    vs->conf.num_queues, errp)) {
+            g_free(s->vq_aio_context);
+            s->vq_aio_context = NULL;
             return;
         }
-        s->ctx = qemu_get_aio_context();
+    } else if (vs->conf.iothread) {
+        AioContext *ctx = iothread_get_aio_context(vs->conf.iothread);
+        for (uint16_t i = 0; i < vs->conf.num_queues; i++) {
+            s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i] = ctx;
+        }
+
+        /* Released in virtio_scsi_dataplane_cleanup() */
+        object_ref(OBJECT(vs->conf.iothread));
+    } else {
+        AioContext *ctx = qemu_get_aio_context();
+        for (unsigned i = 0; i < vs->conf.num_queues; i++) {
+            s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i] = ctx;
+        }
     }
 }
 
+/* Context: BQL held */
+void virtio_scsi_dataplane_cleanup(VirtIOSCSI *s)
+{
+    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
+
+    if (vs->conf.iothread_vq_mapping_list) {
+        iothread_vq_mapping_cleanup(vs->conf.iothread_vq_mapping_list);
+    }
+
+    if (vs->conf.iothread) {
+        object_unref(OBJECT(vs->conf.iothread));
+    }
+
+    g_free(s->vq_aio_context);
+    s->vq_aio_context = NULL;
+}
+
 static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n)
 {
     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s)));
@@ -66,31 +123,20 @@ static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n)
 }
 
 /* Context: BH in IOThread */
-static void virtio_scsi_dataplane_stop_bh(void *opaque)
+static void virtio_scsi_dataplane_stop_vq_bh(void *opaque)
 {
-    VirtIOSCSI *s = opaque;
-    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
+    AioContext *ctx = qemu_get_current_aio_context();
+    VirtQueue *vq = opaque;
     EventNotifier *host_notifier;
-    int i;
 
-    virtio_queue_aio_detach_host_notifier(vs->ctrl_vq, s->ctx);
-    host_notifier = virtio_queue_get_host_notifier(vs->ctrl_vq);
+    virtio_queue_aio_detach_host_notifier(vq, ctx);
+    host_notifier = virtio_queue_get_host_notifier(vq);
 
     /*
      * Test and clear notifier after disabling event, in case poll callback
      * didn't have time to run.
      */
     virtio_queue_host_notifier_read(host_notifier);
-
-    virtio_queue_aio_detach_host_notifier(vs->event_vq, s->ctx);
-    host_notifier = virtio_queue_get_host_notifier(vs->event_vq);
-    virtio_queue_host_notifier_read(host_notifier);
-
-    for (i = 0; i < vs->conf.num_queues; i++) {
-        virtio_queue_aio_detach_host_notifier(vs->cmd_vqs[i], s->ctx);
-        host_notifier = virtio_queue_get_host_notifier(vs->cmd_vqs[i]);
-        virtio_queue_host_notifier_read(host_notifier);
-    }
 }
 
 /* Context: BQL held */
@@ -154,11 +200,14 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
     smp_wmb(); /* paired with aio_notify_accept() */
 
     if (s->bus.drain_count == 0) {
-        virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
-        virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
+        virtio_queue_aio_attach_host_notifier(vs->ctrl_vq,
+                                              s->vq_aio_context[0]);
+        virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq,
+                                                      s->vq_aio_context[1]);
 
         for (i = 0; i < vs->conf.num_queues; i++) {
-            virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
+            AioContext *ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i];
+            virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], ctx);
         }
     }
     return 0;
@@ -207,7 +256,11 @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev)
     s->dataplane_stopping = true;
 
     if (s->bus.drain_count == 0) {
-        aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
+        for (i = 0; i < vs->conf.num_queues + VIRTIO_SCSI_VQ_NUM_FIXED; i++) {
+            VirtQueue *vq = virtio_get_queue(&vs->parent_obj, i);
+            AioContext *ctx = s->vq_aio_context[i];
+            aio_wait_bh_oneshot(ctx, virtio_scsi_dataplane_stop_vq_bh, vq);
+        }
     }
 
     blk_drain_all(); /* ensure there are no in-flight requests */
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 7d094e188103c..f5a3aa2366f76 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -27,6 +27,7 @@
 #include "hw/qdev-properties.h"
 #include "hw/scsi/scsi.h"
 #include "scsi/constants.h"
+#include "hw/virtio/iothread-vq-mapping.h"
 #include "hw/virtio/virtio-bus.h"
 #include "hw/virtio/virtio-access.h"
 #include "trace.h"
@@ -47,7 +48,7 @@ typedef struct VirtIOSCSIReq {
     /* Used for two-stage request submission and TMFs deferred to BH */
     QTAILQ_ENTRY(VirtIOSCSIReq) next;
 
-    /* Used for cancellation of request during TMFs */
+    /* Used for cancellation of request during TMFs. Atomic. */
     int remaining;
 
     SCSIRequest *sreq;
@@ -102,13 +103,18 @@ static void virtio_scsi_free_req(VirtIOSCSIReq *req)
     g_free(req);
 }
 
-static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
+static void virtio_scsi_complete_req(VirtIOSCSIReq *req, QemuMutex *vq_lock)
 {
     VirtIOSCSI *s = req->dev;
     VirtQueue *vq = req->vq;
     VirtIODevice *vdev = VIRTIO_DEVICE(s);
 
     qemu_iovec_from_buf(&req->resp_iov, 0, &req->resp, req->resp_size);
+
+    if (vq_lock) {
+        qemu_mutex_lock(vq_lock);
+    }
+
     virtqueue_push(vq, &req->elem, req->qsgl.size + req->resp_iov.size);
     if (s->dataplane_started && !s->dataplane_fenced) {
         virtio_notify_irqfd(vdev, vq);
@@ -116,6 +122,10 @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
         virtio_notify(vdev, vq);
     }
 
+    if (vq_lock) {
+        qemu_mutex_unlock(vq_lock);
+    }
+
     if (req->sreq) {
         req->sreq->hba_private = NULL;
         scsi_req_unref(req->sreq);
@@ -123,34 +133,20 @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
     virtio_scsi_free_req(req);
 }
 
-static void virtio_scsi_complete_req_bh(void *opaque)
+static void virtio_scsi_bad_req(VirtIOSCSIReq *req, QemuMutex *vq_lock)
 {
-    VirtIOSCSIReq *req = opaque;
+    virtio_error(VIRTIO_DEVICE(req->dev), "wrong size for virtio-scsi headers");
 
-    virtio_scsi_complete_req(req);
-}
+    if (vq_lock) {
+        qemu_mutex_lock(vq_lock);
+    }
 
-/*
- * Called from virtio_scsi_do_one_tmf_bh() in main loop thread. The main loop
- * thread cannot touch the virtqueue since that could race with an IOThread.
- */
-static void virtio_scsi_complete_req_from_main_loop(VirtIOSCSIReq *req)
-{
-    VirtIOSCSI *s = req->dev;
+    virtqueue_detach_element(req->vq, &req->elem, 0);
 
-    if (!s->ctx || s->ctx == qemu_get_aio_context()) {
-        /* No need to schedule a BH when there is no IOThread */
-        virtio_scsi_complete_req(req);
-    } else {
-        /* Run request completion in the IOThread */
-        aio_wait_bh_oneshot(s->ctx, virtio_scsi_complete_req_bh, req);
+    if (vq_lock) {
+        qemu_mutex_unlock(vq_lock);
     }
-}
 
-static void virtio_scsi_bad_req(VirtIOSCSIReq *req)
-{
-    virtio_error(VIRTIO_DEVICE(req->dev), "wrong size for virtio-scsi headers");
-    virtqueue_detach_element(req->vq, &req->elem, 0);
     virtio_scsi_free_req(req);
 }
 
@@ -235,12 +231,21 @@ static int virtio_scsi_parse_req(VirtIOSCSIReq *req,
     return 0;
 }
 
-static VirtIOSCSIReq *virtio_scsi_pop_req(VirtIOSCSI *s, VirtQueue *vq)
+static VirtIOSCSIReq *virtio_scsi_pop_req(VirtIOSCSI *s, VirtQueue *vq, QemuMutex *vq_lock)
 {
     VirtIOSCSICommon *vs = (VirtIOSCSICommon *)s;
     VirtIOSCSIReq *req;
 
+    if (vq_lock) {
+        qemu_mutex_lock(vq_lock);
+    }
+
     req = virtqueue_pop(vq, sizeof(VirtIOSCSIReq) + vs->cdb_size);
+
+    if (vq_lock) {
+        qemu_mutex_unlock(vq_lock);
+    }
+
     if (!req) {
         return NULL;
     }
@@ -294,136 +299,157 @@ typedef struct {
     VirtIOSCSIReq  *tmf_req;
 } VirtIOSCSICancelNotifier;
 
+static void virtio_scsi_tmf_dec_remaining(VirtIOSCSIReq *tmf)
+{
+    if (qatomic_fetch_dec(&tmf->remaining) == 1) {
+        trace_virtio_scsi_tmf_resp(virtio_scsi_get_lun(tmf->req.tmf.lun),
+                                   tmf->req.tmf.tag, tmf->resp.tmf.response);
+
+        virtio_scsi_complete_req(tmf, &tmf->dev->ctrl_lock);
+    }
+}
+
 static void virtio_scsi_cancel_notify(Notifier *notifier, void *data)
 {
     VirtIOSCSICancelNotifier *n = container_of(notifier,
                                                VirtIOSCSICancelNotifier,
                                                notifier);
 
-    if (--n->tmf_req->remaining == 0) {
-        VirtIOSCSIReq *req = n->tmf_req;
-
-        trace_virtio_scsi_tmf_resp(virtio_scsi_get_lun(req->req.tmf.lun),
-                                   req->req.tmf.tag, req->resp.tmf.response);
-        virtio_scsi_complete_req(req);
-    }
+    virtio_scsi_tmf_dec_remaining(n->tmf_req);
     g_free(n);
 }
 
-static inline void virtio_scsi_ctx_check(VirtIOSCSI *s, SCSIDevice *d)
+static void virtio_scsi_tmf_cancel_req(VirtIOSCSIReq *tmf, SCSIRequest *r)
 {
-    if (s->dataplane_started && d && blk_is_available(d->conf.blk)) {
-        assert(blk_get_aio_context(d->conf.blk) == s->ctx);
-    }
+    VirtIOSCSICancelNotifier *notifier;
+
+    assert(r->ctx == qemu_get_current_aio_context());
+
+    /* Decremented in virtio_scsi_cancel_notify() */
+    qatomic_inc(&tmf->remaining);
+
+    notifier = g_new(VirtIOSCSICancelNotifier, 1);
+    notifier->notifier.notify = virtio_scsi_cancel_notify;
+    notifier->tmf_req = tmf;
+    scsi_req_cancel_async(r, &notifier->notifier);
 }
 
-static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req)
+/* Execute a TMF on the requests in the current AioContext */
+static void virtio_scsi_do_tmf_aio_context(void *opaque)
 {
-    VirtIOSCSI *s = req->dev;
-    SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
-    BusChild *kid;
-    int target;
+    AioContext *ctx = qemu_get_current_aio_context();
+    VirtIOSCSIReq *tmf = opaque;
+    VirtIOSCSI *s = tmf->dev;
+    SCSIDevice *d = virtio_scsi_device_get(s, tmf->req.tmf.lun);
+    SCSIRequest *r;
+    bool match_tag;
 
-    switch (req->req.tmf.subtype) {
-    case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
-        if (!d) {
-            req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET;
-            goto out;
-        }
-        if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
-            req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN;
-            goto out;
-        }
-        qatomic_inc(&s->resetting);
-        device_cold_reset(&d->qdev);
-        qatomic_dec(&s->resetting);
+    if (!d) {
+        tmf->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET;
+        virtio_scsi_tmf_dec_remaining(tmf);
+        return;
+    }
+
+    /*
+     * This function could handle other subtypes that need to be processed in
+     * the request's AioContext in the future, but for now only request
+     * cancelation subtypes are performed here.
+     */
+    switch (tmf->req.tmf.subtype) {
+    case VIRTIO_SCSI_T_TMF_ABORT_TASK:
+        match_tag = true;
         break;
+    case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
+    case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET:
+        match_tag = false;
+        break;
+    default:
+        g_assert_not_reached();
+    }
 
-    case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
-        target = req->req.tmf.lun[1];
-        qatomic_inc(&s->resetting);
+    WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
+        QTAILQ_FOREACH(r, &d->requests, next) {
+            VirtIOSCSIReq *cmd_req = r->hba_private;
+            assert(cmd_req); /* request has hba_private while enqueued */
 
-        rcu_read_lock();
-        QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
-            SCSIDevice *d1 = SCSI_DEVICE(kid->child);
-            if (d1->channel == 0 && d1->id == target) {
-                device_cold_reset(&d1->qdev);
+            if (r->ctx != ctx) {
+                continue;
+            }
+            if (match_tag && cmd_req->req.cmd.tag != tmf->req.tmf.tag) {
+                continue;
             }
+            virtio_scsi_tmf_cancel_req(tmf, r);
         }
-        rcu_read_unlock();
-
-        qatomic_dec(&s->resetting);
-        break;
-
-    default:
-        g_assert_not_reached();
     }
 
-out:
-    object_unref(OBJECT(d));
-    virtio_scsi_complete_req_from_main_loop(req);
+    /* Incremented by virtio_scsi_do_tmf() */
+    virtio_scsi_tmf_dec_remaining(tmf);
+
+    object_unref(d);
 }
 
-/* Some TMFs must be processed from the main loop thread */
-static void virtio_scsi_do_tmf_bh(void *opaque)
+static void dummy_bh(void *opaque)
 {
-    VirtIOSCSI *s = opaque;
-    QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
-    VirtIOSCSIReq *req;
-    VirtIOSCSIReq *tmp;
+    /* Do nothing */
+}
 
+/*
+ * Wait for pending virtio_scsi_defer_tmf_to_aio_context() BHs.
+ */
+static void virtio_scsi_flush_defer_tmf_to_aio_context(VirtIOSCSI *s)
+{
     GLOBAL_STATE_CODE();
 
-    WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) {
-        QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
-            QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
-            QTAILQ_INSERT_TAIL(&reqs, req, next);
-        }
+    assert(!s->dataplane_started);
 
-        qemu_bh_delete(s->tmf_bh);
-        s->tmf_bh = NULL;
-    }
+    for (uint32_t i = 0; i < s->parent_obj.conf.num_queues; i++) {
+        AioContext *ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i];
 
-    QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) {
-        QTAILQ_REMOVE(&reqs, req, next);
-        virtio_scsi_do_one_tmf_bh(req);
+        /* Our BH only runs after previously scheduled BHs */
+        aio_wait_bh_oneshot(ctx, dummy_bh, NULL);
     }
 }
 
-static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s)
+/*
+ * Run the TMF in a specific AioContext, handling only requests in that
+ * AioContext. This is necessary because requests can run in different
+ * AioContext and it is only possible to cancel them from the AioContext where
+ * they are running.
+ */
+static void virtio_scsi_defer_tmf_to_aio_context(VirtIOSCSIReq *tmf,
+                                                 AioContext *ctx)
 {
-    VirtIOSCSIReq *req;
-    VirtIOSCSIReq *tmp;
+    /* Decremented in virtio_scsi_do_tmf_aio_context() */
+    qatomic_inc(&tmf->remaining);
 
-    GLOBAL_STATE_CODE();
-
-    /* Called after ioeventfd has been stopped, so tmf_bh_lock is not needed */
-    if (s->tmf_bh) {
-        qemu_bh_delete(s->tmf_bh);
-        s->tmf_bh = NULL;
-    }
-
-    QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
-        QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
-
-        /* SAM-6 6.3.2 Hard reset */
-        req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE;
-        virtio_scsi_complete_req(req);
-    }
+    /* See virtio_scsi_flush_defer_tmf_to_aio_context() cleanup during reset */
+    aio_bh_schedule_oneshot(ctx, virtio_scsi_do_tmf_aio_context, tmf);
 }
 
-static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req)
+/*
+ * Returns the AioContext for a given TMF's tag field or NULL. Note that the
+ * request identified by the tag may have completed by the time you can execute
+ * a BH in the AioContext, so don't assume the request still exists in your BH.
+ */
+static AioContext *find_aio_context_for_tmf_tag(SCSIDevice *d,
+                                                VirtIOSCSIReq *tmf)
 {
-    VirtIOSCSI *s = req->dev;
+    WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
+        SCSIRequest *r;
+        SCSIRequest *next;
+
+        QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
+            VirtIOSCSIReq *cmd_req = r->hba_private;
 
-    WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) {
-        QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next);
+            /* hba_private is non-NULL while the request is enqueued */
+            assert(cmd_req);
 
-        if (!s->tmf_bh) {
-            s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s);
-            qemu_bh_schedule(s->tmf_bh);
+            if (cmd_req->req.cmd.tag == tmf->req.tmf.tag) {
+                return r->ctx;
+            }
         }
     }
+    return NULL;
 }
 
 /* Return 0 if the request is ready to be completed and return to guest;
@@ -433,9 +459,9 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
 {
     SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
     SCSIRequest *r, *next;
+    AioContext *ctx;
     int ret = 0;
 
-    virtio_scsi_ctx_check(s, d);
     /* Here VIRTIO_SCSI_S_OK means "FUNCTION COMPLETE".  */
     req->resp.tmf.response = VIRTIO_SCSI_S_OK;
 
@@ -450,7 +476,22 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
                               req->req.tmf.tag, req->req.tmf.subtype);
 
     switch (req->req.tmf.subtype) {
-    case VIRTIO_SCSI_T_TMF_ABORT_TASK:
+    case VIRTIO_SCSI_T_TMF_ABORT_TASK: {
+        if (!d) {
+            goto fail;
+        }
+        if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
+            goto incorrect_lun;
+        }
+
+        ctx = find_aio_context_for_tmf_tag(d, req);
+        if (ctx) {
+            virtio_scsi_defer_tmf_to_aio_context(req, ctx);
+            ret = -EINPROGRESS;
+        }
+        break;
+    }
+
     case VIRTIO_SCSI_T_TMF_QUERY_TASK:
         if (!d) {
             goto fail;
@@ -458,44 +499,82 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
         if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
             goto incorrect_lun;
         }
-        QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
-            VirtIOSCSIReq *cmd_req = r->hba_private;
-            if (cmd_req && cmd_req->req.cmd.tag == req->req.tmf.tag) {
-                break;
+
+        WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
+            QTAILQ_FOREACH(r, &d->requests, next) {
+                VirtIOSCSIReq *cmd_req = r->hba_private;
+                assert(cmd_req); /* request has hba_private while enqueued */
+
+                if (cmd_req->req.cmd.tag == req->req.tmf.tag) {
+                    /*
+                     * "If the specified command is present in the task set,
+                     * then return a service response set to FUNCTION
+                     * SUCCEEDED".
+                     */
+                    req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
+                }
             }
         }
-        if (r) {
-            /*
-             * Assert that the request has not been completed yet, we
-             * check for it in the loop above.
-             */
-            assert(r->hba_private);
-            if (req->req.tmf.subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK) {
-                /* "If the specified command is present in the task set, then
-                 * return a service response set to FUNCTION SUCCEEDED".
-                 */
-                req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
-            } else {
-                VirtIOSCSICancelNotifier *notifier;
-
-                req->remaining = 1;
-                notifier = g_new(VirtIOSCSICancelNotifier, 1);
-                notifier->tmf_req = req;
-                notifier->notifier.notify = virtio_scsi_cancel_notify;
-                scsi_req_cancel_async(r, &notifier->notifier);
-                ret = -EINPROGRESS;
+        break;
+
+    case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
+        if (!d) {
+            goto fail;
+        }
+        if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
+            goto incorrect_lun;
+        }
+        qatomic_inc(&s->resetting);
+        device_cold_reset(&d->qdev);
+        qatomic_dec(&s->resetting);
+        break;
+
+    case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: {
+        BusChild *kid;
+        int target = req->req.tmf.lun[1];
+        qatomic_inc(&s->resetting);
+
+        rcu_read_lock();
+        QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
+            SCSIDevice *d1 = SCSI_DEVICE(kid->child);
+            if (d1->channel == 0 && d1->id == target) {
+                device_cold_reset(&d1->qdev);
             }
         }
+        rcu_read_unlock();
+
+        qatomic_dec(&s->resetting);
         break;
+    }
 
-    case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
-    case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
-        virtio_scsi_defer_tmf_to_bh(req);
+    case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
+    case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET: {
+        g_autoptr(GHashTable) aio_contexts = g_hash_table_new(NULL, NULL);
+
+        if (!d) {
+            goto fail;
+        }
+        if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
+            goto incorrect_lun;
+        }
+
+        qatomic_inc(&req->remaining);
+
+        for (uint32_t i = 0; i < s->parent_obj.conf.num_queues; i++) {
+            ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i];
+
+            if (!g_hash_table_add(aio_contexts, ctx)) {
+                continue; /* skip previously added AioContext */
+            }
+
+            virtio_scsi_defer_tmf_to_aio_context(req, ctx);
+        }
+
+        virtio_scsi_tmf_dec_remaining(req);
         ret = -EINPROGRESS;
         break;
+    }
 
-    case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
-    case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET:
     case VIRTIO_SCSI_T_TMF_QUERY_TASK_SET:
         if (!d) {
             goto fail;
@@ -504,34 +583,19 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
             goto incorrect_lun;
         }
 
-        /* Add 1 to "remaining" until virtio_scsi_do_tmf returns.
-         * This way, if the bus starts calling back to the notifiers
-         * even before we finish the loop, virtio_scsi_cancel_notify
-         * will not complete the TMF too early.
-         */
-        req->remaining = 1;
-        QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
-            if (r->hba_private) {
-                if (req->req.tmf.subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK_SET) {
-                    /* "If there is any command present in the task set, then
-                     * return a service response set to FUNCTION SUCCEEDED".
-                     */
-                    req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
-                    break;
-                } else {
-                    VirtIOSCSICancelNotifier *notifier;
-
-                    req->remaining++;
-                    notifier = g_new(VirtIOSCSICancelNotifier, 1);
-                    notifier->notifier.notify = virtio_scsi_cancel_notify;
-                    notifier->tmf_req = req;
-                    scsi_req_cancel_async(r, &notifier->notifier);
-                }
+        WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
+            QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
+                /* Request has hba_private while enqueued */
+                assert(r->hba_private);
+
+                /*
+                 * "If there is any command present in the task set, then
+                 * return a service response set to FUNCTION SUCCEEDED".
+                 */
+                req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
+                break;
             }
         }
-        if (--req->remaining > 0) {
-            ret = -EINPROGRESS;
-        }
         break;
 
     case VIRTIO_SCSI_T_TMF_CLEAR_ACA:
@@ -562,7 +626,7 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
 
     if (iov_to_buf(req->elem.out_sg, req->elem.out_num, 0,
                 &type, sizeof(type)) < sizeof(type)) {
-        virtio_scsi_bad_req(req);
+        virtio_scsi_bad_req(req, &s->ctrl_lock);
         return;
     }
 
@@ -570,7 +634,7 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
     if (type == VIRTIO_SCSI_T_TMF) {
         if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICtrlTMFReq),
                     sizeof(VirtIOSCSICtrlTMFResp)) < 0) {
-            virtio_scsi_bad_req(req);
+            virtio_scsi_bad_req(req, &s->ctrl_lock);
             return;
         } else {
             r = virtio_scsi_do_tmf(s, req);
@@ -580,7 +644,7 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
                type == VIRTIO_SCSI_T_AN_SUBSCRIBE) {
         if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICtrlANReq),
                     sizeof(VirtIOSCSICtrlANResp)) < 0) {
-            virtio_scsi_bad_req(req);
+            virtio_scsi_bad_req(req, &s->ctrl_lock);
             return;
         } else {
             req->req.an.event_requested =
@@ -600,7 +664,7 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
                  type == VIRTIO_SCSI_T_AN_SUBSCRIBE)
             trace_virtio_scsi_an_resp(virtio_scsi_get_lun(req->req.an.lun),
                                       req->resp.an.response);
-        virtio_scsi_complete_req(req);
+        virtio_scsi_complete_req(req, &s->ctrl_lock);
     } else {
         assert(r == -EINPROGRESS);
     }
@@ -610,7 +674,7 @@ static void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
 {
     VirtIOSCSIReq *req;
 
-    while ((req = virtio_scsi_pop_req(s, vq))) {
+    while ((req = virtio_scsi_pop_req(s, vq, &s->ctrl_lock))) {
         virtio_scsi_handle_ctrl_req(s, req);
     }
 }
@@ -625,9 +689,12 @@ static void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
  */
 static bool virtio_scsi_defer_to_dataplane(VirtIOSCSI *s)
 {
-    if (!s->ctx || s->dataplane_started) {
+    if (s->dataplane_started) {
         return false;
     }
+    if (s->vq_aio_context[0] == qemu_get_aio_context()) {
+        return false; /* not using IOThreads */
+    }
 
     virtio_device_start_ioeventfd(&s->parent_obj.parent_obj);
     return !s->dataplane_fenced;
@@ -654,7 +721,7 @@ static void virtio_scsi_complete_cmd_req(VirtIOSCSIReq *req)
      * in virtio_scsi_command_complete.
      */
     req->resp_size = sizeof(VirtIOSCSICmdResp);
-    virtio_scsi_complete_req(req);
+    virtio_scsi_complete_req(req, NULL);
 }
 
 static void virtio_scsi_command_failed(SCSIRequest *r)
@@ -788,7 +855,7 @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req)
             virtio_scsi_fail_cmd_req(req);
             return -ENOTSUP;
         } else {
-            virtio_scsi_bad_req(req);
+            virtio_scsi_bad_req(req, NULL);
             return -EINVAL;
         }
     }
@@ -801,7 +868,6 @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req)
         virtio_scsi_complete_cmd_req(req);
         return -ENOENT;
     }
-    virtio_scsi_ctx_check(s, d);
     req->sreq = scsi_req_new(d, req->req.cmd.tag,
                              virtio_scsi_get_lun(req->req.cmd.lun),
                              req->req.cmd.cdb, vs->cdb_size, req);
@@ -843,7 +909,7 @@ static void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
             virtio_queue_set_notification(vq, 0);
         }
 
-        while ((req = virtio_scsi_pop_req(s, vq))) {
+        while ((req = virtio_scsi_pop_req(s, vq, NULL))) {
             ret = virtio_scsi_handle_cmd_req_prepare(s, req);
             if (!ret) {
                 QTAILQ_INSERT_TAIL(&reqs, req, next);
@@ -936,7 +1002,7 @@ static void virtio_scsi_reset(VirtIODevice *vdev)
 
     assert(!s->dataplane_started);
 
-    virtio_scsi_reset_tmf_bh(s);
+    virtio_scsi_flush_defer_tmf_to_aio_context(s);
 
     qatomic_inc(&s->resetting);
     bus_cold_reset(BUS(&s->bus));
@@ -944,7 +1010,10 @@ static void virtio_scsi_reset(VirtIODevice *vdev)
 
     vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE;
     vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE;
-    s->events_dropped = false;
+
+    WITH_QEMU_LOCK_GUARD(&s->event_lock) {
+        s->events_dropped = false;
+    }
 }
 
 typedef struct {
@@ -973,19 +1042,21 @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
         return;
     }
 
-    req = virtio_scsi_pop_req(s, vs->event_vq);
-    if (!req) {
-        s->events_dropped = true;
-        return;
-    }
+    req = virtio_scsi_pop_req(s, vs->event_vq, &s->event_lock);
+    WITH_QEMU_LOCK_GUARD(&s->event_lock) {
+        if (!req) {
+            s->events_dropped = true;
+            return;
+        }
 
-    if (s->events_dropped) {
-        event |= VIRTIO_SCSI_T_EVENTS_MISSED;
-        s->events_dropped = false;
+        if (s->events_dropped) {
+            event |= VIRTIO_SCSI_T_EVENTS_MISSED;
+            s->events_dropped = false;
+        }
     }
 
     if (virtio_scsi_parse_req(req, 0, sizeof(VirtIOSCSIEvent))) {
-        virtio_scsi_bad_req(req);
+        virtio_scsi_bad_req(req, &s->event_lock);
         return;
     }
 
@@ -1005,12 +1076,18 @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
     }
     trace_virtio_scsi_event(virtio_scsi_get_lun(evt->lun), event, reason);
 
-    virtio_scsi_complete_req(req);
+    virtio_scsi_complete_req(req, &s->event_lock);
 }
 
 static void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
 {
-    if (s->events_dropped) {
+    bool events_dropped;
+
+    WITH_QEMU_LOCK_GUARD(&s->event_lock) {
+        events_dropped = s->events_dropped;
+    }
+
+    if (events_dropped) {
         VirtIOSCSIEventInfo info = {
             .event = VIRTIO_SCSI_T_NO_EVENT,
         };
@@ -1061,14 +1138,16 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev,
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev);
     VirtIOSCSI *s = VIRTIO_SCSI(vdev);
+    AioContext *ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED];
     SCSIDevice *sd = SCSI_DEVICE(dev);
-    int ret;
 
-    if (s->ctx && !s->dataplane_fenced) {
-        ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp);
-        if (ret < 0) {
-            return;
-        }
+    if (ctx != qemu_get_aio_context() && !s->dataplane_fenced) {
+        /*
+         * Try to make the BlockBackend's AioContext match ours. Ignore failure
+         * because I/O will still work although block jobs and other users
+         * might be slower when multiple AioContexts use a BlockBackend.
+         */
+        blk_set_aio_context(sd->conf.blk, ctx, NULL);
     }
 
     if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
@@ -1103,7 +1182,7 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
 
     qdev_simple_device_unplug_cb(hotplug_dev, dev, errp);
 
-    if (s->ctx) {
+    if (s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED] != qemu_get_aio_context()) {
         /* If other users keep the BlockBackend in the iothread, that's ok */
         blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL);
     }
@@ -1137,7 +1216,7 @@ static void virtio_scsi_drained_begin(SCSIBus *bus)
 
     for (uint32_t i = 0; i < total_queues; i++) {
         VirtQueue *vq = virtio_get_queue(vdev, i);
-        virtio_queue_aio_detach_host_notifier(vq, s->ctx);
+        virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]);
     }
 }
 
@@ -1163,10 +1242,12 @@ static void virtio_scsi_drained_end(SCSIBus *bus)
 
     for (uint32_t i = 0; i < total_queues; i++) {
         VirtQueue *vq = virtio_get_queue(vdev, i);
+        AioContext *ctx = s->vq_aio_context[i];
+
         if (vq == vs->event_vq) {
-            virtio_queue_aio_attach_host_notifier_no_poll(vq, s->ctx);
+            virtio_queue_aio_attach_host_notifier_no_poll(vq, ctx);
         } else {
-            virtio_queue_aio_attach_host_notifier(vq, s->ctx);
+            virtio_queue_aio_attach_host_notifier(vq, ctx);
         }
     }
 }
@@ -1235,8 +1316,8 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
     VirtIOSCSI *s = VIRTIO_SCSI(dev);
     Error *err = NULL;
 
-    QTAILQ_INIT(&s->tmf_bh_list);
-    qemu_mutex_init(&s->tmf_bh_lock);
+    qemu_mutex_init(&s->ctrl_lock);
+    qemu_mutex_init(&s->event_lock);
 
     virtio_scsi_common_realize(dev,
                                virtio_scsi_handle_ctrl,
@@ -1271,15 +1352,16 @@ void virtio_scsi_common_unrealize(DeviceState *dev)
     virtio_cleanup(vdev);
 }
 
+/* main loop */
 static void virtio_scsi_device_unrealize(DeviceState *dev)
 {
     VirtIOSCSI *s = VIRTIO_SCSI(dev);
 
-    virtio_scsi_reset_tmf_bh(s);
-
+    virtio_scsi_dataplane_cleanup(s);
     qbus_set_hotplug_handler(BUS(&s->bus), NULL);
     virtio_scsi_common_unrealize(dev);
-    qemu_mutex_destroy(&s->tmf_bh_lock);
+    qemu_mutex_destroy(&s->event_lock);
+    qemu_mutex_destroy(&s->ctrl_lock);
 }
 
 static const Property virtio_scsi_properties[] = {
@@ -1299,6 +1381,8 @@ static const Property virtio_scsi_properties[] = {
                                                 VIRTIO_SCSI_F_CHANGE, true),
     DEFINE_PROP_LINK("iothread", VirtIOSCSI, parent_obj.conf.iothread,
                      TYPE_IOTHREAD, IOThread *),
+    DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST("iothread-vq-mapping", VirtIOSCSI,
+            parent_obj.conf.iothread_vq_mapping_list),
 };
 
 static const VMStateDescription vmstate_virtio_scsi = {
diff --git a/hw/sd/allwinner-sdhost.c b/hw/sd/allwinner-sdhost.c
index ee5c5c78a81e7..03980d2716829 100644
--- a/hw/sd/allwinner-sdhost.c
+++ b/hw/sd/allwinner-sdhost.c
@@ -761,7 +761,7 @@ static void allwinner_sdhost_write(void *opaque, hwaddr offset,
 static const MemoryRegionOps allwinner_sdhost_ops = {
     .read = allwinner_sdhost_read,
     .write = allwinner_sdhost_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
diff --git a/hw/sd/sdhci-internal.h b/hw/sd/sdhci-internal.h
index 5f3765f12d2d8..9f768c418e08e 100644
--- a/hw/sd/sdhci-internal.h
+++ b/hw/sd/sdhci-internal.h
@@ -322,6 +322,6 @@ void sdhci_initfn(SDHCIState *s);
 void sdhci_uninitfn(SDHCIState *s);
 void sdhci_common_realize(SDHCIState *s, Error **errp);
 void sdhci_common_unrealize(SDHCIState *s);
-void sdhci_common_class_init(ObjectClass *klass, void *data);
+void sdhci_common_class_init(ObjectClass *klass, const void *data);
 
 #endif
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index 99dd4a4e95289..69baf73ae9b18 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -307,6 +307,10 @@ static void sdhci_reset(SDHCIState *s)
     s->data_count = 0;
     s->stopped_state = sdhc_not_stopped;
     s->pending_insert_state = false;
+    if (s->vendor == SDHCI_VENDOR_FSL) {
+        s->norintstsen = 0x013f;
+        s->errintstsen = 0x117f;
+    }
 }
 
 static void sdhci_poweron_reset(DeviceState *dev)
@@ -1542,7 +1546,7 @@ const VMStateDescription sdhci_vmstate = {
     },
 };
 
-void sdhci_common_class_init(ObjectClass *klass, void *data)
+void sdhci_common_class_init(ObjectClass *klass, const void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
@@ -1731,16 +1735,10 @@ usdhc_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
 
     case USDHC_VENDOR_SPEC:
         s->vendor_spec = value;
-        switch (s->vendor) {
-        case SDHCI_VENDOR_IMX:
-            if (value & USDHC_IMX_FRC_SDCLK_ON) {
-                s->prnsts &= ~SDHC_IMX_CLOCK_GATE_OFF;
-            } else {
-                s->prnsts |= SDHC_IMX_CLOCK_GATE_OFF;
-            }
-            break;
-        default:
-            break;
+        if (value & USDHC_IMX_FRC_SDCLK_ON) {
+            s->prnsts &= ~SDHC_IMX_CLOCK_GATE_OFF;
+        } else {
+            s->prnsts |= SDHC_IMX_CLOCK_GATE_OFF;
         }
         break;
 
diff --git a/hw/sensor/emc141x.c b/hw/sensor/emc141x.c
index aeccd2a3c945e..33c1bd330fd02 100644
--- a/hw/sensor/emc141x.c
+++ b/hw/sensor/emc141x.c
@@ -265,7 +265,7 @@ static void emc141x_initfn(Object *obj)
                         emc141x_set_temperature, NULL, NULL);
 }
 
-static void emc141x_class_init(ObjectClass *klass, void *data)
+static void emc141x_class_init(ObjectClass *klass, const void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     I2CSlaveClass *k = I2C_SLAVE_CLASS(klass);
diff --git a/hw/sensor/isl_pmbus_vr.c b/hw/sensor/isl_pmbus_vr.c
index 304a66ea8b04d..c60282cfe7710 100644
--- a/hw/sensor/isl_pmbus_vr.c
+++ b/hw/sensor/isl_pmbus_vr.c
@@ -233,7 +233,7 @@ static void raa228000_init(Object *obj)
     isl_pmbus_vr_add_props(obj, flags, 1);
 }
 
-static void isl_pmbus_vr_class_init(ObjectClass *klass, void *data,
+static void isl_pmbus_vr_class_init(ObjectClass *klass, const void *data,
                                     uint8_t pages)
 {
     PMBusDeviceClass *k = PMBUS_DEVICE_CLASS(klass);
diff --git a/hw/sensor/tmp421.c b/hw/sensor/tmp421.c
index 82e604279c5af..007f7cd018b76 100644
--- a/hw/sensor/tmp421.c
+++ b/hw/sensor/tmp421.c
@@ -68,7 +68,7 @@ struct TMP421State {
 
 struct TMP421Class {
     I2CSlaveClass parent_class;
-    DeviceInfo *dev;
+    const DeviceInfo *dev;
 };
 
 #define TYPE_TMP421 "tmp421-generic"
diff --git a/hw/sh4/sh7750.c b/hw/sh4/sh7750.c
index 8892eaddcbc6f..6faf0e3ca8bb2 100644
--- a/hw/sh4/sh7750.c
+++ b/hw/sh4/sh7750.c
@@ -36,6 +36,7 @@
 #include "hw/sh4/sh_intc.h"
 #include "hw/timer/tmu012.h"
 #include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "trace.h"
 
 typedef struct SH7750State {
diff --git a/hw/ssi/allwinner-a10-spi.c b/hw/ssi/allwinner-a10-spi.c
index 3eb50b44ac55e..d2f6bb9cdc7ef 100644
--- a/hw/ssi/allwinner-a10-spi.c
+++ b/hw/ssi/allwinner-a10-spi.c
@@ -502,7 +502,7 @@ static const MemoryRegionOps allwinner_a10_spi_ops = {
     .write = allwinner_a10_spi_write,
     .valid.min_access_size = 1,
     .valid.max_access_size = 4,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static const VMStateDescription allwinner_a10_spi_vmstate = {
diff --git a/hw/ssi/npcm7xx_fiu.c b/hw/ssi/npcm7xx_fiu.c
index 21fc489038399..8df4bec3f1721 100644
--- a/hw/ssi/npcm7xx_fiu.c
+++ b/hw/ssi/npcm7xx_fiu.c
@@ -29,7 +29,7 @@
 #include "trace.h"
 
 /* Up to 128 MiB of flash may be accessed directly as memory. */
-#define NPCM7XX_FIU_FLASH_WINDOW_SIZE (128 * MiB)
+#define NPCM7XX_FIU_MAX_FLASH_WINDOW_SIZE (128 * MiB)
 
 /* Each module has 4 KiB of register space. Only a fraction of it is used. */
 #define NPCM7XX_FIU_CTRL_REGS_SIZE (4 * KiB)
@@ -507,6 +507,17 @@ static void npcm7xx_fiu_realize(DeviceState *dev, Error **errp)
         return;
     }
 
+    if (s->flash_size == 0) {
+        error_setg(errp, "%s: flash size must be set", dev->canonical_path);
+        return;
+    }
+
+    if (s->flash_size > NPCM7XX_FIU_MAX_FLASH_WINDOW_SIZE) {
+        error_setg(errp, "%s: flash size should not exceed 128 MiB",
+                   dev->canonical_path);
+        return;
+    }
+
     s->spi = ssi_create_bus(dev, "spi");
     s->cs_lines = g_new0(qemu_irq, s->cs_count);
     qdev_init_gpio_out_named(DEVICE(s), s->cs_lines, "cs", s->cs_count);
@@ -525,7 +536,7 @@ static void npcm7xx_fiu_realize(DeviceState *dev, Error **errp)
         flash->fiu = s;
         memory_region_init_io(&flash->direct_access, OBJECT(s),
                               &npcm7xx_fiu_flash_ops, &s->flash[i], "flash",
-                              NPCM7XX_FIU_FLASH_WINDOW_SIZE);
+                              s->flash_size);
         sysbus_init_mmio(sbd, &flash->direct_access);
     }
 }
@@ -543,6 +554,7 @@ static const VMStateDescription vmstate_npcm7xx_fiu = {
 
 static const Property npcm7xx_fiu_properties[] = {
     DEFINE_PROP_INT32("cs-count", NPCM7xxFIUState, cs_count, 0),
+    DEFINE_PROP_SIZE("flash-size", NPCM7xxFIUState, flash_size, 0),
 };
 
 static void npcm7xx_fiu_class_init(ObjectClass *klass, void *data)
diff --git a/hw/ssi/pnv_spi.c b/hw/ssi/pnv_spi.c
index 15e25bd1be35b..126070393eec5 100644
--- a/hw/ssi/pnv_spi.c
+++ b/hw/ssi/pnv_spi.c
@@ -19,6 +19,8 @@
 
 #define PNV_SPI_OPCODE_LO_NIBBLE(x) (x & 0x0F)
 #define PNV_SPI_MASKED_OPCODE(x) (x & 0xF0)
+#define PNV_SPI_FIFO_SIZE 16
+#define RDR_MATCH_FAILURE_LIMIT 16
 
 /*
  * Macro from include/hw/ppc/fdt.h
@@ -35,48 +37,14 @@
         }                                                          \
     } while (0)
 
-/* PnvXferBuffer */
-typedef struct PnvXferBuffer {
-
-    uint32_t    len;
-    uint8_t    *data;
-
-} PnvXferBuffer;
-
-/* pnv_spi_xfer_buffer_methods */
-static PnvXferBuffer *pnv_spi_xfer_buffer_new(void)
-{
-    PnvXferBuffer *payload = g_malloc0(sizeof(*payload));
-
-    return payload;
-}
-
-static void pnv_spi_xfer_buffer_free(PnvXferBuffer *payload)
-{
-    g_free(payload->data);
-    g_free(payload);
-}
-
-static uint8_t *pnv_spi_xfer_buffer_write_ptr(PnvXferBuffer *payload,
-                uint32_t offset, uint32_t length)
-{
-    if (payload->len < (offset + length)) {
-        payload->len = offset + length;
-        payload->data = g_realloc(payload->data, payload->len);
-    }
-    return &payload->data[offset];
-}
-
 static bool does_rdr_match(PnvSpi *s)
 {
     /*
      * According to spec, the mask bits that are 0 are compared and the
      * bits that are 1 are ignored.
      */
-    uint16_t rdr_match_mask = GETFIELD(SPI_MM_RDR_MATCH_MASK,
-                                        s->regs[SPI_MM_REG]);
-    uint16_t rdr_match_val = GETFIELD(SPI_MM_RDR_MATCH_VAL,
-                                        s->regs[SPI_MM_REG]);
+    uint16_t rdr_match_mask = GETFIELD(SPI_MM_RDR_MATCH_MASK, s->regs[SPI_MM_REG]);
+    uint16_t rdr_match_val = GETFIELD(SPI_MM_RDR_MATCH_VAL, s->regs[SPI_MM_REG]);
 
     if ((~rdr_match_mask & rdr_match_val) == ((~rdr_match_mask) &
             GETFIELD(PPC_BITMASK(48, 63), s->regs[SPI_RCV_DATA_REG]))) {
@@ -107,8 +75,8 @@ static uint8_t get_from_offset(PnvSpi *s, uint8_t offset)
     return byte;
 }
 
-static uint8_t read_from_frame(PnvSpi *s, uint8_t *read_buf, uint8_t nr_bytes,
-                uint8_t ecc_count, uint8_t shift_in_count)
+static uint8_t read_from_frame(PnvSpi *s, uint8_t nr_bytes, uint8_t ecc_count,
+                uint8_t shift_in_count)
 {
     uint8_t byte;
     int count = 0;
@@ -118,20 +86,24 @@ static uint8_t read_from_frame(PnvSpi *s, uint8_t *read_buf, uint8_t nr_bytes,
         if ((ecc_count != 0) &&
             (shift_in_count == (PNV_SPI_REG_SIZE + ecc_count))) {
             shift_in_count = 0;
-        } else {
-            byte = read_buf[count];
+        } else if (!fifo8_is_empty(&s->rx_fifo)) {
+            byte = fifo8_pop(&s->rx_fifo);
             trace_pnv_spi_shift_rx(byte, count);
             s->regs[SPI_RCV_DATA_REG] = (s->regs[SPI_RCV_DATA_REG] << 8) | byte;
+        } else {
+            qemu_log_mask(LOG_GUEST_ERROR, "pnv_spi: Reading empty RX_FIFO\n");
         }
         count++;
     } /* end of while */
     return shift_in_count;
 }
 
-static void spi_response(PnvSpi *s, int bits, PnvXferBuffer *rsp_payload)
+static void spi_response(PnvSpi *s)
 {
     uint8_t ecc_count;
     uint8_t shift_in_count;
+    uint32_t rx_len;
+    int i;
 
     /*
      * Processing here must handle:
@@ -144,13 +116,14 @@ static void spi_response(PnvSpi *s, int bits, PnvXferBuffer *rsp_payload)
      * First check that the response payload is the exact same
      * number of bytes as the request payload was
      */
-    if (rsp_payload->len != (s->N1_bytes + s->N2_bytes)) {
+    rx_len = fifo8_num_used(&s->rx_fifo);
+    if (rx_len != (s->N1_bytes + s->N2_bytes)) {
         qemu_log_mask(LOG_GUEST_ERROR, "Invalid response payload size in "
                        "bytes, expected %d, got %d\n",
-                       (s->N1_bytes + s->N2_bytes), rsp_payload->len);
+                       (s->N1_bytes + s->N2_bytes), rx_len);
     } else {
         uint8_t ecc_control;
-        trace_pnv_spi_rx_received(rsp_payload->len);
+        trace_pnv_spi_rx_received(rx_len);
         trace_pnv_spi_log_Ncounts(s->N1_bits, s->N1_bytes, s->N1_tx,
                         s->N1_rx, s->N2_bits, s->N2_bytes, s->N2_tx, s->N2_rx);
         /*
@@ -175,15 +148,23 @@ static void spi_response(PnvSpi *s, int bits, PnvXferBuffer *rsp_payload)
         /* Handle the N1 portion of the frame first */
         if (s->N1_rx != 0) {
             trace_pnv_spi_rx_read_N1frame();
-            shift_in_count = read_from_frame(s, &rsp_payload->data[0],
-                            s->N1_bytes, ecc_count, shift_in_count);
+            shift_in_count = read_from_frame(s, s->N1_bytes, ecc_count, shift_in_count);
         }
         /* Handle the N2 portion of the frame */
         if (s->N2_rx != 0) {
+            /* pop out N1_bytes from rx_fifo if not already */
+            if (s->N1_rx == 0) {
+                for (i = 0; i < s->N1_bytes; i++) {
+                    if (!fifo8_is_empty(&s->rx_fifo)) {
+                        fifo8_pop(&s->rx_fifo);
+                    } else {
+                        qemu_log_mask(LOG_GUEST_ERROR, "pnv_spi: Reading empty"
+                                                       " RX_FIFO\n");
+                    }
+                }
+            }
             trace_pnv_spi_rx_read_N2frame();
-            shift_in_count = read_from_frame(s,
-                            &rsp_payload->data[s->N1_bytes], s->N2_bytes,
-                            ecc_count, shift_in_count);
+            shift_in_count = read_from_frame(s, s->N2_bytes, ecc_count, shift_in_count);
         }
         if ((s->N1_rx + s->N2_rx) > 0) {
             /*
@@ -210,48 +191,41 @@ static void spi_response(PnvSpi *s, int bits, PnvXferBuffer *rsp_payload)
     } /* end of else */
 } /* end of spi_response() */
 
-static void transfer(PnvSpi *s, PnvXferBuffer *payload)
+static void transfer(PnvSpi *s)
 {
-    uint32_t tx;
-    uint32_t rx;
-    PnvXferBuffer *rsp_payload = NULL;
+    uint32_t tx, rx, payload_len;
+    uint8_t rx_byte;
 
-    rsp_payload = pnv_spi_xfer_buffer_new();
-    if (!rsp_payload) {
-        return;
-    }
-    for (int offset = 0; offset < payload->len; offset += s->transfer_len) {
+    payload_len = fifo8_num_used(&s->tx_fifo);
+    for (int offset = 0; offset < payload_len; offset += s->transfer_len) {
         tx = 0;
         for (int i = 0; i < s->transfer_len; i++) {
-            if ((offset + i) >= payload->len) {
+            if ((offset + i) >= payload_len) {
                 tx <<= 8;
+            } else if (!fifo8_is_empty(&s->tx_fifo)) {
+                tx = (tx << 8) | fifo8_pop(&s->tx_fifo);
             } else {
-                tx = (tx << 8) | payload->data[offset + i];
+                qemu_log_mask(LOG_GUEST_ERROR, "pnv_spi: TX_FIFO underflow\n");
             }
         }
         rx = ssi_transfer(s->ssi_bus, tx);
         for (int i = 0; i < s->transfer_len; i++) {
-            if ((offset + i) >= payload->len) {
+            if ((offset + i) >= payload_len) {
+                break;
+            }
+            rx_byte = (rx >> (8 * (s->transfer_len - 1) - i * 8)) & 0xFF;
+            if (!fifo8_is_full(&s->rx_fifo)) {
+                fifo8_push(&s->rx_fifo, rx_byte);
+            } else {
+                qemu_log_mask(LOG_GUEST_ERROR, "pnv_spi: RX_FIFO is full\n");
                 break;
             }
-            *(pnv_spi_xfer_buffer_write_ptr(rsp_payload, rsp_payload->len, 1)) =
-                    (rx >> (8 * (s->transfer_len - 1) - i * 8)) & 0xFF;
         }
     }
-    spi_response(s, s->N1_bits, rsp_payload);
-    pnv_spi_xfer_buffer_free(rsp_payload);
-}
-
-static inline uint8_t get_seq_index(PnvSpi *s)
-{
-    return GETFIELD(SPI_STS_SEQ_INDEX, s->status);
-}
-
-static inline void next_sequencer_fsm(PnvSpi *s)
-{
-    uint8_t seq_index = get_seq_index(s);
-    s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status, (seq_index + 1));
-    s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_INDEX_INCREMENT);
+    spi_response(s);
+    /* Reset fifo for next frame */
+    fifo8_reset(&s->tx_fifo);
+    fifo8_reset(&s->rx_fifo);
 }
 
 /*
@@ -310,13 +284,11 @@ static void calculate_N1(PnvSpi *s, uint8_t opcode)
              * If Forced Implicit mode and count control doesn't
              * indicate transmit then reset the tx count to 0
              */
-            if (GETFIELD(SPI_CTR_CFG_N1_CTRL_B2,
-                                    s->regs[SPI_CTR_CFG_REG]) == 0) {
+            if (GETFIELD(SPI_CTR_CFG_N1_CTRL_B2, s->regs[SPI_CTR_CFG_REG]) == 0) {
                 s->N1_tx = 0;
             }
             /* If rx count control for N1 is set, load the rx value */
-            if (GETFIELD(SPI_CTR_CFG_N1_CTRL_B3,
-                                    s->regs[SPI_CTR_CFG_REG]) == 1) {
+            if (GETFIELD(SPI_CTR_CFG_N1_CTRL_B3, s->regs[SPI_CTR_CFG_REG]) == 1) {
                 s->N1_rx = s->N1_bytes;
             }
         }
@@ -328,8 +300,7 @@ static void calculate_N1(PnvSpi *s, uint8_t opcode)
      * cap the size at a max of 64 bits or 72 bits and set the sequencer FSM
      * error bit.
      */
-    uint8_t ecc_control = GETFIELD(SPI_CLK_CFG_ECC_CTRL,
-                                   s->regs[SPI_CLK_CFG_REG]);
+    uint8_t ecc_control = GETFIELD(SPI_CLK_CFG_ECC_CTRL, s->regs[SPI_CLK_CFG_REG]);
     if (ecc_control == 0 || ecc_control == 2) {
         if (s->N1_bytes > (PNV_SPI_REG_SIZE + 1)) {
             qemu_log_mask(LOG_GUEST_ERROR, "Unsupported N1 shift size when "
@@ -340,8 +311,7 @@ static void calculate_N1(PnvSpi *s, uint8_t opcode)
         }
     } else if (s->N1_bytes > PNV_SPI_REG_SIZE) {
         qemu_log_mask(LOG_GUEST_ERROR, "Unsupported N1 shift size, "
-                      "bytes = 0x%x, bits = 0x%x\n",
-                      s->N1_bytes, s->N1_bits);
+                      "bytes = 0x%x, bits = 0x%x\n", s->N1_bytes, s->N1_bits);
         s->N1_bytes = PNV_SPI_REG_SIZE;
         s->N1_bits = s->N1_bytes * 8;
     }
@@ -350,19 +320,10 @@ static void calculate_N1(PnvSpi *s, uint8_t opcode)
 /*
  * Shift_N1 operation handler method
  */
-static bool operation_shiftn1(PnvSpi *s, uint8_t opcode,
-                       PnvXferBuffer **payload, bool send_n1_alone)
+static bool operation_shiftn1(PnvSpi *s, uint8_t opcode, bool send_n1_alone)
 {
     uint8_t n1_count;
     bool stop = false;
-
-    /*
-     * If there isn't a current payload left over from a stopped sequence
-     * create a new one.
-     */
-    if (*payload == NULL) {
-        *payload = pnv_spi_xfer_buffer_new();
-    }
     /*
      * Use a combination of N1 counters to build the N1 portion of the
      * transmit payload.
@@ -413,9 +374,13 @@ static bool operation_shiftn1(PnvSpi *s, uint8_t opcode,
                  */
                 uint8_t n1_byte = 0x00;
                 n1_byte = get_from_offset(s, n1_count);
-                trace_pnv_spi_tx_append("n1_byte", n1_byte, n1_count);
-                *(pnv_spi_xfer_buffer_write_ptr(*payload, (*payload)->len, 1)) =
-                        n1_byte;
+                if (!fifo8_is_full(&s->tx_fifo)) {
+                    trace_pnv_spi_tx_append("n1_byte", n1_byte, n1_count);
+                    fifo8_push(&s->tx_fifo, n1_byte);
+                } else {
+                    qemu_log_mask(LOG_GUEST_ERROR, "pnv_spi: TX_FIFO is full\n");
+                    break;
+                }
             } else {
                 /*
                  * We hit a shift_n1 opcode TX but the TDR is empty, tell the
@@ -436,16 +401,17 @@ static bool operation_shiftn1(PnvSpi *s, uint8_t opcode,
              * - we are receiving and the RDR is empty so we allow the operation
              *   to proceed.
              */
-            if ((s->N1_rx != 0) && (GETFIELD(SPI_STS_RDR_FULL,
-                                           s->status) == 1)) {
+            if ((s->N1_rx != 0) && (GETFIELD(SPI_STS_RDR_FULL, s->status) == 1)) {
                 trace_pnv_spi_sequencer_stop_requested("shift N1"
                                 "set for receive but RDR is full");
                 stop = true;
                 break;
-            } else {
+            } else if (!fifo8_is_full(&s->tx_fifo)) {
                 trace_pnv_spi_tx_append_FF("n1_byte");
-                *(pnv_spi_xfer_buffer_write_ptr(*payload, (*payload)->len, 1))
-                        = 0xff;
+                fifo8_push(&s->tx_fifo, 0xff);
+            } else {
+                qemu_log_mask(LOG_GUEST_ERROR, "pnv_spi: TX_FIFO is full\n");
+                break;
             }
         }
         n1_count++;
@@ -486,15 +452,13 @@ static bool operation_shiftn1(PnvSpi *s, uint8_t opcode,
      */
     if (send_n1_alone && !stop) {
         /* We have a TX and a full TDR or an RX and an empty RDR */
-        trace_pnv_spi_tx_request("Shifting N1 frame", (*payload)->len);
-        transfer(s, *payload);
+        trace_pnv_spi_tx_request("Shifting N1 frame", fifo8_num_used(&s->tx_fifo));
+        transfer(s);
         /* The N1 frame shift is complete so reset the N1 counters */
         s->N2_bits = 0;
         s->N2_bytes = 0;
         s->N2_tx = 0;
         s->N2_rx = 0;
-        pnv_spi_xfer_buffer_free(*payload);
-        *payload = NULL;
     }
     return stop;
 } /* end of operation_shiftn1() */
@@ -552,13 +516,11 @@ static void calculate_N2(PnvSpi *s, uint8_t opcode)
              * If Forced Implicit mode and count control doesn't
              * indicate a receive then reset the rx count to 0
              */
-            if (GETFIELD(SPI_CTR_CFG_N2_CTRL_B3,
-                                    s->regs[SPI_CTR_CFG_REG]) == 0) {
+            if (GETFIELD(SPI_CTR_CFG_N2_CTRL_B3, s->regs[SPI_CTR_CFG_REG]) == 0) {
                 s->N2_rx = 0;
             }
             /* If tx count control for N2 is set, load the tx value */
-            if (GETFIELD(SPI_CTR_CFG_N2_CTRL_B2,
-                                    s->regs[SPI_CTR_CFG_REG]) == 1) {
+            if (GETFIELD(SPI_CTR_CFG_N2_CTRL_B2, s->regs[SPI_CTR_CFG_REG]) == 1) {
                 s->N2_tx = s->N2_bytes;
             }
         }
@@ -571,8 +533,7 @@ static void calculate_N2(PnvSpi *s, uint8_t opcode)
      * cap the size at a max of 64 bits or 72 bits and set the sequencer FSM
      * error bit.
      */
-    uint8_t ecc_control = GETFIELD(SPI_CLK_CFG_ECC_CTRL,
-                    s->regs[SPI_CLK_CFG_REG]);
+    uint8_t ecc_control = GETFIELD(SPI_CLK_CFG_ECC_CTRL, s->regs[SPI_CLK_CFG_REG]);
     if (ecc_control == 0 || ecc_control == 2) {
         if (s->N2_bytes > (PNV_SPI_REG_SIZE + 1)) {
             /* Unsupported N2 shift size when ECC enabled */
@@ -590,19 +551,10 @@ static void calculate_N2(PnvSpi *s, uint8_t opcode)
  * Shift_N2 operation handler method
  */
 
-static bool operation_shiftn2(PnvSpi *s, uint8_t opcode,
-                       PnvXferBuffer **payload)
+static bool operation_shiftn2(PnvSpi *s, uint8_t opcode)
 {
     uint8_t n2_count;
     bool stop = false;
-
-    /*
-     * If there isn't a current payload left over from a stopped sequence
-     * create a new one.
-     */
-    if (*payload == NULL) {
-        *payload = pnv_spi_xfer_buffer_new();
-    }
     /*
      * Use a combination of N2 counters to build the N2 portion of the
      * transmit payload.
@@ -629,44 +581,47 @@ static bool operation_shiftn2(PnvSpi *s, uint8_t opcode,
          * code continue will end up building the payload twice in the same
          * buffer since RDR full causes a sequence stop and restart.
          */
-        if ((s->N2_rx != 0) &&
-            (GETFIELD(SPI_STS_RDR_FULL, s->status) == 1)) {
+        if ((s->N2_rx != 0) && (GETFIELD(SPI_STS_RDR_FULL, s->status) == 1)) {
             trace_pnv_spi_sequencer_stop_requested("shift N2 set"
                             "for receive but RDR is full");
             stop = true;
             break;
         }
-        if ((s->N2_tx != 0) && ((s->N1_tx + n2_count) <
-                                PNV_SPI_REG_SIZE)) {
+        if ((s->N2_tx != 0) && ((s->N1_tx + n2_count) < PNV_SPI_REG_SIZE)) {
             /* Always append data for the N2 segment if it is set for TX */
             uint8_t n2_byte = 0x00;
             n2_byte = get_from_offset(s, (s->N1_tx + n2_count));
-            trace_pnv_spi_tx_append("n2_byte", n2_byte, (s->N1_tx + n2_count));
-            *(pnv_spi_xfer_buffer_write_ptr(*payload, (*payload)->len, 1))
-                    = n2_byte;
-        } else {
+            if (!fifo8_is_full(&s->tx_fifo)) {
+                trace_pnv_spi_tx_append("n2_byte", n2_byte, (s->N1_tx + n2_count));
+                fifo8_push(&s->tx_fifo, n2_byte);
+            } else {
+                qemu_log_mask(LOG_GUEST_ERROR, "pnv_spi: TX_FIFO is full\n");
+                break;
+            }
+        } else if (!fifo8_is_full(&s->tx_fifo)) {
             /*
              * Regardless of whether or not N2 is set for TX or RX, we need
              * the number of bytes in the payload to match the overall length
              * of the operation.
              */
             trace_pnv_spi_tx_append_FF("n2_byte");
-            *(pnv_spi_xfer_buffer_write_ptr(*payload, (*payload)->len, 1))
-                    = 0xff;
+            fifo8_push(&s->tx_fifo, 0xff);
+        } else {
+            qemu_log_mask(LOG_GUEST_ERROR, "pnv_spi: TX_FIFO is full\n");
+            break;
         }
         n2_count++;
     } /* end of while */
     if (!stop) {
         /* We have a TX and a full TDR or an RX and an empty RDR */
-        trace_pnv_spi_tx_request("Shifting N2 frame", (*payload)->len);
-        transfer(s, *payload);
+        trace_pnv_spi_tx_request("Shifting N2 frame", fifo8_num_used(&s->tx_fifo));
+        transfer(s);
         /*
          * If we are doing an N2 TX and the TDR is full we need to clear the
          * TDR_full status. Do this here instead of up in the loop above so we
          * don't log the message in every loop iteration.
          */
-        if ((s->N2_tx != 0) &&
-            (GETFIELD(SPI_STS_TDR_FULL, s->status) == 1)) {
+        if ((s->N2_tx != 0) && (GETFIELD(SPI_STS_TDR_FULL, s->status) == 1)) {
             s->status = SETFIELD(SPI_STS_TDR_FULL, s->status, 0);
         }
         /*
@@ -682,8 +637,6 @@ static bool operation_shiftn2(PnvSpi *s, uint8_t opcode,
         s->N1_bytes = 0;
         s->N1_tx = 0;
         s->N1_rx = 0;
-        pnv_spi_xfer_buffer_free(*payload);
-        *payload = NULL;
     }
     return stop;
 } /*  end of operation_shiftn2()*/
@@ -700,20 +653,8 @@ static void operation_sequencer(PnvSpi *s)
     bool stop = false; /* Flag to stop the sequencer */
     uint8_t opcode = 0;
     uint8_t masked_opcode = 0;
+    uint8_t seq_index;
 
-    /*
-     * PnvXferBuffer for containing the payload of the SPI frame.
-     * This is a static because there are cases where a sequence has to stop
-     * and wait for the target application to unload the RDR.  If this occurs
-     * during a sequence where N1 is not sent alone and instead combined with
-     * N2 since the N1 tx length + the N2 tx length is less than the size of
-     * the TDR.
-     */
-    static PnvXferBuffer *payload;
-
-    if (payload == NULL) {
-        payload = pnv_spi_xfer_buffer_new();
-    }
     /*
      * Clear the sequencer FSM error bit - general_SPI_status[3]
      * before starting a sequence.
@@ -726,12 +667,17 @@ static void operation_sequencer(PnvSpi *s)
     if (GETFIELD(SPI_STS_SEQ_FSM, s->status) == SEQ_STATE_IDLE) {
         s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status, 0);
     }
+    /*
+     * SPI_STS_SEQ_INDEX of status register is kept in seq_index variable and
+     * updated back to status register at the end of operation_sequencer().
+     */
+    seq_index = GETFIELD(SPI_STS_SEQ_INDEX, s->status);
     /*
      * There are only 8 possible operation IDs to iterate through though
      * some operations may cause more than one frame to be sequenced.
      */
-    while (get_seq_index(s) < NUM_SEQ_OPS) {
-        opcode = s->seq_op[get_seq_index(s)];
+    while (seq_index < NUM_SEQ_OPS) {
+        opcode = s->seq_op[seq_index];
         /* Set sequencer state to decode */
         s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_DECODE);
         /*
@@ -748,7 +694,7 @@ static void operation_sequencer(PnvSpi *s)
         case SEQ_OP_STOP:
             s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE);
             /* A stop operation in any position stops the sequencer */
-            trace_pnv_spi_sequencer_op("STOP", get_seq_index(s));
+            trace_pnv_spi_sequencer_op("STOP", seq_index);
 
             stop = true;
             s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_IDLE);
@@ -759,7 +705,7 @@ static void operation_sequencer(PnvSpi *s)
 
         case SEQ_OP_SELECT_SLAVE:
             s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE);
-            trace_pnv_spi_sequencer_op("SELECT_SLAVE", get_seq_index(s));
+            trace_pnv_spi_sequencer_op("SELECT_SLAVE", seq_index);
             /*
              * This device currently only supports a single responder
              * connection at position 0.  De-selecting a responder is fine
@@ -770,15 +716,12 @@ static void operation_sequencer(PnvSpi *s)
             if (s->responder_select == 0) {
                 trace_pnv_spi_shifter_done();
                 qemu_set_irq(s->cs_line[0], 1);
-                s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status,
-                                (get_seq_index(s) + 1));
+                seq_index++;
                 s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_DONE);
             } else if (s->responder_select != 1) {
                 qemu_log_mask(LOG_GUEST_ERROR, "Slave selection other than 1 "
-                              "not supported, select = 0x%x\n",
-                               s->responder_select);
-                trace_pnv_spi_sequencer_stop_requested("invalid "
-                                "responder select");
+                              "not supported, select = 0x%x\n", s->responder_select);
+                trace_pnv_spi_sequencer_stop_requested("invalid responder select");
                 s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_IDLE);
                 stop = true;
             } else {
@@ -798,13 +741,15 @@ static void operation_sequencer(PnvSpi *s)
                  * applies once a valid responder select has occurred.
                  */
                 s->shift_n1_done = false;
-                next_sequencer_fsm(s);
+                seq_index++;
+                s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status,
+                                SEQ_STATE_INDEX_INCREMENT);
             }
             break;
 
         case SEQ_OP_SHIFT_N1:
             s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE);
-            trace_pnv_spi_sequencer_op("SHIFT_N1", get_seq_index(s));
+            trace_pnv_spi_sequencer_op("SHIFT_N1", seq_index);
             /*
              * Only allow a shift_n1 when the state is not IDLE or DONE.
              * In either of those two cases the sequencer is not in a proper
@@ -836,13 +781,13 @@ static void operation_sequencer(PnvSpi *s)
                  * transmission to the responder without requiring a refill of
                  * the TDR between the two operations.
                  */
-                if (PNV_SPI_MASKED_OPCODE(s->seq_op[get_seq_index(s) + 1])
-                                == SEQ_OP_SHIFT_N2) {
+                if ((seq_index != 7) &&
+                    PNV_SPI_MASKED_OPCODE(s->seq_op[(seq_index + 1)]) ==
+                    SEQ_OP_SHIFT_N2) {
                     send_n1_alone = false;
                 }
-                s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status,
-                                FSM_SHIFT_N1);
-                stop = operation_shiftn1(s, opcode, &payload, send_n1_alone);
+                s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_SHIFT_N1);
+                stop = operation_shiftn1(s, opcode, send_n1_alone);
                 if (stop) {
                     /*
                      *  The operation code says to stop, this can occur if:
@@ -859,27 +804,27 @@ static void operation_sequencer(PnvSpi *s)
                         s->shift_n1_done = true;
                         s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status,
                                                   FSM_SHIFT_N2);
-                        s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status,
-                                        (get_seq_index(s) + 1));
+                        seq_index++;
                     } else {
                         /*
                          * This is case (1) or (2) so the sequencer needs to
                          * wait and NOT go to the next sequence yet.
                          */
-                        s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status,
-                                        FSM_WAIT);
+                        s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_WAIT);
                     }
                 } else {
                     /* Ok to move on to the next index */
                     s->shift_n1_done = true;
-                    next_sequencer_fsm(s);
+                    seq_index++;
+                    s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status,
+                                    SEQ_STATE_INDEX_INCREMENT);
                 }
             }
             break;
 
         case SEQ_OP_SHIFT_N2:
             s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE);
-            trace_pnv_spi_sequencer_op("SHIFT_N2", get_seq_index(s));
+            trace_pnv_spi_sequencer_op("SHIFT_N2", seq_index);
             if (!s->shift_n1_done) {
                 qemu_log_mask(LOG_GUEST_ERROR, "Shift_N2 is not allowed if a "
                               "Shift_N1 is not done, shifter state = 0x%llx",
@@ -890,31 +835,30 @@ static void operation_sequencer(PnvSpi *s)
                  * error bit 3 (general_SPI_status[3]) in status reg.
                  */
                 s->status = SETFIELD(SPI_STS_GEN_STATUS_B3, s->status, 1);
-                trace_pnv_spi_sequencer_stop_requested("shift_n2 "
-                                    "w/no shift_n1 done");
+                trace_pnv_spi_sequencer_stop_requested("shift_n2 w/no shift_n1 done");
                 stop = true;
             } else {
                 /* Ok to do a Shift_N2 */
-                s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status,
-                                FSM_SHIFT_N2);
-                stop = operation_shiftn2(s, opcode, &payload);
+                s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_SHIFT_N2);
+                stop = operation_shiftn2(s, opcode);
                 /*
                  * If the operation code says to stop set the shifter state to
                  * wait and stop
                  */
                 if (stop) {
-                    s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status,
-                                    FSM_WAIT);
+                    s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_WAIT);
                 } else {
                     /* Ok to move on to the next index */
-                    next_sequencer_fsm(s);
+                    seq_index++;
+                    s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status,
+                                    SEQ_STATE_INDEX_INCREMENT);
                 }
             }
             break;
 
         case SEQ_OP_BRANCH_IFNEQ_RDR:
             s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE);
-            trace_pnv_spi_sequencer_op("BRANCH_IFNEQ_RDR", get_seq_index(s));
+            trace_pnv_spi_sequencer_op("BRANCH_IFNEQ_RDR", seq_index);
             /*
              * The memory mapping register RDR match value is compared against
              * the 16 rightmost bytes of the RDR (potentially with masking).
@@ -929,16 +873,26 @@ static void operation_sequencer(PnvSpi *s)
                 rdr_matched = does_rdr_match(s);
                 if (rdr_matched) {
                     trace_pnv_spi_RDR_match("success");
+                    s->fail_count = 0;
                     /* A match occurred, increment the sequencer index. */
-                    next_sequencer_fsm(s);
+                    seq_index++;
+                    s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status,
+                                    SEQ_STATE_INDEX_INCREMENT);
                 } else {
                     trace_pnv_spi_RDR_match("failed");
+                    s->fail_count++;
                     /*
                      * Branch the sequencer to the index coded into the op
                      * code.
                      */
-                    s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status,
-                                    PNV_SPI_OPCODE_LO_NIBBLE(opcode));
+                    seq_index = PNV_SPI_OPCODE_LO_NIBBLE(opcode);
+                }
+                if (s->fail_count >= RDR_MATCH_FAILURE_LIMIT) {
+                    qemu_log_mask(LOG_GUEST_ERROR, "pnv_spi: RDR match failure"
+                                  " limit crossed %d times hence requesting "
+                                  "sequencer to stop.\n",
+                                  RDR_MATCH_FAILURE_LIMIT);
+                    stop = true;
                 }
                 /*
                  * Regardless of where the branch ended up we want the
@@ -957,12 +911,13 @@ static void operation_sequencer(PnvSpi *s)
         case SEQ_OP_TRANSFER_TDR:
             s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE);
             qemu_log_mask(LOG_GUEST_ERROR, "Transfer TDR is not supported\n");
-            next_sequencer_fsm(s);
+            seq_index++;
+            s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_INDEX_INCREMENT);
             break;
 
         case SEQ_OP_BRANCH_IFNEQ_INC_1:
             s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE);
-            trace_pnv_spi_sequencer_op("BRANCH_IFNEQ_INC_1", get_seq_index(s));
+            trace_pnv_spi_sequencer_op("BRANCH_IFNEQ_INC_1", seq_index);
             /*
              * The spec says the loop should execute count compare + 1 times.
              * However we learned from engineering that we really only loop
@@ -976,18 +931,19 @@ static void operation_sequencer(PnvSpi *s)
                  * mask off all but the first three bits so we don't try to
                  * access beyond the sequencer_operation_reg boundary.
                  */
-                s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status,
-                                PNV_SPI_OPCODE_LO_NIBBLE(opcode));
+                seq_index = PNV_SPI_OPCODE_LO_NIBBLE(opcode);
                 s->loop_counter_1++;
             } else {
                 /* Continue to next index if loop counter is reached */
-                next_sequencer_fsm(s);
+                seq_index++;
+                s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status,
+                                SEQ_STATE_INDEX_INCREMENT);
             }
             break;
 
         case SEQ_OP_BRANCH_IFNEQ_INC_2:
             s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE);
-            trace_pnv_spi_sequencer_op("BRANCH_IFNEQ_INC_2", get_seq_index(s));
+            trace_pnv_spi_sequencer_op("BRANCH_IFNEQ_INC_2", seq_index);
             uint8_t condition2 = GETFIELD(SPI_CTR_CFG_CMP2,
                               s->regs[SPI_CTR_CFG_REG]);
             /*
@@ -1002,19 +958,21 @@ static void operation_sequencer(PnvSpi *s)
                  * mask off all but the first three bits so we don't try to
                  * access beyond the sequencer_operation_reg boundary.
                  */
-                s->status = SETFIELD(SPI_STS_SEQ_INDEX,
-                                s->status, PNV_SPI_OPCODE_LO_NIBBLE(opcode));
+                seq_index = PNV_SPI_OPCODE_LO_NIBBLE(opcode);
                 s->loop_counter_2++;
             } else {
                 /* Continue to next index if loop counter is reached */
-                next_sequencer_fsm(s);
+                seq_index++;
+                s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status,
+                                SEQ_STATE_INDEX_INCREMENT);
             }
             break;
 
         default:
             s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE);
             /* Ignore unsupported operations. */
-            next_sequencer_fsm(s);
+            seq_index++;
+            s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_INDEX_INCREMENT);
             break;
         } /* end of switch */
         /*
@@ -1022,10 +980,10 @@ static void operation_sequencer(PnvSpi *s)
          * we need to go ahead and end things as if there was a STOP at the
          * end.
          */
-        if (get_seq_index(s) == NUM_SEQ_OPS) {
+        if (seq_index == NUM_SEQ_OPS) {
             /* All 8 opcodes completed, sequencer idling */
             s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_IDLE);
-            s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status, 0);
+            seq_index = 0;
             s->loop_counter_1 = 0;
             s->loop_counter_2 = 0;
             s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_IDLE);
@@ -1036,6 +994,8 @@ static void operation_sequencer(PnvSpi *s)
             break;
         }
     } /* end of while */
+    /* Update sequencer index field in status.*/
+    s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status, seq_index);
     return;
 } /* end of operation_sequencer() */
 
@@ -1197,18 +1157,22 @@ static const MemoryRegionOps pnv_spi_xscom_ops = {
 
 static const Property pnv_spi_properties[] = {
     DEFINE_PROP_UINT32("spic_num", PnvSpi, spic_num, 0),
+    DEFINE_PROP_UINT32("chip-id", PnvSpi, chip_id, 0),
     DEFINE_PROP_UINT8("transfer_len", PnvSpi, transfer_len, 4),
 };
 
 static void pnv_spi_realize(DeviceState *dev, Error **errp)
 {
     PnvSpi *s = PNV_SPI(dev);
-    g_autofree char *name = g_strdup_printf(TYPE_PNV_SPI_BUS ".%d",
-                    s->spic_num);
+    g_autofree char *name = g_strdup_printf("chip%d." TYPE_PNV_SPI_BUS ".%d",
+                    s->chip_id, s->spic_num);
     s->ssi_bus = ssi_create_bus(dev, name);
     s->cs_line = g_new0(qemu_irq, 1);
     qdev_init_gpio_out_named(DEVICE(s), s->cs_line, "cs", 1);
 
+    fifo8_create(&s->tx_fifo, PNV_SPI_FIFO_SIZE);
+    fifo8_create(&s->rx_fifo, PNV_SPI_FIFO_SIZE);
+
     /* spi scoms */
     pnv_xscom_region_init(&s->xscom_spic_regs, OBJECT(s), &pnv_spi_xscom_ops,
                           s, "xscom-spi", PNV10_XSCOM_PIB_SPIC_SIZE);
diff --git a/hw/ssi/xilinx_spi.c b/hw/ssi/xilinx_spi.c
index fd1ff12eb1dc6..be5baa6b35042 100644
--- a/hw/ssi/xilinx_spi.c
+++ b/hw/ssi/xilinx_spi.c
@@ -25,6 +25,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "hw/sysbus.h"
 #include "migration/vmstate.h"
 #include "qemu/module.h"
@@ -32,6 +33,7 @@
 
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
 #include "hw/ssi/ssi.h"
 #include "qom/object.h"
 
@@ -83,6 +85,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(XilinxSPI, XILINX_SPI)
 struct XilinxSPI {
     SysBusDevice parent_obj;
 
+    EndianMode model_endianness;
     MemoryRegion mmio;
 
     qemu_irq irq;
@@ -313,14 +316,17 @@ spi_write(void *opaque, hwaddr addr,
     xlx_spi_update_irq(s);
 }
 
-static const MemoryRegionOps spi_ops = {
-    .read = spi_read,
-    .write = spi_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-    .valid = {
-        .min_access_size = 4,
-        .max_access_size = 4
-    }
+static const MemoryRegionOps spi_ops[2] = {
+    [0 ... 1] = {
+        .read = spi_read,
+        .write = spi_write,
+        .valid = {
+            .min_access_size = 4,
+            .max_access_size = 4,
+        },
+    },
+    [0].endianness = DEVICE_LITTLE_ENDIAN,
+    [1].endianness = DEVICE_BIG_ENDIAN,
 };
 
 static void xilinx_spi_realize(DeviceState *dev, Error **errp)
@@ -329,6 +335,12 @@ static void xilinx_spi_realize(DeviceState *dev, Error **errp)
     XilinxSPI *s = XILINX_SPI(dev);
     int i;
 
+    if (s->model_endianness == ENDIAN_MODE_UNSPECIFIED) {
+        error_setg(errp, TYPE_XILINX_SPI " property 'endianness'"
+                         " must be set to 'big' or 'little'");
+        return;
+    }
+
     DB_PRINT("\n");
 
     s->spi = ssi_create_bus(dev, "spi");
@@ -339,7 +351,8 @@ static void xilinx_spi_realize(DeviceState *dev, Error **errp)
         sysbus_init_irq(sbd, &s->cs_lines[i]);
     }
 
-    memory_region_init_io(&s->mmio, OBJECT(s), &spi_ops, s,
+    memory_region_init_io(&s->mmio, OBJECT(s),
+                          &spi_ops[s->model_endianness == ENDIAN_MODE_BIG], s,
                           "xilinx-spi", R_MAX * 4);
     sysbus_init_mmio(sbd, &s->mmio);
 
@@ -362,6 +375,7 @@ static const VMStateDescription vmstate_xilinx_spi = {
 };
 
 static const Property xilinx_spi_properties[] = {
+    DEFINE_PROP_ENDIAN_NODEFAULT("endianness", XilinxSPI, model_endianness),
     DEFINE_PROP_UINT8("num-ss-bits", XilinxSPI, num_cs, 1),
 };
 
diff --git a/hw/timer/Kconfig b/hw/timer/Kconfig
index c96fd5d97ae82..9ac0084534084 100644
--- a/hw/timer/Kconfig
+++ b/hw/timer/Kconfig
@@ -11,7 +11,7 @@ config A9_GTIMER
 
 config HPET
     bool
-    default y if PC
+    default y if PC && !HAVE_RUST
 
 config I8254
     bool
diff --git a/hw/timer/allwinner-a10-pit.c b/hw/timer/allwinner-a10-pit.c
index ddaf2128c2d9f..da3d7173ef560 100644
--- a/hw/timer/allwinner-a10-pit.c
+++ b/hw/timer/allwinner-a10-pit.c
@@ -185,7 +185,7 @@ static void a10_pit_write(void *opaque, hwaddr offset, uint64_t value,
 static const MemoryRegionOps a10_pit_ops = {
     .read = a10_pit_read,
     .write = a10_pit_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static const Property a10_pit_properties[] = {
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 1c8c6c69ef5db..ccb97b680665b 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -40,6 +40,8 @@
 #include "qom/object.h"
 #include "trace.h"
 
+struct hpet_fw_config hpet_fw_cfg = {.count = UINT8_MAX};
+
 #define HPET_MSI_SUPPORT        0
 
 OBJECT_DECLARE_SIMPLE_TYPE(HPETState, HPET)
@@ -75,6 +77,7 @@ struct HPETState {
     uint8_t rtc_irq_level;
     qemu_irq pit_enabled;
     uint8_t num_timers;
+    uint8_t num_timers_save;
     uint32_t intcap;
     HPETTimer timer[HPET_MAX_TIMERS];
 
@@ -235,15 +238,12 @@ static int hpet_pre_save(void *opaque)
         s->hpet_counter = hpet_get_ticks(s);
     }
 
-    return 0;
-}
-
-static int hpet_pre_load(void *opaque)
-{
-    HPETState *s = opaque;
-
-    /* version 1 only supports 3, later versions will load the actual value */
-    s->num_timers = HPET_MIN_TIMERS;
+    /*
+     * The number of timers must match on source and destination, but it was
+     * also added to the migration stream.  Check that it matches the value
+     * that was configured.
+     */
+    s->num_timers_save = s->num_timers;
     return 0;
 }
 
@@ -251,12 +251,7 @@ static bool hpet_validate_num_timers(void *opaque, int version_id)
 {
     HPETState *s = opaque;
 
-    if (s->num_timers < HPET_MIN_TIMERS) {
-        return false;
-    } else if (s->num_timers > HPET_MAX_TIMERS) {
-        return false;
-    }
-    return true;
+    return s->num_timers == s->num_timers_save;
 }
 
 static int hpet_post_load(void *opaque, int version_id)
@@ -275,16 +270,6 @@ static int hpet_post_load(void *opaque, int version_id)
                         - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
     }
 
-    /* Push number of timers into capability returned via HPET_ID */
-    s->capability &= ~HPET_ID_NUM_TIM_MASK;
-    s->capability |= (s->num_timers - 1) << HPET_ID_NUM_TIM_SHIFT;
-    hpet_cfg.hpet[s->hpet_id].event_timer_block_id = (uint32_t)s->capability;
-
-    /* Derive HPET_MSI_SUPPORT from the capability of the first timer. */
-    s->flags &= ~(1 << HPET_MSI_SUPPORT);
-    if (s->timer[0].config & HPET_TN_FSB_CAP) {
-        s->flags |= 1 << HPET_MSI_SUPPORT;
-    }
     return 0;
 }
 
@@ -345,14 +330,13 @@ static const VMStateDescription vmstate_hpet = {
     .version_id = 2,
     .minimum_version_id = 1,
     .pre_save = hpet_pre_save,
-    .pre_load = hpet_pre_load,
     .post_load = hpet_post_load,
     .fields = (const VMStateField[]) {
         VMSTATE_UINT64(config, HPETState),
         VMSTATE_UINT64(isr, HPETState),
         VMSTATE_UINT64(hpet_counter, HPETState),
-        VMSTATE_UINT8_V(num_timers, HPETState, 2),
-        VMSTATE_VALIDATE("num_timers in range", hpet_validate_num_timers),
+        VMSTATE_UINT8_V(num_timers_save, HPETState, 2),
+        VMSTATE_VALIDATE("num_timers must match", hpet_validate_num_timers),
         VMSTATE_STRUCT_VARRAY_UINT8(timer, HPETState, num_timers, 0,
                                     vmstate_hpet_timer, HPETTimer),
         VMSTATE_END_OF_LIST()
@@ -665,8 +649,8 @@ static void hpet_reset(DeviceState *d)
     s->hpet_counter = 0ULL;
     s->hpet_offset = 0ULL;
     s->config = 0ULL;
-    hpet_cfg.hpet[s->hpet_id].event_timer_block_id = (uint32_t)s->capability;
-    hpet_cfg.hpet[s->hpet_id].address = sbd->mmio[0].addr;
+    hpet_fw_cfg.hpet[s->hpet_id].event_timer_block_id = (uint32_t)s->capability;
+    hpet_fw_cfg.hpet[s->hpet_id].address = sbd->mmio[0].addr;
 
     /* to document that the RTC lowers its output on reset as well */
     s->rtc_irq_level = 0;
@@ -708,17 +692,17 @@ static void hpet_realize(DeviceState *dev, Error **errp)
     if (!s->intcap) {
         warn_report("Hpet's intcap not initialized");
     }
-    if (hpet_cfg.count == UINT8_MAX) {
+    if (hpet_fw_cfg.count == UINT8_MAX) {
         /* first instance */
-        hpet_cfg.count = 0;
+        hpet_fw_cfg.count = 0;
     }
 
-    if (hpet_cfg.count == 8) {
+    if (hpet_fw_cfg.count == 8) {
         error_setg(errp, "Only 8 instances of HPET is allowed");
         return;
     }
 
-    s->hpet_id = hpet_cfg.count++;
+    s->hpet_id = hpet_fw_cfg.count++;
 
     for (i = 0; i < HPET_NUM_IRQ_ROUTES; i++) {
         sysbus_init_irq(sbd, &s->irqs[i]);
diff --git a/hw/timer/imx_gpt.c b/hw/timer/imx_gpt.c
index 11eca9fa4df36..200a89225bba0 100644
--- a/hw/timer/imx_gpt.c
+++ b/hw/timer/imx_gpt.c
@@ -126,6 +126,17 @@ static const IMXClk imx7_gpt_clocks[] = {
     CLK_NONE,      /* 111 not defined */
 };
 
+static const IMXClk imx8mp_gpt_clocks[] = {
+    CLK_NONE,      /* 000 No clock source */
+    CLK_IPG,       /* 001 ipg_clk, 532MHz */
+    CLK_IPG_HIGH,  /* 010 ipg_clk_highfreq */
+    CLK_EXT,       /* 011 External clock */
+    CLK_32k,       /* 100 ipg_clk_32k */
+    CLK_HIGH,      /* 101 ipg_clk_16M */
+    CLK_NONE,      /* 110 not defined */
+    CLK_NONE,      /* 111 not defined */
+};
+
 /* Must be called from within ptimer_transaction_begin/commit block */
 static void imx_gpt_set_freq(IMXGPTState *s)
 {
@@ -552,6 +563,13 @@ static void imx7_gpt_init(Object *obj)
     s->clocks = imx7_gpt_clocks;
 }
 
+static void imx8mp_gpt_init(Object *obj)
+{
+    IMXGPTState *s = IMX_GPT(obj);
+
+    s->clocks = imx8mp_gpt_clocks;
+}
+
 static const TypeInfo imx25_gpt_info = {
     .name = TYPE_IMX25_GPT,
     .parent = TYPE_SYS_BUS_DEVICE,
@@ -584,6 +602,12 @@ static const TypeInfo imx7_gpt_info = {
     .instance_init = imx7_gpt_init,
 };
 
+static const TypeInfo imx8mp_gpt_info = {
+    .name = TYPE_IMX8MP_GPT,
+    .parent = TYPE_IMX25_GPT,
+    .instance_init = imx8mp_gpt_init,
+};
+
 static void imx_gpt_register_types(void)
 {
     type_register_static(&imx25_gpt_info);
@@ -591,6 +615,7 @@ static void imx_gpt_register_types(void)
     type_register_static(&imx6_gpt_info);
     type_register_static(&imx6ul_gpt_info);
     type_register_static(&imx7_gpt_info);
+    type_register_static(&imx8mp_gpt_info);
 }
 
 type_init(imx_gpt_register_types)
diff --git a/hw/timer/meson.build b/hw/timer/meson.build
index f5f9eed2d0a9f..6c30bf602226f 100644
--- a/hw/timer/meson.build
+++ b/hw/timer/meson.build
@@ -34,3 +34,5 @@ specific_ss.add(when: 'CONFIG_IBEX', if_true: files('ibex_timer.c'))
 system_ss.add(when: 'CONFIG_SIFIVE_PWM', if_true: files('sifive_pwm.c'))
 
 specific_ss.add(when: 'CONFIG_AVR_TIMER16', if_true: files('avr_timer16.c'))
+
+specific_ss.add(when: 'CONFIG_HEX_DSP', if_true: files('qct-qtimer.c'))
diff --git a/hw/timer/qct-qtimer.c b/hw/timer/qct-qtimer.c
new file mode 100644
index 0000000000000..413f7249eef00
--- /dev/null
+++ b/hw/timer/qct-qtimer.c
@@ -0,0 +1,519 @@
+/*
+ * Qualcomm QCT QTimer
+ *
+ * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+
+#include "qemu/osdep.h"
+#include "hw/irq.h"
+#include "hw/qdev-properties.h"
+#include "hw/timer/qct-qtimer.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qemu/timer.h"
+
+/* Common timer implementation.  */
+
+#define QTIMER_MEM_SIZE_BYTES 0x1000
+#define QTIMER_MEM_REGION_SIZE_BYTES 0x1000
+#define QTIMER_DEFAULT_FREQ_HZ 19200000ULL
+#define QTMR_TIMER_INDEX_MASK (0xf000)
+#define HIGH_32(val) (0x0ffffffffULL & (val >> 32))
+#define LOW_32(val) (0x0ffffffffULL & val)
+
+/*
+ * QTimer version reg:
+ *
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Major |         Minor         |           Step                |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+static unsigned int TIMER_VERSION = 0x20020000;
+
+/*
+ * qct_qtimer_read/write:
+ * if offset < 0x1000 read restricted registers:
+ * QCT_QTIMER_AC_CNTFREQ/CNTSR/CNTTID/CNTACR/CNTOFF_(LO/HI)/QCT_QTIMER_VERSION
+ */
+static uint64_t qct_qtimer_read(void *opaque, hwaddr offset, unsigned size)
+{
+    QCTQtimerState *s = (QCTQtimerState *)opaque;
+    uint32_t frame = 0;
+
+    switch (offset) {
+    case QCT_QTIMER_AC_CNTFRQ:
+        return s->freq;
+    case QCT_QTIMER_AC_CNTSR:
+        return s->secure;
+    case QCT_QTIMER_AC_CNTTID:
+        return s->cnttid;
+    case QCT_QTIMER_AC_CNTACR_START ... QCT_QTIMER_AC_CNTACR_END:
+        frame = (offset - 0x40) / 0x4;
+        if (frame >= s->nr_frames) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: QCT_QTIMER_AC_CNT: Bad offset %x\n", __func__,
+                          (int)offset);
+            return 0x0;
+        }
+        return s->timer[frame].cnt_ctrl;
+    case QCT_QTIMER_VERSION:
+        return TIMER_VERSION;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: QCT_QTIMER_AC_CNT: Bad offset %x\n",
+                      __func__, (int)offset);
+        return 0x0;
+    }
+
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%x\n", __func__,
+                  (int)offset);
+    return 0;
+}
+
+static void qct_qtimer_write(void *opaque, hwaddr offset, uint64_t value,
+                             unsigned size)
+{
+    QCTQtimerState *s = (QCTQtimerState *)opaque;
+    uint32_t frame = 0;
+
+    if (offset < 0x1000) {
+        switch (offset) {
+        case QCT_QTIMER_AC_CNTFRQ:
+            s->freq = value;
+            return;
+        case QCT_QTIMER_AC_CNTSR:
+            if (value > 0xFF)
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "%s: QCT_QTIMER_AC_CNTSR: Bad value %x\n",
+                              __func__, (int)value);
+            else
+                s->secure = value;
+            return;
+        case QCT_QTIMER_AC_CNTACR_START ... QCT_QTIMER_AC_CNTACR_END:
+            frame = (offset - QCT_QTIMER_AC_CNTACR_START) / 0x4;
+            if (frame >= s->nr_frames) {
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "%s: QCT_QTIMER_AC_CNT: Bad offset %x\n",
+                              __func__, (int)offset);
+                return;
+            }
+            s->timer[frame].cnt_ctrl = value;
+            return;
+        default:
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: QCT_QTIMER_AC_CNT: Bad offset %x\n", __func__,
+                          (int)offset);
+            return;
+        }
+    } else
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %x\n", __func__,
+                      (int)offset);
+}
+
+static const MemoryRegionOps qct_qtimer_ops = {
+    .read = qct_qtimer_read,
+    .write = qct_qtimer_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_qct_qtimer = {
+    .name = "qct-qtimer",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]){ VMSTATE_END_OF_LIST() }
+};
+
+static void qct_qtimer_init(Object *obj)
+{
+    QCTQtimerState *s = QCT_QTIMER(obj);
+
+    object_property_add_uint32_ptr(obj, "secure", &s->secure,
+                                   OBJ_PROP_FLAG_READ);
+    object_property_add_uint32_ptr(obj, "frame_id", &s->frame_id,
+                                   OBJ_PROP_FLAG_READ);
+}
+
+static void hex_timer_update(QCTHextimerState *s)
+{
+    /* Update interrupts.  */
+    int level = s->int_level && (s->control & QCT_QTIMER_CNTP_CTL_ENABLE);
+    qemu_set_irq(s->irq, level);
+}
+
+static MemTxResult hex_timer_read(void *opaque, hwaddr offset, uint64_t *data,
+                                  unsigned size, MemTxAttrs attrs)
+{
+    QCTQtimerState *qct_s = (QCTQtimerState *)opaque;
+    uint32_t slot_nr = (offset & 0xF000) >> 12;
+    uint32_t reg_offset = offset & 0xFFF;
+    uint32_t view = slot_nr % qct_s->nr_views;
+    uint32_t frame = slot_nr / qct_s->nr_views;
+
+    if (frame >= qct_s->nr_frames) {
+        *data = 0;
+        return MEMTX_ACCESS_ERROR;
+    }
+    QCTHextimerState *s = &qct_s->timer[frame];
+
+
+    /*
+     * This is the case where we have 2 views, but the second one is not
+     * implemented.
+     */
+    if (view && !(qct_s->cnttid & (0x4 << (frame * 4)))) {
+        *data = 0;
+        return MEMTX_OK;
+    }
+
+    switch (reg_offset) {
+    case (QCT_QTIMER_CNT_FREQ): /* Ticks/Second */
+        if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RFRQ)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        if (view && !((s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0PCTEN) ||
+                      (s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0VCTEN))) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        *data = s->freq;
+        return MEMTX_OK;
+    case (QCT_QTIMER_CNTP_CVAL_LO): /* TimerLoad */
+        if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        *data = LOW_32((s->cntval));
+        return MEMTX_OK;
+    case (QCT_QTIMER_CNTP_CVAL_HI): /* TimerLoad */
+        if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        *data = HIGH_32((s->cntval));
+        return MEMTX_OK;
+    case QCT_QTIMER_CNTPCT_LO:
+        if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RPCT)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0PCTEN)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        *data = LOW_32((s->cntpct + (ptimer_get_count(s->timer))));
+        return MEMTX_OK;
+    case QCT_QTIMER_CNTPCT_HI:
+        if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RPCT)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0PCTEN)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        *data = HIGH_32((s->cntpct + (ptimer_get_count(s->timer))));
+        return MEMTX_OK;
+    case (QCT_QTIMER_CNTP_TVAL): /* CVAL - CNTP */
+        if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        *data =
+            (s->cntval - (HIGH_32((s->cntpct + (ptimer_get_count(s->timer)))) +
+                          LOW_32((s->cntpct + (ptimer_get_count(s->timer))))));
+        return MEMTX_OK;
+    case (QCT_QTIMER_CNTP_CTL): /* TimerMIS */
+        if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        *data = s->int_level;
+        return MEMTX_OK;
+    case QCT_QTIMER_CNTPL0ACR:
+        if (view) {
+            *data = 0;
+        } else {
+            *data = s->cntpl0acr;
+        }
+        return MEMTX_OK;
+
+    case QCT_QTIMER_VERSION:
+        *data = TIMER_VERSION;
+        return MEMTX_OK;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %x\n", __func__,
+                      (int)offset);
+        *data = 0;
+        return MEMTX_ACCESS_ERROR;
+    }
+}
+
+/*
+ * Reset the timer limit after settings have changed.
+ * May only be called from inside a ptimer transaction block.
+ */
+static void hex_timer_recalibrate(QCTHextimerState *s, int reload)
+{
+    uint64_t limit;
+    /* Periodic.  */
+    limit = s->limit;
+    ptimer_set_limit(s->timer, limit, reload);
+}
+
+static MemTxResult hex_timer_write(void *opaque, hwaddr offset, uint64_t value,
+                                   unsigned size, MemTxAttrs attrs)
+{
+    QCTQtimerState *qct_s = (QCTQtimerState *)opaque;
+    uint32_t slot_nr = (offset & 0xF000) >> 12;
+    uint32_t reg_offset = offset & 0xFFF;
+    uint32_t view = slot_nr % qct_s->nr_views;
+    uint32_t frame = slot_nr / qct_s->nr_views;
+
+    if (frame >= qct_s->nr_frames) {
+        return MEMTX_ACCESS_ERROR;
+    }
+    QCTHextimerState *s = &qct_s->timer[frame];
+
+    /*
+     * This is the case where we have 2 views, but the second one is not
+     * implemented.
+     */
+    if (view && !(qct_s->cnttid & (0x4 << (frame * 4)))) {
+        return MEMTX_OK;
+    }
+
+    switch (reg_offset) {
+    case (QCT_QTIMER_CNTP_CVAL_LO): /* TimerLoad */
+        if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+
+        s->int_level = 0;
+        s->cntval = value;
+        ptimer_transaction_begin(s->timer);
+        if (s->control & QCT_QTIMER_CNTP_CTL_ENABLE) {
+            /*
+             * Pause the timer if it is running.  This may cause some
+             * inaccuracy due to rounding, but avoids other issues.
+             */
+            ptimer_stop(s->timer);
+        }
+        hex_timer_recalibrate(s, 1);
+        if (s->control & QCT_QTIMER_CNTP_CTL_ENABLE) {
+            ptimer_run(s->timer, 0);
+        }
+        ptimer_transaction_commit(s->timer);
+        break;
+    case (QCT_QTIMER_CNTP_CVAL_HI):
+        if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        break;
+    case (QCT_QTIMER_CNTP_CTL): /* Timer control register */
+        if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        ptimer_transaction_begin(s->timer);
+        if (s->control & QCT_QTIMER_CNTP_CTL_ENABLE) {
+            /*
+             * Pause the timer if it is running.  This may cause some
+             * inaccuracy due to rounding, but avoids other issues.
+             */
+            ptimer_stop(s->timer);
+        }
+        s->control = value;
+        hex_timer_recalibrate(s, s->control & QCT_QTIMER_CNTP_CTL_ENABLE);
+        ptimer_set_freq(s->timer, s->freq);
+        ptimer_set_period(s->timer, 1);
+        if (s->control & QCT_QTIMER_CNTP_CTL_ENABLE) {
+            ptimer_run(s->timer, 0);
+        }
+        ptimer_transaction_commit(s->timer);
+        break;
+    case (QCT_QTIMER_CNTP_TVAL): /* CVAL - CNTP */
+        if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        ptimer_transaction_begin(s->timer);
+        if (s->control & QCT_QTIMER_CNTP_CTL_ENABLE) {
+            /*
+             * Pause the timer if it is running.  This may cause some
+             * inaccuracy due to rounding, but avoids other issues.
+             */
+            ptimer_stop(s->timer);
+        }
+        s->cntval = s->cntpct + value;
+        ptimer_set_freq(s->timer, s->freq);
+        ptimer_set_period(s->timer, 1);
+        if (s->control & QCT_QTIMER_CNTP_CTL_ENABLE) {
+            ptimer_run(s->timer, 0);
+        }
+        ptimer_transaction_commit(s->timer);
+        break;
+    case QCT_QTIMER_CNTPL0ACR:
+        if (view) {
+            break;
+        }
+
+        s->cntpl0acr = value;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %x\n", __func__,
+                      (int)offset);
+        return MEMTX_ACCESS_ERROR;
+    }
+    hex_timer_update(s);
+    return MEMTX_OK;
+}
+
+static void hex_timer_tick(void *opaque)
+{
+    QCTHextimerState *s = (QCTHextimerState *)opaque;
+    if ((s->cntpct >= s->cntval) && (s->int_level != 1)) {
+        s->int_level = 1;
+        hex_timer_update(s);
+        return;
+    }
+    s->cntpct += s->limit;
+}
+
+static const MemoryRegionOps hex_timer_ops = {
+    .read_with_attrs = hex_timer_read,
+    .write_with_attrs = hex_timer_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_hex_timer = {
+    .name = "hex_timer",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]){ VMSTATE_UINT32(control, QCTHextimerState),
+                                VMSTATE_UINT32(cnt_ctrl, QCTHextimerState),
+                                VMSTATE_UINT64(cntpct, QCTHextimerState),
+                                VMSTATE_UINT64(cntval, QCTHextimerState),
+                                VMSTATE_UINT64(limit, QCTHextimerState),
+                                VMSTATE_UINT32(int_level, QCTHextimerState),
+                                VMSTATE_PTIMER(timer, QCTHextimerState),
+                                VMSTATE_END_OF_LIST() }
+};
+
+static void qct_qtimer_realize(DeviceState *dev, Error **errp)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    QCTQtimerState *s = QCT_QTIMER(dev);
+    unsigned int i;
+
+    if (s->nr_frames > QCT_QTIMER_TIMER_FRAME_ELTS) {
+        error_setg(errp, "nr_frames too high");
+        return;
+    }
+
+    if (s->nr_views > QCT_QTIMER_TIMER_VIEW_ELTS) {
+        error_setg(errp, "nr_views too high");
+        return;
+    }
+
+    memory_region_init_io(&s->iomem, OBJECT(sbd), &qct_qtimer_ops, s, "qutimer",
+                          QTIMER_MEM_SIZE_BYTES);
+    sysbus_init_mmio(sbd, &s->iomem);
+
+    memory_region_init_io(&s->view_iomem, OBJECT(sbd), &hex_timer_ops, s,
+                          "qutimer_views",
+                          QTIMER_MEM_SIZE_BYTES * s->nr_frames * s->nr_views);
+    sysbus_init_mmio(sbd, &s->view_iomem);
+
+    for (i = 0; i < s->nr_frames; i++) {
+        s->timer[i].limit = 1;
+        s->timer[i].control = QCT_QTIMER_CNTP_CTL_ENABLE;
+        s->timer[i].cnt_ctrl =
+            (QCT_QTIMER_AC_CNTACR_RWPT | QCT_QTIMER_AC_CNTACR_RWVT |
+             QCT_QTIMER_AC_CNTACR_RVOFF | QCT_QTIMER_AC_CNTACR_RFRQ |
+             QCT_QTIMER_AC_CNTACR_RPVCT | QCT_QTIMER_AC_CNTACR_RPCT);
+        s->timer[i].qtimer = s;
+        s->timer[i].freq = QTIMER_DEFAULT_FREQ_HZ;
+
+        s->secure |= (1 << i);
+
+        sysbus_init_irq(sbd, &(s->timer[i].irq));
+
+        (s->timer[i]).timer =
+            ptimer_init(hex_timer_tick, &s->timer[i], PTIMER_POLICY_LEGACY);
+        vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_hex_timer,
+                         &s->timer[i]);
+    }
+}
+
+static const Property qct_qtimer_properties[] = {
+    DEFINE_PROP_UINT32("freq", QCTQtimerState, freq, QTIMER_DEFAULT_FREQ_HZ),
+    DEFINE_PROP_UINT32("nr_frames", QCTQtimerState, nr_frames, 2),
+    DEFINE_PROP_UINT32("nr_views", QCTQtimerState, nr_views, 1),
+    DEFINE_PROP_UINT32("cnttid", QCTQtimerState, cnttid, 0x11),
+};
+
+static void qct_qtimer_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *k = DEVICE_CLASS(klass);
+
+    device_class_set_props(k, qct_qtimer_properties);
+    k->realize = qct_qtimer_realize;
+    k->vmsd = &vmstate_qct_qtimer;
+}
+
+static const TypeInfo qct_qtimer_info = {
+    .name = TYPE_QCT_QTIMER,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(QCTQtimerState),
+    .instance_init = qct_qtimer_init,
+    .class_init = qct_qtimer_class_init,
+};
+
+static void qct_qtimer_register_types(void)
+{
+    type_register_static(&qct_qtimer_info);
+}
+
+type_init(qct_qtimer_register_types)
diff --git a/hw/timer/xilinx_timer.c b/hw/timer/xilinx_timer.c
index 6595cf5f51742..4620528f98570 100644
--- a/hw/timer/xilinx_timer.c
+++ b/hw/timer/xilinx_timer.c
@@ -3,6 +3,9 @@
  *
  * Copyright (c) 2009 Edgar E. Iglesias.
  *
+ * DS573: https://docs.amd.com/v/u/en-US/xps_timer
+ * LogiCORE IP XPS Timer/Counter (v1.02a)
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
  * in the Software without restriction, including without limitation the rights
@@ -23,10 +26,12 @@
  */
 
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "hw/sysbus.h"
 #include "hw/irq.h"
 #include "hw/ptimer.h"
 #include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
 #include "qom/object.h"
@@ -69,6 +74,7 @@ struct XpsTimerState
 {
     SysBusDevice parent_obj;
 
+    EndianMode model_endianness;
     MemoryRegion mmio;
     qemu_irq irq;
     uint8_t one_timer_only;
@@ -189,18 +195,21 @@ timer_write(void *opaque, hwaddr addr,
     timer_update_irq(t);
 }
 
-static const MemoryRegionOps timer_ops = {
-    .read = timer_read,
-    .write = timer_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-    .impl = {
-        .min_access_size = 4,
-        .max_access_size = 4,
+static const MemoryRegionOps timer_ops[2] = {
+    [0 ... 1] = {
+        .read = timer_read,
+        .write = timer_write,
+        .impl = {
+            .min_access_size = 4,
+            .max_access_size = 4,
+        },
+        .valid = {
+            .min_access_size = 4,
+            .max_access_size = 4,
+        },
     },
-    .valid = {
-        .min_access_size = 4,
-        .max_access_size = 4
-    }
+    [0].endianness = DEVICE_LITTLE_ENDIAN,
+    [1].endianness = DEVICE_BIG_ENDIAN,
 };
 
 static void timer_hit(void *opaque)
@@ -220,6 +229,12 @@ static void xilinx_timer_realize(DeviceState *dev, Error **errp)
     XpsTimerState *t = XILINX_TIMER(dev);
     unsigned int i;
 
+    if (t->model_endianness == ENDIAN_MODE_UNSPECIFIED) {
+        error_setg(errp, TYPE_XILINX_TIMER " property 'endianness'"
+                         " must be set to 'big' or 'little'");
+        return;
+    }
+
     /* Init all the ptimers.  */
     t->timers = g_malloc0(sizeof t->timers[0] * num_timers(t));
     for (i = 0; i < num_timers(t); i++) {
@@ -233,8 +248,9 @@ static void xilinx_timer_realize(DeviceState *dev, Error **errp)
         ptimer_transaction_commit(xt->ptimer);
     }
 
-    memory_region_init_io(&t->mmio, OBJECT(t), &timer_ops, t, "xlnx.xps-timer",
-                          R_MAX * 4 * num_timers(t));
+    memory_region_init_io(&t->mmio, OBJECT(t),
+                          &timer_ops[t->model_endianness == ENDIAN_MODE_BIG],
+                          t, "xlnx.xps-timer", R_MAX * 4 * num_timers(t));
     sysbus_init_mmio(SYS_BUS_DEVICE(dev), &t->mmio);
 }
 
@@ -247,6 +263,7 @@ static void xilinx_timer_init(Object *obj)
 }
 
 static const Property xilinx_timer_properties[] = {
+    DEFINE_PROP_ENDIAN_NODEFAULT("endianness", XpsTimerState, model_endianness),
     DEFINE_PROP_UINT32("clock-frequency", XpsTimerState, freq_hz, 62 * 1000000),
     DEFINE_PROP_UINT8("one-timer-only", XpsTimerState, one_timer_only, 0),
 };
diff --git a/hw/tpm/tpm_tis_sysbus.c b/hw/tpm/tpm_tis_sysbus.c
index ee0bfe9538e24..4f187690a283d 100644
--- a/hw/tpm/tpm_tis_sysbus.c
+++ b/hw/tpm/tpm_tis_sysbus.c
@@ -133,7 +133,6 @@ static void tpm_tis_sysbus_class_init(ObjectClass *klass, void *data)
     dc->vmsd  = &vmstate_tpm_tis_sysbus;
     tc->model = TPM_MODEL_TPM_TIS;
     dc->realize = tpm_tis_sysbus_realizefn;
-    dc->user_creatable = true;
     device_class_set_legacy_reset(dc, tpm_tis_sysbus_reset);
     tc->request_completed = tpm_tis_sysbus_request_completed;
     tc->get_version = tpm_tis_sysbus_get_tpm_version;
@@ -142,7 +141,7 @@ static void tpm_tis_sysbus_class_init(ObjectClass *klass, void *data)
 
 static const TypeInfo tpm_tis_sysbus_info = {
     .name = TYPE_TPM_TIS_SYSBUS,
-    .parent = TYPE_SYS_BUS_DEVICE,
+    .parent = TYPE_DYNAMIC_SYS_BUS_DEVICE,
     .instance_size = sizeof(TPMStateSysBus),
     .instance_init = tpm_tis_sysbus_initfn,
     .class_init  = tpm_tis_sysbus_class_init,
diff --git a/hw/uefi/Kconfig b/hw/uefi/Kconfig
new file mode 100644
index 0000000000000..ca6c2bc46a962
--- /dev/null
+++ b/hw/uefi/Kconfig
@@ -0,0 +1,3 @@
+config UEFI_VARS
+	bool
+        default y if X86_64 || AARCH64
diff --git a/hw/uefi/LIMITATIONS.md b/hw/uefi/LIMITATIONS.md
new file mode 100644
index 0000000000000..29308bd587aa2
--- /dev/null
+++ b/hw/uefi/LIMITATIONS.md
@@ -0,0 +1,7 @@
+known issues and limitations
+----------------------------
+
+* works only on little endian hosts
+  - accessing structs in guest ram is done without endian conversion.
+* works only for 64-bit guests
+  - UINTN is mapped to uint64_t, for 32-bit guests that would be uint32_t
diff --git a/hw/uefi/hardware-info.c b/hw/uefi/hardware-info.c
new file mode 100644
index 0000000000000..930502a4df3af
--- /dev/null
+++ b/hw/uefi/hardware-info.c
@@ -0,0 +1,31 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * pass hardware information to uefi
+ *
+ * see OvmfPkg/Library/HardwareInfoLib/ in edk2
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/nvram/fw_cfg.h"
+#include "hw/uefi/hardware-info.h"
+
+static void      *blob;
+static uint64_t  blobsize;
+
+void hardware_info_register(HARDWARE_INFO_TYPE type, void *info, uint64_t infosize)
+{
+    HARDWARE_INFO_HEADER hdr = {
+        .type.value = cpu_to_le64(type),
+        .size       = cpu_to_le64(infosize),
+    };
+
+    blob = g_realloc(blob, blobsize + sizeof(hdr) + infosize);
+    memcpy(blob + blobsize, &hdr, sizeof(hdr));
+    blobsize += sizeof(hdr);
+    memcpy(blob + blobsize, info, infosize);
+    blobsize += infosize;
+
+    fw_cfg_modify_file(fw_cfg_find(), "etc/hardware-info", blob, blobsize);
+}
diff --git a/hw/uefi/meson.build b/hw/uefi/meson.build
new file mode 100644
index 0000000000000..91eb95f89e6dc
--- /dev/null
+++ b/hw/uefi/meson.build
@@ -0,0 +1,21 @@
+system_ss.add(files('hardware-info.c'))
+
+uefi_vars_ss = ss.source_set()
+if (config_all_devices.has_key('CONFIG_UEFI_VARS'))
+  uefi_vars_ss.add(files('var-service-core.c',
+                         'var-service-json.c',
+                         'var-service-vars.c',
+                         'var-service-auth.c',
+                         'var-service-guid.c',
+                         'var-service-utils.c',
+                         'var-service-policy.c',
+                         'var-service-sysbus.c'))
+  uefi_vars_ss.add(when: gnutls,
+                   if_true: files('var-service-pkcs7.c'),
+                   if_false: files('var-service-pkcs7-stub.c'))
+  uefi_vars_ss.add(files('var-service-siglist.c'))
+endif
+
+modules += { 'hw-uefi' : {
+    'vars'     : uefi_vars_ss,
+}}
diff --git a/hw/uefi/trace-events b/hw/uefi/trace-events
new file mode 100644
index 0000000000000..3694712a946d7
--- /dev/null
+++ b/hw/uefi/trace-events
@@ -0,0 +1,17 @@
+# device
+uefi_reg_read(uint64_t addr, unsigned size) "addr 0x%" PRIx64 ", size %u"
+uefi_reg_write(uint64_t addr, uint64_t val, unsigned size) "addr 0x%" PRIx64 ", val 0x%" PRIx64 ", size %d"
+uefi_hard_reset(void) ""
+
+# generic uefi
+uefi_variable(const char *context, const char *name, uint64_t size, const char *uuid) "context %s, name %s, size %" PRIu64 ", uuid %s"
+uefi_status(const char *context, const char *name) "context %s, status %s"
+uefi_event(const char *name) "event %s"
+
+# variable protocol
+uefi_vars_proto_cmd(const char *cmd) "cmd %s"
+uefi_vars_security_violation(const char *reason) "reason %s"
+
+# variable policy protocol
+uefi_vars_policy_cmd(const char *cmd) "cmd %s"
+uefi_vars_policy_deny(const char *reason) "reason %s"
diff --git a/hw/uefi/var-service-auth.c b/hw/uefi/var-service-auth.c
new file mode 100644
index 0000000000000..fba5a0956a574
--- /dev/null
+++ b/hw/uefi/var-service-auth.c
@@ -0,0 +1,361 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * uefi vars device - AuthVariableLib
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "system/dma.h"
+
+#include "hw/uefi/var-service.h"
+
+static const uint16_t name_pk[]           = u"PK";
+static const uint16_t name_kek[]          = u"KEK";
+static const uint16_t name_db[]           = u"db";
+static const uint16_t name_dbx[]          = u"dbx";
+static const uint16_t name_setup_mode[]   = u"SetupMode";
+static const uint16_t name_sigs_support[] = u"SignatureSupport";
+static const uint16_t name_sb[]           = u"SecureBoot";
+static const uint16_t name_sb_enable[]    = u"SecureBootEnable";
+static const uint16_t name_custom_mode[]  = u"CustomMode";
+static const uint16_t name_vk[]           = u"VendorKeys";
+static const uint16_t name_vk_nv[]        = u"VendorKeysNv";
+
+static const uint32_t sigdb_attrs =
+    EFI_VARIABLE_NON_VOLATILE |
+    EFI_VARIABLE_BOOTSERVICE_ACCESS |
+    EFI_VARIABLE_RUNTIME_ACCESS |
+    EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS;
+
+static void set_secure_boot(uefi_vars_state *uv, uint8_t sb)
+{
+    uefi_vars_set_variable(uv, EfiGlobalVariable,
+                           name_sb, sizeof(name_sb),
+                           EFI_VARIABLE_BOOTSERVICE_ACCESS |
+                           EFI_VARIABLE_RUNTIME_ACCESS,
+                           &sb, sizeof(sb));
+}
+
+static void set_secure_boot_enable(uefi_vars_state *uv, uint8_t sbe)
+{
+    uefi_vars_set_variable(uv, EfiSecureBootEnableDisable,
+                           name_sb_enable, sizeof(name_sb_enable),
+                           EFI_VARIABLE_NON_VOLATILE |
+                           EFI_VARIABLE_BOOTSERVICE_ACCESS,
+                           &sbe, sizeof(sbe));
+}
+
+static void set_setup_mode(uefi_vars_state *uv, uint8_t sm)
+{
+    uefi_vars_set_variable(uv, EfiGlobalVariable,
+                           name_setup_mode, sizeof(name_setup_mode),
+                           EFI_VARIABLE_BOOTSERVICE_ACCESS |
+                           EFI_VARIABLE_RUNTIME_ACCESS,
+                           &sm, sizeof(sm));
+}
+
+static void set_custom_mode(uefi_vars_state *uv, uint8_t cm)
+{
+    uefi_vars_set_variable(uv, EfiCustomModeEnable,
+                           name_custom_mode, sizeof(name_custom_mode),
+                           EFI_VARIABLE_NON_VOLATILE |
+                           EFI_VARIABLE_BOOTSERVICE_ACCESS,
+                           &cm, sizeof(cm));
+}
+
+static void set_signature_support(uefi_vars_state *uv)
+{
+    QemuUUID sigs_support[5];
+
+    sigs_support[0] = EfiCertSha256Guid;
+    sigs_support[1] = EfiCertSha384Guid;
+    sigs_support[2] = EfiCertSha512Guid;
+    sigs_support[3] = EfiCertRsa2048Guid;
+    sigs_support[4] = EfiCertX509Guid;
+
+    uefi_vars_set_variable(uv, EfiGlobalVariable,
+                           name_sigs_support, sizeof(name_sigs_support),
+                           EFI_VARIABLE_BOOTSERVICE_ACCESS |
+                           EFI_VARIABLE_RUNTIME_ACCESS,
+                           sigs_support, sizeof(sigs_support));
+}
+
+static bool setup_mode_is_active(uefi_vars_state *uv)
+{
+    uefi_variable *var;
+    uint8_t *value;
+
+    var = uefi_vars_find_variable(uv, EfiGlobalVariable,
+                                  name_setup_mode, sizeof(name_setup_mode));
+    if (var) {
+        value = var->data;
+        if (value[0] == SETUP_MODE) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool custom_mode_is_active(uefi_vars_state *uv)
+{
+    uefi_variable *var;
+    uint8_t *value;
+
+    var = uefi_vars_find_variable(uv, EfiCustomModeEnable,
+                                  name_custom_mode, sizeof(name_custom_mode));
+    if (var) {
+        value = var->data;
+        if (value[0] == CUSTOM_SECURE_BOOT_MODE) {
+            return true;
+        }
+    }
+    return false;
+}
+
+bool uefi_vars_is_sb_pk(uefi_variable *var)
+{
+    if (qemu_uuid_is_equal(&var->guid, &EfiGlobalVariable) &&
+        uefi_str_equal(var->name, var->name_size, name_pk, sizeof(name_pk))) {
+        return true;
+    }
+    return false;
+}
+
+static bool uefi_vars_is_sb_kek(uefi_variable *var)
+{
+    if (qemu_uuid_is_equal(&var->guid, &EfiGlobalVariable) &&
+        uefi_str_equal(var->name, var->name_size, name_kek, sizeof(name_kek))) {
+        return true;
+    }
+    return false;
+}
+
+static bool uefi_vars_is_sb_db(uefi_variable *var)
+{
+    if (!qemu_uuid_is_equal(&var->guid, &EfiImageSecurityDatabase)) {
+        return false;
+    }
+    if (uefi_str_equal(var->name, var->name_size, name_db, sizeof(name_db))) {
+        return true;
+    }
+    if (uefi_str_equal(var->name, var->name_size, name_dbx, sizeof(name_dbx))) {
+        return true;
+    }
+    return false;
+}
+
+bool uefi_vars_is_sb_any(uefi_variable *var)
+{
+    if (uefi_vars_is_sb_pk(var) ||
+        uefi_vars_is_sb_kek(var) ||
+        uefi_vars_is_sb_db(var)) {
+        return true;
+    }
+    return false;
+}
+
+static uefi_variable *uefi_vars_find_siglist(uefi_vars_state *uv,
+                                             uefi_variable *var)
+{
+    if (uefi_vars_is_sb_pk(var)) {
+        return uefi_vars_find_variable(uv, EfiGlobalVariable,
+                                       name_pk, sizeof(name_pk));
+    }
+    if (uefi_vars_is_sb_kek(var)) {
+        return uefi_vars_find_variable(uv, EfiGlobalVariable,
+                                       name_pk, sizeof(name_pk));
+    }
+    if (uefi_vars_is_sb_db(var)) {
+        return uefi_vars_find_variable(uv, EfiGlobalVariable,
+                                       name_kek, sizeof(name_kek));
+    }
+
+    return NULL;
+}
+
+static efi_status uefi_vars_check_auth_2_sb(uefi_vars_state *uv,
+                                            uefi_variable *var,
+                                            mm_variable_access *va,
+                                            void *data,
+                                            uint64_t data_offset)
+{
+    variable_auth_2 *auth = data;
+    uefi_variable *siglist;
+
+    if (custom_mode_is_active(uv)) {
+        /* no authentication in custom mode */
+        return EFI_SUCCESS;
+    }
+
+    if (setup_mode_is_active(uv) && !uefi_vars_is_sb_pk(var)) {
+        /* no authentication in setup mode (except PK) */
+        return EFI_SUCCESS;
+    }
+
+    if (auth->hdr_length == 24) {
+        /* no signature (auth->cert_data is empty) */
+        return EFI_SECURITY_VIOLATION;
+    }
+
+    siglist = uefi_vars_find_siglist(uv, var);
+    if (!siglist && setup_mode_is_active(uv) && uefi_vars_is_sb_pk(var)) {
+        /* check PK is self-signed */
+        uefi_variable tmp = {
+            .guid       = EfiGlobalVariable,
+            .name       = (uint16_t *)name_pk,
+            .name_size  = sizeof(name_pk),
+            .attributes = sigdb_attrs,
+            .data       = data + data_offset,
+            .data_size  = va->data_size - data_offset,
+        };
+        return uefi_vars_check_pkcs7_2(&tmp, NULL, NULL, va, data);
+    }
+
+    return uefi_vars_check_pkcs7_2(siglist, NULL, NULL, va, data);
+}
+
+efi_status uefi_vars_check_auth_2(uefi_vars_state *uv, uefi_variable *var,
+                                  mm_variable_access *va, void *data)
+{
+    variable_auth_2 *auth = data;
+    uint64_t data_offset;
+    efi_status status;
+
+    if (va->data_size < sizeof(*auth)) {
+        return EFI_SECURITY_VIOLATION;
+    }
+    if (uadd64_overflow(sizeof(efi_time), auth->hdr_length, &data_offset)) {
+        return EFI_SECURITY_VIOLATION;
+    }
+    if (va->data_size < data_offset) {
+        return EFI_SECURITY_VIOLATION;
+    }
+
+    if (auth->hdr_revision != 0x0200 ||
+        auth->hdr_cert_type != WIN_CERT_TYPE_EFI_GUID ||
+        !qemu_uuid_is_equal(&auth->guid_cert_type, &EfiCertTypePkcs7Guid)) {
+        return EFI_UNSUPPORTED;
+    }
+
+    if (uefi_vars_is_sb_any(var)) {
+        /* secure boot variables */
+        status = uefi_vars_check_auth_2_sb(uv, var, va, data, data_offset);
+        if (status != EFI_SUCCESS) {
+            return status;
+        }
+    } else {
+        /* other authenticated variables */
+        status = uefi_vars_check_pkcs7_2(NULL,
+                                         &var->digest, &var->digest_size,
+                                         va, data);
+        if (status != EFI_SUCCESS) {
+            return status;
+        }
+    }
+
+    /* checks passed, set variable data */
+    var->time = auth->timestamp;
+    if (va->data_size - data_offset > 0) {
+        var->data = g_malloc(va->data_size - data_offset);
+        memcpy(var->data, data + data_offset, va->data_size - data_offset);
+        var->data_size = va->data_size - data_offset;
+    }
+
+    return EFI_SUCCESS;
+}
+
+efi_status uefi_vars_check_secure_boot(uefi_vars_state *uv, uefi_variable *var)
+{
+    uint8_t *value = var->data;
+
+    if (uefi_vars_is_sb_any(var)) {
+        if (var->attributes != sigdb_attrs) {
+            return EFI_INVALID_PARAMETER;
+        }
+    }
+
+    /* reject SecureBootEnable updates if force_secure_boot is set */
+    if (qemu_uuid_is_equal(&var->guid, &EfiSecureBootEnableDisable) &&
+        uefi_str_equal(var->name, var->name_size,
+                       name_sb_enable, sizeof(name_sb_enable)) &&
+        uv->force_secure_boot &&
+        value[0] != SECURE_BOOT_ENABLE) {
+        return EFI_WRITE_PROTECTED;
+    }
+
+    /* reject CustomMode updates if disable_custom_mode is set */
+    if (qemu_uuid_is_equal(&var->guid, &EfiCustomModeEnable) &&
+        uefi_str_equal(var->name, var->name_size,
+                       name_custom_mode, sizeof(name_custom_mode)) &&
+        uv->disable_custom_mode) {
+        return EFI_WRITE_PROTECTED;
+    }
+
+    return EFI_SUCCESS;
+}
+
+/* AuthVariableLibInitialize */
+void uefi_vars_auth_init(uefi_vars_state *uv)
+{
+    uefi_variable *pk_var, *sbe_var;
+    uint8_t platform_mode, sb, sbe, vk;
+
+    /* SetupMode */
+    pk_var = uefi_vars_find_variable(uv, EfiGlobalVariable,
+                                     name_pk, sizeof(name_pk));
+    if (!pk_var) {
+        platform_mode = SETUP_MODE;
+    } else {
+        platform_mode = USER_MODE;
+    }
+    set_setup_mode(uv, platform_mode);
+
+    /* SignatureSupport */
+    set_signature_support(uv);
+
+    /* SecureBootEnable */
+    sbe = SECURE_BOOT_DISABLE;
+    sbe_var = uefi_vars_find_variable(uv, EfiSecureBootEnableDisable,
+                                      name_sb_enable, sizeof(name_sb_enable));
+    if (sbe_var) {
+        if (platform_mode == USER_MODE) {
+            sbe = ((uint8_t *)sbe_var->data)[0];
+        }
+    } else if (platform_mode == USER_MODE) {
+        sbe = SECURE_BOOT_ENABLE;
+        set_secure_boot_enable(uv, sbe);
+    }
+
+    if (uv->force_secure_boot && sbe != SECURE_BOOT_ENABLE) {
+        sbe = SECURE_BOOT_ENABLE;
+        set_secure_boot_enable(uv, sbe);
+    }
+
+    /* SecureBoot */
+    if ((sbe == SECURE_BOOT_ENABLE) && (platform_mode == USER_MODE)) {
+        sb = SECURE_BOOT_MODE_ENABLE;
+    } else {
+        sb = SECURE_BOOT_MODE_DISABLE;
+    }
+    set_secure_boot(uv, sb);
+
+    /* CustomMode */
+    set_custom_mode(uv, STANDARD_SECURE_BOOT_MODE);
+
+    vk = 0;
+    uefi_vars_set_variable(uv, EfiGlobalVariable,
+                           name_vk_nv, sizeof(name_vk_nv),
+                           EFI_VARIABLE_NON_VOLATILE |
+                           EFI_VARIABLE_BOOTSERVICE_ACCESS |
+                           EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS,
+                           &vk, sizeof(vk));
+    uefi_vars_set_variable(uv, EfiGlobalVariable,
+                           name_vk, sizeof(name_vk),
+                           EFI_VARIABLE_BOOTSERVICE_ACCESS |
+                           EFI_VARIABLE_RUNTIME_ACCESS,
+                           &vk, sizeof(vk));
+
+    /* flush to disk */
+    uefi_vars_json_save(uv);
+}
diff --git a/hw/uefi/var-service-core.c b/hw/uefi/var-service-core.c
new file mode 100644
index 0000000000000..8ed8378ab991f
--- /dev/null
+++ b/hw/uefi/var-service-core.c
@@ -0,0 +1,321 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * uefi vars device
+ */
+#include "qemu/osdep.h"
+#include "qemu/crc32c.h"
+#include "system/dma.h"
+#include "migration/vmstate.h"
+
+#include "hw/uefi/var-service.h"
+#include "hw/uefi/var-service-api.h"
+#include "hw/uefi/var-service-edk2.h"
+
+#include "trace/trace-hw_uefi.h"
+
+static int uefi_vars_pre_load(void *opaque)
+{
+    uefi_vars_state *uv = opaque;
+
+    uefi_vars_clear_all(uv);
+    uefi_vars_policies_clear(uv);
+    g_free(uv->buffer);
+    return 0;
+}
+
+static int uefi_vars_post_load(void *opaque, int version_id)
+{
+    uefi_vars_state *uv = opaque;
+
+    uefi_vars_update_storage(uv);
+    uv->buffer = g_malloc(uv->buf_size);
+    return 0;
+}
+
+const VMStateDescription vmstate_uefi_vars = {
+    .name = "uefi-vars",
+    .pre_load = uefi_vars_pre_load,
+    .post_load = uefi_vars_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT16(sts, uefi_vars_state),
+        VMSTATE_UINT32(buf_size, uefi_vars_state),
+        VMSTATE_UINT32(buf_addr_lo, uefi_vars_state),
+        VMSTATE_UINT32(buf_addr_hi, uefi_vars_state),
+        VMSTATE_UINT32(pio_xfer_offset, uefi_vars_state),
+        VMSTATE_VBUFFER_ALLOC_UINT32(pio_xfer_buffer, uefi_vars_state,
+                                     0, NULL, buf_size),
+        VMSTATE_BOOL(end_of_dxe, uefi_vars_state),
+        VMSTATE_BOOL(ready_to_boot, uefi_vars_state),
+        VMSTATE_BOOL(exit_boot_service, uefi_vars_state),
+        VMSTATE_BOOL(policy_locked, uefi_vars_state),
+        VMSTATE_UINT64(used_storage, uefi_vars_state),
+        VMSTATE_QTAILQ_V(variables, uefi_vars_state, 0,
+                         vmstate_uefi_variable, uefi_variable, next),
+        VMSTATE_QTAILQ_V(var_policies, uefi_vars_state, 0,
+                         vmstate_uefi_var_policy, uefi_var_policy, next),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static uint32_t uefi_vars_cmd_mm(uefi_vars_state *uv, bool dma_mode)
+{
+    hwaddr    dma;
+    mm_header *mhdr;
+    uint64_t  size;
+    uint32_t  retval;
+
+    dma = uv->buf_addr_lo | ((hwaddr)uv->buf_addr_hi << 32);
+    mhdr = (mm_header *) uv->buffer;
+
+    if (!uv->buffer || uv->buf_size < sizeof(*mhdr)) {
+        return UEFI_VARS_STS_ERR_BAD_BUFFER_SIZE;
+    }
+
+    /* read header */
+    if (dma_mode) {
+        dma_memory_read(&address_space_memory, dma,
+                        uv->buffer, sizeof(*mhdr),
+                        MEMTXATTRS_UNSPECIFIED);
+    } else {
+        memcpy(uv->buffer, uv->pio_xfer_buffer, sizeof(*mhdr));
+    }
+
+    if (uadd64_overflow(sizeof(*mhdr), mhdr->length, &size)) {
+        return UEFI_VARS_STS_ERR_BAD_BUFFER_SIZE;
+    }
+    if (uv->buf_size < size) {
+        return UEFI_VARS_STS_ERR_BAD_BUFFER_SIZE;
+    }
+
+    /* read buffer (excl header) */
+    if (dma_mode) {
+        dma_memory_read(&address_space_memory, dma + sizeof(*mhdr),
+                        uv->buffer + sizeof(*mhdr), mhdr->length,
+                        MEMTXATTRS_UNSPECIFIED);
+    } else {
+        memcpy(uv->buffer + sizeof(*mhdr),
+               uv->pio_xfer_buffer + sizeof(*mhdr),
+               mhdr->length);
+    }
+    memset(uv->buffer + size, 0, uv->buf_size - size);
+
+    /* dispatch */
+    if (qemu_uuid_is_equal(&mhdr->guid, &EfiSmmVariableProtocolGuid)) {
+        retval = uefi_vars_mm_vars_proto(uv);
+
+    } else if (qemu_uuid_is_equal(&mhdr->guid, &VarCheckPolicyLibMmiHandlerGuid)) {
+        retval = uefi_vars_mm_check_policy_proto(uv);
+
+    } else if (qemu_uuid_is_equal(&mhdr->guid, &EfiEndOfDxeEventGroupGuid)) {
+        trace_uefi_event("end-of-dxe");
+        uv->end_of_dxe = true;
+        retval = UEFI_VARS_STS_SUCCESS;
+
+    } else if (qemu_uuid_is_equal(&mhdr->guid, &EfiEventReadyToBootGuid)) {
+        trace_uefi_event("ready-to-boot");
+        uv->ready_to_boot = true;
+        retval = UEFI_VARS_STS_SUCCESS;
+
+    } else if (qemu_uuid_is_equal(&mhdr->guid, &EfiEventExitBootServicesGuid)) {
+        trace_uefi_event("exit-boot-service");
+        uv->exit_boot_service = true;
+        retval = UEFI_VARS_STS_SUCCESS;
+
+    } else {
+        retval = UEFI_VARS_STS_ERR_NOT_SUPPORTED;
+    }
+
+    /* write buffer */
+    if (dma_mode) {
+        dma_memory_write(&address_space_memory, dma,
+                         uv->buffer, sizeof(*mhdr) + mhdr->length,
+                         MEMTXATTRS_UNSPECIFIED);
+    } else {
+        memcpy(uv->pio_xfer_buffer + sizeof(*mhdr),
+               uv->buffer + sizeof(*mhdr),
+               sizeof(*mhdr) + mhdr->length);
+    }
+
+    return retval;
+}
+
+static void uefi_vars_soft_reset(uefi_vars_state *uv)
+{
+    g_free(uv->buffer);
+    uv->buffer = NULL;
+    uv->buf_size = 0;
+    uv->buf_addr_lo = 0;
+    uv->buf_addr_hi = 0;
+}
+
+void uefi_vars_hard_reset(uefi_vars_state *uv)
+{
+    trace_uefi_hard_reset();
+    uefi_vars_soft_reset(uv);
+
+    uv->end_of_dxe        = false;
+    uv->ready_to_boot     = false;
+    uv->exit_boot_service = false;
+    uv->policy_locked     = false;
+
+    uefi_vars_clear_volatile(uv);
+    uefi_vars_policies_clear(uv);
+    uefi_vars_auth_init(uv);
+}
+
+static uint32_t uefi_vars_cmd(uefi_vars_state *uv, uint32_t cmd)
+{
+    switch (cmd) {
+    case UEFI_VARS_CMD_RESET:
+        uefi_vars_soft_reset(uv);
+        return UEFI_VARS_STS_SUCCESS;
+    case UEFI_VARS_CMD_DMA_MM:
+        return uefi_vars_cmd_mm(uv, true);
+    case UEFI_VARS_CMD_PIO_MM:
+        return uefi_vars_cmd_mm(uv, false);
+    case UEFI_VARS_CMD_PIO_ZERO_OFFSET:
+        uv->pio_xfer_offset = 0;
+        return UEFI_VARS_STS_SUCCESS;
+    default:
+        return UEFI_VARS_STS_ERR_NOT_SUPPORTED;
+    }
+}
+
+static uint64_t uefi_vars_read(void *opaque, hwaddr addr, unsigned size)
+{
+    uefi_vars_state *uv = opaque;
+    uint64_t retval = -1;
+    void *xfer_ptr;
+
+    trace_uefi_reg_read(addr, size);
+
+    switch (addr) {
+    case UEFI_VARS_REG_MAGIC:
+        retval = UEFI_VARS_MAGIC_VALUE;
+        break;
+    case UEFI_VARS_REG_CMD_STS:
+        retval = uv->sts;
+        break;
+    case UEFI_VARS_REG_BUFFER_SIZE:
+        retval = uv->buf_size;
+        break;
+    case UEFI_VARS_REG_DMA_BUFFER_ADDR_LO:
+        retval = uv->buf_addr_lo;
+        break;
+    case UEFI_VARS_REG_DMA_BUFFER_ADDR_HI:
+        retval = uv->buf_addr_hi;
+        break;
+    case UEFI_VARS_REG_PIO_BUFFER_TRANSFER:
+        if (uv->pio_xfer_offset + size > uv->buf_size) {
+            retval = 0;
+            break;
+        }
+        xfer_ptr = uv->pio_xfer_buffer + uv->pio_xfer_offset;
+        switch (size) {
+        case 1:
+            retval = *(uint8_t *)xfer_ptr;
+            break;
+        case 2:
+            retval = *(uint16_t *)xfer_ptr;
+            break;
+        case 4:
+            retval = *(uint32_t *)xfer_ptr;
+            break;
+        case 8:
+            retval = *(uint64_t *)xfer_ptr;
+            break;
+        }
+        uv->pio_xfer_offset += size;
+        break;
+    case UEFI_VARS_REG_PIO_BUFFER_CRC32C:
+        retval = crc32c(0xffffffff, uv->pio_xfer_buffer, uv->pio_xfer_offset);
+        break;
+    case UEFI_VARS_REG_FLAGS:
+        retval = 0;
+        if (uv->use_pio) {
+            retval |= UEFI_VARS_FLAG_USE_PIO;
+        }
+    }
+    return retval;
+}
+
+static void uefi_vars_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
+{
+    uefi_vars_state *uv = opaque;
+    void *xfer_ptr;
+
+    trace_uefi_reg_write(addr, val, size);
+
+    switch (addr) {
+    case UEFI_VARS_REG_CMD_STS:
+        uv->sts = uefi_vars_cmd(uv, val);
+        break;
+    case UEFI_VARS_REG_BUFFER_SIZE:
+        if (val > MAX_BUFFER_SIZE) {
+            val = MAX_BUFFER_SIZE;
+        }
+        uv->buf_size = val;
+        g_free(uv->buffer);
+        g_free(uv->pio_xfer_buffer);
+        uv->buffer = g_malloc(uv->buf_size);
+        uv->pio_xfer_buffer = g_malloc(uv->buf_size);
+        break;
+    case UEFI_VARS_REG_DMA_BUFFER_ADDR_LO:
+        uv->buf_addr_lo = val;
+        break;
+    case UEFI_VARS_REG_DMA_BUFFER_ADDR_HI:
+        uv->buf_addr_hi = val;
+        break;
+    case UEFI_VARS_REG_PIO_BUFFER_TRANSFER:
+        if (uv->pio_xfer_offset + size > uv->buf_size) {
+            break;
+        }
+        xfer_ptr = uv->pio_xfer_buffer + uv->pio_xfer_offset;
+        switch (size) {
+        case 1:
+            *(uint8_t *)xfer_ptr = val;
+            break;
+        case 2:
+            *(uint16_t *)xfer_ptr = val;
+            break;
+        case 4:
+            *(uint32_t *)xfer_ptr = val;
+            break;
+        case 8:
+            *(uint64_t *)xfer_ptr = val;
+            break;
+        }
+        uv->pio_xfer_offset += size;
+        break;
+    case UEFI_VARS_REG_PIO_BUFFER_CRC32C:
+    case UEFI_VARS_REG_FLAGS:
+    default:
+        break;
+    }
+}
+
+static const MemoryRegionOps uefi_vars_ops = {
+    .read = uefi_vars_read,
+    .write = uefi_vars_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 2,
+        .max_access_size = 4,
+    },
+};
+
+void uefi_vars_init(Object *obj, uefi_vars_state *uv)
+{
+    QTAILQ_INIT(&uv->variables);
+    QTAILQ_INIT(&uv->var_policies);
+    uv->jsonfd = -1;
+    memory_region_init_io(&uv->mr, obj, &uefi_vars_ops, uv,
+                          "uefi-vars", UEFI_VARS_REGS_SIZE);
+}
+
+void uefi_vars_realize(uefi_vars_state *uv, Error **errp)
+{
+    uefi_vars_json_init(uv, errp);
+    uefi_vars_json_load(uv, errp);
+}
diff --git a/hw/uefi/var-service-guid.c b/hw/uefi/var-service-guid.c
new file mode 100644
index 0000000000000..eba3655c8d304
--- /dev/null
+++ b/hw/uefi/var-service-guid.c
@@ -0,0 +1,99 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * uefi vars device - GUIDs
+ */
+
+#include "qemu/osdep.h"
+#include "system/dma.h"
+
+#include "hw/uefi/var-service.h"
+
+/* variable namespaces */
+
+const QemuUUID EfiGlobalVariable = {
+    .data = UUID_LE(0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d,
+                    0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c)
+};
+
+const QemuUUID EfiImageSecurityDatabase = {
+    .data = UUID_LE(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc,
+                    0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f)
+};
+
+const QemuUUID EfiCustomModeEnable = {
+    .data = UUID_LE(0xc076ec0c, 0x7028, 0x4399, 0xa0, 0x72,
+                    0x71, 0xee, 0x5c, 0x44, 0x8b, 0x9f)
+};
+
+const QemuUUID EfiSecureBootEnableDisable = {
+    .data = UUID_LE(0xf0a30bc7, 0xaf08, 0x4556, 0x99, 0xc4,
+                    0x0, 0x10, 0x9, 0xc9, 0x3a, 0x44)
+};
+
+/* signatures */
+
+const QemuUUID EfiCertSha256Guid = {
+    .data = UUID_LE(0xc1c41626, 0x504c, 0x4092, 0xac, 0xa9,
+                    0x41, 0xf9, 0x36, 0x93, 0x43, 0x28)
+};
+
+const QemuUUID EfiCertSha384Guid = {
+    .data = UUID_LE(0xff3e5307, 0x9fd0, 0x48c9, 0x85, 0xf1,
+                    0x8a, 0xd5, 0x6c, 0x70, 0x1e, 0x1)
+};
+
+const QemuUUID EfiCertSha512Guid = {
+    .data = UUID_LE(0x93e0fae, 0xa6c4, 0x4f50, 0x9f, 0x1b,
+                    0xd4, 0x1e, 0x2b, 0x89, 0xc1, 0x9a)
+};
+
+const QemuUUID EfiCertRsa2048Guid = {
+    .data = UUID_LE(0x3c5766e8, 0x269c, 0x4e34, 0xaa, 0x14,
+                    0xed, 0x77, 0x6e, 0x85, 0xb3, 0xb6)
+};
+
+const QemuUUID EfiCertX509Guid = {
+    .data = UUID_LE(0xa5c059a1, 0x94e4, 0x4aa7, 0x87, 0xb5,
+                    0xab, 0x15, 0x5c, 0x2b, 0xf0, 0x72)
+};
+
+const QemuUUID EfiCertTypePkcs7Guid = {
+    .data = UUID_LE(0x4aafd29d, 0x68df, 0x49ee, 0x8a, 0xa9,
+                    0x34, 0x7d, 0x37, 0x56, 0x65, 0xa7)
+};
+
+/*
+ * mm_header.guid values that the guest DXE/BDS phases use for
+ * sending requests to management mode
+ */
+
+const QemuUUID EfiSmmVariableProtocolGuid = {
+    .data = UUID_LE(0xed32d533, 0x99e6, 0x4209, 0x9c, 0xc0,
+                    0x2d, 0x72, 0xcd, 0xd9, 0x98, 0xa7)
+};
+
+const QemuUUID VarCheckPolicyLibMmiHandlerGuid = {
+    .data = UUID_LE(0xda1b0d11, 0xd1a7, 0x46c4, 0x9d, 0xc9,
+                    0xf3, 0x71, 0x48, 0x75, 0xc6, 0xeb)
+};
+
+/*
+ * mm_header.guid values that the guest DXE/BDS phases use for
+ * reporting event groups being signaled to management mode
+ */
+
+const QemuUUID EfiEndOfDxeEventGroupGuid = {
+    .data = UUID_LE(0x02ce967a, 0xdd7e, 0x4FFc, 0x9e, 0xe7,
+                    0x81, 0x0c, 0xF0, 0x47, 0x08, 0x80)
+};
+
+const QemuUUID EfiEventReadyToBootGuid = {
+    .data = UUID_LE(0x7ce88Fb3, 0x4bd7, 0x4679, 0x87, 0xa8,
+                    0xa8, 0xd8, 0xde, 0xe5, 0x0d, 0x2b)
+};
+
+const QemuUUID EfiEventExitBootServicesGuid = {
+    .data = UUID_LE(0x27abF055, 0xb1b8, 0x4c26, 0x80, 0x48,
+                    0x74, 0x8F, 0x37, 0xba, 0xa2, 0xdF)
+};
diff --git a/hw/uefi/var-service-json.c b/hw/uefi/var-service-json.c
new file mode 100644
index 0000000000000..761082c11fc1a
--- /dev/null
+++ b/hw/uefi/var-service-json.c
@@ -0,0 +1,243 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * uefi vars device - serialize non-volatile varstore from/to json,
+ *                    using qapi
+ *
+ * tools which can read/write these json files:
+ *  - https://gitlab.com/kraxel/virt-firmware
+ *  - https://github.com/awslabs/python-uefivars
+ */
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "system/dma.h"
+
+#include "hw/uefi/var-service.h"
+
+#include "qobject/qobject.h"
+#include "qobject/qjson.h"
+
+#include "qapi/dealloc-visitor.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
+#include "qapi/qapi-types-uefi.h"
+#include "qapi/qapi-visit-uefi.h"
+
+static char *generate_hexstr(void *data, size_t len)
+{
+    static const char hex[] = {
+        '0', '1', '2', '3', '4', '5', '6', '7',
+        '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
+    };
+    uint8_t *src = data;
+    char *dest;
+    size_t i;
+
+    dest = g_malloc(len * 2 + 1);
+    for (i = 0; i < len * 2;) {
+        dest[i++] = hex[*src >> 4];
+        dest[i++] = hex[*src & 15];
+        src++;
+    }
+    dest[i++] = 0;
+
+    return dest;
+}
+
+static UefiVarStore *uefi_vars_to_qapi(uefi_vars_state *uv)
+{
+    UefiVarStore *vs;
+    UefiVariableList **tail;
+    UefiVariable *v;
+    QemuUUID be;
+    uefi_variable *var;
+
+    vs = g_new0(UefiVarStore, 1);
+    vs->version = 2;
+    tail = &vs->variables;
+
+    QTAILQ_FOREACH(var, &uv->variables, next) {
+        if (!(var->attributes & EFI_VARIABLE_NON_VOLATILE)) {
+            continue;
+        }
+
+        v = g_new0(UefiVariable, 1);
+        be = qemu_uuid_bswap(var->guid);
+        v->guid = qemu_uuid_unparse_strdup(&be);
+        v->name = uefi_ucs2_to_ascii(var->name, var->name_size);
+        v->attr = var->attributes;
+
+        v->data = generate_hexstr(var->data, var->data_size);
+
+        if (var->attributes &
+            EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS) {
+            v->time = generate_hexstr(&var->time, sizeof(var->time));
+            if (var->digest && var->digest_size) {
+                v->digest = generate_hexstr(var->digest, var->digest_size);
+            }
+        }
+
+        QAPI_LIST_APPEND(tail, v);
+    }
+    return vs;
+}
+
+static unsigned parse_hexchar(char c)
+{
+    switch (c) {
+    case '0' ... '9': return c - '0';
+    case 'a' ... 'f': return c - 'a' + 0xa;
+    case 'A' ... 'F': return c - 'A' + 0xA;
+    default: return 0;
+    }
+}
+
+static void parse_hexstr(void *dest, char *src, int len)
+{
+    uint8_t *data = dest;
+    size_t i;
+
+    for (i = 0; i < len; i += 2) {
+        *(data++) =
+            parse_hexchar(src[i]) << 4 |
+            parse_hexchar(src[i + 1]);
+    }
+}
+
+static void uefi_vars_from_qapi(uefi_vars_state *uv, UefiVarStore *vs)
+{
+    UefiVariableList *item;
+    UefiVariable *v;
+    QemuUUID be;
+    uefi_variable *var;
+    uint8_t *data;
+    size_t i, len;
+
+    for (item = vs->variables; item != NULL; item = item->next) {
+        v = item->value;
+
+        var = g_new0(uefi_variable, 1);
+        var->attributes = v->attr;
+        qemu_uuid_parse(v->guid, &be);
+        var->guid = qemu_uuid_bswap(be);
+
+        len = strlen(v->name);
+        var->name_size = len * 2 + 2;
+        var->name = g_malloc(var->name_size);
+        for (i = 0; i <= len; i++) {
+            var->name[i] = v->name[i];
+        }
+
+        len = strlen(v->data);
+        var->data_size = len / 2;
+        var->data = data = g_malloc(var->data_size);
+        parse_hexstr(var->data, v->data, len);
+
+        if (v->time && strlen(v->time) == 32) {
+            parse_hexstr(&var->time, v->time, 32);
+        }
+
+        if (v->digest) {
+            len = strlen(v->digest);
+            var->digest_size = len / 2;
+            var->digest = g_malloc(var->digest_size);
+            parse_hexstr(var->digest, v->digest, len);
+        }
+
+        QTAILQ_INSERT_TAIL(&uv->variables, var, next);
+    }
+}
+
+static GString *uefi_vars_to_json(uefi_vars_state *uv)
+{
+    UefiVarStore *vs = uefi_vars_to_qapi(uv);
+    QObject *qobj = NULL;
+    Visitor *v;
+    GString *gstr;
+
+    v = qobject_output_visitor_new(&qobj);
+    if (visit_type_UefiVarStore(v, NULL, &vs, NULL)) {
+        visit_complete(v, &qobj);
+    }
+    visit_free(v);
+    qapi_free_UefiVarStore(vs);
+
+    gstr = qobject_to_json_pretty(qobj, true);
+    qobject_unref(qobj);
+
+    return gstr;
+}
+
+void uefi_vars_json_init(uefi_vars_state *uv, Error **errp)
+{
+    if (uv->jsonfile) {
+        uv->jsonfd = qemu_create(uv->jsonfile, O_RDWR, 0666, errp);
+    }
+}
+
+void uefi_vars_json_save(uefi_vars_state *uv)
+{
+    GString *gstr;
+    int rc;
+
+    if (uv->jsonfd == -1) {
+        return;
+    }
+
+    gstr = uefi_vars_to_json(uv);
+
+    lseek(uv->jsonfd, 0, SEEK_SET);
+    rc = ftruncate(uv->jsonfd, 0);
+    if (rc != 0) {
+        warn_report("%s: ftruncate error", __func__);
+    }
+    rc = write(uv->jsonfd, gstr->str, gstr->len);
+    if (rc != gstr->len) {
+        warn_report("%s: write error", __func__);
+    }
+    fsync(uv->jsonfd);
+
+    g_string_free(gstr, true);
+}
+
+void uefi_vars_json_load(uefi_vars_state *uv, Error **errp)
+{
+    UefiVarStore *vs;
+    QObject *qobj;
+    Visitor *v;
+    char *str;
+    size_t len;
+    int rc;
+
+    if (uv->jsonfd == -1) {
+        return;
+    }
+
+    len = lseek(uv->jsonfd, 0, SEEK_END);
+    if (len == 0) {
+        return;
+    }
+
+    str = g_malloc(len + 1);
+    lseek(uv->jsonfd, 0, SEEK_SET);
+    rc = read(uv->jsonfd, str, len);
+    if (rc != len) {
+        warn_report("%s: read error", __func__);
+    }
+    str[len] = 0;
+
+    qobj = qobject_from_json(str, errp);
+    v = qobject_input_visitor_new(qobj);
+    visit_type_UefiVarStore(v, NULL, &vs, errp);
+    visit_free(v);
+
+    if (!(*errp)) {
+        uefi_vars_from_qapi(uv, vs);
+        uefi_vars_update_storage(uv);
+    }
+
+    qapi_free_UefiVarStore(vs);
+    qobject_unref(qobj);
+    g_free(str);
+}
diff --git a/hw/uefi/var-service-pkcs7-stub.c b/hw/uefi/var-service-pkcs7-stub.c
new file mode 100644
index 0000000000000..118cba446d4b7
--- /dev/null
+++ b/hw/uefi/var-service-pkcs7-stub.c
@@ -0,0 +1,16 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * uefi vars device - pkcs7 stubs
+ */
+#include "qemu/osdep.h"
+#include "system/dma.h"
+
+#include "hw/uefi/var-service.h"
+
+efi_status uefi_vars_check_pkcs7_2(uefi_variable *siglist,
+                                   void **digest, uint32_t *digest_size,
+                                   mm_variable_access *va, void *data)
+{
+    return EFI_WRITE_PROTECTED;
+}
diff --git a/hw/uefi/var-service-pkcs7.c b/hw/uefi/var-service-pkcs7.c
new file mode 100644
index 0000000000000..32accf4e44e0a
--- /dev/null
+++ b/hw/uefi/var-service-pkcs7.c
@@ -0,0 +1,436 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * uefi vars device - pkcs7 verification
+ */
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "system/dma.h"
+
+#include <gnutls/gnutls.h>
+#include <gnutls/pkcs7.h>
+#include <gnutls/crypto.h>
+
+#include "hw/uefi/var-service.h"
+
+#define AUTHVAR_DIGEST_ALGO GNUTLS_DIG_SHA256
+#define AUTHVAR_DIGEST_SIZE 32
+
+/*
+ * Replicate the signed data for signature verification.
+ */
+static gnutls_datum_t *build_signed_data(mm_variable_access *va, void *data)
+{
+    variable_auth_2 *auth = data;
+    uint64_t data_offset = sizeof(efi_time) + auth->hdr_length;
+    uint16_t *name = (void *)va + sizeof(mm_variable_access);
+    gnutls_datum_t *sdata;
+    uint64_t pos = 0;
+
+    sdata = g_new(gnutls_datum_t, 1);
+    sdata->size = (va->name_size - 2
+                   + sizeof(QemuUUID)
+                   + sizeof(va->attributes)
+                   + sizeof(auth->timestamp)
+                   + va->data_size - data_offset);
+    sdata->data = g_malloc(sdata->size);
+
+    /* Variable Name (without terminating \0) */
+    memcpy(sdata->data + pos, name, va->name_size - 2);
+    pos += va->name_size - 2;
+
+    /* Variable Namespace Guid */
+    memcpy(sdata->data + pos, &va->guid, sizeof(va->guid));
+    pos += sizeof(va->guid);
+
+    /* Attributes */
+    memcpy(sdata->data + pos, &va->attributes, sizeof(va->attributes));
+    pos += sizeof(va->attributes);
+
+    /* TimeStamp */
+    memcpy(sdata->data + pos, &auth->timestamp, sizeof(auth->timestamp));
+    pos += sizeof(auth->timestamp);
+
+    /* Variable Content */
+    memcpy(sdata->data + pos, data + data_offset, va->data_size - data_offset);
+    pos += va->data_size - data_offset;
+
+    assert(pos == sdata->size);
+    return sdata;
+}
+
+/*
+ * See WrapPkcs7Data() in edk2.
+ *
+ * UEFI spec allows pkcs7 signatures being used without the envelope which
+ * identifies them as pkcs7 signatures.  openssl and gnutls will not parse them
+ * without the envelope though.  So add it if needed.
+ */
+static void wrap_pkcs7(gnutls_datum_t *pkcs7)
+{
+    static uint8_t signed_data_oid[9] = {
+        0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x07, 0x02
+    };
+    gnutls_datum_t wrap;
+
+    if (pkcs7->data[4] == 0x06 &&
+        pkcs7->data[5] == 0x09 &&
+        memcmp(pkcs7->data + 6, signed_data_oid, sizeof(signed_data_oid)) == 0 &&
+        pkcs7->data[15] == 0x0a &&
+        pkcs7->data[16] == 0x82) {
+        return;
+    }
+
+    wrap.size = pkcs7->size + 19;
+    wrap.data = g_malloc(wrap.size);
+
+    wrap.data[0] = 0x30;
+    wrap.data[1] = 0x82;
+    wrap.data[2] = (wrap.size - 4) >> 8;
+    wrap.data[3] = (wrap.size - 4) & 0xff;
+    wrap.data[4] = 0x06;
+    wrap.data[5] = 0x09;
+    memcpy(wrap.data + 6, signed_data_oid, sizeof(signed_data_oid));
+
+    wrap.data[15] = 0xa0;
+    wrap.data[16] = 0x82;
+    wrap.data[17] = pkcs7->size >> 8;
+    wrap.data[18] = pkcs7->size & 0xff;
+    memcpy(wrap.data + 19, pkcs7->data, pkcs7->size);
+
+    g_free(pkcs7->data);
+    *pkcs7 = wrap;
+}
+
+static gnutls_datum_t *build_pkcs7(void *data)
+{
+    variable_auth_2 *auth = data;
+    gnutls_datum_t *pkcs7;
+
+    pkcs7 = g_new(gnutls_datum_t, 1);
+    pkcs7->size = auth->hdr_length - 24;
+    pkcs7->data = g_malloc(pkcs7->size);
+    memcpy(pkcs7->data, data + 16 + 24, pkcs7->size);
+
+    wrap_pkcs7(pkcs7);
+
+    return pkcs7;
+}
+
+/*
+ * Read UEFI signature database, store x509 all certificates found in
+ * gnutls_x509_trust_list_t.
+ */
+static gnutls_x509_trust_list_t build_trust_list_sb(uefi_variable *var)
+{
+    gnutls_x509_trust_list_t tlist;
+    gnutls_datum_t cert_data;
+    gnutls_x509_crt_t cert;
+    uefi_vars_siglist siglist;
+    uefi_vars_cert *c;
+    int rc;
+
+    rc = gnutls_x509_trust_list_init(&tlist, 0);
+    if (rc < 0) {
+        warn_report("gnutls_x509_trust_list_init error: %s",
+                     gnutls_strerror(rc));
+        return NULL;
+    }
+
+    uefi_vars_siglist_init(&siglist);
+    uefi_vars_siglist_parse(&siglist, var->data, var->data_size);
+
+    QTAILQ_FOREACH(c, &siglist.x509, next) {
+        cert_data.size = c->size;
+        cert_data.data = c->data;
+
+        rc = gnutls_x509_crt_init(&cert);
+        if (rc < 0) {
+            warn_report("gnutls_x509_crt_init error: %s", gnutls_strerror(rc));
+            break;
+        }
+        rc = gnutls_x509_crt_import(cert, &cert_data, GNUTLS_X509_FMT_DER);
+        if (rc < 0) {
+            warn_report("gnutls_x509_crt_import error: %s",
+                        gnutls_strerror(rc));
+            gnutls_x509_crt_deinit(cert);
+            break;
+        }
+        rc = gnutls_x509_trust_list_add_cas(tlist, &cert, 1, 0);
+        if (rc < 0) {
+            warn_report("gnutls_x509_crt_import error: %s",
+                        gnutls_strerror(rc));
+            gnutls_x509_crt_deinit(cert);
+            break;
+        }
+    }
+
+    uefi_vars_siglist_free(&siglist);
+
+    return tlist;
+}
+
+static int build_digest_authvar(gnutls_x509_crt_t signer,
+                                gnutls_x509_crt_t root,
+                                uint8_t *hash_digest)
+{
+    char *cn;
+    size_t cn_size = 0;
+    uint8_t fp[AUTHVAR_DIGEST_SIZE];
+    size_t fp_size = sizeof(fp);
+    gnutls_hash_hd_t hash;
+    int rc;
+
+    /* get signer CN */
+    rc = gnutls_x509_crt_get_dn_by_oid(signer, GNUTLS_OID_X520_COMMON_NAME,
+                                       0, 0, NULL, &cn_size);
+    if (rc != GNUTLS_E_SHORT_MEMORY_BUFFER) {
+        warn_report("gnutls_x509_crt_get_dn_by_oid error #1: %s",
+                    gnutls_strerror(rc));
+        return rc;
+    }
+
+    cn = g_malloc(cn_size);
+    rc = gnutls_x509_crt_get_dn_by_oid(signer, GNUTLS_OID_X520_COMMON_NAME,
+                                       0, 0, cn, &cn_size);
+    if (rc < 0) {
+        warn_report("gnutls_x509_crt_get_dn_by_oid error #2: %s",
+                    gnutls_strerror(rc));
+        goto err;
+    }
+
+    /* get root certificate fingerprint */
+    rc = gnutls_x509_crt_get_fingerprint(root, AUTHVAR_DIGEST_ALGO,
+                                         fp, &fp_size);
+    if (rc < 0) {
+        warn_report("gnutls_x509_crt_get_fingerprint error: %s",
+                    gnutls_strerror(rc));
+        goto err;
+    }
+
+    /* digest both items */
+    rc = gnutls_hash_init(&hash, AUTHVAR_DIGEST_ALGO);
+    if (rc < 0) {
+        warn_report("gnutls_hash_init error: %s",
+                    gnutls_strerror(rc));
+        goto err;
+    }
+    rc = gnutls_hash(hash, cn, cn_size);
+    if (rc < 0) {
+        warn_report("gnutls_hash error: %s",
+                    gnutls_strerror(rc));
+        goto err;
+    }
+    rc = gnutls_hash(hash, fp, fp_size);
+    if (rc < 0) {
+        warn_report("gnutls_hash error: %s",
+                    gnutls_strerror(rc));
+        goto err;
+    }
+    gnutls_hash_deinit(hash, hash_digest);
+
+    return 0;
+
+err:
+    g_free(cn);
+    return rc;
+}
+
+/*
+ * uefi spec 2.9, section 8.2.2
+ *
+ * For EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS variables which are
+ * NOT secure boot variables we should track the root certificate of the trust
+ * chain, and the subject CN of the signer certificate.
+ *
+ * So we'll go store a digest of these two items so we can verify this.  Also
+ * create a gnutls_x509_trust_list_t with the root certificate, so
+ * gnutls_pkcs7_verify() will pass (assuming the signature is otherwise
+ * correct).
+ */
+static gnutls_x509_trust_list_t build_trust_list_authvar(gnutls_pkcs7_t pkcs7,
+                                                         uint8_t *hash_digest)
+{
+    gnutls_datum_t signer_data = { 0 };
+    gnutls_datum_t root_data = { 0 };
+    gnutls_x509_crt_t signer = NULL;
+    gnutls_x509_crt_t root = NULL;
+    gnutls_x509_trust_list_t tlist = NULL;
+    int n, rc;
+
+    n = gnutls_pkcs7_get_crt_count(pkcs7);
+
+    /* first is signer certificate */
+    rc = gnutls_pkcs7_get_crt_raw2(pkcs7, 0, &signer_data);
+    if (rc < 0) {
+        warn_report("gnutls_pkcs7_get_crt_raw2(0) error: %s",
+                    gnutls_strerror(rc));
+        goto done;
+    }
+    rc = gnutls_x509_crt_init(&signer);
+    if (rc < 0) {
+        warn_report("gnutls_x509_crt_init error: %s", gnutls_strerror(rc));
+        goto done;
+    }
+    rc = gnutls_x509_crt_import(signer, &signer_data, GNUTLS_X509_FMT_DER);
+    if (rc < 0) {
+        warn_report("gnutls_x509_crt_import error: %s",
+                    gnutls_strerror(rc));
+        gnutls_x509_crt_deinit(signer);
+        goto done;
+    }
+
+    /* last is root-of-trust certificate (can be identical to signer) */
+    rc = gnutls_pkcs7_get_crt_raw2(pkcs7, n - 1, &root_data);
+    if (rc < 0) {
+        warn_report("gnutls_pkcs7_get_crt_raw2(%d) error: %s",
+                    n - 1, gnutls_strerror(rc));
+        goto done;
+    }
+    rc = gnutls_x509_crt_init(&root);
+    if (rc < 0) {
+        warn_report("gnutls_x509_crt_init error: %s", gnutls_strerror(rc));
+        goto done;
+    }
+    rc = gnutls_x509_crt_import(root, &root_data, GNUTLS_X509_FMT_DER);
+    if (rc < 0) {
+        warn_report("gnutls_x509_crt_import error: %s",
+                    gnutls_strerror(rc));
+        goto done;
+    }
+
+    /* calc digest for signer CN + root cert */
+    rc = build_digest_authvar(signer, root, hash_digest);
+    if (rc < 0) {
+        goto done;
+    }
+
+    /* add root to trust list */
+    rc = gnutls_x509_trust_list_init(&tlist, 0);
+    if (rc < 0) {
+        warn_report("gnutls_x509_trust_list_init error: %s",
+                    gnutls_strerror(rc));
+        goto done;
+    }
+    rc = gnutls_x509_trust_list_add_cas(tlist, &root, 1, 0);
+    if (rc < 0) {
+        warn_report("gnutls_x509_crt_import error: %s",
+                    gnutls_strerror(rc));
+        gnutls_x509_trust_list_deinit(tlist, 1);
+        tlist = NULL;
+        goto done;
+    } else {
+        /* ownership passed to tlist */
+        root = NULL;
+    }
+
+done:
+    if (signer_data.data) {
+        gnutls_free(signer_data.data);
+    }
+    if (root_data.data) {
+        gnutls_free(root_data.data);
+    }
+    if (signer) {
+        gnutls_x509_crt_deinit(signer);
+    }
+    if (root) {
+        gnutls_x509_crt_deinit(root);
+    }
+    return tlist;
+}
+
+static void free_datum(gnutls_datum_t *ptr)
+{
+    if (!ptr) {
+        return;
+    }
+    g_free(ptr->data);
+    g_free(ptr);
+}
+
+static void gnutls_log_stderr(int level, const char *msg)
+{
+    if (strncmp(msg, "ASSERT:", 7) == 0) {
+        return;
+    }
+    fprintf(stderr, "    %d: %s", level, msg);
+}
+
+/*
+ * pkcs7 signature verification (EFI_VARIABLE_AUTHENTICATION_2).
+ */
+efi_status uefi_vars_check_pkcs7_2(uefi_variable *siglist,
+                                   void **digest, uint32_t *digest_size,
+                                   mm_variable_access *va, void *data)
+{
+    gnutls_x509_trust_list_t tlist = NULL;
+    gnutls_datum_t *signed_data = NULL;
+    gnutls_datum_t *pkcs7_data = NULL;
+    gnutls_pkcs7_t pkcs7 = NULL;
+    efi_status status = EFI_SECURITY_VIOLATION;
+    int rc;
+
+    if (0) {
+        /* gnutls debug logging */
+        static bool first = true;
+
+        if (first) {
+            first = false;
+            gnutls_global_set_log_function(gnutls_log_stderr);
+            gnutls_global_set_log_level(99);
+        }
+    }
+
+    signed_data = build_signed_data(va, data);
+    pkcs7_data = build_pkcs7(data);
+
+    rc = gnutls_pkcs7_init(&pkcs7);
+    if (rc < 0) {
+        warn_report("gnutls_pkcs7_init error: %s", gnutls_strerror(rc));
+        goto out;
+    }
+
+    rc = gnutls_pkcs7_import(pkcs7, pkcs7_data, GNUTLS_X509_FMT_DER);
+    if (rc < 0) {
+        warn_report("gnutls_pkcs7_import error: %s", gnutls_strerror(rc));
+        goto out;
+    }
+
+    if (siglist) {
+        /* secure boot variables */
+        tlist = build_trust_list_sb(siglist);
+    } else if (digest && digest_size) {
+        /* other authenticated variables */
+        *digest_size = AUTHVAR_DIGEST_SIZE;
+        *digest = g_malloc(*digest_size);
+        tlist = build_trust_list_authvar(pkcs7, *digest);
+    } else {
+        /* should not happen */
+        goto out;
+    }
+
+    rc = gnutls_pkcs7_verify(pkcs7, tlist,
+                             NULL, 0,
+                             0, signed_data,
+                             GNUTLS_VERIFY_DISABLE_TIME_CHECKS |
+                             GNUTLS_VERIFY_DISABLE_TRUSTED_TIME_CHECKS);
+    if (rc < 0) {
+        warn_report("gnutls_pkcs7_verify error: %s", gnutls_strerror(rc));
+        goto out;
+    }
+
+    /* check passed */
+    status = EFI_SUCCESS;
+
+out:
+    free_datum(signed_data);
+    free_datum(pkcs7_data);
+    if (tlist) {
+        gnutls_x509_trust_list_deinit(tlist, 1);
+    }
+    if (pkcs7) {
+        gnutls_pkcs7_deinit(pkcs7);
+    }
+    return status;
+}
diff --git a/hw/uefi/var-service-policy.c b/hw/uefi/var-service-policy.c
new file mode 100644
index 0000000000000..3b1155fe4ea14
--- /dev/null
+++ b/hw/uefi/var-service-policy.c
@@ -0,0 +1,370 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * uefi vars device - VarCheckPolicyLibMmiHandler implementation
+ *
+ * variable policy specs:
+ * https://github.com/tianocore/edk2/blob/master/MdeModulePkg/Library/VariablePolicyLib/ReadMe.md
+ */
+#include "qemu/osdep.h"
+#include "system/dma.h"
+#include "migration/vmstate.h"
+
+#include "hw/uefi/var-service.h"
+#include "hw/uefi/var-service-api.h"
+#include "hw/uefi/var-service-edk2.h"
+
+#include "trace/trace-hw_uefi.h"
+
+static void calc_policy(uefi_var_policy *pol);
+
+static int uefi_var_policy_post_load(void *opaque, int version_id)
+{
+    uefi_var_policy *pol = opaque;
+
+    calc_policy(pol);
+    return 0;
+}
+
+const VMStateDescription vmstate_uefi_var_policy = {
+    .name = "uefi-var-policy",
+    .post_load = uefi_var_policy_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(entry_size, uefi_var_policy),
+        VMSTATE_VBUFFER_ALLOC_UINT32(entry, uefi_var_policy,
+                                     0, NULL, entry_size),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static void print_policy_entry(variable_policy_entry *pe)
+{
+    uint16_t *name = (void *)pe + pe->offset_to_name;
+
+    fprintf(stderr, "%s:\n", __func__);
+
+    fprintf(stderr, "    name ´");
+    while (*name) {
+        fprintf(stderr, "%c", *name);
+        name++;
+    }
+    fprintf(stderr, "', version=%d.%d, size=%d\n",
+            pe->version >> 16, pe->version & 0xffff, pe->size);
+
+    if (pe->min_size) {
+        fprintf(stderr, "    size min=%d\n", pe->min_size);
+    }
+    if (pe->max_size != UINT32_MAX) {
+        fprintf(stderr, "    size max=%u\n", pe->max_size);
+    }
+    if (pe->attributes_must_have) {
+        fprintf(stderr, "    attr must=0x%x\n", pe->attributes_must_have);
+    }
+    if (pe->attributes_cant_have) {
+        fprintf(stderr, "    attr cant=0x%x\n", pe->attributes_cant_have);
+    }
+    if (pe->lock_policy_type) {
+        fprintf(stderr, "    lock policy type %d\n", pe->lock_policy_type);
+    }
+}
+
+static gboolean wildcard_str_equal(uefi_var_policy *pol,
+                                   uefi_variable *var)
+{
+    return uefi_str_equal_ex(pol->name, pol->name_size,
+                             var->name, var->name_size,
+                             true);
+}
+
+static uefi_var_policy *find_policy(uefi_vars_state *uv, QemuUUID guid,
+                                    uint16_t *name, uint64_t name_size)
+{
+    uefi_var_policy *pol;
+
+    QTAILQ_FOREACH(pol, &uv->var_policies, next) {
+        if (!qemu_uuid_is_equal(&pol->entry->namespace, &guid)) {
+            continue;
+        }
+        if (!uefi_str_equal(pol->name, pol->name_size,
+                            name, name_size)) {
+            continue;
+        }
+        return pol;
+    }
+    return NULL;
+}
+
+static uefi_var_policy *wildcard_find_policy(uefi_vars_state *uv,
+                                             uefi_variable *var)
+{
+    uefi_var_policy *pol;
+
+    QTAILQ_FOREACH(pol, &uv->var_policies, next) {
+        if (!qemu_uuid_is_equal(&pol->entry->namespace, &var->guid)) {
+            continue;
+        }
+        if (!wildcard_str_equal(pol, var)) {
+            continue;
+        }
+        return pol;
+    }
+    return NULL;
+}
+
+static void calc_policy(uefi_var_policy *pol)
+{
+    variable_policy_entry *pe = pol->entry;
+    unsigned int i;
+
+    pol->name = (void *)pol->entry + pe->offset_to_name;
+    pol->name_size = pe->size - pe->offset_to_name;
+
+    for (i = 0; i < pol->name_size / 2; i++) {
+        if (pol->name[i] == '#') {
+            pol->hashmarks++;
+        }
+    }
+}
+
+uefi_var_policy *uefi_vars_add_policy(uefi_vars_state *uv,
+                                      variable_policy_entry *pe)
+{
+    uefi_var_policy *pol, *p;
+
+    pol = g_new0(uefi_var_policy, 1);
+    pol->entry = g_malloc(pe->size);
+    memcpy(pol->entry, pe, pe->size);
+    pol->entry_size = pe->size;
+
+    calc_policy(pol);
+
+    /* keep list sorted by priority, add to tail of priority group */
+    QTAILQ_FOREACH(p, &uv->var_policies, next) {
+        if ((p->hashmarks > pol->hashmarks) ||
+            (!p->name_size && pol->name_size)) {
+            QTAILQ_INSERT_BEFORE(p, pol, next);
+            return pol;
+        }
+    }
+
+    QTAILQ_INSERT_TAIL(&uv->var_policies, pol, next);
+    return pol;
+}
+
+efi_status uefi_vars_policy_check(uefi_vars_state *uv,
+                                  uefi_variable *var,
+                                  gboolean is_newvar)
+{
+    uefi_var_policy *pol;
+    variable_policy_entry *pe;
+    variable_lock_on_var_state *lvarstate;
+    uint16_t *lvarname;
+    size_t lvarnamesize;
+    uefi_variable *lvar;
+
+    if (!uv->end_of_dxe) {
+        return EFI_SUCCESS;
+    }
+
+    pol = wildcard_find_policy(uv, var);
+    if (!pol) {
+        return EFI_SUCCESS;
+    }
+    pe = pol->entry;
+
+    uefi_trace_variable(__func__, var->guid, var->name, var->name_size);
+    print_policy_entry(pe);
+
+    if ((var->attributes & pe->attributes_must_have) != pe->attributes_must_have) {
+        trace_uefi_vars_policy_deny("must-have-attr");
+        return EFI_INVALID_PARAMETER;
+    }
+    if ((var->attributes & pe->attributes_cant_have) != 0) {
+        trace_uefi_vars_policy_deny("cant-have-attr");
+        return EFI_INVALID_PARAMETER;
+    }
+
+    if (var->data_size < pe->min_size) {
+        trace_uefi_vars_policy_deny("min-size");
+        return EFI_INVALID_PARAMETER;
+    }
+    if (var->data_size > pe->max_size) {
+        trace_uefi_vars_policy_deny("max-size");
+        return EFI_INVALID_PARAMETER;
+    }
+
+    switch (pe->lock_policy_type) {
+    case VARIABLE_POLICY_TYPE_NO_LOCK:
+        break;
+
+    case VARIABLE_POLICY_TYPE_LOCK_NOW:
+        trace_uefi_vars_policy_deny("lock-now");
+        return EFI_WRITE_PROTECTED;
+
+    case VARIABLE_POLICY_TYPE_LOCK_ON_CREATE:
+        if (!is_newvar) {
+            trace_uefi_vars_policy_deny("lock-on-create");
+            return EFI_WRITE_PROTECTED;
+        }
+        break;
+
+    case VARIABLE_POLICY_TYPE_LOCK_ON_VAR_STATE:
+        lvarstate    = (void *)pol->entry + sizeof(*pe);
+        lvarname     = (void *)pol->entry + sizeof(*pe) + sizeof(*lvarstate);
+        lvarnamesize = pe->offset_to_name - sizeof(*pe) - sizeof(*lvarstate);
+
+        uefi_trace_variable(__func__, lvarstate->namespace,
+                            lvarname, lvarnamesize);
+        lvar = uefi_vars_find_variable(uv, lvarstate->namespace,
+                                          lvarname, lvarnamesize);
+        if (lvar && lvar->data_size == 1) {
+            uint8_t *value = lvar->data;
+            if (lvarstate->value == *value) {
+                return EFI_WRITE_PROTECTED;
+            }
+        }
+        break;
+    }
+
+    return EFI_SUCCESS;
+}
+
+void uefi_vars_policies_clear(uefi_vars_state *uv)
+{
+    uefi_var_policy *pol;
+
+    while (!QTAILQ_EMPTY(&uv->var_policies)) {
+        pol = QTAILQ_FIRST(&uv->var_policies);
+        QTAILQ_REMOVE(&uv->var_policies, pol, next);
+        g_free(pol->entry);
+        g_free(pol);
+    }
+}
+
+static size_t uefi_vars_mm_policy_error(mm_header *mhdr,
+                                        mm_check_policy *mchk,
+                                        uint64_t status)
+{
+    mchk->result = status;
+    return sizeof(*mchk);
+}
+
+static uint32_t uefi_vars_mm_check_policy_is_enabled(uefi_vars_state *uv,
+                                                     mm_header       *mhdr,
+                                                     mm_check_policy *mchk,
+                                                     void            *func)
+{
+    mm_check_policy_is_enabled *mpar = func;
+    size_t length;
+
+    length = sizeof(*mchk) + sizeof(*mpar);
+    if (mhdr->length < length) {
+        return uefi_vars_mm_policy_error(mhdr, mchk, EFI_BAD_BUFFER_SIZE);
+    }
+
+    mpar->state  = TRUE;
+    mchk->result = EFI_SUCCESS;
+    return sizeof(*mchk);
+}
+
+static uint32_t uefi_vars_mm_check_policy_register(uefi_vars_state *uv,
+                                                   mm_header       *mhdr,
+                                                   mm_check_policy *mchk,
+                                                   void            *func)
+{
+    variable_policy_entry *pe = func;
+    uefi_var_policy *pol;
+    uint64_t length;
+
+    if (uadd64_overflow(sizeof(*mchk), pe->size, &length)) {
+        return uefi_vars_mm_policy_error(mhdr, mchk, EFI_BAD_BUFFER_SIZE);
+    }
+    if (mhdr->length < length) {
+        return uefi_vars_mm_policy_error(mhdr, mchk, EFI_BAD_BUFFER_SIZE);
+    }
+    if (pe->size < sizeof(*pe)) {
+        return uefi_vars_mm_policy_error(mhdr, mchk, EFI_BAD_BUFFER_SIZE);
+    }
+    if (pe->offset_to_name < sizeof(*pe)) {
+        return uefi_vars_mm_policy_error(mhdr, mchk, EFI_BAD_BUFFER_SIZE);
+    }
+
+    if (pe->lock_policy_type == VARIABLE_POLICY_TYPE_LOCK_ON_VAR_STATE &&
+        pe->offset_to_name < sizeof(*pe) + sizeof(variable_lock_on_var_state)) {
+        return uefi_vars_mm_policy_error(mhdr, mchk, EFI_BAD_BUFFER_SIZE);
+    }
+
+    /* check space for minimum string length */
+    if (pe->size < (size_t)pe->offset_to_name) {
+        return uefi_vars_mm_policy_error(mhdr, mchk, EFI_BAD_BUFFER_SIZE);
+    }
+
+    if (!uefi_str_is_valid((void *)pe + pe->offset_to_name,
+                           pe->size - pe->offset_to_name,
+                           false)) {
+        return uefi_vars_mm_policy_error(mhdr, mchk, EFI_INVALID_PARAMETER);
+    }
+
+    pol = find_policy(uv, pe->namespace,
+                      (void *)pe + pe->offset_to_name,
+                      pe->size - pe->offset_to_name);
+    if (pol) {
+        return uefi_vars_mm_policy_error(mhdr, mchk, EFI_ALREADY_STARTED);
+    }
+
+    uefi_vars_add_policy(uv, pe);
+
+    mchk->result = EFI_SUCCESS;
+    return sizeof(*mchk);
+}
+
+uint32_t uefi_vars_mm_check_policy_proto(uefi_vars_state *uv)
+{
+    static const char *fnames[] = {
+        "zero",
+        "disable",
+        "is-enabled",
+        "register",
+        "dump",
+        "lock",
+    };
+    const char      *fname;
+    mm_header       *mhdr = (mm_header *) uv->buffer;
+    mm_check_policy *mchk = (mm_check_policy *) (uv->buffer + sizeof(*mhdr));
+    void            *func = (uv->buffer + sizeof(*mhdr) + sizeof(*mchk));
+
+    if (mhdr->length < sizeof(*mchk)) {
+        return UEFI_VARS_STS_ERR_BAD_BUFFER_SIZE;
+    }
+
+    fname = mchk->command < ARRAY_SIZE(fnames)
+        ? fnames[mchk->command]
+        : "unknown";
+    trace_uefi_vars_policy_cmd(fname);
+
+    switch (mchk->command) {
+    case VAR_CHECK_POLICY_COMMAND_DISABLE:
+        mchk->result = EFI_UNSUPPORTED;
+        break;
+    case VAR_CHECK_POLICY_COMMAND_IS_ENABLED:
+        uefi_vars_mm_check_policy_is_enabled(uv, mhdr, mchk, func);
+        break;
+    case VAR_CHECK_POLICY_COMMAND_REGISTER:
+        if (uv->policy_locked) {
+            mchk->result = EFI_WRITE_PROTECTED;
+        } else {
+            uefi_vars_mm_check_policy_register(uv, mhdr, mchk, func);
+        }
+        break;
+    case VAR_CHECK_POLICY_COMMAND_LOCK:
+        uv->policy_locked = true;
+        mchk->result = EFI_SUCCESS;
+        break;
+    default:
+        mchk->result = EFI_UNSUPPORTED;
+        break;
+    }
+
+    uefi_trace_status(__func__, mchk->result);
+    return UEFI_VARS_STS_SUCCESS;
+}
diff --git a/hw/uefi/var-service-siglist.c b/hw/uefi/var-service-siglist.c
new file mode 100644
index 0000000000000..8948f1b784719
--- /dev/null
+++ b/hw/uefi/var-service-siglist.c
@@ -0,0 +1,212 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * uefi vars device - parse and generate efi signature databases
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "system/dma.h"
+
+#include "hw/uefi/var-service.h"
+
+/*
+ * Add x509 certificate to list (with duplicate check).
+ */
+static void uefi_vars_siglist_add_x509(uefi_vars_siglist *siglist,
+                                       QemuUUID *owner,
+                                       void *data, uint64_t size)
+{
+    uefi_vars_cert *c;
+
+    QTAILQ_FOREACH(c, &siglist->x509, next) {
+        if (c->size != size) {
+            continue;
+        }
+        if (memcmp(c->data, data, size) != 0) {
+            continue;
+        }
+        return;
+    }
+
+    c = g_malloc(sizeof(*c) + size);
+    c->owner = *owner;
+    c->size = size;
+    memcpy(c->data, data, size);
+    QTAILQ_INSERT_TAIL(&siglist->x509, c, next);
+}
+
+/*
+ * Add sha256 hash to list (with duplicate check).
+ */
+static void uefi_vars_siglist_add_sha256(uefi_vars_siglist *siglist,
+                                         QemuUUID *owner,
+                                         void *data)
+{
+    uefi_vars_hash *h;
+
+    QTAILQ_FOREACH(h, &siglist->sha256, next) {
+        if (memcmp(h->data, data, 32) != 0) {
+            continue;
+        }
+        return;
+    }
+
+    h = g_malloc(sizeof(*h) + 32);
+    h->owner = *owner;
+    memcpy(h->data, data, 32);
+    QTAILQ_INSERT_TAIL(&siglist->sha256, h, next);
+}
+
+void uefi_vars_siglist_init(uefi_vars_siglist *siglist)
+{
+    memset(siglist, 0, sizeof(*siglist));
+    QTAILQ_INIT(&siglist->x509);
+    QTAILQ_INIT(&siglist->sha256);
+}
+
+void uefi_vars_siglist_free(uefi_vars_siglist *siglist)
+{
+    uefi_vars_cert *c, *cs;
+    uefi_vars_hash *h, *hs;
+
+    QTAILQ_FOREACH_SAFE(c, &siglist->x509, next, cs) {
+        QTAILQ_REMOVE(&siglist->x509, c, next);
+        g_free(c);
+    }
+    QTAILQ_FOREACH_SAFE(h, &siglist->sha256, next, hs) {
+        QTAILQ_REMOVE(&siglist->sha256, h, next);
+        g_free(h);
+    }
+}
+
+/*
+ * Parse UEFI signature list.
+ */
+void uefi_vars_siglist_parse(uefi_vars_siglist *siglist,
+                             void *data, uint64_t size)
+{
+    efi_siglist *efilist;
+    uint64_t start;
+
+    while (size) {
+        if (size < sizeof(*efilist)) {
+            break;
+        }
+        efilist = data;
+        if (size < efilist->siglist_size) {
+            break;
+        }
+
+        if (uadd64_overflow(sizeof(*efilist), efilist->header_size, &start)) {
+            break;
+        }
+        if (efilist->sig_size <= sizeof(QemuUUID)) {
+            break;
+        }
+
+        if (qemu_uuid_is_equal(&efilist->guid_type, &EfiCertX509Guid)) {
+            if (start + efilist->sig_size != efilist->siglist_size) {
+                break;
+            }
+            uefi_vars_siglist_add_x509(siglist,
+                                       (QemuUUID *)(data + start),
+                                       data + start + sizeof(QemuUUID),
+                                       efilist->sig_size - sizeof(QemuUUID));
+
+        } else if (qemu_uuid_is_equal(&efilist->guid_type, &EfiCertSha256Guid)) {
+            if (efilist->sig_size != sizeof(QemuUUID) + 32) {
+                break;
+            }
+            if (start + efilist->sig_size > efilist->siglist_size) {
+                break;
+            }
+            while (start <= efilist->siglist_size - efilist->sig_size) {
+                uefi_vars_siglist_add_sha256(siglist,
+                                             (QemuUUID *)(data + start),
+                                             data + start + sizeof(QemuUUID));
+                start += efilist->sig_size;
+            }
+
+        } else {
+            QemuUUID be = qemu_uuid_bswap(efilist->guid_type);
+            char *str_uuid = qemu_uuid_unparse_strdup(&be);
+            warn_report("%s: unknown type (%s)", __func__, str_uuid);
+            g_free(str_uuid);
+        }
+
+        data += efilist->siglist_size;
+        size -= efilist->siglist_size;
+    }
+}
+
+uint64_t uefi_vars_siglist_blob_size(uefi_vars_siglist *siglist)
+{
+    uefi_vars_cert *c;
+    uefi_vars_hash *h;
+    uint64_t size = 0;
+
+    QTAILQ_FOREACH(c, &siglist->x509, next) {
+        size += sizeof(efi_siglist) + sizeof(QemuUUID) + c->size;
+    }
+
+    if (!QTAILQ_EMPTY(&siglist->sha256)) {
+        size += sizeof(efi_siglist);
+        QTAILQ_FOREACH(h, &siglist->sha256, next) {
+            size += sizeof(QemuUUID) + 32;
+        }
+    }
+
+    return size;
+}
+
+/*
+ * Generate UEFI signature list.
+ */
+void uefi_vars_siglist_blob_generate(uefi_vars_siglist *siglist,
+                                     void *data, uint64_t size)
+{
+    uefi_vars_cert *c;
+    uefi_vars_hash *h;
+    efi_siglist *efilist;
+    uint64_t pos = 0, start;
+    uint32_t i;
+
+    QTAILQ_FOREACH(c, &siglist->x509, next) {
+        efilist = data + pos;
+        efilist->guid_type = EfiCertX509Guid;
+        efilist->sig_size = sizeof(QemuUUID) + c->size;
+        efilist->header_size = 0;
+
+        start = pos + sizeof(efi_siglist);
+        memcpy(data + start,
+               &c->owner, sizeof(QemuUUID));
+        memcpy(data + start + sizeof(QemuUUID),
+               c->data, c->size);
+
+        efilist->siglist_size = sizeof(efi_siglist) + efilist->sig_size;
+        pos += efilist->siglist_size;
+    }
+
+    if (!QTAILQ_EMPTY(&siglist->sha256)) {
+        efilist = data + pos;
+        efilist->guid_type = EfiCertSha256Guid;
+        efilist->sig_size = sizeof(QemuUUID) + 32;
+        efilist->header_size = 0;
+
+        i = 0;
+        start = pos + sizeof(efi_siglist);
+        QTAILQ_FOREACH(h, &siglist->sha256, next) {
+            memcpy(data + start + efilist->sig_size * i,
+                   &h->owner, sizeof(QemuUUID));
+            memcpy(data + start + efilist->sig_size * i + sizeof(QemuUUID),
+                   h->data, 32);
+            i++;
+        }
+
+        efilist->siglist_size = sizeof(efi_siglist) + efilist->sig_size * i;
+        pos += efilist->siglist_size;
+    }
+
+    assert(pos == size);
+}
diff --git a/hw/uefi/var-service-sysbus.c b/hw/uefi/var-service-sysbus.c
new file mode 100644
index 0000000000000..97da8672ee952
--- /dev/null
+++ b/hw/uefi/var-service-sysbus.c
@@ -0,0 +1,124 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * uefi vars device - sysbus variant.
+ */
+#include "qemu/osdep.h"
+#include "migration/vmstate.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/sysbus.h"
+
+#include "hw/uefi/hardware-info.h"
+#include "hw/uefi/var-service.h"
+#include "hw/uefi/var-service-api.h"
+
+OBJECT_DECLARE_SIMPLE_TYPE(uefi_vars_sysbus_state, UEFI_VARS_SYSBUS)
+
+struct uefi_vars_sysbus_state {
+    SysBusDevice parent_obj;
+    struct uefi_vars_state state;
+};
+
+static const VMStateDescription vmstate_uefi_vars_sysbus = {
+    .name = TYPE_UEFI_VARS_SYSBUS,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT(state, uefi_vars_sysbus_state, 0,
+                       vmstate_uefi_vars, uefi_vars_state),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const Property uefi_vars_sysbus_properties[] = {
+    DEFINE_PROP_SIZE("size", uefi_vars_sysbus_state, state.max_storage,
+                     256 * 1024),
+    DEFINE_PROP_STRING("jsonfile", uefi_vars_sysbus_state, state.jsonfile),
+    DEFINE_PROP_BOOL("force-secure-boot", uefi_vars_sysbus_state,
+                     state.force_secure_boot, false),
+    DEFINE_PROP_BOOL("disable-custom-mode", uefi_vars_sysbus_state,
+                     state.disable_custom_mode, false),
+    DEFINE_PROP_BOOL("use-pio", uefi_vars_sysbus_state,
+                     state.use_pio, false),
+};
+
+static void uefi_vars_sysbus_init(Object *obj)
+{
+    uefi_vars_sysbus_state *uv = UEFI_VARS_SYSBUS(obj);
+
+    uefi_vars_init(obj, &uv->state);
+}
+
+static void uefi_vars_sysbus_reset(DeviceState *dev)
+{
+    uefi_vars_sysbus_state *uv = UEFI_VARS_SYSBUS(dev);
+
+    uefi_vars_hard_reset(&uv->state);
+}
+
+static void uefi_vars_sysbus_realize(DeviceState *dev, Error **errp)
+{
+    uefi_vars_sysbus_state *uv = UEFI_VARS_SYSBUS(dev);
+    SysBusDevice *sysbus = SYS_BUS_DEVICE(dev);
+
+    sysbus_init_mmio(sysbus, &uv->state.mr);
+    uefi_vars_realize(&uv->state, errp);
+}
+
+static void uefi_vars_sysbus_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = uefi_vars_sysbus_realize;
+    dc->vmsd = &vmstate_uefi_vars_sysbus;
+    dc->user_creatable = true;
+    device_class_set_legacy_reset(dc, uefi_vars_sysbus_reset);
+    device_class_set_props(dc, uefi_vars_sysbus_properties);
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+}
+
+/* generic: hardware discovery via FDT */
+static const TypeInfo uefi_vars_sysbus_info = {
+    .name          = TYPE_UEFI_VARS_SYSBUS,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(uefi_vars_sysbus_state),
+    .instance_init = uefi_vars_sysbus_init,
+    .class_init    = uefi_vars_sysbus_class_init,
+};
+module_obj(TYPE_UEFI_VARS_SYSBUS);
+
+static void uefi_vars_x64_realize(DeviceState *dev, Error **errp)
+{
+    HARDWARE_INFO_SIMPLE_DEVICE hwinfo = {
+        .mmio_address = cpu_to_le64(0xfef10000),
+    };
+    SysBusDevice *sysbus = SYS_BUS_DEVICE(dev);
+
+    uefi_vars_sysbus_realize(dev, errp);
+
+    hardware_info_register(HardwareInfoQemuUefiVars,
+                           &hwinfo, sizeof(hwinfo));
+    sysbus_mmio_map(sysbus, 0, hwinfo.mmio_address);
+}
+
+static void uefi_vars_x64_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = uefi_vars_x64_realize;
+}
+
+/* x64: hardware discovery via etc/hardware-info fw_cfg */
+static const TypeInfo uefi_vars_x64_info = {
+    .name          = TYPE_UEFI_VARS_X64,
+    .parent        = TYPE_UEFI_VARS_SYSBUS,
+    .class_init    = uefi_vars_x64_class_init,
+};
+module_obj(TYPE_UEFI_VARS_X64);
+
+static void uefi_vars_sysbus_register_types(void)
+{
+    type_register_static(&uefi_vars_sysbus_info);
+    type_register_static(&uefi_vars_x64_info);
+}
+
+type_init(uefi_vars_sysbus_register_types)
diff --git a/hw/uefi/var-service-utils.c b/hw/uefi/var-service-utils.c
new file mode 100644
index 0000000000000..c9ef46570f489
--- /dev/null
+++ b/hw/uefi/var-service-utils.c
@@ -0,0 +1,241 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * uefi vars device - helper functions for ucs2 strings and tracing
+ */
+#include "qemu/osdep.h"
+#include "system/dma.h"
+
+#include "hw/uefi/var-service.h"
+
+#include "trace/trace-hw_uefi.h"
+
+/* ------------------------------------------------------------------ */
+
+/*
+ * string helper functions.
+ *
+ * Most of the time uefi ucs2 strings are NULL-terminated, except
+ * sometimes when they are not (for example in variable policies).
+ */
+
+gboolean uefi_str_is_valid(const uint16_t *str, size_t len,
+                           gboolean must_be_null_terminated)
+{
+    size_t pos = 0;
+
+    for (;;) {
+        if (pos == len) {
+            if (must_be_null_terminated) {
+                return false;
+            } else {
+                return true;
+            }
+        }
+        switch (str[pos]) {
+        case 0:
+            /* end of string */
+            return true;
+        case 0xd800 ... 0xdfff:
+            /* reject surrogates */
+            return false;
+        default:
+            /* char is good, check next */
+            break;
+        }
+        pos++;
+    }
+}
+
+size_t uefi_strlen(const uint16_t *str, size_t len)
+{
+    size_t pos = 0;
+
+    for (;;) {
+        if (pos == len) {
+            return pos;
+        }
+        if (str[pos] == 0) {
+            return pos;
+        }
+        pos++;
+    }
+}
+
+gboolean uefi_str_equal_ex(const uint16_t *a, size_t alen,
+                           const uint16_t *b, size_t blen,
+                           gboolean wildcards_in_a)
+{
+    size_t pos = 0;
+
+    alen = alen / 2;
+    blen = blen / 2;
+    for (;;) {
+        if (pos == alen && pos == blen) {
+            return true;
+        }
+        if (pos == alen && b[pos] == 0) {
+            return true;
+        }
+        if (pos == blen && a[pos] == 0) {
+            return true;
+        }
+        if (pos == alen || pos == blen) {
+            return false;
+        }
+        if (a[pos] == 0 && b[pos] == 0) {
+            return true;
+        }
+
+        if (wildcards_in_a && a[pos] == '#') {
+            if (!isxdigit(b[pos])) {
+                return false;
+            }
+        } else {
+            if (a[pos] != b[pos]) {
+                return false;
+            }
+        }
+        pos++;
+    }
+}
+
+gboolean uefi_str_equal(const uint16_t *a, size_t alen,
+                        const uint16_t *b, size_t blen)
+{
+    return uefi_str_equal_ex(a, alen, b, blen, false);
+}
+
+char *uefi_ucs2_to_ascii(const uint16_t *ucs2, uint64_t ucs2_size)
+{
+    char *str = g_malloc0(ucs2_size / 2 + 1);
+    int i;
+
+    for (i = 0; i * 2 < ucs2_size; i++) {
+        if (ucs2[i] == 0) {
+            break;
+        }
+        if (ucs2[i] < 128) {
+            str[i] = ucs2[i];
+        } else {
+            str[i] = '?';
+        }
+    }
+    str[i] = 0;
+    return str;
+}
+
+/* ------------------------------------------------------------------ */
+/* time helper functions                                              */
+
+int uefi_time_compare(efi_time *a, efi_time *b)
+{
+    if (a->year < b->year) {
+        return -1;
+    }
+    if (a->year > b->year) {
+        return 1;
+    }
+
+    if (a->month < b->month) {
+        return -1;
+    }
+    if (a->month > b->month) {
+        return 1;
+    }
+
+    if (a->day < b->day) {
+        return -1;
+    }
+    if (a->day > b->day) {
+        return 1;
+    }
+
+    if (a->hour < b->hour) {
+        return -1;
+    }
+    if (a->hour > b->hour) {
+        return 1;
+    }
+
+    if (a->minute < b->minute) {
+        return -1;
+    }
+    if (a->minute > b->minute) {
+        return 1;
+    }
+
+    if (a->second < b->second) {
+        return -1;
+    }
+    if (a->second > b->second) {
+        return 1;
+    }
+
+    if (a->nanosecond < b->nanosecond) {
+        return -1;
+    }
+    if (a->nanosecond > b->nanosecond) {
+        return 1;
+    }
+
+    return 0;
+}
+
+/* ------------------------------------------------------------------ */
+/* tracing helper functions                                           */
+
+void uefi_trace_variable(const char *action, QemuUUID guid,
+                         const uint16_t *name, uint64_t name_size)
+{
+    QemuUUID be = qemu_uuid_bswap(guid);
+    char *str_uuid = qemu_uuid_unparse_strdup(&be);
+    char *str_name = uefi_ucs2_to_ascii(name, name_size);
+
+    trace_uefi_variable(action, str_name, name_size, str_uuid);
+
+    g_free(str_name);
+    g_free(str_uuid);
+}
+
+void uefi_trace_status(const char *action, efi_status status)
+{
+    switch (status) {
+    case EFI_SUCCESS:
+        trace_uefi_status(action, "success");
+        break;
+    case EFI_INVALID_PARAMETER:
+        trace_uefi_status(action, "invalid parameter");
+        break;
+    case EFI_UNSUPPORTED:
+        trace_uefi_status(action, "unsupported");
+        break;
+    case EFI_BAD_BUFFER_SIZE:
+        trace_uefi_status(action, "bad buffer size");
+        break;
+    case EFI_BUFFER_TOO_SMALL:
+        trace_uefi_status(action, "buffer too small");
+        break;
+    case EFI_WRITE_PROTECTED:
+        trace_uefi_status(action, "write protected");
+        break;
+    case EFI_OUT_OF_RESOURCES:
+        trace_uefi_status(action, "out of resources");
+        break;
+    case EFI_NOT_FOUND:
+        trace_uefi_status(action, "not found");
+        break;
+    case EFI_ACCESS_DENIED:
+        trace_uefi_status(action, "access denied");
+        break;
+    case EFI_ALREADY_STARTED:
+        trace_uefi_status(action, "already started");
+        break;
+    case EFI_SECURITY_VIOLATION:
+        trace_uefi_status(action, "security violation");
+        break;
+    default:
+        trace_uefi_status(action, "unknown error");
+        break;
+    }
+}
diff --git a/hw/uefi/var-service-vars.c b/hw/uefi/var-service-vars.c
new file mode 100644
index 0000000000000..7f98d77a38d13
--- /dev/null
+++ b/hw/uefi/var-service-vars.c
@@ -0,0 +1,725 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * uefi vars device - EfiSmmVariableProtocol implementation
+ */
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "system/dma.h"
+#include "migration/vmstate.h"
+
+#include "hw/uefi/var-service.h"
+#include "hw/uefi/var-service-api.h"
+#include "hw/uefi/var-service-edk2.h"
+
+#include "trace/trace-hw_uefi.h"
+
+#define EFI_VARIABLE_ATTRIBUTE_SUPPORTED                                \
+    (EFI_VARIABLE_NON_VOLATILE |                                        \
+     EFI_VARIABLE_BOOTSERVICE_ACCESS |                                  \
+     EFI_VARIABLE_RUNTIME_ACCESS |                                      \
+     EFI_VARIABLE_HARDWARE_ERROR_RECORD |                               \
+     EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS |               \
+     EFI_VARIABLE_APPEND_WRITE)
+
+
+const VMStateDescription vmstate_uefi_time = {
+    .name = "uefi-time",
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT16(year, efi_time),
+        VMSTATE_UINT8(month, efi_time),
+        VMSTATE_UINT8(day, efi_time),
+        VMSTATE_UINT8(hour, efi_time),
+        VMSTATE_UINT8(minute, efi_time),
+        VMSTATE_UINT8(second, efi_time),
+        VMSTATE_UINT32(nanosecond, efi_time),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+const VMStateDescription vmstate_uefi_variable = {
+    .name = "uefi-variable",
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8_ARRAY_V(guid.data, uefi_variable, sizeof(QemuUUID), 0),
+        VMSTATE_UINT32(name_size, uefi_variable),
+        VMSTATE_UINT32(data_size, uefi_variable),
+        VMSTATE_UINT32(attributes, uefi_variable),
+        VMSTATE_VBUFFER_ALLOC_UINT32(name, uefi_variable, 0, NULL, name_size),
+        VMSTATE_VBUFFER_ALLOC_UINT32(data, uefi_variable, 0, NULL, data_size),
+        VMSTATE_STRUCT(time, uefi_variable, 0, vmstate_uefi_time, efi_time),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+uefi_variable *uefi_vars_find_variable(uefi_vars_state *uv, QemuUUID guid,
+                                       const uint16_t *name, uint64_t name_size)
+{
+    uefi_variable *var;
+
+    QTAILQ_FOREACH(var, &uv->variables, next) {
+        if (!uefi_str_equal(var->name, var->name_size,
+                            name, name_size)) {
+            continue;
+        }
+        if (!qemu_uuid_is_equal(&var->guid, &guid)) {
+            continue;
+        }
+        if (!var->data_size) {
+            /* in process of being created/updated */
+            continue;
+        }
+        return var;
+    }
+    return NULL;
+}
+
+static uefi_variable *add_variable(uefi_vars_state *uv, QemuUUID guid,
+                                   const uint16_t *name, uint64_t name_size,
+                                   uint32_t attributes)
+{
+    uefi_variable *var;
+
+    var = g_new0(uefi_variable, 1);
+    var->guid = guid;
+    var->name = g_malloc(name_size);
+    memcpy(var->name, name, name_size);
+    var->name_size = name_size;
+    var->attributes = attributes;
+
+    var->attributes &= ~EFI_VARIABLE_APPEND_WRITE;
+
+    QTAILQ_INSERT_TAIL(&uv->variables, var, next);
+    return var;
+}
+
+static void del_variable(uefi_vars_state *uv, uefi_variable *var)
+{
+    if (!var) {
+        return;
+    }
+
+    QTAILQ_REMOVE(&uv->variables, var, next);
+    g_free(var->data);
+    g_free(var->name);
+    g_free(var->digest);
+    g_free(var);
+}
+
+static size_t variable_size(uefi_variable *var)
+{
+    size_t size;
+
+    size  = sizeof(*var);
+    size += var->name_size;
+    size += var->data_size;
+    size += var->digest_size;
+    return size;
+}
+
+void uefi_vars_set_variable(uefi_vars_state *uv, QemuUUID guid,
+                            const uint16_t *name, uint64_t name_size,
+                            uint32_t attributes,
+                            void *data, uint64_t data_size)
+{
+    uefi_variable *old_var, *new_var;
+
+    uefi_trace_variable(__func__, guid, name, name_size);
+
+    old_var = uefi_vars_find_variable(uv, guid, name, name_size);
+    if (old_var) {
+        uv->used_storage -= variable_size(old_var);
+        del_variable(uv, old_var);
+    }
+
+    new_var = add_variable(uv, guid, name, name_size, attributes);
+    new_var->data = g_malloc(data_size);
+    new_var->data_size = data_size;
+    memcpy(new_var->data, data, data_size);
+    uv->used_storage += variable_size(new_var);
+}
+
+void uefi_vars_clear_volatile(uefi_vars_state *uv)
+{
+    uefi_variable *var, *n;
+
+    QTAILQ_FOREACH_SAFE(var, &uv->variables, next, n) {
+        if (var->attributes & EFI_VARIABLE_NON_VOLATILE) {
+            continue;
+        }
+        uv->used_storage -= variable_size(var);
+        del_variable(uv, var);
+    }
+}
+
+void uefi_vars_clear_all(uefi_vars_state *uv)
+{
+    uefi_variable *var, *n;
+
+    QTAILQ_FOREACH_SAFE(var, &uv->variables, next, n) {
+        del_variable(uv, var);
+    }
+    uv->used_storage = 0;
+}
+
+void uefi_vars_update_storage(uefi_vars_state *uv)
+{
+    uefi_variable *var;
+
+    uv->used_storage = 0;
+    QTAILQ_FOREACH(var, &uv->variables, next) {
+        uv->used_storage += variable_size(var);
+    }
+}
+
+static gboolean check_access(uefi_vars_state *uv, uefi_variable *var)
+{
+    if (!uv->exit_boot_service) {
+        if (!(var->attributes & EFI_VARIABLE_BOOTSERVICE_ACCESS)) {
+            return false;
+        }
+    } else {
+        if (!(var->attributes & EFI_VARIABLE_RUNTIME_ACCESS)) {
+            return false;
+        }
+    }
+    return true;
+}
+
+static efi_status check_update(uefi_vars_state *uv, uefi_variable *old_var,
+                               uefi_variable *new_var)
+{
+    efi_status status;
+
+    if (old_var) {
+        if (!check_access(uv, old_var)) {
+            return EFI_ACCESS_DENIED;
+        }
+    }
+
+    if (new_var) {
+        if (new_var->attributes & ~EFI_VARIABLE_ATTRIBUTE_SUPPORTED) {
+            return EFI_UNSUPPORTED;
+        }
+        if (!check_access(uv, new_var)) {
+            return EFI_ACCESS_DENIED;
+        }
+    }
+
+    if (old_var && new_var) {
+        if (old_var->attributes != new_var->attributes) {
+            return EFI_INVALID_PARAMETER;
+        }
+    }
+
+    if (new_var) {
+        /* create + update */
+        status = uefi_vars_policy_check(uv, new_var, old_var == NULL);
+    } else {
+        /* delete */
+        g_assert(old_var);
+        status = uefi_vars_policy_check(uv, old_var, false);
+    }
+    if (status != EFI_SUCCESS) {
+        return status;
+    }
+
+    status = uefi_vars_check_secure_boot(uv, new_var ?: old_var);
+    if (status != EFI_SUCCESS) {
+        return status;
+    }
+
+    return EFI_SUCCESS;
+}
+
+static void append_write(uefi_variable *old_var,
+                         uefi_variable *new_var)
+{
+    uefi_vars_siglist siglist;
+    uint64_t size;
+    void *data;
+
+    uefi_vars_siglist_init(&siglist);
+    uefi_vars_siglist_parse(&siglist, old_var->data, old_var->data_size);
+    uefi_vars_siglist_parse(&siglist, new_var->data, new_var->data_size);
+
+    size = uefi_vars_siglist_blob_size(&siglist);
+    data = g_malloc(size);
+    uefi_vars_siglist_blob_generate(&siglist, data, size);
+
+    g_free(new_var->data);
+    new_var->data = data;
+    new_var->data_size = size;
+
+    uefi_vars_siglist_free(&siglist);
+}
+
+static size_t uefi_vars_mm_error(mm_header *mhdr, mm_variable *mvar,
+                                 uint64_t status)
+{
+    mvar->status = status;
+    return sizeof(*mvar);
+}
+
+static size_t uefi_vars_mm_get_variable(uefi_vars_state *uv, mm_header *mhdr,
+                                        mm_variable *mvar, void *func)
+{
+    mm_variable_access *va = func;
+    uint16_t *name;
+    void *data;
+    uefi_variable *var;
+    uint64_t length;
+
+    length = sizeof(*mvar) + sizeof(*va);
+    if (mhdr->length < length) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+
+    if (va->name_size > uv->max_storage ||
+        va->data_size > uv->max_storage) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_OUT_OF_RESOURCES);
+    }
+
+    name = func + sizeof(*va);
+    if (uadd64_overflow(length, va->name_size, &length)) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+    if (mhdr->length < length) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+
+    if (!uefi_str_is_valid(name, va->name_size, true)) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_INVALID_PARAMETER);
+    }
+
+    uefi_trace_variable(__func__, va->guid, name, va->name_size);
+
+    var = uefi_vars_find_variable(uv, va->guid, name, va->name_size);
+    if (!var) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_NOT_FOUND);
+    }
+
+    /* check permissions etc. */
+    if (!check_access(uv, var)) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_ACCESS_DENIED);
+    }
+
+    data = func + sizeof(*va) + va->name_size;
+    if (uadd64_overflow(length, va->data_size, &length)) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+    if (uv->buf_size < length) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+
+    va->attributes = var->attributes;
+    if (va->data_size < var->data_size) {
+        va->data_size = var->data_size;
+        length -= va->data_size;
+        mvar->status = EFI_BUFFER_TOO_SMALL;
+    } else {
+        va->data_size = var->data_size;
+        memcpy(data, var->data, var->data_size);
+        mvar->status = EFI_SUCCESS;
+    }
+    return length;
+}
+
+static size_t
+uefi_vars_mm_get_next_variable(uefi_vars_state *uv, mm_header *mhdr,
+                               mm_variable *mvar, void *func)
+{
+    mm_next_variable *nv = func;
+    uefi_variable *var;
+    uint16_t *name;
+    uint64_t length;
+
+    length = sizeof(*mvar) + sizeof(*nv);
+    if (mhdr->length < length) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+
+    if (nv->name_size > uv->max_storage) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_OUT_OF_RESOURCES);
+    }
+
+    name = func + sizeof(*nv);
+    if (uadd64_overflow(length, nv->name_size, &length)) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+    if (mhdr->length < length) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+
+    if (!uefi_str_is_valid(name, nv->name_size, true)) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_INVALID_PARAMETER);
+    }
+
+    if (uefi_strlen(name, nv->name_size) == 0) {
+        /* empty string -> first */
+        var = QTAILQ_FIRST(&uv->variables);
+        if (!var) {
+            return uefi_vars_mm_error(mhdr, mvar, EFI_NOT_FOUND);
+        }
+    } else {
+        var = uefi_vars_find_variable(uv, nv->guid, name, nv->name_size);
+        if (!var) {
+            return uefi_vars_mm_error(mhdr, mvar, EFI_INVALID_PARAMETER);
+        }
+        do {
+            var = QTAILQ_NEXT(var, next);
+        } while (var && !check_access(uv, var));
+        if (!var) {
+            return uefi_vars_mm_error(mhdr, mvar, EFI_NOT_FOUND);
+        }
+    }
+
+    length = sizeof(*mvar) + sizeof(*nv) + var->name_size;
+    if (uv->buf_size < length) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+
+    nv->guid = var->guid;
+    nv->name_size = var->name_size;
+    memcpy(name, var->name, var->name_size);
+    mvar->status = EFI_SUCCESS;
+    return length;
+}
+
+static bool uefi_vars_mm_digest_compare(uefi_variable *old_var,
+                                        uefi_variable *new_var)
+{
+    if (!old_var->digest ||
+        !new_var->digest ||
+        !old_var->digest_size ||
+        !new_var->digest_size) {
+        /* should not happen */
+        trace_uefi_vars_security_violation("inconsistent authvar digest state");
+        return false;
+    }
+    if (old_var->digest_size != new_var->digest_size) {
+        trace_uefi_vars_security_violation("authvar digest size mismatch");
+        return false;
+    }
+    if (memcmp(old_var->digest, new_var->digest,
+               old_var->digest_size) != 0) {
+        trace_uefi_vars_security_violation("authvar digest data mismatch");
+        return false;
+    }
+    return true;
+}
+
+static size_t uefi_vars_mm_set_variable(uefi_vars_state *uv, mm_header *mhdr,
+                                        mm_variable *mvar, void *func)
+{
+    mm_variable_access *va = func;
+    uint32_t attributes = 0;
+    uint16_t *name;
+    void *data;
+    uefi_variable *old_var, *new_var;
+    uint64_t length;
+    size_t new_storage;
+    efi_status status;
+
+    length = sizeof(*mvar) + sizeof(*va);
+    if (mhdr->length < length) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+
+    if (va->name_size > uv->max_storage ||
+        va->data_size > uv->max_storage) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_OUT_OF_RESOURCES);
+    }
+
+    name = func + sizeof(*va);
+    if (uadd64_overflow(length, va->name_size, &length)) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+    if (mhdr->length < length) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+
+    data = func + sizeof(*va) + va->name_size;
+    if (uadd64_overflow(length, va->data_size, &length)) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+    if (mhdr->length < length) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+
+    g_assert(va->name_size < G_MAXUINT32);
+    g_assert(va->data_size < G_MAXUINT32);
+
+    if (!uefi_str_is_valid(name, va->name_size, true)) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_INVALID_PARAMETER);
+    }
+
+    uefi_trace_variable(__func__, va->guid, name, va->name_size);
+
+    old_var = uefi_vars_find_variable(uv, va->guid, name, va->name_size);
+    if (va->data_size) {
+        new_var = add_variable(uv, va->guid, name, va->name_size,
+                               va->attributes);
+        if (va->attributes & EFI_VARIABLE_AUTHENTICATED_WRITE_ACCESS) {
+            /* not implemented (deprecated in uefi spec) */
+            warn_report("%s: AUTHENTICATED_WRITE_ACCESS", __func__);
+            mvar->status = EFI_UNSUPPORTED;
+            goto rollback;
+        } else if (va->attributes &
+                   EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS) {
+            status = uefi_vars_check_auth_2(uv, new_var, va, data);
+            if (status != EFI_SUCCESS) {
+                mvar->status = status;
+                goto rollback;
+            }
+            if (old_var && new_var) {
+                if (uefi_time_compare(&old_var->time, &new_var->time) > 0) {
+                    trace_uefi_vars_security_violation("time check failed");
+                    mvar->status = EFI_SECURITY_VIOLATION;
+                    goto rollback;
+                }
+                if (old_var->digest_size || new_var->digest_size) {
+                    if (!uefi_vars_mm_digest_compare(old_var, new_var)) {
+                        mvar->status = EFI_SECURITY_VIOLATION;
+                        goto rollback;
+                    }
+                }
+            }
+        } else {
+            new_var->data = g_malloc(va->data_size);
+            memcpy(new_var->data, data, va->data_size);
+            new_var->data_size = va->data_size;
+        }
+        if (!new_var->data) {
+            /* we land here when deleting authenticated variables */
+            del_variable(uv, new_var);
+            new_var = NULL;
+        }
+    } else {
+        new_var = NULL;
+    }
+
+    if (!old_var && !new_var) {
+        /* delete non-existing variable -> nothing to do */
+        mvar->status = EFI_SUCCESS;
+        return sizeof(*mvar);
+    }
+
+    /* check permissions etc. */
+    status = check_update(uv, old_var, new_var);
+    if (status != EFI_SUCCESS) {
+        mvar->status = status;
+        goto rollback;
+    }
+
+    if (va->attributes & EFI_VARIABLE_APPEND_WRITE && old_var && new_var) {
+        /* merge signature databases */
+        if (!uefi_vars_is_sb_any(new_var)) {
+            mvar->status = EFI_UNSUPPORTED;
+            goto rollback;
+        }
+        append_write(old_var, new_var);
+    }
+
+    /* check storage space */
+    new_storage = uv->used_storage;
+    if (old_var) {
+        new_storage -= variable_size(old_var);
+    }
+    if (new_var) {
+        new_storage += variable_size(new_var);
+    }
+    if (new_storage > uv->max_storage) {
+        mvar->status = EFI_OUT_OF_RESOURCES;
+        goto rollback;
+    }
+
+    attributes = new_var
+        ? new_var->attributes
+        : old_var->attributes;
+
+    /* all good, commit */
+    del_variable(uv, old_var);
+    uv->used_storage = new_storage;
+
+    if (attributes & EFI_VARIABLE_NON_VOLATILE) {
+        uefi_vars_json_save(uv);
+    }
+
+    if (new_var && uefi_vars_is_sb_pk(new_var)) {
+        uefi_vars_auth_init(uv);
+    }
+
+    mvar->status = EFI_SUCCESS;
+    return sizeof(*mvar);
+
+rollback:
+    del_variable(uv, new_var);
+    return sizeof(*mvar);
+}
+
+static size_t uefi_vars_mm_variable_info(uefi_vars_state *uv, mm_header *mhdr,
+                                         mm_variable *mvar, void *func)
+{
+    mm_variable_info *vi = func;
+    uint64_t length;
+
+    length = sizeof(*mvar) + sizeof(*vi);
+    if (uv->buf_size < length) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+
+    vi->max_storage_size  = uv->max_storage;
+    vi->free_storage_size = uv->max_storage - uv->used_storage;
+    vi->max_variable_size = uv->max_storage >> 2;
+    vi->attributes        = 0;
+
+    mvar->status = EFI_SUCCESS;
+    return length;
+}
+
+static size_t
+uefi_vars_mm_get_payload_size(uefi_vars_state *uv, mm_header *mhdr,
+                              mm_variable *mvar, void *func)
+{
+    mm_get_payload_size *ps = func;
+    uint64_t length;
+
+    length = sizeof(*mvar) + sizeof(*ps);
+    if (uv->buf_size < length) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+
+    ps->payload_size = uv->buf_size;
+    mvar->status = EFI_SUCCESS;
+    return length;
+}
+
+static size_t
+uefi_vars_mm_lock_variable(uefi_vars_state *uv, mm_header *mhdr,
+                           mm_variable *mvar, void *func)
+{
+    mm_lock_variable *lv = func;
+    variable_policy_entry *pe;
+    uint16_t *name, *dest;
+    uint64_t length;
+
+    length = sizeof(*mvar) + sizeof(*lv);
+    if (mhdr->length < length) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+
+    name = func + sizeof(*lv);
+    if (uadd64_overflow(length, lv->name_size, &length)) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+    if (mhdr->length < length) {
+        return uefi_vars_mm_error(mhdr, mvar, EFI_BAD_BUFFER_SIZE);
+    }
+
+    uefi_trace_variable(__func__, lv->guid, name, lv->name_size);
+
+    pe = g_malloc0(sizeof(*pe) + lv->name_size);
+    pe->version               = VARIABLE_POLICY_ENTRY_REVISION;
+    pe->size                  = sizeof(*pe) + lv->name_size;
+    pe->offset_to_name        = sizeof(*pe);
+    pe->namespace             = lv->guid;
+    pe->min_size              = 0;
+    pe->max_size              = UINT32_MAX;
+    pe->attributes_must_have  = 0;
+    pe->attributes_cant_have  = 0;
+    pe->lock_policy_type      = VARIABLE_POLICY_TYPE_LOCK_NOW;
+
+    dest = (void *)pe + pe->offset_to_name;
+    memcpy(dest, name, lv->name_size);
+
+    uefi_vars_add_policy(uv, pe);
+    g_free(pe);
+
+    mvar->status = EFI_SUCCESS;
+    return length;
+}
+
+uint32_t uefi_vars_mm_vars_proto(uefi_vars_state *uv)
+{
+    static const char *fnames[] = {
+        "zero",
+        "get-variable",
+        "get-next-variable-name",
+        "set-variable",
+        "query-variable-info",
+        "ready-to-boot",
+        "exit-boot-service",
+        "get-statistics",
+        "lock-variable",
+        "var-check-prop-set",
+        "var-check-prop-get",
+        "get-payload-size",
+        "init-runtime-cache-contect",
+        "sync-runtime-cache",
+        "get-runtime-cache-info",
+    };
+    const char  *fname;
+    uint64_t    length;
+
+    mm_header   *mhdr = (mm_header *) uv->buffer;
+    mm_variable *mvar = (mm_variable *) (uv->buffer + sizeof(*mhdr));
+    void        *func = (uv->buffer + sizeof(*mhdr) + sizeof(*mvar));
+
+    if (mhdr->length < sizeof(*mvar)) {
+        return UEFI_VARS_STS_ERR_BAD_BUFFER_SIZE;
+    }
+
+    fname = mvar->function < ARRAY_SIZE(fnames)
+        ? fnames[mvar->function]
+        : "unknown";
+    trace_uefi_vars_proto_cmd(fname);
+
+    switch (mvar->function) {
+    case SMM_VARIABLE_FUNCTION_GET_VARIABLE:
+        length = uefi_vars_mm_get_variable(uv, mhdr, mvar, func);
+        break;
+
+    case SMM_VARIABLE_FUNCTION_GET_NEXT_VARIABLE_NAME:
+        length = uefi_vars_mm_get_next_variable(uv, mhdr, mvar, func);
+        break;
+
+    case SMM_VARIABLE_FUNCTION_SET_VARIABLE:
+        length = uefi_vars_mm_set_variable(uv, mhdr, mvar, func);
+        break;
+
+    case SMM_VARIABLE_FUNCTION_QUERY_VARIABLE_INFO:
+        length = uefi_vars_mm_variable_info(uv, mhdr, mvar, func);
+        break;
+
+    case SMM_VARIABLE_FUNCTION_LOCK_VARIABLE:
+        length = uefi_vars_mm_lock_variable(uv, mhdr, mvar, func);
+        break;
+
+    case SMM_VARIABLE_FUNCTION_GET_PAYLOAD_SIZE:
+        length = uefi_vars_mm_get_payload_size(uv, mhdr, mvar, func);
+        break;
+
+    case SMM_VARIABLE_FUNCTION_READY_TO_BOOT:
+        trace_uefi_event("ready-to-boot");
+        uv->ready_to_boot = true;
+        length = 0;
+        break;
+
+    case SMM_VARIABLE_FUNCTION_EXIT_BOOT_SERVICE:
+        trace_uefi_event("exit-boot-service");
+        uv->exit_boot_service = true;
+        length = 0;
+        break;
+
+    default:
+        length = uefi_vars_mm_error(mhdr, mvar, EFI_UNSUPPORTED);
+        break;
+    }
+
+    if (mhdr->length < length) {
+        mvar->status = EFI_BUFFER_TOO_SMALL;
+    }
+
+    uefi_trace_status(__func__, mvar->status);
+    return UEFI_VARS_STS_SUCCESS;
+}
diff --git a/hw/ufs/ufs.c b/hw/ufs/ufs.c
index 428fe927ad083..857de6e9c2ca3 100644
--- a/hw/ufs/ufs.c
+++ b/hw/ufs/ufs.c
@@ -34,6 +34,11 @@
 #define UFS_MAX_NUTMRS 8
 #define UFS_MCQ_QCFGPTR 2
 
+/* Each value represents the temperature in celsius as (value - 80) */
+#define UFS_TEMPERATURE 120
+#define UFS_TOO_HIGH_TEMP_BOUNDARY 160
+#define UFS_TOO_LOW_TEMP_BOUNDARY 60
+
 static void ufs_exec_req(UfsRequest *req);
 static void ufs_clear_req(UfsRequest *req);
 
@@ -838,6 +843,42 @@ static const MemoryRegionOps ufs_mmio_ops = {
     },
 };
 
+static void ufs_update_ee_status(UfsHc *u)
+{
+    uint16_t ee_status = be16_to_cpu(u->attributes.exception_event_status);
+    uint8_t high_temp_thresh = u->attributes.device_too_high_temp_boundary;
+    uint8_t low_temp_thresh = u->attributes.device_too_low_temp_boundary;
+
+    if (u->temperature >= high_temp_thresh) {
+        ee_status |= MASK_EE_TOO_HIGH_TEMP;
+    } else {
+        ee_status &= ~MASK_EE_TOO_HIGH_TEMP;
+    }
+
+    if (u->temperature <= low_temp_thresh) {
+        ee_status |= MASK_EE_TOO_LOW_TEMP;
+    } else {
+        ee_status &= ~MASK_EE_TOO_LOW_TEMP;
+    }
+
+    u->attributes.exception_event_status = cpu_to_be16(ee_status);
+}
+
+static bool ufs_check_exception_event_alert(UfsHc *u, uint8_t trans_type)
+{
+    uint16_t ee_control = be16_to_cpu(u->attributes.exception_event_control);
+    uint16_t ee_status;
+
+    if (trans_type != UFS_UPIU_TRANSACTION_RESPONSE) {
+        return false;
+    }
+
+    ufs_update_ee_status(u);
+
+    ee_status = be16_to_cpu(u->attributes.exception_event_status);
+
+    return ee_control & ee_status;
+}
 
 void ufs_build_upiu_header(UfsRequest *req, uint8_t trans_type, uint8_t flags,
                            uint8_t response, uint8_t scsi_status,
@@ -848,6 +889,8 @@ void ufs_build_upiu_header(UfsRequest *req, uint8_t trans_type, uint8_t flags,
     req->rsp_upiu.header.flags = flags;
     req->rsp_upiu.header.response = response;
     req->rsp_upiu.header.scsi_status = scsi_status;
+    req->rsp_upiu.header.device_inf =
+        ufs_check_exception_event_alert(req->hc, trans_type);
     req->rsp_upiu.header.data_segment_length = cpu_to_be16(data_segment_length);
 }
 
@@ -1042,6 +1085,25 @@ static QueryRespCode ufs_exec_query_flag(UfsRequest *req, int op)
     return UFS_QUERY_RESULT_SUCCESS;
 }
 
+static inline uint8_t ufs_read_device_temp(UfsHc *u)
+{
+    uint8_t feat_sup = u->device_desc.ufs_features_support;
+    bool high_temp_sup, low_temp_sup, high_temp_en, low_temp_en;
+    uint16_t ee_control = be16_to_cpu(u->attributes.exception_event_control);
+
+    high_temp_sup = feat_sup & UFS_DEV_HIGH_TEMP_NOTIF;
+    low_temp_sup = feat_sup & UFS_DEV_LOW_TEMP_NOTIF;
+    high_temp_en = ee_control & MASK_EE_TOO_HIGH_TEMP;
+    low_temp_en = ee_control & MASK_EE_TOO_LOW_TEMP;
+
+    if ((high_temp_sup && high_temp_en) ||
+        (low_temp_sup && low_temp_en)) {
+        return u->temperature;
+    }
+
+    return 0;
+}
+
 static uint32_t ufs_read_attr_value(UfsHc *u, uint8_t idn)
 {
     switch (idn) {
@@ -1072,6 +1134,7 @@ static uint32_t ufs_read_attr_value(UfsHc *u, uint8_t idn)
     case UFS_QUERY_ATTR_IDN_EE_CONTROL:
         return be16_to_cpu(u->attributes.exception_event_control);
     case UFS_QUERY_ATTR_IDN_EE_STATUS:
+        ufs_update_ee_status(u);
         return be16_to_cpu(u->attributes.exception_event_status);
     case UFS_QUERY_ATTR_IDN_SECONDS_PASSED:
         return be32_to_cpu(u->attributes.seconds_passed);
@@ -1086,7 +1149,8 @@ static uint32_t ufs_read_attr_value(UfsHc *u, uint8_t idn)
     case UFS_QUERY_ATTR_IDN_REF_CLK_GATING_WAIT_TIME:
         return u->attributes.ref_clk_gating_wait_time;
     case UFS_QUERY_ATTR_IDN_CASE_ROUGH_TEMP:
-        return u->attributes.device_case_rough_temperaure;
+        u->attributes.device_case_rough_temperature = ufs_read_device_temp(u);
+        return u->attributes.device_case_rough_temperature;
     case UFS_QUERY_ATTR_IDN_HIGH_TEMP_BOUND:
         return u->attributes.device_too_high_temp_boundary;
     case UFS_QUERY_ATTR_IDN_LOW_TEMP_BOUND:
@@ -1635,7 +1699,7 @@ static void ufs_init_hc(UfsHc *u)
     cap = FIELD_DP32(cap, CAP, OODDS, 0);
     cap = FIELD_DP32(cap, CAP, UICDMETMS, 0);
     cap = FIELD_DP32(cap, CAP, CS, 0);
-    cap = FIELD_DP32(cap, CAP, LSDBS, 1);
+    cap = FIELD_DP32(cap, CAP, LSDBS, 0);
     cap = FIELD_DP32(cap, CAP, MCQS, u->params.mcq);
     u->reg.cap = cap;
 
@@ -1677,8 +1741,12 @@ static void ufs_init_hc(UfsHc *u)
     u->device_desc.ud_0_base_offset = 0x16;
     u->device_desc.ud_config_p_length = 0x1A;
     u->device_desc.device_rtt_cap = 0x02;
+    u->device_desc.ufs_features_support = UFS_DEV_HIGH_TEMP_NOTIF |
+        UFS_DEV_LOW_TEMP_NOTIF;
     u->device_desc.queue_depth = u->params.nutrs;
     u->device_desc.product_revision_level = 0x04;
+    u->device_desc.extended_ufs_features_support =
+        cpu_to_be32(UFS_DEV_HIGH_TEMP_NOTIF | UFS_DEV_LOW_TEMP_NOTIF);
 
     memset(&u->geometry_desc, 0, sizeof(GeometryDescriptor));
     u->geometry_desc.length = sizeof(GeometryDescriptor);
@@ -1702,9 +1770,17 @@ static void ufs_init_hc(UfsHc *u)
     /* configure descriptor is not supported */
     u->attributes.config_descr_lock = 0x01;
     u->attributes.max_num_of_rtt = 0x02;
+    u->attributes.device_too_high_temp_boundary = UFS_TOO_HIGH_TEMP_BOUNDARY;
+    u->attributes.device_too_low_temp_boundary = UFS_TOO_LOW_TEMP_BOUNDARY;
 
     memset(&u->flags, 0, sizeof(u->flags));
     u->flags.permanently_disable_fw_update = 1;
+
+    /*
+     * The temperature value is fixed to UFS_TEMPERATURE and does not change
+     * dynamically
+     */
+    u->temperature = UFS_TEMPERATURE;
 }
 
 static void ufs_realize(PCIDevice *pci_dev, Error **errp)
diff --git a/hw/ufs/ufs.h b/hw/ufs/ufs.h
index 4bcc41f53a0d6..3799d97f30d3c 100644
--- a/hw/ufs/ufs.h
+++ b/hw/ufs/ufs.h
@@ -146,6 +146,8 @@ typedef struct UfsHc {
     /* MCQ properties */
     UfsSq *sq[UFS_MAX_MCQ_QNUM];
     UfsCq *cq[UFS_MAX_MCQ_QNUM];
+
+    uint8_t temperature;
 } UfsHc;
 
 static inline uint32_t ufs_mcq_sq_tail(UfsHc *u, uint32_t qid)
diff --git a/hw/usb/Kconfig b/hw/usb/Kconfig
index 5fbecd2f43bf7..69c663be52fb5 100644
--- a/hw/usb/Kconfig
+++ b/hw/usb/Kconfig
@@ -143,3 +143,7 @@ config USB_DWC3
 config XLNX_USB_SUBSYS
     bool
     select USB_DWC3
+
+config USB_CHIPIDEA
+    bool
+    select USB_EHCI_SYSBUS
diff --git a/hw/usb/hcd-dwc3.c b/hw/usb/hcd-dwc3.c
index 9ce9ba0b04659..0bceee2712824 100644
--- a/hw/usb/hcd-dwc3.c
+++ b/hw/usb/hcd-dwc3.c
@@ -343,6 +343,8 @@ REG32(GFLADJ, 0x530)
     FIELD(GFLADJ, GFLADJ_REFCLK_FLADJ, 8, 14)
     FIELD(GFLADJ, GFLADJ_30MHZ_SDBND_SEL, 7, 1)
     FIELD(GFLADJ, GFLADJ_30MHZ, 0, 6)
+REG32(GUSB2RHBCTL, 0x540)
+    FIELD(GUSB2RHBCTL, OVRD_L1TIMEOUT, 0, 4)
 
 #define DWC3_GLOBAL_OFFSET 0xC100
 static void reset_csr(USBDWC3 * s)
@@ -560,6 +562,9 @@ static const RegisterAccessInfo usb_dwc3_regs_info[] = {
         .rsvd = 0x40,
         .ro = 0x400040,
         .unimp = 0xffffffff,
+    },{ .name = "GUSB2RHBCTL",  .addr = A_GUSB2RHBCTL,
+        .rsvd = 0xfffffff0,
+        .unimp = 0xffffffff,
     }
 };
 
diff --git a/hw/usb/hcd-ehci-pci.c b/hw/usb/hcd-ehci-pci.c
index d410c38a8a286..e00316721ac28 100644
--- a/hw/usb/hcd-ehci-pci.c
+++ b/hw/usb/hcd-ehci-pci.c
@@ -182,7 +182,7 @@ static void ehci_data_class_init(ObjectClass *klass, void *data)
 {
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
-    EHCIPCIInfo *i = data;
+    const EHCIPCIInfo *i = data;
 
     k->vendor_id = i->vendor_id;
     k->device_id = i->device_id;
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 8528d493d63cf..0561a6d801ace 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -1289,7 +1289,7 @@ void uhci_data_class_init(ObjectClass *klass, void *data)
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
     UHCIPCIDeviceClass *u = UHCI_CLASS(klass);
-    UHCIInfo *info = data;
+    const UHCIInfo *info = data;
 
     k->realize = info->realize ? info->realize : usb_uhci_common_realize;
     k->exit = info->unplug ? usb_uhci_exit : NULL;
diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c
index 49642aab58eca..d908eb787d34a 100644
--- a/hw/usb/hcd-xhci-pci.c
+++ b/hw/usb/hcd-xhci-pci.c
@@ -82,6 +82,21 @@ static bool xhci_pci_intr_raise(XHCIState *xhci, int n, bool level)
     return false;
 }
 
+static bool xhci_pci_intr_mapping_conditional(XHCIState *xhci)
+{
+    XHCIPciState *s = container_of(xhci, XHCIPciState, xhci);
+    PCIDevice *pci_dev = PCI_DEVICE(s);
+
+    /*
+     * Implementation of the "conditional-intr-mapping" property, which only
+     * enables interrupter mapping if MSI or MSI-X is available and active.
+     * Forces all events onto interrupter/event ring 0 in pin-based IRQ mode.
+     * Provides compatibility with macOS guests on machine types where MSI(-X)
+     * is not available.
+     */
+    return msix_enabled(pci_dev) || msi_enabled(pci_dev);
+}
+
 static void xhci_pci_reset(DeviceState *dev)
 {
     XHCIPciState *s = XHCI_PCI(dev);
@@ -119,6 +134,9 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp)
     object_property_set_link(OBJECT(&s->xhci), "host", OBJECT(s), NULL);
     s->xhci.intr_update = xhci_pci_intr_update;
     s->xhci.intr_raise = xhci_pci_intr_raise;
+    if (s->conditional_intr_mapping) {
+        s->xhci.intr_mapping_supported = xhci_pci_intr_mapping_conditional;
+    }
     if (!qdev_realize(DEVICE(&s->xhci), NULL, errp)) {
         return;
     }
@@ -201,6 +219,8 @@ static void xhci_instance_init(Object *obj)
 static const Property xhci_pci_properties[] = {
     DEFINE_PROP_ON_OFF_AUTO("msi", XHCIPciState, msi, ON_OFF_AUTO_AUTO),
     DEFINE_PROP_ON_OFF_AUTO("msix", XHCIPciState, msix, ON_OFF_AUTO_AUTO),
+    DEFINE_PROP_BOOL("conditional-intr-mapping", XHCIPciState,
+                     conditional_intr_mapping, false),
 };
 
 static void xhci_class_init(ObjectClass *klass, void *data)
@@ -215,6 +235,10 @@ static void xhci_class_init(ObjectClass *klass, void *data)
     k->exit         = usb_xhci_pci_exit;
     k->class_id     = PCI_CLASS_SERIAL_USB;
     device_class_set_props(dc, xhci_pci_properties);
+    object_class_property_set_description(klass, "conditional-intr-mapping",
+        "When true, disables interrupter mapping for pin-based IRQ mode. "
+        "Intended to be used with guest drivers with questionable behaviour, "
+        "such as macOS's.");
 }
 
 static const TypeInfo xhci_pci_info = {
diff --git a/hw/usb/hcd-xhci-pci.h b/hw/usb/hcd-xhci-pci.h
index 08f70ce97cc64..5b61ae8455530 100644
--- a/hw/usb/hcd-xhci-pci.h
+++ b/hw/usb/hcd-xhci-pci.h
@@ -40,6 +40,7 @@ typedef struct XHCIPciState {
     XHCIState xhci;
     OnOffAuto msi;
     OnOffAuto msix;
+    bool conditional_intr_mapping;
 } XHCIPciState;
 
 #endif
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 00d5bc377927a..64c3a23b9b79c 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -644,7 +644,8 @@ static void xhci_event(XHCIState *xhci, XHCIEvent *event, int v)
     dma_addr_t erdp;
     unsigned int dp_idx;
 
-    if (xhci->numintrs == 1) {
+    if (xhci->numintrs == 1 ||
+        (xhci->intr_mapping_supported && !xhci->intr_mapping_supported(xhci))) {
         v = 0;
     }
 
diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h
index 9609b83514133..9c3974f1489c0 100644
--- a/hw/usb/hcd-xhci.h
+++ b/hw/usb/hcd-xhci.h
@@ -193,6 +193,11 @@ typedef struct XHCIState {
     uint32_t max_pstreams_mask;
     void (*intr_update)(XHCIState *s, int n, bool enable);
     bool (*intr_raise)(XHCIState *s, int n, bool level);
+    /*
+     * Callback for special-casing interrupter mapping support. NULL for most
+     * implementations, for defaulting to enabled mapping unless numintrs == 1.
+     */
+    bool (*intr_mapping_supported)(XHCIState *s);
     DeviceState *hostOpaque;
 
     /* Operational Registers */
diff --git a/hw/usb/meson.build b/hw/usb/meson.build
index 1b4d1507e4126..17360a5b5a497 100644
--- a/hw/usb/meson.build
+++ b/hw/usb/meson.build
@@ -25,8 +25,8 @@ system_ss.add(when: 'CONFIG_USB_XHCI_SYSBUS', if_true: files('hcd-xhci-sysbus.c'
 system_ss.add(when: 'CONFIG_USB_XHCI_NEC', if_true: files('hcd-xhci-nec.c'))
 system_ss.add(when: 'CONFIG_USB_DWC2', if_true: files('hcd-dwc2.c'))
 system_ss.add(when: 'CONFIG_USB_DWC3', if_true: files('hcd-dwc3.c'))
+system_ss.add(when: 'CONFIG_USB_CHIPIDEA', if_true: files('chipidea.c'))
 
-system_ss.add(when: 'CONFIG_IMX', if_true: files('chipidea.c'))
 system_ss.add(when: 'CONFIG_IMX_USBPHY', if_true: files('imx-usb-phy.c'))
 system_ss.add(when: 'CONFIG_VT82C686', if_true: files('vt82c686-uhci-pci.c'))
 system_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files('xlnx-versal-usb2-ctrl-regs.c'))
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
index 13b065b0faa4e..fa46a7da01c1c 100644
--- a/hw/usb/xen-usb.c
+++ b/hw/usb/xen-usb.c
@@ -755,10 +755,10 @@ static void usbback_portid_add(struct usbback_info *usbif, unsigned port,
 
     qdict = qdict_new();
     qdict_put_str(qdict, "driver", "usb-host");
-    tmp = g_strdup_printf("%s.0", usbif->xendev.qdev.id);
+    tmp = g_strdup_printf("%s.0", DEVICE(&usbif->xendev)->id);
     qdict_put_str(qdict, "bus", tmp);
     g_free(tmp);
-    tmp = g_strdup_printf("%s-%u", usbif->xendev.qdev.id, port);
+    tmp = g_strdup_printf("%s-%u", DEVICE(&usbif->xendev)->id, port);
     qdict_put_str(qdict, "id", tmp);
     g_free(tmp);
     qdict_put_int(qdict, "port", port);
@@ -1022,7 +1022,7 @@ static void usbback_alloc(struct XenLegacyDevice *xendev)
     usbif = container_of(xendev, struct usbback_info, xendev);
 
     usb_bus_new(&usbif->bus, sizeof(usbif->bus), &xen_usb_bus_ops,
-                DEVICE(&xendev->qdev));
+                DEVICE(xendev));
     for (i = 0; i < USBBACK_MAXPORTS; i++) {
         p = &(usbif->ports[i].port);
         usb_register_port(&usbif->bus, p, usbif, i, &xen_usb_port_ops,
diff --git a/hw/vfio/amd-xgbe.c b/hw/vfio/amd-xgbe.c
index 96bd608b8dd0b..5927503b5c452 100644
--- a/hw/vfio/amd-xgbe.c
+++ b/hw/vfio/amd-xgbe.c
@@ -15,12 +15,14 @@
 #include "hw/vfio/vfio-amd-xgbe.h"
 #include "migration/vmstate.h"
 #include "qemu/module.h"
+#include "qemu/error-report.h"
 
 static void amd_xgbe_realize(DeviceState *dev, Error **errp)
 {
     VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
     VFIOAmdXgbeDeviceClass *k = VFIO_AMD_XGBE_DEVICE_GET_CLASS(dev);
 
+    warn_report("-device vfio-amd-xgbe is deprecated");
     vdev->compat = g_strdup("amd,xgbe-seattle-v1a");
     vdev->num_compat = 1;
 
@@ -41,8 +43,6 @@ static void vfio_amd_xgbe_class_init(ObjectClass *klass, void *data)
                                     &vcxc->parent_realize);
     dc->desc = "VFIO AMD XGBE";
     dc->vmsd = &vfio_platform_amd_xgbe_vmstate;
-    /* Supported by TYPE_VIRT_MACHINE */
-    dc->user_creatable = true;
 }
 
 static const TypeInfo vfio_amd_xgbe_dev_info = {
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 30b08ad375d5e..c7ab4ff57ada0 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -257,6 +257,15 @@ static void vfio_ap_class_init(ObjectClass *klass, void *data)
     dc->hotpluggable = true;
     device_class_set_legacy_reset(dc, vfio_ap_reset);
     dc->bus_type = TYPE_AP_BUS;
+
+    object_class_property_set_description(klass, /* 3.1 */
+                                          "sysfsdev",
+                                          "Host sysfs path of assigned device");
+#ifdef CONFIG_IOMMUFD
+    object_class_property_set_description(klass, /* 9.0 */
+                                          "iommufd",
+                                          "Set host IOMMUFD backend device");
+#endif
 }
 
 static const TypeInfo vfio_ap_info = {
diff --git a/hw/vfio/calxeda-xgmac.c b/hw/vfio/calxeda-xgmac.c
index 87c382e736104..a5ef262def950 100644
--- a/hw/vfio/calxeda-xgmac.c
+++ b/hw/vfio/calxeda-xgmac.c
@@ -15,12 +15,14 @@
 #include "hw/vfio/vfio-calxeda-xgmac.h"
 #include "migration/vmstate.h"
 #include "qemu/module.h"
+#include "qemu/error-report.h"
 
 static void calxeda_xgmac_realize(DeviceState *dev, Error **errp)
 {
     VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
     VFIOCalxedaXgmacDeviceClass *k = VFIO_CALXEDA_XGMAC_DEVICE_GET_CLASS(dev);
 
+    warn_report("-device vfio-calxeda-xgmac is deprecated");
     vdev->compat = g_strdup("calxeda,hb-xgmac");
     vdev->num_compat = 1;
 
@@ -41,8 +43,6 @@ static void vfio_calxeda_xgmac_class_init(ObjectClass *klass, void *data)
                                     &vcxc->parent_realize);
     dc->desc = "VFIO Calxeda XGMAC";
     dc->vmsd = &vfio_platform_calxeda_xgmac_vmstate;
-    /* Supported by TYPE_VIRT_MACHINE */
-    dc->user_creatable = true;
 }
 
 static const TypeInfo vfio_calxeda_xgmac_dev_info = {
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 67bc137f9be6d..e5e0d9e3e7ed1 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -51,17 +51,8 @@ struct VFIOCCWDevice {
     EventNotifier crw_notifier;
     EventNotifier req_notifier;
     bool force_orb_pfch;
-    bool warned_orb_pfch;
 };
 
-static inline void warn_once_pfch(VFIOCCWDevice *vcdev, SubchDev *sch,
-                                  const char *msg)
-{
-    warn_report_once_cond(&vcdev->warned_orb_pfch,
-                          "vfio-ccw (devno %x.%x.%04x): %s",
-                          sch->cssid, sch->ssid, sch->devno, msg);
-}
-
 static void vfio_ccw_compute_needs_reset(VFIODevice *vdev)
 {
     vdev->needs_reset = false;
@@ -83,7 +74,8 @@ static IOInstEnding vfio_ccw_handle_request(SubchDev *sch)
 
     if (!(sch->orb.ctrl0 & ORB_CTRL0_MASK_PFCH) && vcdev->force_orb_pfch) {
         sch->orb.ctrl0 |= ORB_CTRL0_MASK_PFCH;
-        warn_once_pfch(vcdev, sch, "PFCH flag forced");
+        warn_report_once("vfio-ccw (devno %x.%x.%04x): PFCH flag forced",
+                         sch->cssid, sch->ssid, sch->devno);
     }
 
     QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB));
@@ -717,6 +709,21 @@ static void vfio_ccw_class_init(ObjectClass *klass, void *data)
     cdc->handle_halt = vfio_ccw_handle_halt;
     cdc->handle_clear = vfio_ccw_handle_clear;
     cdc->handle_store = vfio_ccw_handle_store;
+
+    object_class_property_set_description(klass, /* 2.10 */
+                                          "sysfsdev",
+                                          "Host sysfs path of assigned device");
+    object_class_property_set_description(klass, /* 3.0 */
+                                          "force-orb-pfch",
+                                          "Force unlimited prefetch");
+#ifdef CONFIG_IOMMUFD
+    object_class_property_set_description(klass, /* 9.0 */
+                                          "iommufd",
+                                          "Set host IOMMUFD backend device");
+#endif
+    object_class_property_set_description(klass, /* 9.2 */
+                                          "loadparm",
+                                          "Define which devices that can be used for booting");
 }
 
 static const TypeInfo vfio_ccw_info = {
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index abbdc56b6dbb5..1a0d9290f88c9 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -30,6 +30,7 @@
 #include "exec/address-spaces.h"
 #include "exec/memory.h"
 #include "exec/ram_addr.h"
+#include "exec/target_page.h"
 #include "hw/hw.h"
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
@@ -42,6 +43,7 @@
 #include "migration/misc.h"
 #include "migration/blocker.h"
 #include "migration/qemu-file.h"
+#include "system/tcg.h"
 #include "system/tpm.h"
 
 VFIODeviceList vfio_device_list =
@@ -392,13 +394,14 @@ static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer,
                                                MemoryRegionSection *section)
 {
     RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
+    int target_page_size = qemu_target_page_size();
     VFIORamDiscardListener *vrdl;
 
     /* Ignore some corner cases not relevant in practice. */
-    g_assert(QEMU_IS_ALIGNED(section->offset_within_region, TARGET_PAGE_SIZE));
+    g_assert(QEMU_IS_ALIGNED(section->offset_within_region, target_page_size));
     g_assert(QEMU_IS_ALIGNED(section->offset_within_address_space,
-                             TARGET_PAGE_SIZE));
-    g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE));
+                             target_page_size));
+    g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), target_page_size));
 
     vrdl = g_new0(VFIORamDiscardListener, 1);
     vrdl->bcontainer = bcontainer;
@@ -1404,6 +1407,7 @@ void vfio_reset_handler(void *opaque)
 {
     VFIODevice *vbasedev;
 
+    trace_vfio_reset_handler();
     QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
         if (vbasedev->dev->realized) {
             vbasedev->ops->vfio_compute_needs_reset(vbasedev);
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
index b1a237edd6608..265fffc2aa52d 100644
--- a/hw/vfio/igd.c
+++ b/hw/vfio/igd.c
@@ -15,6 +15,7 @@
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
+#include "hw/boards.h"
 #include "hw/hw.h"
 #include "hw/nvram/fw_cfg.h"
 #include "pci.h"
@@ -106,40 +107,15 @@ static int igd_gen(VFIOPCIDevice *vdev)
     return -1;
 }
 
-typedef struct VFIOIGDQuirk {
-    struct VFIOPCIDevice *vdev;
-    uint32_t index;
-    uint64_t bdsm;
-} VFIOIGDQuirk;
-
+#define IGD_ASLS 0xfc /* ASL Storage Register */
 #define IGD_GMCH 0x50 /* Graphics Control Register */
 #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */
 #define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */
 
 #define IGD_GMCH_GEN6_GMS_SHIFT     3       /* SNB_GMCH in i915 */
 #define IGD_GMCH_GEN6_GMS_MASK      0x1f
-#define IGD_GMCH_GEN6_GGMS_SHIFT    8
-#define IGD_GMCH_GEN6_GGMS_MASK     0x3
 #define IGD_GMCH_GEN8_GMS_SHIFT     8       /* BDW_GMCH in i915 */
 #define IGD_GMCH_GEN8_GMS_MASK      0xff
-#define IGD_GMCH_GEN8_GGMS_SHIFT    6
-#define IGD_GMCH_GEN8_GGMS_MASK     0x3
-
-static uint64_t igd_gtt_memory_size(int gen, uint16_t gmch)
-{
-    uint64_t ggms;
-
-    if (gen < 8) {
-        ggms = (gmch >> IGD_GMCH_GEN6_GGMS_SHIFT) & IGD_GMCH_GEN6_GGMS_MASK;
-    } else {
-        ggms = (gmch >> IGD_GMCH_GEN8_GGMS_SHIFT) & IGD_GMCH_GEN8_GGMS_MASK;
-        if (ggms != 0) {
-            ggms = 1ULL << ggms;
-        }
-    }
-
-    return ggms * MiB;
-}
 
 static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch)
 {
@@ -164,6 +140,82 @@ static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch)
     return 0;
 }
 
+/*
+ * The OpRegion includes the Video BIOS Table, which seems important for
+ * telling the driver what sort of outputs it has.  Without this, the device
+ * may work in the guest, but we may not get output.  This also requires BIOS
+ * support to reserve and populate a section of guest memory sufficient for
+ * the table and to write the base address of that memory to the ASLS register
+ * of the IGD device.
+ */
+static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
+                                       struct vfio_region_info *info,
+                                       Error **errp)
+{
+    int ret;
+
+    vdev->igd_opregion = g_malloc0(info->size);
+    ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
+                info->size, info->offset);
+    if (ret != info->size) {
+        error_setg(errp, "failed to read IGD OpRegion");
+        g_free(vdev->igd_opregion);
+        vdev->igd_opregion = NULL;
+        return false;
+    }
+
+    /*
+     * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to
+     * allocate 32bit reserved memory for, copy these contents into, and write
+     * the reserved memory base address to the device ASLS register at 0xFC.
+     * Alignment of this reserved region seems flexible, but using a 4k page
+     * alignment seems to work well.  This interface assumes a single IGD
+     * device, which may be at VM address 00:02.0 in legacy mode or another
+     * address in UPT mode.
+     *
+     * NB, there may be future use cases discovered where the VM should have
+     * direct interaction with the host OpRegion, in which case the write to
+     * the ASLS register would trigger MemoryRegion setup to enable that.
+     */
+    fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
+                    vdev->igd_opregion, info->size);
+
+    trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
+
+    pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
+    pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
+    pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
+
+    return true;
+}
+
+static bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp)
+{
+    g_autofree struct vfio_region_info *opregion = NULL;
+    int ret;
+
+    /* Hotplugging is not supported for opregion access */
+    if (vdev->pdev.qdev.hotplugged) {
+        error_setg(errp, "IGD OpRegion is not supported on hotplugged device");
+        return false;
+    }
+
+    ret = vfio_get_dev_region_info(&vdev->vbasedev,
+                    VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
+                    VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
+    if (ret) {
+        error_setg_errno(errp, -ret,
+                         "Device does not supports IGD OpRegion feature");
+        return false;
+    }
+
+    if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) {
+        return false;
+    }
+
+    return true;
+}
+
 /*
  * The rather short list of registers that we copy from the host devices.
  * The LPC/ISA bridge values are definitely needed to support the vBIOS, the
@@ -300,129 +352,72 @@ static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev,
     return ret;
 }
 
-/*
- * IGD Gen8 and newer support up to 8MB for the GTT and use a 64bit PTE
- * entry, older IGDs use 2MB and 32bit.  Each PTE maps a 4k page.  Therefore
- * we either have 2M/4k * 4 = 2k or 8M/4k * 8 = 16k as the maximum iobar index
- * for programming the GTT.
- *
- * See linux:include/drm/i915_drm.h for shift and mask values.
- */
-static int vfio_igd_gtt_max(VFIOPCIDevice *vdev)
-{
-    uint32_t gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, sizeof(gmch));
-    int gen = igd_gen(vdev);
-    uint64_t ggms_size = igd_gtt_memory_size(gen, gmch);
-
-    return (ggms_size / (4 * KiB)) * (gen < 8 ? 4 : 8);
-}
-
-/*
- * The IGD ROM will make use of stolen memory (GGMS) for support of VESA modes.
- * Somehow the host stolen memory range is used for this, but how the ROM gets
- * it is a mystery, perhaps it's hardcoded into the ROM.  Thankfully though, it
- * reprograms the GTT through the IOBAR where we can trap it and transpose the
- * programming to the VM allocated buffer.  That buffer gets reserved by the VM
- * firmware via the fw_cfg entry added below.  Here we're just monitoring the
- * IOBAR address and data registers to detect a write sequence targeting the
- * GTTADR.  This code is developed by observed behavior and doesn't have a
- * direct spec reference, unfortunately.
- */
-static uint64_t vfio_igd_quirk_data_read(void *opaque,
-                                         hwaddr addr, unsigned size)
-{
-    VFIOIGDQuirk *igd = opaque;
-    VFIOPCIDevice *vdev = igd->vdev;
-
-    igd->index = ~0;
-
-    return vfio_region_read(&vdev->bars[4].region, addr + 4, size);
-}
-
-static void vfio_igd_quirk_data_write(void *opaque, hwaddr addr,
-                                      uint64_t data, unsigned size)
+static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp)
 {
-    VFIOIGDQuirk *igd = opaque;
-    VFIOPCIDevice *vdev = igd->vdev;
-    uint64_t val = data;
-    int gen = igd_gen(vdev);
+    g_autofree struct vfio_region_info *host = NULL;
+    g_autofree struct vfio_region_info *lpc = NULL;
+    PCIDevice *lpc_bridge;
+    int ret;
 
     /*
-     * Programming the GGMS starts at index 0x1 and uses every 4th index (ie.
-     * 0x1, 0x5, 0x9, 0xd,...).  For pre-Gen8 each 4-byte write is a whole PTE
-     * entry, with 0th bit enable set.  For Gen8 and up, PTEs are 64bit, so
-     * entries 0x5 & 0xd are the high dword, in our case zero.  Each PTE points
-     * to a 4k page, which we translate to a page from the VM allocated region,
-     * pointed to by the BDSM register.  If this is not set, we fail.
-     *
-     * We trap writes to the full configured GTT size, but we typically only
-     * see the vBIOS writing up to (nearly) the 1MB barrier.  In fact it often
-     * seems to miss the last entry for an even 1MB GTT.  Doing a gratuitous
-     * write of that last entry does work, but is hopefully unnecessary since
-     * we clear the previous GTT on initialization.
+     * Copying IDs or creating new devices are not supported on hotplug
      */
-    if ((igd->index % 4 == 1) && igd->index < vfio_igd_gtt_max(vdev)) {
-        if (gen < 8 || (igd->index % 8 == 1)) {
-            uint64_t base;
-
-            if (gen < 11) {
-                base = pci_get_long(vdev->pdev.config + IGD_BDSM);
-            } else {
-                base = pci_get_quad(vdev->pdev.config + IGD_BDSM_GEN11);
-            }
-            if (!base) {
-                hw_error("vfio-igd: Guest attempted to program IGD GTT before "
-                         "BIOS reserved stolen memory.  Unsupported BIOS?");
-            }
-
-            val = data - igd->bdsm + base;
-        } else {
-            val = 0; /* upper 32bits of pte, we only enable below 4G PTEs */
-        }
-
-        trace_vfio_pci_igd_bar4_write(vdev->vbasedev.name,
-                                      igd->index, data, val);
+    if (vdev->pdev.qdev.hotplugged) {
+        error_setg(errp, "IGD LPC is not supported on hotplugged device");
+        return false;
     }
 
-    vfio_region_write(&vdev->bars[4].region, addr + 4, val, size);
-
-    igd->index = ~0;
-}
-
-static const MemoryRegionOps vfio_igd_data_quirk = {
-    .read = vfio_igd_quirk_data_read,
-    .write = vfio_igd_quirk_data_write,
-    .endianness = DEVICE_LITTLE_ENDIAN,
-};
-
-static uint64_t vfio_igd_quirk_index_read(void *opaque,
-                                          hwaddr addr, unsigned size)
-{
-    VFIOIGDQuirk *igd = opaque;
-    VFIOPCIDevice *vdev = igd->vdev;
+    /*
+     * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we
+     * can stuff host values into, so if there's already one there and it's not
+     * one we can hack on, this quirk is no-go.  Sorry Q35.
+     */
+    lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev),
+                                 0, PCI_DEVFN(0x1f, 0));
+    if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge),
+                                           "vfio-pci-igd-lpc-bridge")) {
+        error_setg(errp,
+                   "Cannot create LPC bridge due to existing device at 1f.0");
+        return false;
+    }
 
-    igd->index = ~0;
+    /*
+     * Check whether we have all the vfio device specific regions to
+     * support LPC quirk (added in Linux v4.6).
+     */
+    ret = vfio_get_dev_region_info(&vdev->vbasedev,
+                        VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
+                        VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, &lpc);
+    if (ret) {
+        error_setg(errp, "IGD LPC bridge access is not supported by kernel");
+        return false;
+    }
 
-    return vfio_region_read(&vdev->bars[4].region, addr, size);
-}
+    ret = vfio_get_dev_region_info(&vdev->vbasedev,
+                        VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
+                        VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host);
+    if (ret) {
+        error_setg(errp, "IGD host bridge access is not supported by kernel");
+        return false;
+    }
 
-static void vfio_igd_quirk_index_write(void *opaque, hwaddr addr,
-                                       uint64_t data, unsigned size)
-{
-    VFIOIGDQuirk *igd = opaque;
-    VFIOPCIDevice *vdev = igd->vdev;
+    /* Create/modify LPC bridge */
+    ret = vfio_pci_igd_lpc_init(vdev, lpc);
+    if (ret) {
+        error_setg(errp, "Failed to create/modify LPC bridge for IGD");
+        return false;
+    }
 
-    igd->index = data;
+    /* Stuff some host values into the VM PCI host bridge */
+    ret = vfio_pci_igd_host_init(vdev, host);
+    if (ret) {
+        error_setg(errp, "Failed to modify host bridge for IGD");
+        return false;
+    }
 
-    vfio_region_write(&vdev->bars[4].region, addr, data, size);
+    return true;
 }
 
-static const MemoryRegionOps vfio_igd_index_quirk = {
-    .read = vfio_igd_quirk_index_read,
-    .write = vfio_igd_quirk_index_write,
-    .endianness = DEVICE_LITTLE_ENDIAN,
-};
-
 #define IGD_GGC_MMIO_OFFSET     0x108040
 #define IGD_BDSM_MMIO_OFFSET    0x1080C0
 
@@ -438,9 +433,7 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
      * bus address.
      */
     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
-        !vfio_is_vga(vdev) || nr != 0 ||
-        &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
-                                       0, PCI_DEVFN(0x2, 0))) {
+        !vfio_is_vga(vdev) || nr != 0) {
         return;
     }
 
@@ -488,20 +481,13 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next);
 }
 
-void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
+static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
 {
-    g_autofree struct vfio_region_info *rom = NULL;
-    g_autofree struct vfio_region_info *opregion = NULL;
-    g_autofree struct vfio_region_info *host = NULL;
-    g_autofree struct vfio_region_info *lpc = NULL;
-    VFIOQuirk *quirk;
-    VFIOIGDQuirk *igd;
-    PCIDevice *lpc_bridge;
-    int i, ret, gen;
-    uint64_t ggms_size, gms_size;
+    int ret, gen;
+    uint64_t gms_size;
     uint64_t *bdsm_size;
     uint32_t gmch;
-    uint16_t cmd_orig, cmd;
+    bool legacy_mode_enabled = false;
     Error *err = NULL;
 
     /*
@@ -510,24 +496,8 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
      * PCI bus address.
      */
     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
-        !vfio_is_vga(vdev) || nr != 4 ||
-        &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
-                                       0, PCI_DEVFN(0x2, 0))) {
-        return;
-    }
-
-    /*
-     * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we
-     * can stuff host values into, so if there's already one there and it's not
-     * one we can hack on, legacy mode is no-go.  Sorry Q35.
-     */
-    lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev),
-                                 0, PCI_DEVFN(0x1f, 0));
-    if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge),
-                                           "vfio-pci-igd-lpc-bridge")) {
-        error_report("IGD device %s cannot support legacy mode due to existing "
-                     "devices at address 1f.0", vdev->vbasedev.name);
-        return;
+        !vfio_is_vga(vdev)) {
+        return true;
     }
 
     /*
@@ -539,126 +509,77 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
     if (gen == -1) {
         error_report("IGD device %s is unsupported in legacy mode, "
                      "try SandyBridge or newer", vdev->vbasedev.name);
-        return;
-    }
-
-    /*
-     * Most of what we're doing here is to enable the ROM to run, so if
-     * there's no ROM, there's no point in setting up this quirk.
-     * NB. We only seem to get BIOS ROMs, so a UEFI VM would need CSM support.
-     */
-    ret = vfio_get_region_info(&vdev->vbasedev,
-                               VFIO_PCI_ROM_REGION_INDEX, &rom);
-    if ((ret || !rom->size) && !vdev->pdev.romfile) {
-        error_report("IGD device %s has no ROM, legacy mode disabled",
-                     vdev->vbasedev.name);
-        return;
-    }
-
-    /*
-     * Ignore the hotplug corner case, mark the ROM failed, we can't
-     * create the devices we need for legacy mode in the hotplug scenario.
-     */
-    if (vdev->pdev.qdev.hotplugged) {
-        error_report("IGD device %s hotplugged, ROM disabled, "
-                     "legacy mode disabled", vdev->vbasedev.name);
-        vdev->rom_read_failed = true;
-        return;
-    }
-
-    /*
-     * Check whether we have all the vfio device specific regions to
-     * support legacy mode (added in Linux v4.6).  If not, bail.
-     */
-    ret = vfio_get_dev_region_info(&vdev->vbasedev,
-                        VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
-                        VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
-    if (ret) {
-        error_report("IGD device %s does not support OpRegion access,"
-                     "legacy mode disabled", vdev->vbasedev.name);
-        return;
-    }
-
-    ret = vfio_get_dev_region_info(&vdev->vbasedev,
-                        VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
-                        VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host);
-    if (ret) {
-        error_report("IGD device %s does not support host bridge access,"
-                     "legacy mode disabled", vdev->vbasedev.name);
-        return;
-    }
-
-    ret = vfio_get_dev_region_info(&vdev->vbasedev,
-                        VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
-                        VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, &lpc);
-    if (ret) {
-        error_report("IGD device %s does not support LPC bridge access,"
-                     "legacy mode disabled", vdev->vbasedev.name);
-        return;
+        return true;
     }
 
     gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
 
     /*
-     * If IGD VGA Disable is clear (expected) and VGA is not already enabled,
-     * try to enable it.  Probably shouldn't be using legacy mode without VGA,
-     * but also no point in us enabling VGA if disabled in hardware.
+     * For backward compatibility, enable legacy mode when
+     * - Machine type is i440fx (pc_piix)
+     * - IGD device is at guest BDF 00:02.0
+     * - Not manually disabled by x-igd-legacy-mode=off
      */
-    if (!(gmch & 0x2) && !vdev->vga && !vfio_populate_vga(vdev, &err)) {
-        error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
-        error_report("IGD device %s failed to enable VGA access, "
-                     "legacy mode disabled", vdev->vbasedev.name);
-        return;
-    }
+    if ((vdev->igd_legacy_mode != ON_OFF_AUTO_OFF) &&
+        !strcmp(MACHINE_GET_CLASS(qdev_get_machine())->family, "pc_piix") &&
+        (&vdev->pdev == pci_find_device(pci_device_root_bus(&vdev->pdev),
+        0, PCI_DEVFN(0x2, 0)))) {
+        /*
+         * IGD legacy mode requires:
+         * - VBIOS in ROM BAR or file
+         * - VGA IO/MMIO ranges are claimed by IGD
+         * - OpRegion
+         * - Same LPC bridge and Host bridge VID/DID/SVID/SSID as host
+         */
+        g_autofree struct vfio_region_info *rom = NULL;
+
+        legacy_mode_enabled = true;
+        info_report("IGD legacy mode enabled, "
+                    "use x-igd-legacy-mode=off to disable it if unwanted.");
+
+        /*
+         * Most of what we're doing here is to enable the ROM to run, so if
+         * there's no ROM, there's no point in setting up this quirk.
+         * NB. We only seem to get BIOS ROMs, so UEFI VM would need CSM support.
+         */
+        ret = vfio_get_region_info(&vdev->vbasedev,
+                                   VFIO_PCI_ROM_REGION_INDEX, &rom);
+        if ((ret || !rom->size) && !vdev->pdev.romfile) {
+            error_setg(&err, "Device has no ROM");
+            goto error;
+        }
 
-    /* Create our LPC/ISA bridge */
-    ret = vfio_pci_igd_lpc_init(vdev, lpc);
-    if (ret) {
-        error_report("IGD device %s failed to create LPC bridge, "
-                     "legacy mode disabled", vdev->vbasedev.name);
-        return;
-    }
+        /*
+         * If IGD VGA Disable is clear (expected) and VGA is not already
+         * enabled, try to enable it. Probably shouldn't be using legacy mode
+         * without VGA, but also no point in us enabling VGA if disabled in
+         * hardware.
+         */
+        if (!(gmch & 0x2) && !vdev->vga && !vfio_populate_vga(vdev, &err)) {
+            error_setg(&err, "Unable to enable VGA access");
+            goto error;
+        }
 
-    /* Stuff some host values into the VM PCI host bridge */
-    ret = vfio_pci_igd_host_init(vdev, host);
-    if (ret) {
-        error_report("IGD device %s failed to modify host bridge, "
-                     "legacy mode disabled", vdev->vbasedev.name);
-        return;
+        /* Enable OpRegion and LPC bridge quirk */
+        vdev->features |= VFIO_FEATURE_ENABLE_IGD_OPREGION;
+        vdev->features |= VFIO_FEATURE_ENABLE_IGD_LPC;
+    } else if (vdev->igd_legacy_mode == ON_OFF_AUTO_ON) {
+        error_setg(&err,
+                   "Machine is not i440fx or assigned BDF is not 00:02.0");
+        goto error;
     }
 
     /* Setup OpRegion access */
-    if (!vfio_pci_igd_opregion_init(vdev, opregion, &err)) {
-        error_append_hint(&err, "IGD legacy mode disabled\n");
-        error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
-        return;
+    if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) &&
+        !vfio_pci_igd_setup_opregion(vdev, errp)) {
+        goto error;
     }
 
-    /* Setup our quirk to munge GTT addresses to the VM allocated buffer */
-    quirk = vfio_quirk_alloc(2);
-    igd = quirk->data = g_malloc0(sizeof(*igd));
-    igd->vdev = vdev;
-    igd->index = ~0;
-    if (gen < 11) {
-        igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM, 4);
-    } else {
-        igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM_GEN11, 4);
-        igd->bdsm |=
-            (uint64_t)vfio_pci_read_config(&vdev->pdev, IGD_BDSM_GEN11 + 4, 4) << 32;
-    }
-    igd->bdsm &= ~((1 * MiB) - 1); /* 1MB aligned */
-
-    memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_index_quirk,
-                          igd, "vfio-igd-index-quirk", 4);
-    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
-                                        0, &quirk->mem[0], 1);
-
-    memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_igd_data_quirk,
-                          igd, "vfio-igd-data-quirk", 4);
-    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
-                                        4, &quirk->mem[1], 1);
-
-    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+    /* Setup LPC bridge / Host bridge PCI IDs */
+    if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_LPC) &&
+        !vfio_pci_igd_setup_lpc_bridge(vdev, errp)) {
+        goto error;
+     }
 
     /*
      * Allow user to override dsm size using x-igd-gms option, in multiples of
@@ -685,7 +606,6 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
         }
     }
 
-    ggms_size = igd_gtt_memory_size(gen, gmch);
     gms_size = igd_stolen_memory_size(gen, gmch);
 
     /*
@@ -697,7 +617,7 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
      * config offset 0x5C.
      */
     bdsm_size = g_malloc(sizeof(*bdsm_size));
-    *bdsm_size = cpu_to_le64(ggms_size + gms_size);
+    *bdsm_size = cpu_to_le64(gms_size);
     fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size",
                     bdsm_size, sizeof(*bdsm_size));
 
@@ -717,37 +637,46 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
         pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0);
     }
 
+    trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, (gms_size / MiB));
+
+    return true;
+
+error:
     /*
-     * This IOBAR gives us access to GTTADR, which allows us to write to
-     * the GTT itself.  So let's go ahead and write zero to all the GTT
-     * entries to avoid spurious DMA faults.  Be sure I/O access is enabled
-     * before talking to the device.
+     * When legacy mode is implicity enabled, continue on error,
+     * to keep compatibility
      */
-    if (pread(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig),
-              vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) {
-        error_report("IGD device %s - failed to read PCI command register",
-                     vdev->vbasedev.name);
+    if (legacy_mode_enabled && (vdev->igd_legacy_mode == ON_OFF_AUTO_AUTO)) {
+        error_report_err(err);
+        error_report("IGD legacy mode disabled");
+        return true;
     }
 
-    cmd = cmd_orig | PCI_COMMAND_IO;
+    error_propagate(errp, err);
+    return false;
+}
 
-    if (pwrite(vdev->vbasedev.fd, &cmd, sizeof(cmd),
-               vdev->config_offset + PCI_COMMAND) != sizeof(cmd)) {
-        error_report("IGD device %s - failed to write PCI command register",
-                     vdev->vbasedev.name);
+/*
+ * KVMGT/GVT-g vGPU exposes an emulated OpRegion. So far, users have to specify
+ * x-igd-opregion=on to enable the access.
+ * TODO: Check VID/DID and enable opregion access automatically
+ */
+static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp)
+{
+    if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) &&
+        !vfio_pci_igd_setup_opregion(vdev, errp)) {
+        return false;
     }
 
-    for (i = 1; i < vfio_igd_gtt_max(vdev); i += 4) {
-        vfio_region_write(&vdev->bars[4].region, 0, i, 4);
-        vfio_region_write(&vdev->bars[4].region, 4, 0, 4);
-    }
+    return true;
+}
 
-    if (pwrite(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig),
-               vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) {
-        error_report("IGD device %s - failed to restore PCI command register",
-                     vdev->vbasedev.name);
+bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
+{
+    /* KVMGT/GVT-g vGPU is exposed as mdev */
+    if (vdev->vbasedev.mdev) {
+        return vfio_pci_kvmgt_config_quirk(vdev, errp);
     }
 
-    trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name,
-                                    (ggms_size + gms_size) / MiB);
+    return vfio_pci_igd_config_quirk(vdev, errp);
 }
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index df61edffc0843..42c8412bbf507 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -25,7 +25,6 @@
 #include "qemu/cutils.h"
 #include "qemu/chardev_open.h"
 #include "pci.h"
-#include "exec/ram_addr.h"
 
 static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
                             ram_addr_t size, void *vaddr, bool readonly)
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
index bba776f75cc71..a8939c838657b 100644
--- a/hw/vfio/meson.build
+++ b/hw/vfio/meson.build
@@ -1,26 +1,32 @@
 vfio_ss = ss.source_set()
 vfio_ss.add(files(
-  'helpers.c',
   'common.c',
-  'container-base.c',
   'container.c',
-  'migration.c',
-  'cpr.c',
 ))
 vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c'))
-vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files(
-  'iommufd.c',
-))
 vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
-  'display.c',
   'pci-quirks.c',
   'pci.c',
 ))
 vfio_ss.add(when: 'CONFIG_VFIO_CCW', if_true: files('ccw.c'))
 vfio_ss.add(when: 'CONFIG_VFIO_PLATFORM', if_true: files('platform.c'))
-vfio_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c'))
-vfio_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c'))
 vfio_ss.add(when: 'CONFIG_VFIO_AP', if_true: files('ap.c'))
 vfio_ss.add(when: 'CONFIG_VFIO_IGD', if_true: files('igd.c'))
 
 specific_ss.add_all(when: 'CONFIG_VFIO', if_true: vfio_ss)
+
+system_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c'))
+system_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c'))
+system_ss.add(when: 'CONFIG_VFIO', if_true: files(
+  'helpers.c',
+  'container-base.c',
+  'migration.c',
+  'migration-multifd.c',
+  'cpr.c',
+))
+system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files(
+  'iommufd.c',
+))
+system_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
+  'display.c',
+))
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
new file mode 100644
index 0000000000000..378f6f3bf01f6
--- /dev/null
+++ b/hw/vfio/migration-multifd.c
@@ -0,0 +1,684 @@
+/*
+ * Multifd VFIO migration
+ *
+ * Copyright (C) 2024,2025 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/vfio/vfio-common.h"
+#include "migration/misc.h"
+#include "qapi/error.h"
+#include "qemu/bswap.h"
+#include "qemu/error-report.h"
+#include "qemu/lockable.h"
+#include "qemu/main-loop.h"
+#include "qemu/thread.h"
+#include "io/channel-buffer.h"
+#include "migration/qemu-file.h"
+#include "migration-multifd.h"
+#include "trace.h"
+
+#define VFIO_DEVICE_STATE_CONFIG_STATE (1)
+
+#define VFIO_DEVICE_STATE_PACKET_VER_CURRENT (0)
+
+typedef struct VFIODeviceStatePacket {
+    uint32_t version;
+    uint32_t idx;
+    uint32_t flags;
+    uint8_t data[0];
+} QEMU_PACKED VFIODeviceStatePacket;
+
+/* type safety */
+typedef struct VFIOStateBuffers {
+    GArray *array;
+} VFIOStateBuffers;
+
+typedef struct VFIOStateBuffer {
+    bool is_present;
+    char *data;
+    size_t len;
+} VFIOStateBuffer;
+
+typedef struct VFIOMultifd {
+    bool load_bufs_thread_running;
+    bool load_bufs_thread_want_exit;
+
+    VFIOStateBuffers load_bufs;
+    QemuCond load_bufs_buffer_ready_cond;
+    QemuCond load_bufs_thread_finished_cond;
+    QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */
+    uint32_t load_buf_idx;
+    uint32_t load_buf_idx_last;
+} VFIOMultifd;
+
+static void vfio_state_buffer_clear(gpointer data)
+{
+    VFIOStateBuffer *lb = data;
+
+    if (!lb->is_present) {
+        return;
+    }
+
+    g_clear_pointer(&lb->data, g_free);
+    lb->is_present = false;
+}
+
+static void vfio_state_buffers_init(VFIOStateBuffers *bufs)
+{
+    bufs->array = g_array_new(FALSE, TRUE, sizeof(VFIOStateBuffer));
+    g_array_set_clear_func(bufs->array, vfio_state_buffer_clear);
+}
+
+static void vfio_state_buffers_destroy(VFIOStateBuffers *bufs)
+{
+    g_clear_pointer(&bufs->array, g_array_unref);
+}
+
+static void vfio_state_buffers_assert_init(VFIOStateBuffers *bufs)
+{
+    assert(bufs->array);
+}
+
+static unsigned int vfio_state_buffers_size_get(VFIOStateBuffers *bufs)
+{
+    return bufs->array->len;
+}
+
+static void vfio_state_buffers_size_set(VFIOStateBuffers *bufs,
+                                        unsigned int size)
+{
+    g_array_set_size(bufs->array, size);
+}
+
+static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers *bufs,
+                                              unsigned int idx)
+{
+    return &g_array_index(bufs->array, VFIOStateBuffer, idx);
+}
+
+/* called with load_bufs_mutex locked */
+static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev,
+                                          VFIODeviceStatePacket *packet,
+                                          size_t packet_total_size,
+                                          Error **errp)
+{
+    VFIOMigration *migration = vbasedev->migration;
+    VFIOMultifd *multifd = migration->multifd;
+    VFIOStateBuffer *lb;
+
+    vfio_state_buffers_assert_init(&multifd->load_bufs);
+    if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) {
+        vfio_state_buffers_size_set(&multifd->load_bufs, packet->idx + 1);
+    }
+
+    lb = vfio_state_buffers_at(&multifd->load_bufs, packet->idx);
+    if (lb->is_present) {
+        error_setg(errp, "%s: state buffer %" PRIu32 " already filled",
+                   vbasedev->name, packet->idx);
+        return false;
+    }
+
+    assert(packet->idx >= multifd->load_buf_idx);
+
+    lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet));
+    lb->len = packet_total_size - sizeof(*packet);
+    lb->is_present = true;
+
+    return true;
+}
+
+bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
+                                    Error **errp)
+{
+    VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
+    VFIOMultifd *multifd = migration->multifd;
+    VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data;
+
+    if (!vfio_multifd_transfer_enabled(vbasedev)) {
+        error_setg(errp,
+                   "%s: got device state packet but not doing multifd transfer",
+                   vbasedev->name);
+        return false;
+    }
+
+    assert(multifd);
+
+    if (data_size < sizeof(*packet)) {
+        error_setg(errp, "%s: packet too short at %zu (min is %zu)",
+                   vbasedev->name, data_size, sizeof(*packet));
+        return false;
+    }
+
+    packet->version = be32_to_cpu(packet->version);
+    if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) {
+        error_setg(errp, "%s: packet has unknown version %" PRIu32,
+                   vbasedev->name, packet->version);
+        return false;
+    }
+
+    packet->idx = be32_to_cpu(packet->idx);
+    packet->flags = be32_to_cpu(packet->flags);
+
+    if (packet->idx == UINT32_MAX) {
+        error_setg(errp, "%s: packet index is invalid", vbasedev->name);
+        return false;
+    }
+
+    trace_vfio_load_state_device_buffer_incoming(vbasedev->name, packet->idx);
+
+    /*
+     * Holding BQL here would violate the lock order and can cause
+     * a deadlock once we attempt to lock load_bufs_mutex below.
+     */
+    assert(!bql_locked());
+
+    WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
+        /* config state packet should be the last one in the stream */
+        if (packet->flags & VFIO_DEVICE_STATE_CONFIG_STATE) {
+            multifd->load_buf_idx_last = packet->idx;
+        }
+
+        if (!vfio_load_state_buffer_insert(vbasedev, packet, data_size,
+                                           errp)) {
+            return false;
+        }
+
+        qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
+    }
+
+    return true;
+}
+
+static bool vfio_load_bufs_thread_load_config(VFIODevice *vbasedev,
+                                              Error **errp)
+{
+    VFIOMigration *migration = vbasedev->migration;
+    VFIOMultifd *multifd = migration->multifd;
+    VFIOStateBuffer *lb;
+    g_autoptr(QIOChannelBuffer) bioc = NULL;
+    g_autoptr(QEMUFile) f_out = NULL, f_in = NULL;
+    uint64_t mig_header;
+    int ret;
+
+    assert(multifd->load_buf_idx == multifd->load_buf_idx_last);
+    lb = vfio_state_buffers_at(&multifd->load_bufs, multifd->load_buf_idx);
+    assert(lb->is_present);
+
+    bioc = qio_channel_buffer_new(lb->len);
+    qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-load");
+
+    f_out = qemu_file_new_output(QIO_CHANNEL(bioc));
+    qemu_put_buffer(f_out, (uint8_t *)lb->data, lb->len);
+
+    ret = qemu_fflush(f_out);
+    if (ret) {
+        error_setg(errp, "%s: load config state flush failed: %d",
+                   vbasedev->name, ret);
+        return false;
+    }
+
+    qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
+    f_in = qemu_file_new_input(QIO_CHANNEL(bioc));
+
+    mig_header = qemu_get_be64(f_in);
+    if (mig_header != VFIO_MIG_FLAG_DEV_CONFIG_STATE) {
+        error_setg(errp, "%s: expected FLAG_DEV_CONFIG_STATE but got %" PRIx64,
+                   vbasedev->name, mig_header);
+        return false;
+    }
+
+    bql_lock();
+    ret = vfio_load_device_config_state(f_in, vbasedev);
+    bql_unlock();
+
+    if (ret < 0) {
+        error_setg(errp, "%s: vfio_load_device_config_state() failed: %d",
+                   vbasedev->name, ret);
+        return false;
+    }
+
+    return true;
+}
+
+static VFIOStateBuffer *vfio_load_state_buffer_get(VFIOMultifd *multifd)
+{
+    VFIOStateBuffer *lb;
+    unsigned int bufs_len;
+
+    bufs_len = vfio_state_buffers_size_get(&multifd->load_bufs);
+    if (multifd->load_buf_idx >= bufs_len) {
+        assert(multifd->load_buf_idx == bufs_len);
+        return NULL;
+    }
+
+    lb = vfio_state_buffers_at(&multifd->load_bufs,
+                               multifd->load_buf_idx);
+    if (!lb->is_present) {
+        return NULL;
+    }
+
+    return lb;
+}
+
+static bool vfio_load_state_buffer_write(VFIODevice *vbasedev,
+                                         VFIOStateBuffer *lb,
+                                         Error **errp)
+{
+    VFIOMigration *migration = vbasedev->migration;
+    VFIOMultifd *multifd = migration->multifd;
+    g_autofree char *buf = NULL;
+    char *buf_cur;
+    size_t buf_len;
+
+    if (!lb->len) {
+        return true;
+    }
+
+    trace_vfio_load_state_device_buffer_load_start(vbasedev->name,
+                                                   multifd->load_buf_idx);
+
+    /* lb might become re-allocated when we drop the lock */
+    buf = g_steal_pointer(&lb->data);
+    buf_cur = buf;
+    buf_len = lb->len;
+    while (buf_len > 0) {
+        ssize_t wr_ret;
+        int errno_save;
+
+        /*
+         * Loading data to the device takes a while,
+         * drop the lock during this process.
+         */
+        qemu_mutex_unlock(&multifd->load_bufs_mutex);
+        wr_ret = write(migration->data_fd, buf_cur, buf_len);
+        errno_save = errno;
+        qemu_mutex_lock(&multifd->load_bufs_mutex);
+
+        if (wr_ret < 0) {
+            error_setg(errp,
+                       "%s: writing state buffer %" PRIu32 " failed: %d",
+                       vbasedev->name, multifd->load_buf_idx, errno_save);
+            return false;
+        }
+
+        assert(wr_ret <= buf_len);
+        buf_len -= wr_ret;
+        buf_cur += wr_ret;
+    }
+
+    trace_vfio_load_state_device_buffer_load_end(vbasedev->name,
+                                                 multifd->load_buf_idx);
+
+    return true;
+}
+
+static bool vfio_load_bufs_thread_want_exit(VFIOMultifd *multifd,
+                                            bool *should_quit)
+{
+    return multifd->load_bufs_thread_want_exit || qatomic_read(should_quit);
+}
+
+/*
+ * This thread is spawned by vfio_multifd_switchover_start() which gets
+ * called upon encountering the switchover point marker in main migration
+ * stream.
+ *
+ * It exits after either:
+ * * completing loading the remaining device state and device config, OR:
+ * * encountering some error while doing the above, OR:
+ * * being forcefully aborted by the migration core by it setting should_quit
+ *   or by vfio_load_cleanup_load_bufs_thread() setting
+ *   multifd->load_bufs_thread_want_exit.
+ */
+static bool vfio_load_bufs_thread(void *opaque, bool *should_quit, Error **errp)
+{
+    VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
+    VFIOMultifd *multifd = migration->multifd;
+    bool ret = false;
+
+    trace_vfio_load_bufs_thread_start(vbasedev->name);
+
+    assert(multifd);
+    QEMU_LOCK_GUARD(&multifd->load_bufs_mutex);
+
+    assert(multifd->load_bufs_thread_running);
+
+    while (true) {
+        VFIOStateBuffer *lb;
+
+        /*
+         * Always check cancellation first after the buffer_ready wait below in
+         * case that cond was signalled by vfio_load_cleanup_load_bufs_thread().
+         */
+        if (vfio_load_bufs_thread_want_exit(multifd, should_quit)) {
+            error_setg(errp, "operation cancelled");
+            goto thread_exit;
+        }
+
+        assert(multifd->load_buf_idx <= multifd->load_buf_idx_last);
+
+        lb = vfio_load_state_buffer_get(multifd);
+        if (!lb) {
+            trace_vfio_load_state_device_buffer_starved(vbasedev->name,
+                                                        multifd->load_buf_idx);
+            qemu_cond_wait(&multifd->load_bufs_buffer_ready_cond,
+                           &multifd->load_bufs_mutex);
+            continue;
+        }
+
+        if (multifd->load_buf_idx == multifd->load_buf_idx_last) {
+            break;
+        }
+
+        if (multifd->load_buf_idx == 0) {
+            trace_vfio_load_state_device_buffer_start(vbasedev->name);
+        }
+
+        if (!vfio_load_state_buffer_write(vbasedev, lb, errp)) {
+            goto thread_exit;
+        }
+
+        if (multifd->load_buf_idx == multifd->load_buf_idx_last - 1) {
+            trace_vfio_load_state_device_buffer_end(vbasedev->name);
+        }
+
+        multifd->load_buf_idx++;
+    }
+
+    if (!vfio_load_bufs_thread_load_config(vbasedev, errp)) {
+        goto thread_exit;
+    }
+
+    ret = true;
+
+thread_exit:
+    /*
+     * Notify possibly waiting vfio_load_cleanup_load_bufs_thread() that
+     * this thread is exiting.
+     */
+    multifd->load_bufs_thread_running = false;
+    qemu_cond_signal(&multifd->load_bufs_thread_finished_cond);
+
+    trace_vfio_load_bufs_thread_end(vbasedev->name);
+
+    return ret;
+}
+
+static VFIOMultifd *vfio_multifd_new(void)
+{
+    VFIOMultifd *multifd = g_new(VFIOMultifd, 1);
+
+    vfio_state_buffers_init(&multifd->load_bufs);
+
+    qemu_mutex_init(&multifd->load_bufs_mutex);
+
+    multifd->load_buf_idx = 0;
+    multifd->load_buf_idx_last = UINT32_MAX;
+    qemu_cond_init(&multifd->load_bufs_buffer_ready_cond);
+
+    multifd->load_bufs_thread_running = false;
+    multifd->load_bufs_thread_want_exit = false;
+    qemu_cond_init(&multifd->load_bufs_thread_finished_cond);
+
+    return multifd;
+}
+
+/*
+ * Terminates vfio_load_bufs_thread by setting
+ * multifd->load_bufs_thread_want_exit and signalling all the conditions
+ * the thread could be blocked on.
+ *
+ * Waits for the thread to signal that it had finished.
+ */
+static void vfio_load_cleanup_load_bufs_thread(VFIOMultifd *multifd)
+{
+    /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
+    bql_unlock();
+    WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
+        while (multifd->load_bufs_thread_running) {
+            multifd->load_bufs_thread_want_exit = true;
+
+            qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
+            qemu_cond_wait(&multifd->load_bufs_thread_finished_cond,
+                           &multifd->load_bufs_mutex);
+        }
+    }
+    bql_lock();
+}
+
+static void vfio_multifd_free(VFIOMultifd *multifd)
+{
+    vfio_load_cleanup_load_bufs_thread(multifd);
+
+    qemu_cond_destroy(&multifd->load_bufs_thread_finished_cond);
+    vfio_state_buffers_destroy(&multifd->load_bufs);
+    qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond);
+    qemu_mutex_destroy(&multifd->load_bufs_mutex);
+
+    g_free(multifd);
+}
+
+void vfio_multifd_cleanup(VFIODevice *vbasedev)
+{
+    VFIOMigration *migration = vbasedev->migration;
+
+    g_clear_pointer(&migration->multifd, vfio_multifd_free);
+}
+
+bool vfio_multifd_transfer_supported(void)
+{
+    return multifd_device_state_supported() &&
+        migrate_send_switchover_start();
+}
+
+bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev)
+{
+    VFIOMigration *migration = vbasedev->migration;
+
+    return migration->multifd_transfer;
+}
+
+bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp)
+{
+    VFIOMigration *migration = vbasedev->migration;
+
+    /*
+     * Make a copy of this setting at the start in case it is changed
+     * mid-migration.
+     */
+    if (vbasedev->migration_multifd_transfer == ON_OFF_AUTO_AUTO) {
+        migration->multifd_transfer = vfio_multifd_transfer_supported();
+    } else {
+        migration->multifd_transfer =
+            vbasedev->migration_multifd_transfer == ON_OFF_AUTO_ON;
+    }
+
+    if (!vfio_multifd_transfer_enabled(vbasedev)) {
+        /* Nothing further to check or do */
+        return true;
+    }
+
+    if (!vfio_multifd_transfer_supported()) {
+        error_setg(errp,
+                   "%s: Multifd device transfer requested but unsupported in the current config",
+                   vbasedev->name);
+        return false;
+    }
+
+    if (alloc_multifd) {
+        assert(!migration->multifd);
+        migration->multifd = vfio_multifd_new();
+    }
+
+    return true;
+}
+
+void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
+{
+    assert(vfio_multifd_transfer_enabled(vbasedev));
+
+    /*
+     * Emit dummy NOP data on the main migration channel since the actual
+     * device state transfer is done via multifd channels.
+     */
+    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+}
+
+static bool
+vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
+                                               char *idstr,
+                                               uint32_t instance_id,
+                                               uint32_t idx,
+                                               Error **errp)
+{
+    g_autoptr(QIOChannelBuffer) bioc = NULL;
+    g_autoptr(QEMUFile) f = NULL;
+    int ret;
+    g_autofree VFIODeviceStatePacket *packet = NULL;
+    size_t packet_len;
+
+    bioc = qio_channel_buffer_new(0);
+    qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
+
+    f = qemu_file_new_output(QIO_CHANNEL(bioc));
+
+    if (vfio_save_device_config_state(f, vbasedev, errp)) {
+        return false;
+    }
+
+    ret = qemu_fflush(f);
+    if (ret) {
+        error_setg(errp, "%s: save config state flush failed: %d",
+                   vbasedev->name, ret);
+        return false;
+    }
+
+    packet_len = sizeof(*packet) + bioc->usage;
+    packet = g_malloc0(packet_len);
+    packet->version = cpu_to_be32(VFIO_DEVICE_STATE_PACKET_VER_CURRENT);
+    packet->idx = cpu_to_be32(idx);
+    packet->flags = cpu_to_be32(VFIO_DEVICE_STATE_CONFIG_STATE);
+    memcpy(&packet->data, bioc->data, bioc->usage);
+
+    if (!multifd_queue_device_state(idstr, instance_id,
+                                    (char *)packet, packet_len)) {
+        error_setg(errp, "%s: multifd config data queuing failed",
+                   vbasedev->name);
+        return false;
+    }
+
+    vfio_mig_add_bytes_transferred(packet_len);
+
+    return true;
+}
+
+/*
+ * This thread is spawned by the migration core directly via
+ * .save_live_complete_precopy_thread SaveVMHandler.
+ *
+ * It exits after either:
+ * * completing saving the remaining device state and device config, OR:
+ * * encountering some error while doing the above, OR:
+ * * being forcefully aborted by the migration core by
+ *   multifd_device_state_save_thread_should_exit() returning true.
+ */
+bool
+vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
+                                          Error **errp)
+{
+    VFIODevice *vbasedev = d->handler_opaque;
+    VFIOMigration *migration = vbasedev->migration;
+    bool ret = false;
+    g_autofree VFIODeviceStatePacket *packet = NULL;
+    uint32_t idx;
+
+    if (!vfio_multifd_transfer_enabled(vbasedev)) {
+        /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
+        return true;
+    }
+
+    trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
+                                                  d->idstr, d->instance_id);
+
+    /* We reach here with device state STOP or STOP_COPY only */
+    if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
+                                 VFIO_DEVICE_STATE_STOP, errp)) {
+        goto thread_exit;
+    }
+
+    packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
+    packet->version = cpu_to_be32(VFIO_DEVICE_STATE_PACKET_VER_CURRENT);
+
+    for (idx = 0; ; idx++) {
+        ssize_t data_size;
+        size_t packet_size;
+
+        if (multifd_device_state_save_thread_should_exit()) {
+            error_setg(errp, "operation cancelled");
+            goto thread_exit;
+        }
+
+        data_size = read(migration->data_fd, &packet->data,
+                         migration->data_buffer_size);
+        if (data_size < 0) {
+            error_setg(errp, "%s: reading state buffer %" PRIu32 " failed: %d",
+                       vbasedev->name, idx, errno);
+            goto thread_exit;
+        } else if (data_size == 0) {
+            break;
+        }
+
+        packet->idx = cpu_to_be32(idx);
+        packet_size = sizeof(*packet) + data_size;
+
+        if (!multifd_queue_device_state(d->idstr, d->instance_id,
+                                        (char *)packet, packet_size)) {
+            error_setg(errp, "%s: multifd data queuing failed", vbasedev->name);
+            goto thread_exit;
+        }
+
+        vfio_mig_add_bytes_transferred(packet_size);
+    }
+
+    if (!vfio_save_complete_precopy_thread_config_state(vbasedev,
+                                                        d->idstr,
+                                                        d->instance_id,
+                                                        idx, errp)) {
+        goto thread_exit;
+   }
+
+    ret = true;
+
+thread_exit:
+    trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
+
+    return ret;
+}
+
+int vfio_multifd_switchover_start(VFIODevice *vbasedev)
+{
+    VFIOMigration *migration = vbasedev->migration;
+    VFIOMultifd *multifd = migration->multifd;
+
+    assert(multifd);
+
+    /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
+    bql_unlock();
+    WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
+        assert(!multifd->load_bufs_thread_running);
+        multifd->load_bufs_thread_running = true;
+    }
+    bql_lock();
+
+    qemu_loadvm_start_load_thread(vfio_load_bufs_thread, vbasedev);
+
+    return 0;
+}
diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
new file mode 100644
index 0000000000000..a664051eb8ae0
--- /dev/null
+++ b/hw/vfio/migration-multifd.h
@@ -0,0 +1,34 @@
+/*
+ * Multifd VFIO migration
+ *
+ * Copyright (C) 2024,2025 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VFIO_MIGRATION_MULTIFD_H
+#define HW_VFIO_MIGRATION_MULTIFD_H
+
+#include "hw/vfio/vfio-common.h"
+
+bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp);
+void vfio_multifd_cleanup(VFIODevice *vbasedev);
+
+bool vfio_multifd_transfer_supported(void);
+bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev);
+
+bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
+                                    Error **errp);
+
+void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f);
+
+bool
+vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
+                                          Error **errp);
+
+int vfio_multifd_switchover_start(VFIODevice *vbasedev);
+
+#endif
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index adfa752db5272..fbff46cfc35e0 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -23,31 +23,14 @@
 #include "migration/qemu-file.h"
 #include "migration/register.h"
 #include "migration/blocker.h"
+#include "migration-multifd.h"
 #include "qapi/error.h"
 #include "qapi/qapi-events-vfio.h"
 #include "exec/ramlist.h"
-#include "exec/ram_addr.h"
 #include "pci.h"
 #include "trace.h"
 #include "hw/hw.h"
 
-/*
- * Flags to be used as unique delimiters for VFIO devices in the migration
- * stream. These flags are composed as:
- * 0xffffffff => MSB 32-bit all 1s
- * 0xef10     => Magic ID, represents emulated (virtual) function IO
- * 0x0000     => 16-bits reserved for flags
- *
- * The beginning of state information is marked by _DEV_CONFIG_STATE,
- * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
- * certain state information is marked by _END_OF_STATE.
- */
-#define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
-#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
-#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
-#define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
-#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL)
-
 /*
  * This is an arbitrary size based on migration of mlx5 devices, where typically
  * total device migration size is on the order of 100s of MB. Testing with
@@ -55,7 +38,7 @@
  */
 #define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB)
 
-static int64_t bytes_transferred;
+static unsigned long bytes_transferred;
 
 static const char *mig_state_to_str(enum vfio_device_mig_state state)
 {
@@ -136,10 +119,10 @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev,
     vfio_migration_send_event(vbasedev);
 }
 
-static int vfio_migration_set_state(VFIODevice *vbasedev,
-                                    enum vfio_device_mig_state new_state,
-                                    enum vfio_device_mig_state recover_state,
-                                    Error **errp)
+int vfio_migration_set_state(VFIODevice *vbasedev,
+                             enum vfio_device_mig_state new_state,
+                             enum vfio_device_mig_state recover_state,
+                             Error **errp)
 {
     VFIOMigration *migration = vbasedev->migration;
     uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
@@ -254,8 +237,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
     return ret;
 }
 
-static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
-                                         Error **errp)
+int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
 {
     VFIODevice *vbasedev = opaque;
     int ret;
@@ -280,11 +262,13 @@ static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
     return ret;
 }
 
-static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
+int vfio_load_device_config_state(QEMUFile *f, void *opaque)
 {
     VFIODevice *vbasedev = opaque;
     uint64_t data;
 
+    trace_vfio_load_device_config_state_start(vbasedev->name);
+
     if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
         int ret;
 
@@ -303,7 +287,7 @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
         return -EINVAL;
     }
 
-    trace_vfio_load_device_config_state(vbasedev->name);
+    trace_vfio_load_device_config_state_end(vbasedev->name);
     return qemu_file_get_error(f);
 }
 
@@ -389,7 +373,7 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration)
     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
     qemu_put_be64(f, data_size);
     qemu_put_buffer(f, migration->data_buffer, data_size);
-    bytes_transferred += data_size;
+    vfio_mig_add_bytes_transferred(data_size);
 
     trace_vfio_save_block(migration->vbasedev->name, data_size);
 
@@ -467,6 +451,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
     uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
     int ret;
 
+    if (!vfio_multifd_setup(vbasedev, false, errp)) {
+        return -EINVAL;
+    }
+
     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
 
     vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
@@ -523,6 +511,9 @@ static void vfio_save_cleanup(void *opaque)
     Error *local_err = NULL;
     int ret;
 
+    /* Currently a NOP, done for symmetry with load_cleanup() */
+    vfio_multifd_cleanup(vbasedev);
+
     /*
      * Changing device state from STOP_COPY to STOP can take time. Do it here,
      * after migration has completed, so it won't increase downtime.
@@ -645,6 +636,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
     int ret;
     Error *local_err = NULL;
 
+    if (vfio_multifd_transfer_enabled(vbasedev)) {
+        vfio_multifd_emit_dummy_eos(vbasedev, f);
+        return 0;
+    }
+
     trace_vfio_save_complete_precopy_start(vbasedev->name);
 
     /* We reach here with device state STOP or STOP_COPY only */
@@ -676,6 +672,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
     Error *local_err = NULL;
     int ret;
 
+    if (vfio_multifd_transfer_enabled(vbasedev)) {
+        vfio_multifd_emit_dummy_eos(vbasedev, f);
+        return;
+    }
+
     ret = vfio_save_device_config_state(f, opaque, &local_err);
     if (ret) {
         error_prepend(&local_err,
@@ -688,15 +689,28 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
 static int vfio_load_setup(QEMUFile *f, void *opaque, Error **errp)
 {
     VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
+    int ret;
+
+    if (!vfio_multifd_setup(vbasedev, true, errp)) {
+        return -EINVAL;
+    }
+
+    ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
+                                   migration->device_state, errp);
+    if (ret) {
+        return ret;
+    }
 
-    return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
-                                    vbasedev->migration->device_state, errp);
+    return 0;
 }
 
 static int vfio_load_cleanup(void *opaque)
 {
     VFIODevice *vbasedev = opaque;
 
+    vfio_multifd_cleanup(vbasedev);
+
     vfio_migration_cleanup(vbasedev);
     trace_vfio_load_cleanup(vbasedev->name);
 
@@ -717,6 +731,13 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
         switch (data) {
         case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
         {
+            if (vfio_multifd_transfer_enabled(vbasedev)) {
+                error_report("%s: got DEV_CONFIG_STATE in main migration "
+                             "channel but doing multifd transfer",
+                             vbasedev->name);
+                return -EINVAL;
+            }
+
             return vfio_load_device_config_state(f, opaque);
         }
         case VFIO_MIG_FLAG_DEV_SETUP_STATE:
@@ -782,6 +803,17 @@ static bool vfio_switchover_ack_needed(void *opaque)
     return vfio_precopy_supported(vbasedev);
 }
 
+static int vfio_switchover_start(void *opaque)
+{
+    VFIODevice *vbasedev = opaque;
+
+    if (vfio_multifd_transfer_enabled(vbasedev)) {
+        return vfio_multifd_switchover_start(vbasedev);
+    }
+
+    return 0;
+}
+
 static const SaveVMHandlers savevm_vfio_handlers = {
     .save_prepare = vfio_save_prepare,
     .save_setup = vfio_save_setup,
@@ -796,6 +828,12 @@ static const SaveVMHandlers savevm_vfio_handlers = {
     .load_cleanup = vfio_load_cleanup,
     .load_state = vfio_load_state,
     .switchover_ack_needed = vfio_switchover_ack_needed,
+    /*
+     * Multifd support
+     */
+    .load_state_buffer = vfio_multifd_load_state_buffer,
+    .switchover_start = vfio_switchover_start,
+    .save_live_complete_precopy_thread = vfio_multifd_save_complete_precopy_thread,
 };
 
 /* ---------------------------------------------------------------------- */
@@ -1011,12 +1049,17 @@ static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
 
 int64_t vfio_mig_bytes_transferred(void)
 {
-    return bytes_transferred;
+    return MIN(qatomic_read(&bytes_transferred), INT64_MAX);
 }
 
 void vfio_reset_bytes_transferred(void)
 {
-    bytes_transferred = 0;
+    qatomic_set(&bytes_transferred, 0);
+}
+
+void vfio_mig_add_bytes_transferred(unsigned long val)
+{
+    qatomic_add(&bytes_transferred, val);
 }
 
 /*
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index fbe43b0a7903e..3f002252acfb7 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -403,7 +403,7 @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
 
     /* This windows doesn't seem to be used except by legacy VGA code */
     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
-        !vdev->vga || nr != 4) {
+        !vdev->vga || nr != 4 || !vdev->bars[4].ioport) {
         return;
     }
 
@@ -1114,59 +1114,19 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
     trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name);
 }
 
-#define IGD_ASLS 0xfc /* ASL Storage Register */
-
 /*
- * The OpRegion includes the Video BIOS Table, which seems important for
- * telling the driver what sort of outputs it has.  Without this, the device
- * may work in the guest, but we may not get output.  This also requires BIOS
- * support to reserve and populate a section of guest memory sufficient for
- * the table and to write the base address of that memory to the ASLS register
- * of the IGD device.
+ * Common quirk probe entry points.
  */
-bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
-                                struct vfio_region_info *info, Error **errp)
+bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp)
 {
-    int ret;
-
-    vdev->igd_opregion = g_malloc0(info->size);
-    ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
-                info->size, info->offset);
-    if (ret != info->size) {
-        error_setg(errp, "failed to read IGD OpRegion");
-        g_free(vdev->igd_opregion);
-        vdev->igd_opregion = NULL;
+#ifdef CONFIG_VFIO_IGD
+    if (!vfio_probe_igd_config_quirk(vdev, errp)) {
         return false;
     }
-
-    /*
-     * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to
-     * allocate 32bit reserved memory for, copy these contents into, and write
-     * the reserved memory base address to the device ASLS register at 0xFC.
-     * Alignment of this reserved region seems flexible, but using a 4k page
-     * alignment seems to work well.  This interface assumes a single IGD
-     * device, which may be at VM address 00:02.0 in legacy mode or another
-     * address in UPT mode.
-     *
-     * NB, there may be future use cases discovered where the VM should have
-     * direct interaction with the host OpRegion, in which case the write to
-     * the ASLS register would trigger MemoryRegion setup to enable that.
-     */
-    fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
-                    vdev->igd_opregion, info->size);
-
-    trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
-
-    pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
-    pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
-    pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
-
+#endif
     return true;
 }
 
-/*
- * Common quirk probe entry points.
- */
 void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
 {
     vfio_vga_probe_ati_3c3_quirk(vdev);
@@ -1215,7 +1175,6 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
     vfio_probe_rtl8168_bar2_quirk(vdev, nr);
 #ifdef CONFIG_VFIO_IGD
     vfio_probe_igd_bar0_quirk(vdev, nr);
-    vfio_probe_igd_bar4_quirk(vdev, nr);
 #endif
 }
 
@@ -1480,7 +1439,7 @@ static void set_nv_gpudirect_clique_id(Object *obj, Visitor *v,
 }
 
 const PropertyInfo qdev_prop_nv_gpudirect_clique = {
-    .name = "uint4",
+    .type = "uint8",
     .description = "NVIDIA GPUDirect Clique ID (0 - 15)",
     .get = get_nv_gpudirect_clique_id,
     .set = set_nv_gpudirect_clique_id,
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 89d900e9cf0ce..7f1532fbed9ae 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2215,8 +2215,12 @@ static bool vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp)
         break;
     case PCI_CAP_ID_PM:
         vfio_check_pm_reset(vdev, pos);
-        vdev->pm_cap = pos;
-        ret = pci_add_capability(pdev, cap_id, pos, size, errp) >= 0;
+        ret = pci_pm_init(pdev, pos, errp) >= 0;
+        /*
+         * PCI-core config space emulation needs write access to the power
+         * state enabled for tracking BAR mapping relative to PM state.
+         */
+        pci_set_word(pdev->wmask + pos + PCI_PM_CTRL, PCI_PM_CTRL_STATE_MASK);
         break;
     case PCI_CAP_ID_AF:
         vfio_check_af_flr(vdev, pos);
@@ -2406,18 +2410,27 @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
 
     vfio_disable_interrupts(vdev);
 
+    /*
+     * Stop any ongoing DMA by disconnecting I/O, MMIO, and bus master.
+     * Also put INTx Disable in known state.
+     */
+    cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2);
+    cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
+             PCI_COMMAND_INTX_DISABLE);
+    vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2);
+
     /* Make sure the device is in D0 */
-    if (vdev->pm_cap) {
+    if (pdev->pm_cap) {
         uint16_t pmcsr;
         uint8_t state;
 
-        pmcsr = vfio_pci_read_config(pdev, vdev->pm_cap + PCI_PM_CTRL, 2);
+        pmcsr = vfio_pci_read_config(pdev, pdev->pm_cap + PCI_PM_CTRL, 2);
         state = pmcsr & PCI_PM_CTRL_STATE_MASK;
         if (state) {
             pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
-            vfio_pci_write_config(pdev, vdev->pm_cap + PCI_PM_CTRL, pmcsr, 2);
+            vfio_pci_write_config(pdev, pdev->pm_cap + PCI_PM_CTRL, pmcsr, 2);
             /* vfio handles the necessary delay here */
-            pmcsr = vfio_pci_read_config(pdev, vdev->pm_cap + PCI_PM_CTRL, 2);
+            pmcsr = vfio_pci_read_config(pdev, pdev->pm_cap + PCI_PM_CTRL, 2);
             state = pmcsr & PCI_PM_CTRL_STATE_MASK;
             if (state) {
                 error_report("vfio: Unable to power on device, stuck in D%d",
@@ -2425,15 +2438,6 @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
             }
         }
     }
-
-    /*
-     * Stop any ongoing DMA by disconnecting I/O, MMIO, and bus master.
-     * Also put INTx Disable in known state.
-     */
-    cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2);
-    cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
-             PCI_COMMAND_INTX_DISABLE);
-    vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2);
 }
 
 void vfio_pci_post_reset(VFIOPCIDevice *vdev)
@@ -3124,6 +3128,10 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
         goto out_unset_idev;
     }
 
+    if (!vfio_config_quirk_setup(vdev, errp)) {
+        goto out_unset_idev;
+    }
+
     if (vdev->vga) {
         vfio_vga_quirk_setup(vdev);
     }
@@ -3132,31 +3140,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
         vfio_bar_quirk_setup(vdev, i);
     }
 
-    if (!vdev->igd_opregion &&
-        vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) {
-        g_autofree struct vfio_region_info *opregion = NULL;
-
-        if (vdev->pdev.qdev.hotplugged) {
-            error_setg(errp,
-                       "cannot support IGD OpRegion feature on hotplugged "
-                       "device");
-            goto out_unset_idev;
-        }
-
-        ret = vfio_get_dev_region_info(vbasedev,
-                        VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
-                        VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
-        if (ret) {
-            error_setg_errno(errp, -ret,
-                             "does not support requested IGD OpRegion feature");
-            goto out_unset_idev;
-        }
-
-        if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) {
-            goto out_unset_idev;
-        }
-    }
-
     /* QEMU emulates all of MSI & MSIX */
     if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
         memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
@@ -3353,6 +3336,8 @@ static void vfio_instance_init(Object *obj)
     pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
 }
 
+static PropertyInfo vfio_pci_migration_multifd_transfer_prop;
+
 static const Property vfio_pci_dev_properties[] = {
     DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host),
     DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token),
@@ -3375,8 +3360,16 @@ static const Property vfio_pci_dev_properties[] = {
                     VFIO_FEATURE_ENABLE_REQ_BIT, true),
     DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features,
                     VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
+    DEFINE_PROP_BIT("x-igd-lpc", VFIOPCIDevice, features,
+                    VFIO_FEATURE_ENABLE_IGD_LPC_BIT, false),
+    DEFINE_PROP_ON_OFF_AUTO("x-igd-legacy-mode", VFIOPCIDevice,
+                            igd_legacy_mode, ON_OFF_AUTO_AUTO),
     DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice,
                             vbasedev.enable_migration, ON_OFF_AUTO_AUTO),
+    DEFINE_PROP("x-migration-multifd-transfer", VFIOPCIDevice,
+                vbasedev.migration_multifd_transfer,
+                vfio_pci_migration_multifd_transfer_prop, OnOffAuto,
+                .set_default = true, .defval.i = ON_OFF_AUTO_AUTO),
     DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
                      vbasedev.migration_events, false),
     DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
@@ -3433,6 +3426,126 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
     pdc->exit = vfio_exitfn;
     pdc->config_read = vfio_pci_read_config;
     pdc->config_write = vfio_pci_write_config;
+
+    object_class_property_set_description(klass, /* 1.3 */
+                                          "host",
+                                          "Host PCI address [domain:]<bus:slot.function> of assigned device");
+    object_class_property_set_description(klass, /* 1.3 */
+                                          "x-intx-mmap-timeout-ms",
+                                          "When EOI is not provided by KVM/QEMU, wait time "
+                                          "(milliseconds) to re-enable device direct access "
+                                          "after INTx (DEBUG)");
+    object_class_property_set_description(klass, /* 1.5 */
+                                          "x-vga",
+                                          "Expose VGA address spaces for device");
+    object_class_property_set_description(klass, /* 2.3 */
+                                          "x-req",
+                                          "Disable device request notification support (DEBUG)");
+    object_class_property_set_description(klass, /* 2.4 and 2.5 */
+                                          "x-no-mmap",
+                                          "Disable MMAP for device. Allows to trace MMIO "
+                                          "accesses (DEBUG)");
+    object_class_property_set_description(klass, /* 2.5 */
+                                          "x-no-kvm-intx",
+                                          "Disable direct VFIO->KVM INTx injection. Allows to "
+                                          "trace INTx interrupts (DEBUG)");
+    object_class_property_set_description(klass, /* 2.5 */
+                                          "x-no-kvm-msi",
+                                          "Disable direct VFIO->KVM MSI injection. Allows to "
+                                          "trace MSI interrupts (DEBUG)");
+    object_class_property_set_description(klass, /* 2.5 */
+                                          "x-no-kvm-msix",
+                                          "Disable direct VFIO->KVM MSIx injection. Allows to "
+                                          "trace MSIx interrupts (DEBUG)");
+    object_class_property_set_description(klass, /* 2.5 */
+                                          "x-pci-vendor-id",
+                                          "Override PCI Vendor ID with provided value (DEBUG)");
+    object_class_property_set_description(klass, /* 2.5 */
+                                          "x-pci-device-id",
+                                          "Override PCI device ID with provided value (DEBUG)");
+    object_class_property_set_description(klass, /* 2.5 */
+                                          "x-pci-sub-vendor-id",
+                                          "Override PCI Subsystem Vendor ID with provided value "
+                                          "(DEBUG)");
+    object_class_property_set_description(klass, /* 2.5 */
+                                          "x-pci-sub-device-id",
+                                          "Override PCI Subsystem Device ID with provided value "
+                                          "(DEBUG)");
+    object_class_property_set_description(klass, /* 2.6 */
+                                          "sysfsdev",
+                                          "Host sysfs path of assigned device");
+    object_class_property_set_description(klass, /* 2.7 */
+                                          "x-igd-opregion",
+                                          "Expose host IGD OpRegion to guest");
+    object_class_property_set_description(klass, /* 2.7 (See c4c45e943e51) */
+                                          "x-igd-gms",
+                                          "Override IGD data stolen memory size (32MiB units)");
+    object_class_property_set_description(klass, /* 2.11 */
+                                          "x-nv-gpudirect-clique",
+                                          "Add NVIDIA GPUDirect capability indicating P2P DMA "
+                                          "clique for device [0-15]");
+    object_class_property_set_description(klass, /* 2.12 */
+                                          "x-no-geforce-quirks",
+                                          "Disable GeForce quirks (for NVIDIA Quadro/GRID/Tesla). "
+                                          "Improves performance");
+    object_class_property_set_description(klass, /* 2.12 */
+                                          "display",
+                                          "Enable display support for device, ex. vGPU");
+    object_class_property_set_description(klass, /* 2.12 */
+                                          "x-msix-relocation",
+                                          "Specify MSI-X MMIO relocation to the end of specified "
+                                          "existing BAR or new BAR to avoid virtualization overhead "
+                                          "due to adjacent device registers");
+    object_class_property_set_description(klass, /* 3.0 */
+                                          "x-no-kvm-ioeventfd",
+                                          "Disable registration of ioeventfds with KVM (DEBUG)");
+    object_class_property_set_description(klass, /* 3.0 */
+                                          "x-no-vfio-ioeventfd",
+                                          "Disable linking of KVM ioeventfds to VFIO ioeventfds "
+                                          "(DEBUG)");
+    object_class_property_set_description(klass, /* 3.1 */
+                                          "x-balloon-allowed",
+                                          "Override allowing ballooning with device (DEBUG, DANGER)");
+    object_class_property_set_description(klass, /* 3.2 */
+                                          "xres",
+                                          "Set X display resolution the vGPU should use");
+    object_class_property_set_description(klass, /* 3.2 */
+                                          "yres",
+                                          "Set Y display resolution the vGPU should use");
+    object_class_property_set_description(klass, /* 5.2 */
+                                          "x-pre-copy-dirty-page-tracking",
+                                          "Disable dirty pages tracking during iterative phase "
+                                          "(DEBUG)");
+    object_class_property_set_description(klass, /* 5.2, 8.0 non-experimetal */
+                                          "enable-migration",
+                                          "Enale device migration. Also requires a host VFIO PCI "
+                                          "variant or mdev driver with migration support enabled");
+    object_class_property_set_description(klass, /* 8.1 */
+                                          "vf-token",
+                                          "Specify UUID VF token. Required for VF when PF is owned "
+                                          "by another VFIO driver");
+#ifdef CONFIG_IOMMUFD
+    object_class_property_set_description(klass, /* 9.0 */
+                                          "iommufd",
+                                          "Set host IOMMUFD backend device");
+#endif
+    object_class_property_set_description(klass, /* 9.1 */
+                                          "x-device-dirty-page-tracking",
+                                          "Disable device dirty page tracking and use "
+                                          "container-based dirty page tracking");
+    object_class_property_set_description(klass, /* 9.1 */
+                                          "migration-events",
+                                          "Emit VFIO migration QAPI event when a VFIO device "
+                                          "changes its migration state. For management applications");
+    object_class_property_set_description(klass, /* 9.1 */
+                                          "skip-vsc-check",
+                                          "Skip config space check for Vendor Specific Capability. "
+                                          "Setting to false will enforce strict checking of VSC content "
+                                          "(DEBUG)");
+    object_class_property_set_description(klass, /* 10.0 */
+                                          "x-migration-multifd-transfer",
+                                          "Transfer this device state via "
+                                          "multifd channels when live migrating it");
 }
 
 static const TypeInfo vfio_pci_dev_info = {
@@ -3461,6 +3574,15 @@ static void vfio_pci_nohotplug_dev_class_init(ObjectClass *klass, void *data)
 
     device_class_set_props(dc, vfio_pci_dev_nohotplug_properties);
     dc->hotpluggable = false;
+
+    object_class_property_set_description(klass, /* 3.1 */
+                                          "ramfb",
+                                          "Enable ramfb to provide pre-boot graphics for devices "
+                                          "enabling display option");
+    object_class_property_set_description(klass, /* 8.2 */
+                                          "x-ramfb-migrate",
+                                          "Override default migration support for ramfb support "
+                                          "(DEBUG)");
 }
 
 static const TypeInfo vfio_pci_nohotplug_dev_info = {
@@ -3472,6 +3594,17 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = {
 
 static void register_vfio_pci_dev_type(void)
 {
+    /*
+     * Ordinary ON_OFF_AUTO property isn't runtime-mutable, but source VM can
+     * run for a long time before being migrated so it is desirable to have a
+     * fallback mechanism to the old way of transferring VFIO device state if
+     * it turns to be necessary.
+     * The following makes this type of property have the same mutability level
+     * as ordinary migration parameters.
+     */
+    vfio_pci_migration_multifd_transfer_prop = qdev_prop_on_off_auto;
+    vfio_pci_migration_multifd_transfer_prop.realized_set_allowed = true;
+
     type_register_static(&vfio_pci_dev_info);
     type_register_static(&vfio_pci_nohotplug_dev_info);
 }
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 43c166680abbc..d94ecaba689c4 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -154,13 +154,16 @@ struct VFIOPCIDevice {
 #define VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT 2
 #define VFIO_FEATURE_ENABLE_IGD_OPREGION \
                                 (1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT)
+#define VFIO_FEATURE_ENABLE_IGD_LPC_BIT 3
+#define VFIO_FEATURE_ENABLE_IGD_LPC \
+                                (1 << VFIO_FEATURE_ENABLE_IGD_LPC_BIT)
     OnOffAuto display;
     uint32_t display_xres;
     uint32_t display_yres;
     int32_t bootindex;
+    OnOffAuto igd_legacy_mode;
     uint32_t igd_gms;
     OffAutoPCIBAR msix_relo;
-    uint8_t pm_cap;
     uint8_t nv_gpudirect_clique;
     bool pci_aer;
     bool req_enabled;
@@ -205,6 +208,7 @@ uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size);
 void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size);
 
 bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev);
+bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp);
 void vfio_vga_quirk_setup(VFIOPCIDevice *vdev);
 void vfio_vga_quirk_exit(VFIOPCIDevice *vdev);
 void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev);
@@ -216,7 +220,7 @@ bool vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp);
 void vfio_quirk_reset(VFIOPCIDevice *vdev);
 VFIOQuirk *vfio_quirk_alloc(int nr_mem);
 void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr);
-void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr);
+bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp);
 
 extern const PropertyInfo qdev_prop_nv_gpudirect_clique;
 
@@ -228,10 +232,6 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev,
 
 bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp);
 
-bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
-                                struct vfio_region_info *info,
-                                Error **errp);
-
 void vfio_display_reset(VFIOPCIDevice *vdev);
 bool vfio_display_probe(VFIOPCIDevice *vdev, Error **errp);
 void vfio_display_finalize(VFIOPCIDevice *vdev);
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index 1070a2113a17e..67bc57409c1f5 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -575,6 +575,7 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp)
     VFIODevice *vbasedev = &vdev->vbasedev;
     int i;
 
+    warn_report("-device vfio-platform is deprecated");
     qemu_mutex_init(&vdev->intp_mutex);
 
     trace_vfio_platform_realize(vbasedev->sysfsdev ?
@@ -672,13 +673,35 @@ static void vfio_platform_class_init(ObjectClass *klass, void *data)
     dc->desc = "VFIO-based platform device assignment";
     sbc->connect_irq_notifier = vfio_start_irqfd_injection;
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
-    /* Supported by TYPE_VIRT_MACHINE */
-    dc->user_creatable = true;
+
+    object_class_property_set_description(klass, /* 2.4 */
+                                          "host",
+                                          "Host device name of assigned device");
+    object_class_property_set_description(klass, /* 2.4 and 2.5 */
+                                          "x-no-mmap",
+                                          "Disable MMAP for device. Allows to trace MMIO "
+                                          "accesses (DEBUG)");
+    object_class_property_set_description(klass, /* 2.4 */
+                                          "mmap-timeout-ms",
+                                          "When EOI is not provided by KVM/QEMU, wait time "
+                                          "(milliseconds) to re-enable device direct access "
+                                          "after level interrupt (DEBUG)");
+    object_class_property_set_description(klass, /* 2.4 */
+                                          "x-irqfd",
+                                          "Allow disabling irqfd support (DEBUG)");
+    object_class_property_set_description(klass, /* 2.6 */
+                                          "sysfsdev",
+                                          "Host sysfs path of assigned device");
+#ifdef CONFIG_IOMMUFD
+    object_class_property_set_description(klass, /* 9.0 */
+                                          "iommufd",
+                                          "Set host IOMMUFD backend device");
+#endif
 }
 
 static const TypeInfo vfio_platform_dev_info = {
     .name = TYPE_VFIO_PLATFORM,
-    .parent = TYPE_SYS_BUS_DEVICE,
+    .parent = TYPE_DYNAMIC_SYS_BUS_DEVICE,
     .instance_size = sizeof(VFIOPlatformDevice),
     .instance_init = vfio_platform_instance_init,
     .class_init = vfio_platform_class_init,
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
index ad4c499eafea2..1a5d1611f2cdc 100644
--- a/hw/vfio/spapr.c
+++ b/hw/vfio/spapr.c
@@ -11,10 +11,8 @@
 #include "qemu/osdep.h"
 #include <sys/ioctl.h>
 #include <linux/vfio.h>
-#ifdef CONFIG_KVM
-#include <linux/kvm.h>
-#endif
 #include "system/kvm.h"
+#include "system/hostmem.h"
 #include "exec/address-spaces.h"
 
 #include "hw/vfio/vfio-common.h"
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index cab1cf1de0a29..9347e3a5f6607 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -120,6 +120,7 @@ vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype
 vfio_legacy_dma_unmap_overflow_workaround(void) ""
 vfio_get_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64
 vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64
+vfio_reset_handler(void) ""
 
 # platform.c
 vfio_platform_realize(char *name, char *compat) "vfio device %s, compat = %s"
@@ -148,10 +149,19 @@ vfio_display_edid_update(uint32_t prefx, uint32_t prefy) "%ux%u"
 vfio_display_edid_write_error(void) ""
 
 # migration.c
+vfio_load_bufs_thread_start(const char *name) " (%s)"
+vfio_load_bufs_thread_end(const char *name) " (%s)"
 vfio_load_cleanup(const char *name) " (%s)"
-vfio_load_device_config_state(const char *name) " (%s)"
+vfio_load_device_config_state_start(const char *name) " (%s)"
+vfio_load_device_config_state_end(const char *name) " (%s)"
 vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
 vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size %"PRIu64" ret %d"
+vfio_load_state_device_buffer_incoming(const char *name, uint32_t idx) " (%s) idx %"PRIu32
+vfio_load_state_device_buffer_start(const char *name) " (%s)"
+vfio_load_state_device_buffer_starved(const char *name, uint32_t idx) " (%s) idx %"PRIu32
+vfio_load_state_device_buffer_load_start(const char *name, uint32_t idx) " (%s) idx %"PRIu32
+vfio_load_state_device_buffer_load_end(const char *name, uint32_t idx) " (%s) idx %"PRIu32
+vfio_load_state_device_buffer_end(const char *name) " (%s)"
 vfio_migration_realize(const char *name) " (%s)"
 vfio_migration_set_device_state(const char *name, const char *state) " (%s) state %s"
 vfio_migration_set_state(const char *name, const char *new_state, const char *recover_state) " (%s) new state %s, recover state %s"
@@ -161,6 +171,8 @@ vfio_save_block_precopy_empty_hit(const char *name) " (%s)"
 vfio_save_cleanup(const char *name) " (%s)"
 vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
 vfio_save_complete_precopy_start(const char *name) " (%s)"
+vfio_save_complete_precopy_thread_start(const char *name, const char *idstr, uint32_t instance_id) " (%s) idstr %s instance %"PRIu32
+vfio_save_complete_precopy_thread_end(const char *name, int ret) " (%s) ret %d"
 vfio_save_device_config_state(const char *name) " (%s)"
 vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size %"PRIu64" precopy dirty size %"PRIu64
 vfio_save_iterate_start(const char *name) " (%s)"
diff --git a/hw/virtio/iothread-vq-mapping.c b/hw/virtio/iothread-vq-mapping.c
new file mode 100644
index 0000000000000..15909eb9332a5
--- /dev/null
+++ b/hw/virtio/iothread-vq-mapping.c
@@ -0,0 +1,131 @@
+/*
+ * IOThread Virtqueue Mapping
+ *
+ * Copyright Red Hat, Inc
+ *
+ * SPDX-License-Identifier: GPL-2.0-only
+ */
+
+#include "qemu/osdep.h"
+#include "system/iothread.h"
+#include "hw/virtio/iothread-vq-mapping.h"
+
+static bool
+iothread_vq_mapping_validate(IOThreadVirtQueueMappingList *list, uint16_t
+        num_queues, Error **errp)
+{
+    g_autofree unsigned long *vqs = bitmap_new(num_queues);
+    g_autoptr(GHashTable) iothreads =
+        g_hash_table_new(g_str_hash, g_str_equal);
+
+    for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) {
+        const char *name = node->value->iothread;
+        uint16List *vq;
+
+        if (!iothread_by_id(name)) {
+            error_setg(errp, "IOThread \"%s\" object does not exist", name);
+            return false;
+        }
+
+        if (!g_hash_table_add(iothreads, (gpointer)name)) {
+            error_setg(errp,
+                    "duplicate IOThread name \"%s\" in iothread-vq-mapping",
+                    name);
+            return false;
+        }
+
+        if (node != list) {
+            if (!!node->value->vqs != !!list->value->vqs) {
+                error_setg(errp, "either all items in iothread-vq-mapping "
+                                 "must have vqs or none of them must have it");
+                return false;
+            }
+        }
+
+        for (vq = node->value->vqs; vq; vq = vq->next) {
+            if (vq->value >= num_queues) {
+                error_setg(errp, "vq index %u for IOThread \"%s\" must be "
+                        "less than num_queues %u in iothread-vq-mapping",
+                        vq->value, name, num_queues);
+                return false;
+            }
+
+            if (test_and_set_bit(vq->value, vqs)) {
+                error_setg(errp, "cannot assign vq %u to IOThread \"%s\" "
+                        "because it is already assigned", vq->value, name);
+                return false;
+            }
+        }
+    }
+
+    if (list->value->vqs) {
+        for (uint16_t i = 0; i < num_queues; i++) {
+            if (!test_bit(i, vqs)) {
+                error_setg(errp,
+                        "missing vq %u IOThread assignment in iothread-vq-mapping",
+                        i);
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
+
+bool iothread_vq_mapping_apply(
+        IOThreadVirtQueueMappingList *list,
+        AioContext **vq_aio_context,
+        uint16_t num_queues,
+        Error **errp)
+{
+    IOThreadVirtQueueMappingList *node;
+    size_t num_iothreads = 0;
+    size_t cur_iothread = 0;
+
+    if (!iothread_vq_mapping_validate(list, num_queues, errp)) {
+        return false;
+    }
+
+    for (node = list; node; node = node->next) {
+        num_iothreads++;
+    }
+
+    for (node = list; node; node = node->next) {
+        IOThread *iothread = iothread_by_id(node->value->iothread);
+        AioContext *ctx = iothread_get_aio_context(iothread);
+
+        /* Released in virtio_blk_vq_aio_context_cleanup() */
+        object_ref(OBJECT(iothread));
+
+        if (node->value->vqs) {
+            uint16List *vq;
+
+            /* Explicit vq:IOThread assignment */
+            for (vq = node->value->vqs; vq; vq = vq->next) {
+                assert(vq->value < num_queues);
+                vq_aio_context[vq->value] = ctx;
+            }
+        } else {
+            /* Round-robin vq:IOThread assignment */
+            for (unsigned i = cur_iothread; i < num_queues;
+                 i += num_iothreads) {
+                vq_aio_context[i] = ctx;
+            }
+        }
+
+        cur_iothread++;
+    }
+
+    return true;
+}
+
+void iothread_vq_mapping_cleanup(IOThreadVirtQueueMappingList *list)
+{
+    IOThreadVirtQueueMappingList *node;
+
+    for (node = list; node; node = node->next) {
+        IOThread *iothread = iothread_by_id(node->value->iothread);
+        object_unref(OBJECT(iothread));
+    }
+}
+
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
index a5f9f7999dde1..19b04c4d9ccbd 100644
--- a/hw/virtio/meson.build
+++ b/hw/virtio/meson.build
@@ -1,5 +1,6 @@
 system_virtio_ss = ss.source_set()
 system_virtio_ss.add(files('virtio-bus.c'))
+system_virtio_ss.add(files('iothread-vq-mapping.c'))
 system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c'))
 system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c'))
 system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c'))
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 04e36ae047db3..76f0d458b29c0 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -108,7 +108,7 @@ virtio_pci_notify_write(uint64_t addr, uint64_t val, unsigned int size) "0x%" PR
 virtio_pci_notify_write_pio(uint64_t addr, uint64_t val, unsigned int size) "0x%" PRIx64" = 0x%" PRIx64 " (%d)"
 
 # hw/virtio/virtio-iommu.c
-virtio_iommu_device_reset(void) "reset!"
+virtio_iommu_device_reset_exit(void) "reset!"
 virtio_iommu_system_reset(void) "system reset!"
 virtio_iommu_get_features(uint64_t features) "device supports features=0x%"PRIx64
 virtio_iommu_device_status(uint8_t status) "driver status = %d"
diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
index 3d03395a77b53..fa4147b773288 100644
--- a/hw/virtio/vhost-iova-tree.c
+++ b/hw/virtio/vhost-iova-tree.c
@@ -28,12 +28,18 @@ struct VhostIOVATree {
 
     /* IOVA address to qemu memory maps. */
     IOVATree *iova_taddr_map;
+
+    /* Allocated IOVA addresses */
+    IOVATree *iova_map;
+
+    /* GPA->IOVA address memory maps */
+    IOVATree *gpa_iova_map;
 };
 
 /**
- * Create a new IOVA tree
+ * Create a new VhostIOVATree
  *
- * Returns the new IOVA tree
+ * Returns the new VhostIOVATree.
  */
 VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
 {
@@ -44,25 +50,29 @@ VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
     tree->iova_last = iova_last;
 
     tree->iova_taddr_map = iova_tree_new();
+    tree->iova_map = iova_tree_new();
+    tree->gpa_iova_map = gpa_tree_new();
     return tree;
 }
 
 /**
- * Delete an iova tree
+ * Delete a VhostIOVATree
  */
 void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
 {
     iova_tree_destroy(iova_tree->iova_taddr_map);
+    iova_tree_destroy(iova_tree->iova_map);
+    iova_tree_destroy(iova_tree->gpa_iova_map);
     g_free(iova_tree);
 }
 
 /**
  * Find the IOVA address stored from a memory address
  *
- * @tree: The iova tree
+ * @tree: The VhostIOVATree
  * @map: The map with the memory address
  *
- * Return the stored mapping, or NULL if not found.
+ * Returns the stored IOVA->HVA mapping, or NULL if not found.
  */
 const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree,
                                         const DMAMap *map)
@@ -71,40 +81,111 @@ const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree,
 }
 
 /**
- * Allocate a new mapping
+ * Allocate a new IOVA range and add the mapping to the IOVA->HVA tree
  *
- * @tree: The iova tree
- * @map: The iova map
+ * @tree: The VhostIOVATree
+ * @map: The IOVA mapping
+ * @taddr: The translated address (HVA)
  *
  * Returns:
  * - IOVA_OK if the map fits in the container
  * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
  * - IOVA_ERR_NOMEM if tree cannot allocate more space.
  *
- * It returns assignated iova in map->iova if return value is VHOST_DMA_MAP_OK.
+ * It returns an assigned IOVA in map->iova if the return value is IOVA_OK.
  */
-int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map)
+int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map, hwaddr taddr)
 {
+    int ret;
+
     /* Some vhost devices do not like addr 0. Skip first page */
     hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size();
 
-    if (map->translated_addr + map->size < map->translated_addr ||
-        map->perm == IOMMU_NONE) {
+    if (taddr + map->size < taddr || map->perm == IOMMU_NONE) {
         return IOVA_ERR_INVALID;
     }
 
-    /* Allocate a node in IOVA address */
-    return iova_tree_alloc_map(tree->iova_taddr_map, map, iova_first,
-                               tree->iova_last);
+    /* Allocate a node in the IOVA-only tree */
+    ret = iova_tree_alloc_map(tree->iova_map, map, iova_first, tree->iova_last);
+    if (unlikely(ret != IOVA_OK)) {
+        return ret;
+    }
+
+    /* Insert a node in the IOVA->HVA tree */
+    map->translated_addr = taddr;
+    return iova_tree_insert(tree->iova_taddr_map, map);
 }
 
 /**
- * Remove existing mappings from iova tree
+ * Remove existing mappings from the IOVA-only and IOVA->HVA trees
  *
- * @iova_tree: The vhost iova tree
+ * @iova_tree: The VhostIOVATree
  * @map: The map to remove
  */
 void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map)
 {
     iova_tree_remove(iova_tree->iova_taddr_map, map);
+    iova_tree_remove(iova_tree->iova_map, map);
+}
+
+/**
+ * Find the IOVA address stored from a guest memory address (GPA)
+ *
+ * @tree: The VhostIOVATree
+ * @map: The map with the guest memory address
+ *
+ * Returns the stored GPA->IOVA mapping, or NULL if not found.
+ */
+const DMAMap *vhost_iova_tree_find_gpa(const VhostIOVATree *tree,
+                                       const DMAMap *map)
+{
+    return iova_tree_find_iova(tree->gpa_iova_map, map);
+}
+
+/**
+ * Allocate a new IOVA range and add the mapping to the GPA->IOVA tree
+ *
+ * @tree: The VhostIOVATree
+ * @map: The IOVA mapping
+ * @taddr: The translated address (GPA)
+ *
+ * Returns:
+ * - IOVA_OK if the map fits both containers
+ * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
+ * - IOVA_ERR_NOMEM if the IOVA-only tree cannot allocate more space
+ *
+ * It returns an assigned IOVA in map->iova if the return value is IOVA_OK.
+ */
+int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *tree, DMAMap *map, hwaddr taddr)
+{
+    int ret;
+
+    /* Some vhost devices don't like addr 0. Skip first page */
+    hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size();
+
+    if (taddr + map->size < taddr || map->perm == IOMMU_NONE) {
+        return IOVA_ERR_INVALID;
+    }
+
+    /* Allocate a node in the IOVA-only tree */
+    ret = iova_tree_alloc_map(tree->iova_map, map, iova_first, tree->iova_last);
+    if (unlikely(ret != IOVA_OK)) {
+        return ret;
+    }
+
+    /* Insert a node in the GPA->IOVA tree */
+    map->translated_addr = taddr;
+    return gpa_tree_insert(tree->gpa_iova_map, map);
+}
+
+/**
+ * Remove existing mappings from the IOVA-only and GPA->IOVA trees
+ *
+ * @tree: The VhostIOVATree
+ * @map: The map to remove
+ */
+void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map)
+{
+    iova_tree_remove(iova_tree->gpa_iova_map, map);
+    iova_tree_remove(iova_tree->iova_map, map);
 }
diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
index 4adfd79ff0390..0c4ba5abd5201 100644
--- a/hw/virtio/vhost-iova-tree.h
+++ b/hw/virtio/vhost-iova-tree.h
@@ -21,7 +21,13 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete);
 
 const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
                                         const DMAMap *map);
-int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
+int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map,
+                              hwaddr taddr);
 void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map);
+const DMAMap *vhost_iova_tree_find_gpa(const VhostIOVATree *iova_tree,
+                                       const DMAMap *map);
+int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *iova_tree, DMAMap *map,
+                                  hwaddr taddr);
+void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map);
 
 #endif
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
index 37aca8b43186d..2481d49345dd5 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -78,24 +78,39 @@ uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
  * @vaddr: Translated IOVA addresses
  * @iovec: Source qemu's VA addresses
  * @num: Length of iovec and minimum length of vaddr
+ * @gpas: Descriptors' GPAs, if backed by guest memory
  */
 static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
                                      hwaddr *addrs, const struct iovec *iovec,
-                                     size_t num)
+                                     size_t num, const hwaddr *gpas)
 {
     if (num == 0) {
         return true;
     }
 
     for (size_t i = 0; i < num; ++i) {
-        DMAMap needle = {
-            .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
-            .size = iovec[i].iov_len,
-        };
         Int128 needle_last, map_last;
         size_t off;
+        const DMAMap *map;
+        DMAMap needle;
+
+        /* Check if the descriptor is backed by guest memory  */
+        if (gpas) {
+            /* Search the GPA->IOVA tree */
+            needle = (DMAMap) {
+                .translated_addr = gpas[i],
+                .size = iovec[i].iov_len,
+            };
+            map = vhost_iova_tree_find_gpa(svq->iova_tree, &needle);
+        } else {
+            /* Search the IOVA->HVA tree */
+            needle = (DMAMap) {
+                .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
+                .size = iovec[i].iov_len,
+            };
+            map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
+        }
 
-        const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
         /*
          * Map cannot be NULL since iova map contains all guest space and
          * qemu already has a physical address mapped
@@ -130,6 +145,7 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
  * @sg: Cache for hwaddr
  * @iovec: The iovec from the guest
  * @num: iovec length
+ * @addr: Descriptors' GPAs, if backed by guest memory
  * @more_descs: True if more descriptors come in the chain
  * @write: True if they are writeable descriptors
  *
@@ -137,7 +153,8 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
  */
 static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
                                         const struct iovec *iovec, size_t num,
-                                        bool more_descs, bool write)
+                                        const hwaddr *addr, bool more_descs,
+                                        bool write)
 {
     uint16_t i = svq->free_head, last = svq->free_head;
     unsigned n;
@@ -149,7 +166,7 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
         return true;
     }
 
-    ok = vhost_svq_translate_addr(svq, sg, iovec, num);
+    ok = vhost_svq_translate_addr(svq, sg, iovec, num, addr);
     if (unlikely(!ok)) {
         return false;
     }
@@ -165,17 +182,18 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
         descs[i].len = cpu_to_le32(iovec[n].iov_len);
 
         last = i;
-        i = cpu_to_le16(svq->desc_next[i]);
+        i = svq->desc_next[i];
     }
 
-    svq->free_head = le16_to_cpu(svq->desc_next[last]);
+    svq->free_head = svq->desc_next[last];
     return true;
 }
 
 static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
                                 const struct iovec *out_sg, size_t out_num,
+                                const hwaddr *out_addr,
                                 const struct iovec *in_sg, size_t in_num,
-                                unsigned *head)
+                                const hwaddr *in_addr, unsigned *head)
 {
     unsigned avail_idx;
     vring_avail_t *avail = svq->vring.avail;
@@ -191,13 +209,14 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
         return false;
     }
 
-    ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
-                                     false);
+    ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, out_addr,
+                                     in_num > 0, false);
     if (unlikely(!ok)) {
         return false;
     }
 
-    ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
+    ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, in_addr, false,
+                                     true);
     if (unlikely(!ok)) {
         return false;
     }
@@ -228,10 +247,12 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
     smp_mb();
 
     if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
-        uint16_t avail_event = *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]);
+        uint16_t avail_event = le16_to_cpu(
+                *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]));
         needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1);
     } else {
-        needs_kick = !(svq->vring.used->flags & VRING_USED_F_NO_NOTIFY);
+        needs_kick =
+                !(svq->vring.used->flags & cpu_to_le16(VRING_USED_F_NO_NOTIFY));
     }
 
     if (!needs_kick) {
@@ -247,8 +268,9 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
  * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
  */
 int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
-                  size_t out_num, const struct iovec *in_sg, size_t in_num,
-                  VirtQueueElement *elem)
+                  size_t out_num, const hwaddr *out_addr,
+                  const struct iovec *in_sg, size_t in_num,
+                  const hwaddr *in_addr, VirtQueueElement *elem)
 {
     unsigned qemu_head;
     unsigned ndescs = in_num + out_num;
@@ -258,7 +280,8 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
         return -ENOSPC;
     }
 
-    ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
+    ok = vhost_svq_add_split(svq, out_sg, out_num, out_addr, in_sg, in_num,
+                             in_addr, &qemu_head);
     if (unlikely(!ok)) {
         return -EINVAL;
     }
@@ -274,8 +297,8 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
 static int vhost_svq_add_element(VhostShadowVirtqueue *svq,
                                  VirtQueueElement *elem)
 {
-    return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg,
-                         elem->in_num, elem);
+    return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->out_addr,
+                         elem->in_sg, elem->in_num, elem->in_addr, elem);
 }
 
 /**
@@ -365,7 +388,7 @@ static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
         return true;
     }
 
-    svq->shadow_used_idx = cpu_to_le16(*(volatile uint16_t *)used_idx);
+    svq->shadow_used_idx = le16_to_cpu(*(volatile uint16_t *)used_idx);
 
     return svq->last_used_idx != svq->shadow_used_idx;
 }
@@ -383,7 +406,7 @@ static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq)
 {
     if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
         uint16_t *used_event = (uint16_t *)&svq->vring.avail->ring[svq->vring.num];
-        *used_event = svq->shadow_used_idx;
+        *used_event = cpu_to_le16(svq->shadow_used_idx);
     } else {
         svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
     }
@@ -408,7 +431,7 @@ static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
                                              uint16_t num, uint16_t i)
 {
     for (uint16_t j = 0; j < (num - 1); ++j) {
-        i = le16_to_cpu(svq->desc_next[i]);
+        i = svq->desc_next[i];
     }
 
     return i;
@@ -683,7 +706,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
     svq->desc_state = g_new0(SVQDescState, svq->vring.num);
     svq->desc_next = g_new0(uint16_t, svq->vring.num);
     for (unsigned i = 0; i < svq->vring.num - 1; i++) {
-        svq->desc_next[i] = cpu_to_le16(i + 1);
+        svq->desc_next[i] = i + 1;
     }
 }
 
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
index 19c842a15b72e..9c273739d6df9 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -118,8 +118,9 @@ uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq);
 void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
                          const VirtQueueElement *elem, uint32_t len);
 int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
-                  size_t out_num, const struct iovec *in_sg, size_t in_num,
-                  VirtQueueElement *elem);
+                  size_t out_num, const hwaddr *out_addr,
+                  const struct iovec *in_sg, size_t in_num,
+                  const hwaddr *in_addr, VirtQueueElement *elem);
 size_t vhost_svq_poll(VhostShadowVirtqueue *svq, size_t num);
 
 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
diff --git a/hw/virtio/vhost-user-snd.c b/hw/virtio/vhost-user-snd.c
index 8610370af806a..b414c75c0619a 100644
--- a/hw/virtio/vhost-user-snd.c
+++ b/hw/virtio/vhost-user-snd.c
@@ -16,6 +16,18 @@
 #include "standard-headers/linux/virtio_ids.h"
 #include "standard-headers/linux/virtio_snd.h"
 
+static const VirtIOFeature feature_sizes[] = {
+    {.flags = 1ULL << VIRTIO_SND_F_CTLS,
+    .end = endof(struct virtio_snd_config, controls)},
+    {}
+};
+
+static const VirtIOConfigSizeParams cfg_size_params = {
+    .min_size = endof(struct virtio_snd_config, chmaps),
+    .max_size = sizeof(struct virtio_snd_config),
+    .feature_sizes = feature_sizes
+};
+
 static const VMStateDescription vu_snd_vmstate = {
     .name = "vhost-user-snd",
     .unmigratable = 1,
@@ -23,16 +35,20 @@ static const VMStateDescription vu_snd_vmstate = {
 
 static const Property vsnd_properties[] = {
     DEFINE_PROP_CHR("chardev", VHostUserBase, chardev),
+    DEFINE_PROP_BIT64("controls", VHostUserBase,
+                      parent_obj.host_features, VIRTIO_SND_F_CTLS, false),
 };
 
 static void vu_snd_base_realize(DeviceState *dev, Error **errp)
 {
     VHostUserBase *vub = VHOST_USER_BASE(dev);
     VHostUserBaseClass *vubs = VHOST_USER_BASE_GET_CLASS(dev);
+    VirtIODevice *vdev = &vub->parent_obj;
 
     vub->virtio_id = VIRTIO_ID_SOUND;
     vub->num_vqs = 4;
-    vub->config_size = sizeof(struct virtio_snd_config);
+    vub->config_size = virtio_get_config_size(&cfg_size_params,
+                                              vdev->host_features);
     vub->vq_size = 64;
 
     vubs->parent_realize(dev, errp);
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 3cdaa12ed5ec6..7efbde3d4c1c4 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -360,14 +360,20 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
     llsize = int128_sub(llend, int128_make64(iova));
     if (s->shadow_data) {
         int r;
+        hwaddr gpa = section->offset_within_address_space;
 
-        mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr,
         mem_region.size = int128_get64(llsize) - 1,
         mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly),
 
-        r = vhost_iova_tree_map_alloc(s->iova_tree, &mem_region);
+        r = vhost_iova_tree_map_alloc_gpa(s->iova_tree, &mem_region, gpa);
         if (unlikely(r != IOVA_OK)) {
             error_report("Can't allocate a mapping (%d)", r);
+
+            if (mem_region.translated_addr == gpa) {
+                error_report("Insertion to GPA->IOVA tree failed");
+                /* Remove the mapping from the IOVA-only tree */
+                goto fail_map;
+            }
             goto fail;
         }
 
@@ -386,7 +392,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
 
 fail_map:
     if (s->shadow_data) {
-        vhost_iova_tree_remove(s->iova_tree, mem_region);
+        vhost_iova_tree_remove_gpa(s->iova_tree, mem_region);
     }
 
 fail:
@@ -440,21 +446,18 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
 
     if (s->shadow_data) {
         const DMAMap *result;
-        const void *vaddr = memory_region_get_ram_ptr(section->mr) +
-            section->offset_within_region +
-            (iova - section->offset_within_address_space);
         DMAMap mem_region = {
-            .translated_addr = (hwaddr)(uintptr_t)vaddr,
+            .translated_addr = section->offset_within_address_space,
             .size = int128_get64(llsize) - 1,
         };
 
-        result = vhost_iova_tree_find_iova(s->iova_tree, &mem_region);
+        result = vhost_iova_tree_find_gpa(s->iova_tree, &mem_region);
         if (!result) {
             /* The memory listener map wasn't mapped */
             return;
         }
         iova = result->iova;
-        vhost_iova_tree_remove(s->iova_tree, *result);
+        vhost_iova_tree_remove_gpa(s->iova_tree, *result);
     }
     vhost_vdpa_iotlb_batch_begin_once(s);
     /*
@@ -1142,16 +1145,23 @@ static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
  *
  * @v: Vhost-vdpa device
  * @needle: The area to search iova
+ * @taddr: The translated address (HVA)
  * @errorp: Error pointer
  */
 static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle,
-                                    Error **errp)
+                                    hwaddr taddr, Error **errp)
 {
     int r;
 
-    r = vhost_iova_tree_map_alloc(v->shared->iova_tree, needle);
+    r = vhost_iova_tree_map_alloc(v->shared->iova_tree, needle, taddr);
     if (unlikely(r != IOVA_OK)) {
         error_setg(errp, "Cannot allocate iova (%d)", r);
+
+        if (needle->translated_addr == taddr) {
+            error_append_hint(errp, "Insertion to IOVA->HVA tree failed");
+            /* Remove the mapping from the IOVA-only tree */
+            vhost_iova_tree_remove(v->shared->iova_tree, *needle);
+        }
         return false;
     }
 
@@ -1192,11 +1202,11 @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
     vhost_svq_get_vring_addr(svq, &svq_addr);
 
     driver_region = (DMAMap) {
-        .translated_addr = svq_addr.desc_user_addr,
         .size = driver_size - 1,
         .perm = IOMMU_RO,
     };
-    ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp);
+    ok = vhost_vdpa_svq_map_ring(v, &driver_region, svq_addr.desc_user_addr,
+                                 errp);
     if (unlikely(!ok)) {
         error_prepend(errp, "Cannot create vq driver region: ");
         return false;
@@ -1206,11 +1216,11 @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
     addr->avail_user_addr = driver_region.iova + avail_offset;
 
     device_region = (DMAMap) {
-        .translated_addr = svq_addr.used_user_addr,
         .size = device_size - 1,
         .perm = IOMMU_RW,
     };
-    ok = vhost_vdpa_svq_map_ring(v, &device_region, errp);
+    ok = vhost_vdpa_svq_map_ring(v, &device_region, svq_addr.used_user_addr,
+                                 errp);
     if (unlikely(!ok)) {
         error_prepend(errp, "Cannot create vq device region: ");
         vhost_vdpa_svq_unmap_ring(v, driver_region.translated_addr);
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index ad05768dedc43..2eb5a14fa2ce6 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -31,7 +31,7 @@
 #include "trace.h"
 #include "qemu/error-report.h"
 #include "migration/misc.h"
-
+#include "system/reset.h"
 #include "hw/virtio/virtio-bus.h"
 #include "hw/virtio/virtio-access.h"
 
@@ -910,6 +910,8 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
     }
 
     reset_stats(s);
+    s->stats_last_update = 0;
+    qemu_register_resettable(OBJECT(dev));
 }
 
 static void virtio_balloon_device_unrealize(DeviceState *dev)
@@ -917,6 +919,7 @@ static void virtio_balloon_device_unrealize(DeviceState *dev)
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIOBalloon *s = VIRTIO_BALLOON(dev);
 
+    qemu_unregister_resettable(OBJECT(dev));
     if (s->free_page_bh) {
         qemu_bh_delete(s->free_page_bh);
         object_unref(OBJECT(s->iothread));
@@ -987,6 +990,27 @@ static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
     }
 }
 
+static ResettableState *virtio_balloon_get_reset_state(Object *obj)
+{
+    VirtIOBalloon *s = VIRTIO_BALLOON(obj);
+    return &s->reset_state;
+}
+
+static void virtio_balloon_reset_enter(Object *obj, ResetType type)
+{
+    VirtIOBalloon *s = VIRTIO_BALLOON(obj);
+
+    /*
+     * When waking up from standby/suspend-to-ram, do not reset stats.
+     */
+    if (type == RESET_TYPE_WAKEUP) {
+        return;
+    }
+
+    reset_stats(s);
+    s->stats_last_update = 0;
+}
+
 static void virtio_balloon_instance_init(Object *obj)
 {
     VirtIOBalloon *s = VIRTIO_BALLOON(obj);
@@ -1038,6 +1062,7 @@ static void virtio_balloon_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
 
     device_class_set_props(dc, virtio_balloon_properties);
     dc->vmsd = &vmstate_virtio_balloon;
@@ -1050,6 +1075,9 @@ static void virtio_balloon_class_init(ObjectClass *klass, void *data)
     vdc->get_features = virtio_balloon_get_features;
     vdc->set_status = virtio_balloon_set_status;
     vdc->vmsd = &vmstate_virtio_balloon_device;
+
+    rc->get_state = virtio_balloon_get_reset_state;
+    rc->phases.enter = virtio_balloon_reset_enter;
 }
 
 static const TypeInfo virtio_balloon_info = {
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index f41104a952369..b6e7e01ef7425 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -1504,11 +1504,11 @@ static void virtio_iommu_device_unrealize(DeviceState *dev)
     virtio_cleanup(vdev);
 }
 
-static void virtio_iommu_device_reset(VirtIODevice *vdev)
+static void virtio_iommu_device_reset_exit(Object *obj, ResetType type)
 {
-    VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
+    VirtIOIOMMU *s = VIRTIO_IOMMU(obj);
 
-    trace_virtio_iommu_device_reset();
+    trace_virtio_iommu_device_reset_exit();
 
     if (s->domains) {
         g_tree_destroy(s->domains);
@@ -1668,6 +1668,7 @@ static void virtio_iommu_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
 
     device_class_set_props(dc, virtio_iommu_properties);
     dc->vmsd = &vmstate_virtio_iommu;
@@ -1675,7 +1676,12 @@ static void virtio_iommu_class_init(ObjectClass *klass, void *data)
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
     vdc->realize = virtio_iommu_device_realize;
     vdc->unrealize = virtio_iommu_device_unrealize;
-    vdc->reset = virtio_iommu_device_reset;
+
+    /*
+     * Use 'exit' reset phase to make sure all DMA requests
+     * have been quiesced during 'enter' or 'hold' phase
+     */
+    rc->phases.exit = virtio_iommu_device_reset_exit;
     vdc->get_config = virtio_iommu_get_config;
     vdc->set_config = virtio_iommu_set_config;
     vdc->get_features = virtio_iommu_get_features;
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index b1a003736b0a6..5f57eccbb66e5 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -28,7 +28,7 @@
 #include "migration/misc.h"
 #include "hw/boards.h"
 #include "hw/qdev-properties.h"
-#include CONFIG_DEVICES
+#include "hw/acpi/acpi.h"
 #include "trace.h"
 
 static const VMStateDescription vmstate_virtio_mem_device_early;
@@ -883,10 +883,8 @@ static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features,
     MachineState *ms = MACHINE(qdev_get_machine());
     VirtIOMEM *vmem = VIRTIO_MEM(vdev);
 
-    if (ms->numa_state) {
-#if defined(CONFIG_ACPI)
+    if (ms->numa_state && acpi_builtin()) {
         virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM);
-#endif
     }
     assert(vmem->unplugged_inaccessible != ON_OFF_AUTO_AUTO);
     if (vmem->unplugged_inaccessible == ON_OFF_AUTO_ON) {
@@ -991,7 +989,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    if (enable_mlock) {
+    if (should_mlock(mlock_state)) {
         error_setg(errp, "Incompatible with mlock");
         return;
     }
diff --git a/hw/virtio/virtio-nsm.c b/hw/virtio/virtio-nsm.c
index 098e1aeac6e24..b22aa74e34d22 100644
--- a/hw/virtio/virtio-nsm.c
+++ b/hw/virtio/virtio-nsm.c
@@ -1596,7 +1596,7 @@ static void handle_input(VirtIODevice *vdev, VirtQueue *vq)
     g_free(req.iov_base);
     g_free(res.iov_base);
     virtqueue_push(vq, out_elem, 0);
-    virtqueue_push(vq, in_elem, in_elem->in_sg->iov_len);
+    virtqueue_push(vq, in_elem, sz);
     virtio_notify(vdev, vq);
     return;
 
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index c773a9130c7e8..3ca3f849d3915 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -2204,14 +2204,11 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
         pos = pcie_endpoint_cap_init(pci_dev, 0);
         assert(pos > 0);
 
-        pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0,
-                                 PCI_PM_SIZEOF, errp);
+        pos = pci_pm_init(pci_dev, 0, errp);
         if (pos < 0) {
             return;
         }
 
-        pci_dev->exp.pm_cap = pos;
-
         /*
          * Indicates that this function complies with revision 1.2 of the
          * PCI Power Management Interface Specification.
@@ -2310,11 +2307,11 @@ static bool virtio_pci_no_soft_reset(PCIDevice *dev)
 {
     uint16_t pmcsr;
 
-    if (!pci_is_express(dev) || !dev->exp.pm_cap) {
+    if (!pci_is_express(dev) || !(dev->cap_present & QEMU_PCI_CAP_PM)) {
         return false;
     }
 
-    pmcsr = pci_get_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL);
+    pmcsr = pci_get_word(dev->config + dev->pm_cap + PCI_PM_CTRL);
 
     /*
      * When No_Soft_Reset bit is set and the device
@@ -2343,7 +2340,7 @@ static void virtio_pci_bus_reset_hold(Object *obj, ResetType type)
 
         if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) {
             pci_word_test_and_clear_mask(
-                dev->config + dev->exp.pm_cap + PCI_PM_CTRL,
+                dev->config + dev->pm_cap + PCI_PM_CTRL,
                 PCI_PM_CTRL_STATE_MASK);
         }
     }
diff --git a/hw/vmapple/Kconfig b/hw/vmapple/Kconfig
new file mode 100644
index 0000000000000..2382b29767227
--- /dev/null
+++ b/hw/vmapple/Kconfig
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+config VMAPPLE_AES
+    bool
+
+config VMAPPLE_BDIF
+    bool
+
+config VMAPPLE_CFG
+    bool
+
+config VMAPPLE_VIRTIO_BLK
+    bool
+
+config VMAPPLE
+    bool
+    depends on ARM
+    depends on HVF
+    default y if ARM
+    imply PCI_DEVICES
+    select ARM_GICV3
+    select PLATFORM_BUS
+    select PCI_EXPRESS
+    select PCI_EXPRESS_GENERIC_BRIDGE
+    select PL011 # UART
+    select PL031 # RTC
+    select PL061 # GPIO
+    select GPIO_PWR
+    select PVPANIC_MMIO
+    select VMAPPLE_AES
+    select VMAPPLE_BDIF
+    select VMAPPLE_CFG
+    select MAC_PVG_MMIO
+    select VMAPPLE_VIRTIO_BLK
diff --git a/hw/vmapple/aes.c b/hw/vmapple/aes.c
new file mode 100644
index 0000000000000..3a7641ab4b00c
--- /dev/null
+++ b/hw/vmapple/aes.c
@@ -0,0 +1,581 @@
+/*
+ * QEMU Apple AES device emulation
+ *
+ * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "trace.h"
+#include "crypto/hash.h"
+#include "crypto/aes.h"
+#include "crypto/cipher.h"
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "hw/vmapple/vmapple.h"
+#include "migration/vmstate.h"
+#include "qemu/cutils.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "system/dma.h"
+
+OBJECT_DECLARE_SIMPLE_TYPE(AESState, APPLE_AES)
+
+#define MAX_FIFO_SIZE     9
+
+#define CMD_KEY           0x1
+#define CMD_KEY_CONTEXT_SHIFT    27
+#define CMD_KEY_CONTEXT_MASK     (0x1 << CMD_KEY_CONTEXT_SHIFT)
+#define CMD_KEY_SELECT_MAX_IDX   0x7
+#define CMD_KEY_SELECT_SHIFT     24
+#define CMD_KEY_SELECT_MASK      (CMD_KEY_SELECT_MAX_IDX << CMD_KEY_SELECT_SHIFT)
+#define CMD_KEY_KEY_LEN_NUM      4u
+#define CMD_KEY_KEY_LEN_SHIFT    22
+#define CMD_KEY_KEY_LEN_MASK     ((CMD_KEY_KEY_LEN_NUM - 1u) << CMD_KEY_KEY_LEN_SHIFT)
+#define CMD_KEY_ENCRYPT_SHIFT    20
+#define CMD_KEY_ENCRYPT_MASK     (0x1 << CMD_KEY_ENCRYPT_SHIFT)
+#define CMD_KEY_BLOCK_MODE_SHIFT 16
+#define CMD_KEY_BLOCK_MODE_MASK  (0x3 << CMD_KEY_BLOCK_MODE_SHIFT)
+#define CMD_IV            0x2
+#define CMD_IV_CONTEXT_SHIFT     26
+#define CMD_IV_CONTEXT_MASK      (0x3 << CMD_KEY_CONTEXT_SHIFT)
+#define CMD_DSB           0x3
+#define CMD_SKG           0x4
+#define CMD_DATA          0x5
+#define CMD_DATA_KEY_CTX_SHIFT   27
+#define CMD_DATA_KEY_CTX_MASK    (0x1 << CMD_DATA_KEY_CTX_SHIFT)
+#define CMD_DATA_IV_CTX_SHIFT    25
+#define CMD_DATA_IV_CTX_MASK     (0x3 << CMD_DATA_IV_CTX_SHIFT)
+#define CMD_DATA_LEN_MASK        0xffffff
+#define CMD_STORE_IV      0x6
+#define CMD_STORE_IV_ADDR_MASK   0xffffff
+#define CMD_WRITE_REG     0x7
+#define CMD_FLAG          0x8
+#define CMD_FLAG_STOP_MASK       BIT(26)
+#define CMD_FLAG_RAISE_IRQ_MASK  BIT(27)
+#define CMD_FLAG_INFO_MASK       0xff
+#define CMD_MAX           0x10
+
+#define CMD_SHIFT         28
+
+#define REG_STATUS            0xc
+#define REG_STATUS_DMA_READ_RUNNING     BIT(0)
+#define REG_STATUS_DMA_READ_PENDING     BIT(1)
+#define REG_STATUS_DMA_WRITE_RUNNING    BIT(2)
+#define REG_STATUS_DMA_WRITE_PENDING    BIT(3)
+#define REG_STATUS_BUSY                 BIT(4)
+#define REG_STATUS_EXECUTING            BIT(5)
+#define REG_STATUS_READY                BIT(6)
+#define REG_STATUS_TEXT_DPA_SEEDED      BIT(7)
+#define REG_STATUS_UNWRAP_DPA_SEEDED    BIT(8)
+
+#define REG_IRQ_STATUS        0x18
+#define REG_IRQ_STATUS_INVALID_CMD      BIT(2)
+#define REG_IRQ_STATUS_FLAG             BIT(5)
+#define REG_IRQ_ENABLE        0x1c
+#define REG_WATERMARK         0x20
+#define REG_Q_STATUS          0x24
+#define REG_FLAG_INFO         0x30
+#define REG_FIFO              0x200
+
+static const uint32_t key_lens[CMD_KEY_KEY_LEN_NUM] = {
+    [0] = 16,
+    [1] = 24,
+    [2] = 32,
+    [3] = 64,
+};
+
+typedef struct Key {
+    uint32_t key_len;
+    uint8_t key[32];
+} Key;
+
+typedef struct IV {
+    uint32_t iv[4];
+} IV;
+
+static Key builtin_keys[CMD_KEY_SELECT_MAX_IDX + 1] = {
+    [1] = {
+        .key_len = 32,
+        .key = { 0x1 },
+    },
+    [2] = {
+        .key_len = 32,
+        .key = { 0x2 },
+    },
+    [3] = {
+        .key_len = 32,
+        .key = { 0x3 },
+    }
+};
+
+struct AESState {
+    SysBusDevice parent_obj;
+
+    qemu_irq irq;
+    MemoryRegion iomem1;
+    MemoryRegion iomem2;
+    AddressSpace *as;
+
+    uint32_t status;
+    uint32_t q_status;
+    uint32_t irq_status;
+    uint32_t irq_enable;
+    uint32_t watermark;
+    uint32_t flag_info;
+    uint32_t fifo[MAX_FIFO_SIZE];
+    uint32_t fifo_idx;
+    Key key[2];
+    IV iv[4];
+    bool is_encrypt;
+    QCryptoCipherMode block_mode;
+};
+
+static void aes_update_irq(AESState *s)
+{
+    qemu_set_irq(s->irq, !!(s->irq_status & s->irq_enable));
+}
+
+static uint64_t aes1_read(void *opaque, hwaddr offset, unsigned size)
+{
+    AESState *s = opaque;
+    uint64_t res = 0;
+
+    switch (offset) {
+    case REG_STATUS:
+        res = s->status;
+        break;
+    case REG_IRQ_STATUS:
+        res = s->irq_status;
+        break;
+    case REG_IRQ_ENABLE:
+        res = s->irq_enable;
+        break;
+    case REG_WATERMARK:
+        res = s->watermark;
+        break;
+    case REG_Q_STATUS:
+        res = s->q_status;
+        break;
+    case REG_FLAG_INFO:
+        res = s->flag_info;
+        break;
+
+    default:
+        qemu_log_mask(LOG_UNIMP, "%s: Unknown AES MMIO offset %" PRIx64 "\n",
+                      __func__, offset);
+        break;
+    }
+
+    trace_aes_read(offset, res);
+
+    return res;
+}
+
+static void fifo_append(AESState *s, uint64_t val)
+{
+    if (s->fifo_idx == MAX_FIFO_SIZE) {
+        /* Exceeded the FIFO. Bail out */
+        return;
+    }
+
+    s->fifo[s->fifo_idx++] = val;
+}
+
+static bool has_payload(AESState *s, uint32_t elems)
+{
+    return s->fifo_idx >= elems + 1;
+}
+
+static bool cmd_key(AESState *s)
+{
+    uint32_t cmd = s->fifo[0];
+    uint32_t key_select = (cmd & CMD_KEY_SELECT_MASK) >> CMD_KEY_SELECT_SHIFT;
+    uint32_t ctxt = (cmd & CMD_KEY_CONTEXT_MASK) >> CMD_KEY_CONTEXT_SHIFT;
+    uint32_t key_len;
+
+    switch ((cmd & CMD_KEY_BLOCK_MODE_MASK) >> CMD_KEY_BLOCK_MODE_SHIFT) {
+    case 0:
+        s->block_mode = QCRYPTO_CIPHER_MODE_ECB;
+        break;
+    case 1:
+        s->block_mode = QCRYPTO_CIPHER_MODE_CBC;
+        break;
+    default:
+        return false;
+    }
+
+    s->is_encrypt = cmd & CMD_KEY_ENCRYPT_MASK;
+    key_len = key_lens[(cmd & CMD_KEY_KEY_LEN_MASK) >> CMD_KEY_KEY_LEN_SHIFT];
+
+    if (key_select) {
+        trace_aes_cmd_key_select_builtin(ctxt, key_select,
+                                         s->is_encrypt ? "en" : "de",
+                                         QCryptoCipherMode_str(s->block_mode));
+        s->key[ctxt] = builtin_keys[key_select];
+    } else {
+        trace_aes_cmd_key_select_new(ctxt, key_len,
+                                     s->is_encrypt ? "en" : "de",
+                                     QCryptoCipherMode_str(s->block_mode));
+        if (key_len > sizeof(s->key[ctxt].key)) {
+            return false;
+        }
+        if (!has_payload(s, key_len / sizeof(uint32_t))) {
+            /* wait for payload */
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: No payload\n", __func__);
+            return false;
+        }
+        memcpy(&s->key[ctxt].key, &s->fifo[1], key_len);
+        s->key[ctxt].key_len = key_len;
+    }
+
+    return true;
+}
+
+static bool cmd_iv(AESState *s)
+{
+    uint32_t cmd = s->fifo[0];
+    uint32_t ctxt = (cmd & CMD_IV_CONTEXT_MASK) >> CMD_IV_CONTEXT_SHIFT;
+
+    if (!has_payload(s, 4)) {
+        /* wait for payload */
+        return false;
+    }
+    memcpy(&s->iv[ctxt].iv, &s->fifo[1], sizeof(s->iv[ctxt].iv));
+    trace_aes_cmd_iv(ctxt, s->fifo[1], s->fifo[2], s->fifo[3], s->fifo[4]);
+
+    return true;
+}
+
+static void dump_data(const char *desc, const void *p, size_t len)
+{
+    static const size_t MAX_LEN = 0x1000;
+    char hex[MAX_LEN * 2 + 1] = "";
+
+    if (len > MAX_LEN) {
+        return;
+    }
+
+    qemu_hexdump_to_buffer(hex, sizeof(hex), p, len);
+    trace_aes_dump_data(desc, hex);
+}
+
+static bool cmd_data(AESState *s)
+{
+    uint32_t cmd = s->fifo[0];
+    uint32_t ctxt_iv = 0;
+    uint32_t ctxt_key = (cmd & CMD_DATA_KEY_CTX_MASK) >> CMD_DATA_KEY_CTX_SHIFT;
+    uint32_t len = cmd & CMD_DATA_LEN_MASK;
+    uint64_t src_addr = s->fifo[2];
+    uint64_t dst_addr = s->fifo[3];
+    QCryptoCipherAlgo alg;
+    g_autoptr(QCryptoCipher) cipher = NULL;
+    g_autoptr(GByteArray) src = NULL;
+    g_autoptr(GByteArray) dst = NULL;
+    MemTxResult r;
+
+    src_addr |= ((uint64_t)s->fifo[1] << 16) & 0xffff00000000ULL;
+    dst_addr |= ((uint64_t)s->fifo[1] << 32) & 0xffff00000000ULL;
+
+    trace_aes_cmd_data(ctxt_key, ctxt_iv, src_addr, dst_addr, len);
+
+    if (!has_payload(s, 3)) {
+        /* wait for payload */
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: No payload\n", __func__);
+        return false;
+    }
+
+    if (ctxt_key >= ARRAY_SIZE(s->key) ||
+        ctxt_iv >= ARRAY_SIZE(s->iv)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid key or iv\n", __func__);
+        return false;
+    }
+
+    src = g_byte_array_sized_new(len);
+    g_byte_array_set_size(src, len);
+    dst = g_byte_array_sized_new(len);
+    g_byte_array_set_size(dst, len);
+
+    r = dma_memory_read(s->as, src_addr, src->data, len, MEMTXATTRS_UNSPECIFIED);
+    if (r != MEMTX_OK) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: DMA read of %"PRIu32" bytes "
+                      "from 0x%"PRIx64" failed. (r=%d)\n",
+                      __func__, len, src_addr, r);
+        return false;
+    }
+
+    dump_data("cmd_data(): src_data=", src->data, len);
+
+    switch (s->key[ctxt_key].key_len) {
+    case 128 / 8:
+        alg = QCRYPTO_CIPHER_ALGO_AES_128;
+        break;
+    case 192 / 8:
+        alg = QCRYPTO_CIPHER_ALGO_AES_192;
+        break;
+    case 256 / 8:
+        alg = QCRYPTO_CIPHER_ALGO_AES_256;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid key length\n", __func__);
+        return false;
+    }
+    cipher = qcrypto_cipher_new(alg, s->block_mode,
+                                s->key[ctxt_key].key,
+                                s->key[ctxt_key].key_len, NULL);
+    if (!cipher) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Failed to create cipher object\n",
+                      __func__);
+        return false;
+    }
+    if (s->block_mode != QCRYPTO_CIPHER_MODE_ECB) {
+        if (qcrypto_cipher_setiv(cipher, (void *)s->iv[ctxt_iv].iv,
+                                 sizeof(s->iv[ctxt_iv].iv), NULL) != 0) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: Failed to set IV\n", __func__);
+            return false;
+        }
+    }
+    if (s->is_encrypt) {
+        if (qcrypto_cipher_encrypt(cipher, src->data, dst->data, len, NULL) != 0) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: Encryption failed\n", __func__);
+            return false;
+        }
+    } else {
+        if (qcrypto_cipher_decrypt(cipher, src->data, dst->data, len, NULL) != 0) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: Decryption failed\n", __func__);
+            return false;
+        }
+    }
+
+    dump_data("cmd_data(): dst_data=", dst->data, len);
+    r = dma_memory_write(s->as, dst_addr, dst->data, len, MEMTXATTRS_UNSPECIFIED);
+    if (r != MEMTX_OK) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: DMA write of %"PRIu32" bytes "
+                      "to 0x%"PRIx64" failed. (r=%d)\n",
+                      __func__, len, src_addr, r);
+        return false;
+    }
+
+    return true;
+}
+
+static bool cmd_store_iv(AESState *s)
+{
+    uint32_t cmd = s->fifo[0];
+    uint32_t ctxt = (cmd & CMD_IV_CONTEXT_MASK) >> CMD_IV_CONTEXT_SHIFT;
+    uint64_t addr = s->fifo[1];
+    MemTxResult dma_result;
+
+    if (!has_payload(s, 1)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: No payload\n", __func__);
+        return false;
+    }
+
+    if (ctxt >= ARRAY_SIZE(s->iv)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Invalid context. ctxt = %u, allowed: 0..%zu\n",
+                      __func__, ctxt, ARRAY_SIZE(s->iv) - 1);
+        return false;
+    }
+
+    addr |= ((uint64_t)cmd << 32) & 0xff00000000ULL;
+    dma_result = dma_memory_write(&address_space_memory, addr,
+                                  &s->iv[ctxt].iv, sizeof(s->iv[ctxt].iv),
+                                  MEMTXATTRS_UNSPECIFIED);
+
+    trace_aes_cmd_store_iv(ctxt, addr, s->iv[ctxt].iv[0], s->iv[ctxt].iv[1],
+                           s->iv[ctxt].iv[2], s->iv[ctxt].iv[3]);
+
+    return dma_result == MEMTX_OK;
+}
+
+static bool cmd_flag(AESState *s)
+{
+    uint32_t cmd = s->fifo[0];
+    uint32_t raise_irq = cmd & CMD_FLAG_RAISE_IRQ_MASK;
+
+    /* We always process data when it's coming in, so fire an IRQ immediately */
+    if (raise_irq) {
+        s->irq_status |= REG_IRQ_STATUS_FLAG;
+    }
+
+    s->flag_info = cmd & CMD_FLAG_INFO_MASK;
+
+    trace_aes_cmd_flag(!!raise_irq, s->flag_info);
+
+    return true;
+}
+
+static void fifo_process(AESState *s)
+{
+    uint32_t cmd = s->fifo[0] >> CMD_SHIFT;
+    bool success = false;
+
+    if (!s->fifo_idx) {
+        return;
+    }
+
+    switch (cmd) {
+    case CMD_KEY:
+        success = cmd_key(s);
+        break;
+    case CMD_IV:
+        success = cmd_iv(s);
+        break;
+    case CMD_DATA:
+        success = cmd_data(s);
+        break;
+    case CMD_STORE_IV:
+        success = cmd_store_iv(s);
+        break;
+    case CMD_FLAG:
+        success = cmd_flag(s);
+        break;
+    default:
+        s->irq_status |= REG_IRQ_STATUS_INVALID_CMD;
+        break;
+    }
+
+    if (success) {
+        s->fifo_idx = 0;
+    }
+
+    trace_aes_fifo_process(cmd, success);
+}
+
+static void aes1_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
+{
+    AESState *s = opaque;
+
+    trace_aes_write(offset, val);
+
+    switch (offset) {
+    case REG_IRQ_STATUS:
+        s->irq_status &= ~val;
+        break;
+    case REG_IRQ_ENABLE:
+        s->irq_enable = val;
+        break;
+    case REG_FIFO:
+        fifo_append(s, val);
+        fifo_process(s);
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP,
+                      "%s: Unknown AES MMIO offset %"PRIx64", data %"PRIx64"\n",
+                      __func__, offset, val);
+        return;
+    }
+
+    aes_update_irq(s);
+}
+
+static const MemoryRegionOps aes1_ops = {
+    .read = aes1_read,
+    .write = aes1_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static uint64_t aes2_read(void *opaque, hwaddr offset, unsigned size)
+{
+    uint64_t res = 0;
+
+    switch (offset) {
+    case 0:
+        res = 0;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP,
+                      "%s: Unknown AES MMIO 2 offset %"PRIx64"\n",
+                      __func__, offset);
+        break;
+    }
+
+    trace_aes_2_read(offset, res);
+
+    return res;
+}
+
+static void aes2_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
+{
+    trace_aes_2_write(offset, val);
+
+    switch (offset) {
+    default:
+        qemu_log_mask(LOG_UNIMP,
+                      "%s: Unknown AES MMIO 2 offset %"PRIx64", data %"PRIx64"\n",
+                      __func__, offset, val);
+        return;
+    }
+}
+
+static const MemoryRegionOps aes2_ops = {
+    .read = aes2_read,
+    .write = aes2_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static void aes_reset(Object *obj, ResetType type)
+{
+    AESState *s = APPLE_AES(obj);
+
+    s->status = 0x3f80;
+    s->q_status = 2;
+    s->irq_status = 0;
+    s->irq_enable = 0;
+    s->watermark = 0;
+}
+
+static void aes_init(Object *obj)
+{
+    AESState *s = APPLE_AES(obj);
+
+    memory_region_init_io(&s->iomem1, obj, &aes1_ops, s, TYPE_APPLE_AES, 0x4000);
+    memory_region_init_io(&s->iomem2, obj, &aes2_ops, s, TYPE_APPLE_AES, 0x4000);
+    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem1);
+    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem2);
+    sysbus_init_irq(SYS_BUS_DEVICE(s), &s->irq);
+    s->as = &address_space_memory;
+}
+
+static void aes_class_init(ObjectClass *klass, void *data)
+{
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+
+    rc->phases.hold = aes_reset;
+}
+
+static const TypeInfo aes_info = {
+    .name          = TYPE_APPLE_AES,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(AESState),
+    .class_init    = aes_class_init,
+    .instance_init = aes_init,
+};
+
+static void aes_register_types(void)
+{
+    type_register_static(&aes_info);
+}
+
+type_init(aes_register_types)
diff --git a/hw/vmapple/bdif.c b/hw/vmapple/bdif.c
new file mode 100644
index 0000000000000..5827dd2aab8c4
--- /dev/null
+++ b/hw/vmapple/bdif.c
@@ -0,0 +1,274 @@
+/*
+ * VMApple Backdoor Interface
+ *
+ * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "trace.h"
+#include "hw/vmapple/vmapple.h"
+#include "hw/sysbus.h"
+#include "hw/block/block.h"
+#include "qapi/error.h"
+#include "system/block-backend.h"
+#include "system/dma.h"
+
+OBJECT_DECLARE_SIMPLE_TYPE(VMAppleBdifState, VMAPPLE_BDIF)
+
+struct VMAppleBdifState {
+    SysBusDevice parent_obj;
+
+    BlockBackend *aux;
+    BlockBackend *root;
+    MemoryRegion mmio;
+};
+
+#define VMAPPLE_BDIF_SIZE   0x00200000
+
+#define REG_DEVID_MASK      0xffff0000
+#define DEVID_ROOT          0x00000000
+#define DEVID_AUX           0x00010000
+#define DEVID_USB           0x00100000
+
+#define REG_STATUS          0x0
+#define REG_STATUS_ACTIVE     BIT(0)
+#define REG_CFG             0x4
+#define REG_CFG_ACTIVE        BIT(1)
+#define REG_UNK1            0x8
+#define REG_BUSY            0x10
+#define REG_BUSY_READY        BIT(0)
+#define REG_UNK2            0x400
+#define REG_CMD             0x408
+#define REG_NEXT_DEVICE     0x420
+#define REG_UNK3            0x434
+
+typedef struct VblkSector {
+    uint32_t pad;
+    uint32_t pad2;
+    uint32_t sector;
+    uint32_t pad3;
+} VblkSector;
+
+typedef struct VblkReqCmd {
+    uint64_t addr;
+    uint32_t len;
+    uint32_t flags;
+} VblkReqCmd;
+
+typedef struct VblkReq {
+    VblkReqCmd sector;
+    VblkReqCmd data;
+    VblkReqCmd retval;
+} VblkReq;
+
+#define VBLK_DATA_FLAGS_READ  0x00030001
+#define VBLK_DATA_FLAGS_WRITE 0x00010001
+
+#define VBLK_RET_SUCCESS  0
+#define VBLK_RET_FAILED   1
+
+static uint64_t bdif_read(void *opaque, hwaddr offset, unsigned size)
+{
+    uint64_t ret = -1;
+    uint64_t devid = offset & REG_DEVID_MASK;
+
+    switch (offset & ~REG_DEVID_MASK) {
+    case REG_STATUS:
+        ret = REG_STATUS_ACTIVE;
+        break;
+    case REG_CFG:
+        ret = REG_CFG_ACTIVE;
+        break;
+    case REG_UNK1:
+        ret = 0x420;
+        break;
+    case REG_BUSY:
+        ret = REG_BUSY_READY;
+        break;
+    case REG_UNK2:
+        ret = 0x1;
+        break;
+    case REG_UNK3:
+        ret = 0x0;
+        break;
+    case REG_NEXT_DEVICE:
+        switch (devid) {
+        case DEVID_ROOT:
+            ret = 0x8000000;
+            break;
+        case DEVID_AUX:
+            ret = 0x10000;
+            break;
+        }
+        break;
+    }
+
+    trace_bdif_read(offset, size, ret);
+    return ret;
+}
+
+static void le2cpu_sector(VblkSector *sector)
+{
+    sector->sector = le32_to_cpu(sector->sector);
+}
+
+static void le2cpu_reqcmd(VblkReqCmd *cmd)
+{
+    cmd->addr = le64_to_cpu(cmd->addr);
+    cmd->len = le32_to_cpu(cmd->len);
+    cmd->flags = le32_to_cpu(cmd->flags);
+}
+
+static void le2cpu_req(VblkReq *req)
+{
+    le2cpu_reqcmd(&req->sector);
+    le2cpu_reqcmd(&req->data);
+    le2cpu_reqcmd(&req->retval);
+}
+
+static void vblk_cmd(uint64_t devid, BlockBackend *blk, uint64_t gp_addr,
+                     uint64_t static_off)
+{
+    VblkReq req;
+    VblkSector sector;
+    uint64_t off = 0;
+    g_autofree char *buf = NULL;
+    uint8_t ret = VBLK_RET_FAILED;
+    int r;
+    MemTxResult dma_result;
+
+    dma_result = dma_memory_read(&address_space_memory, gp_addr,
+                                 &req, sizeof(req), MEMTXATTRS_UNSPECIFIED);
+    if (dma_result != MEMTX_OK) {
+        goto out;
+    }
+
+    le2cpu_req(&req);
+
+    if (req.sector.len != sizeof(sector)) {
+        goto out;
+    }
+
+    /* Read the vblk command */
+    dma_result = dma_memory_read(&address_space_memory, req.sector.addr,
+                                 &sector, sizeof(sector),
+                                 MEMTXATTRS_UNSPECIFIED);
+    if (dma_result != MEMTX_OK) {
+        goto out;
+    }
+    le2cpu_sector(&sector);
+
+    off = sector.sector * 512ULL + static_off;
+
+    /* Sanity check that we're not allocating bogus sizes */
+    if (req.data.len > 128 * MiB) {
+        goto out;
+    }
+
+    buf = g_malloc0(req.data.len);
+    switch (req.data.flags) {
+    case VBLK_DATA_FLAGS_READ:
+        r = blk_pread(blk, off, req.data.len, buf, 0);
+        trace_bdif_vblk_read(devid == DEVID_AUX ? "aux" : "root",
+                             req.data.addr, off, req.data.len, r);
+        if (r < 0) {
+            goto out;
+        }
+        dma_result = dma_memory_write(&address_space_memory, req.data.addr, buf,
+                                      req.data.len, MEMTXATTRS_UNSPECIFIED);
+        if (dma_result == MEMTX_OK) {
+            ret = VBLK_RET_SUCCESS;
+        }
+        break;
+    case VBLK_DATA_FLAGS_WRITE:
+        /* Not needed, iBoot only reads */
+        break;
+    default:
+        break;
+    }
+
+out:
+    dma_memory_write(&address_space_memory, req.retval.addr, &ret, 1,
+                     MEMTXATTRS_UNSPECIFIED);
+}
+
+static void bdif_write(void *opaque, hwaddr offset,
+                       uint64_t value, unsigned size)
+{
+    VMAppleBdifState *s = opaque;
+    uint64_t devid = (offset & REG_DEVID_MASK);
+
+    trace_bdif_write(offset, size, value);
+
+    switch (offset & ~REG_DEVID_MASK) {
+    case REG_CMD:
+        switch (devid) {
+        case DEVID_ROOT:
+            vblk_cmd(devid, s->root, value, 0x0);
+            break;
+        case DEVID_AUX:
+            vblk_cmd(devid, s->aux, value, 0x0);
+            break;
+        }
+        break;
+    }
+}
+
+static const MemoryRegionOps bdif_ops = {
+    .read = bdif_read,
+    .write = bdif_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+    },
+};
+
+static void bdif_init(Object *obj)
+{
+    VMAppleBdifState *s = VMAPPLE_BDIF(obj);
+
+    memory_region_init_io(&s->mmio, obj, &bdif_ops, obj,
+                         "VMApple Backdoor Interface", VMAPPLE_BDIF_SIZE);
+    sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio);
+}
+
+static const Property bdif_properties[] = {
+    DEFINE_PROP_DRIVE("aux", VMAppleBdifState, aux),
+    DEFINE_PROP_DRIVE("root", VMAppleBdifState, root),
+};
+
+static void bdif_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc = "VMApple Backdoor Interface";
+    device_class_set_props(dc, bdif_properties);
+}
+
+static const TypeInfo bdif_info = {
+    .name          = TYPE_VMAPPLE_BDIF,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(VMAppleBdifState),
+    .instance_init = bdif_init,
+    .class_init    = bdif_class_init,
+};
+
+static void bdif_register_types(void)
+{
+    type_register_static(&bdif_info);
+}
+
+type_init(bdif_register_types)
diff --git a/hw/vmapple/cfg.c b/hw/vmapple/cfg.c
new file mode 100644
index 0000000000000..63414d801fe40
--- /dev/null
+++ b/hw/vmapple/cfg.c
@@ -0,0 +1,195 @@
+/*
+ * VMApple Configuration Region
+ *
+ * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/vmapple/vmapple.h"
+#include "hw/sysbus.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qapi/error.h"
+#include "net/net.h"
+
+OBJECT_DECLARE_SIMPLE_TYPE(VMAppleCfgState, VMAPPLE_CFG)
+
+#define VMAPPLE_CFG_SIZE 0x00010000
+
+typedef struct VMAppleCfg {
+    uint32_t version;         /* 0x000 */
+    uint32_t nr_cpus;         /* 0x004 */
+    uint32_t unk1;            /* 0x008 */
+    uint32_t unk2;            /* 0x00c */
+    uint32_t unk3;            /* 0x010 */
+    uint32_t unk4;            /* 0x014 */
+    uint64_t ecid;            /* 0x018 */
+    uint64_t ram_size;        /* 0x020 */
+    uint32_t run_installer1;  /* 0x028 */
+    uint32_t unk5;            /* 0x02c */
+    uint32_t unk6;            /* 0x030 */
+    uint32_t run_installer2;  /* 0x034 */
+    uint32_t rnd;             /* 0x038 */
+    uint32_t unk7;            /* 0x03c */
+    MACAddr mac_en0;          /* 0x040 */
+    uint8_t pad1[2];
+    MACAddr mac_en1;          /* 0x048 */
+    uint8_t pad2[2];
+    MACAddr mac_wifi0;        /* 0x050 */
+    uint8_t pad3[2];
+    MACAddr mac_bt0;          /* 0x058 */
+    uint8_t pad4[2];
+    uint8_t reserved[0xa0];   /* 0x060 */
+    uint32_t cpu_ids[0x80];   /* 0x100 */
+    uint8_t scratch[0x200];   /* 0x180 */
+    char serial[32];          /* 0x380 */
+    char unk8[32];            /* 0x3a0 */
+    char model[32];           /* 0x3c0 */
+    uint8_t unk9[32];         /* 0x3e0 */
+    uint32_t unk10;           /* 0x400 */
+    char soc_name[32];        /* 0x404 */
+} VMAppleCfg;
+
+struct VMAppleCfgState {
+    SysBusDevice parent_obj;
+    VMAppleCfg cfg;
+
+    MemoryRegion mem;
+    char *serial;
+    char *model;
+    char *soc_name;
+};
+
+static void vmapple_cfg_reset(Object *obj, ResetType type)
+{
+    VMAppleCfgState *s = VMAPPLE_CFG(obj);
+    VMAppleCfg *cfg;
+
+    cfg = memory_region_get_ram_ptr(&s->mem);
+    memset(cfg, 0, VMAPPLE_CFG_SIZE);
+    *cfg = s->cfg;
+}
+
+static bool set_fixlen_property_or_error(char *restrict dst,
+                                         const char *restrict src,
+                                         size_t dst_size, Error **errp,
+                                         const char *property_name)
+{
+    ERRP_GUARD();
+    size_t len;
+
+    len = g_strlcpy(dst, src, dst_size);
+    if (len < dst_size) { /* len does not count nul terminator */
+        return true;
+    }
+
+    error_setg(errp, "Provided value too long for property '%s'", property_name);
+    error_append_hint(errp, "length (%zu) exceeds maximum of %zu\n",
+                      len, dst_size - 1);
+    return false;
+}
+
+#define set_fixlen_property_or_return(dst_array, src, errp, property_name) \
+    do { \
+        if (!set_fixlen_property_or_error((dst_array), (src), \
+                                          ARRAY_SIZE(dst_array), \
+                                          (errp), (property_name))) { \
+            return; \
+        } \
+    } while (0)
+
+static void vmapple_cfg_realize(DeviceState *dev, Error **errp)
+{
+    VMAppleCfgState *s = VMAPPLE_CFG(dev);
+    uint32_t i;
+
+    if (!s->serial) {
+        s->serial = g_strdup("1234");
+    }
+    if (!s->model) {
+        s->model = g_strdup("VM0001");
+    }
+    if (!s->soc_name) {
+        s->soc_name = g_strdup("Apple M1 (Virtual)");
+    }
+
+    set_fixlen_property_or_return(s->cfg.serial, s->serial, errp, "serial");
+    set_fixlen_property_or_return(s->cfg.model, s->model, errp, "model");
+    set_fixlen_property_or_return(s->cfg.soc_name, s->soc_name, errp, "soc_name");
+    set_fixlen_property_or_return(s->cfg.unk8, "D/A", errp, "unk8");
+    s->cfg.version = 2;
+    s->cfg.unk1 = 1;
+    s->cfg.unk2 = 1;
+    s->cfg.unk3 = 0x20;
+    s->cfg.unk4 = 0;
+    s->cfg.unk5 = 1;
+    s->cfg.unk6 = 1;
+    s->cfg.unk7 = 0;
+    s->cfg.unk10 = 1;
+
+    if (s->cfg.nr_cpus > ARRAY_SIZE(s->cfg.cpu_ids)) {
+        error_setg(errp,
+                   "Failed to create %u CPUs, vmapple machine supports %zu max",
+                   s->cfg.nr_cpus, ARRAY_SIZE(s->cfg.cpu_ids));
+        return;
+    }
+    for (i = 0; i < s->cfg.nr_cpus; i++) {
+        s->cfg.cpu_ids[i] = i;
+    }
+}
+
+static void vmapple_cfg_init(Object *obj)
+{
+    VMAppleCfgState *s = VMAPPLE_CFG(obj);
+
+    memory_region_init_ram(&s->mem, obj, "VMApple Config", VMAPPLE_CFG_SIZE,
+                           &error_fatal);
+    sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mem);
+}
+
+static const Property vmapple_cfg_properties[] = {
+    DEFINE_PROP_UINT32("nr-cpus", VMAppleCfgState, cfg.nr_cpus, 1),
+    DEFINE_PROP_UINT64("ecid", VMAppleCfgState, cfg.ecid, 0),
+    DEFINE_PROP_UINT64("ram-size", VMAppleCfgState, cfg.ram_size, 0),
+    DEFINE_PROP_UINT32("run_installer1", VMAppleCfgState, cfg.run_installer1, 0),
+    DEFINE_PROP_UINT32("run_installer2", VMAppleCfgState, cfg.run_installer2, 0),
+    DEFINE_PROP_UINT32("rnd", VMAppleCfgState, cfg.rnd, 0),
+    DEFINE_PROP_MACADDR("mac-en0", VMAppleCfgState, cfg.mac_en0),
+    DEFINE_PROP_MACADDR("mac-en1", VMAppleCfgState, cfg.mac_en1),
+    DEFINE_PROP_MACADDR("mac-wifi0", VMAppleCfgState, cfg.mac_wifi0),
+    DEFINE_PROP_MACADDR("mac-bt0", VMAppleCfgState, cfg.mac_bt0),
+    DEFINE_PROP_STRING("serial", VMAppleCfgState, serial),
+    DEFINE_PROP_STRING("model", VMAppleCfgState, model),
+    DEFINE_PROP_STRING("soc_name", VMAppleCfgState, soc_name),
+};
+
+static void vmapple_cfg_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+
+    dc->realize = vmapple_cfg_realize;
+    dc->desc = "VMApple Configuration Region";
+    device_class_set_props(dc, vmapple_cfg_properties);
+    rc->phases.hold = vmapple_cfg_reset;
+}
+
+static const TypeInfo vmapple_cfg_info = {
+    .name          = TYPE_VMAPPLE_CFG,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(VMAppleCfgState),
+    .instance_init = vmapple_cfg_init,
+    .class_init    = vmapple_cfg_class_init,
+};
+
+static void vmapple_cfg_register_types(void)
+{
+    type_register_static(&vmapple_cfg_info);
+}
+
+type_init(vmapple_cfg_register_types)
diff --git a/hw/vmapple/meson.build b/hw/vmapple/meson.build
new file mode 100644
index 0000000000000..23bc4c999e38a
--- /dev/null
+++ b/hw/vmapple/meson.build
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+system_ss.add(when: 'CONFIG_VMAPPLE_AES',  if_true: files('aes.c'))
+system_ss.add(when: 'CONFIG_VMAPPLE_BDIF', if_true: files('bdif.c'))
+system_ss.add(when: 'CONFIG_VMAPPLE_CFG',  if_true: files('cfg.c'))
+system_ss.add(when: 'CONFIG_VMAPPLE_VIRTIO_BLK',  if_true: files('virtio-blk.c'))
+specific_ss.add(when: 'CONFIG_VMAPPLE',     if_true: files('vmapple.c'))
diff --git a/hw/vmapple/trace-events b/hw/vmapple/trace-events
new file mode 100644
index 0000000000000..93380ede1451d
--- /dev/null
+++ b/hw/vmapple/trace-events
@@ -0,0 +1,21 @@
+# See docs/devel/tracing.rst for syntax documentation.
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+# aes.c
+aes_read(uint64_t offset, uint64_t res) "offset=0x%"PRIx64" res=0x%"PRIx64
+aes_cmd_key_select_builtin(uint32_t ctx, uint32_t key_id, const char *direction, const char *cipher) "[%d] Selecting builtin key %d to %scrypt with %s"
+aes_cmd_key_select_new(uint32_t ctx, uint32_t key_len, const char *direction, const char *cipher) "[%d] Selecting new key size=%d to %scrypt with %s"
+aes_cmd_iv(uint32_t ctx, uint32_t iv0, uint32_t iv1, uint32_t iv2, uint32_t iv3) "[%d] 0x%08x 0x%08x 0x%08x 0x%08x"
+aes_cmd_data(uint32_t key, uint32_t iv, uint64_t src, uint64_t dst, uint32_t len) "[key=%d iv=%d] src=0x%"PRIx64" dst=0x%"PRIx64" len=0x%x"
+aes_cmd_store_iv(uint32_t ctx, uint64_t addr, uint32_t iv0, uint32_t iv1, uint32_t iv2, uint32_t iv3) "[%d] addr=0x%"PRIx64"x -> 0x%08x 0x%08x 0x%08x 0x%08x"
+aes_cmd_flag(uint32_t raise, uint32_t flag_info) "raise=%d flag_info=0x%x"
+aes_fifo_process(uint32_t cmd, bool success) "cmd=%d success=%d"
+aes_write(uint64_t offset, uint64_t val) "offset=0x%"PRIx64" val=0x%"PRIx64
+aes_2_read(uint64_t offset, uint64_t res) "offset=0x%"PRIx64" res=0x%"PRIx64
+aes_2_write(uint64_t offset, uint64_t val) "offset=0x%"PRIx64" val=0x%"PRIx64
+aes_dump_data(const char *desc, const char *hex) "%s%s"
+
+# bdif.c
+bdif_read(uint64_t offset, uint32_t size, uint64_t value) "offset=0x%"PRIx64" size=0x%x value=0x%"PRIx64
+bdif_write(uint64_t offset, uint32_t size, uint64_t value) "offset=0x%"PRIx64" size=0x%x value=0x%"PRIx64
+bdif_vblk_read(const char *dev, uint64_t addr, uint64_t offset, uint32_t len, int r) "dev=%s addr=0x%"PRIx64" off=0x%"PRIx64" size=0x%x r=%d"
diff --git a/hw/vmapple/trace.h b/hw/vmapple/trace.h
new file mode 100644
index 0000000000000..d099d5ecd9e2f
--- /dev/null
+++ b/hw/vmapple/trace.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include "trace/trace-hw_vmapple.h"
diff --git a/hw/vmapple/virtio-blk.c b/hw/vmapple/virtio-blk.c
new file mode 100644
index 0000000000000..aa3f18c47db8f
--- /dev/null
+++ b/hw/vmapple/virtio-blk.c
@@ -0,0 +1,204 @@
+/*
+ * VMApple specific VirtIO Block implementation
+ *
+ * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * VMApple uses almost standard VirtIO Block, but with a few key differences:
+ *
+ *  - Different PCI device/vendor ID
+ *  - An additional "type" identifier to differentiate AUX and Root volumes
+ *  - An additional BARRIER command
+ */
+
+#include "qemu/osdep.h"
+#include "hw/vmapple/vmapple.h"
+#include "hw/virtio/virtio-blk.h"
+#include "hw/virtio/virtio-pci.h"
+#include "qemu/bswap.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qapi/error.h"
+
+#define TYPE_VMAPPLE_VIRTIO_BLK  "vmapple-virtio-blk"
+OBJECT_DECLARE_TYPE(VMAppleVirtIOBlk, VMAppleVirtIOBlkClass, VMAPPLE_VIRTIO_BLK)
+
+typedef struct VMAppleVirtIOBlkClass {
+    VirtIOBlkClass parent;
+
+    void (*get_config)(VirtIODevice *vdev, uint8_t *config);
+} VMAppleVirtIOBlkClass;
+
+typedef struct VMAppleVirtIOBlk {
+    VirtIOBlock parent_obj;
+
+    uint32_t apple_type;
+} VMAppleVirtIOBlk;
+
+/*
+ * vmapple-virtio-blk-pci: This extends VirtioPCIProxy.
+ */
+OBJECT_DECLARE_SIMPLE_TYPE(VMAppleVirtIOBlkPCI, VMAPPLE_VIRTIO_BLK_PCI)
+
+#define VIRTIO_BLK_T_APPLE_BARRIER     0x10000
+
+static bool vmapple_virtio_blk_handle_unknown_request(VirtIOBlockReq *req,
+                                                      MultiReqBuffer *mrb,
+                                                      uint32_t type)
+{
+    switch (type) {
+    case VIRTIO_BLK_T_APPLE_BARRIER:
+        qemu_log_mask(LOG_UNIMP, "%s: Barrier requests are currently no-ops\n",
+                      __func__);
+        virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
+        g_free(req);
+        return true;
+    default:
+        return false;
+    }
+}
+
+/*
+ * VMApple virtio-blk uses the same config format as normal virtio, with one
+ * exception: It adds an "apple type" specififer at the same location that
+ * the spec reserves for max_secure_erase_sectors. Let's hook into the
+ * get_config code path here, run it as usual and then patch in the apple type.
+ */
+static void vmapple_virtio_blk_get_config(VirtIODevice *vdev, uint8_t *config)
+{
+    VMAppleVirtIOBlk *dev = VMAPPLE_VIRTIO_BLK(vdev);
+    VMAppleVirtIOBlkClass *vvbk = VMAPPLE_VIRTIO_BLK_GET_CLASS(dev);
+    struct virtio_blk_config *blkcfg = (struct virtio_blk_config *)config;
+
+    vvbk->get_config(vdev, config);
+
+    g_assert(dev->parent_obj.config_size >= endof(struct virtio_blk_config, zoned));
+
+    /* Apple abuses the field for max_secure_erase_sectors as type id */
+    stl_he_p(&blkcfg->max_secure_erase_sectors, dev->apple_type);
+}
+
+static void vmapple_virtio_blk_class_init(ObjectClass *klass, void *data)
+{
+    VirtIOBlkClass *vbk = VIRTIO_BLK_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+    VMAppleVirtIOBlkClass *vvbk = VMAPPLE_VIRTIO_BLK_CLASS(klass);
+
+    vbk->handle_unknown_request = vmapple_virtio_blk_handle_unknown_request;
+    vvbk->get_config = vdc->get_config;
+    vdc->get_config = vmapple_virtio_blk_get_config;
+}
+
+static const TypeInfo vmapple_virtio_blk_info = {
+    .name          = TYPE_VMAPPLE_VIRTIO_BLK,
+    .parent        = TYPE_VIRTIO_BLK,
+    .instance_size = sizeof(VMAppleVirtIOBlk),
+    .class_size    = sizeof(VMAppleVirtIOBlkClass),
+    .class_init    = vmapple_virtio_blk_class_init,
+};
+
+/* PCI Devices */
+
+struct VMAppleVirtIOBlkPCI {
+    VirtIOPCIProxy parent_obj;
+
+    VMAppleVirtIOBlk vdev;
+    VMAppleVirtioBlkVariant variant;
+};
+
+static const Property vmapple_virtio_blk_pci_properties[] = {
+    DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0),
+    DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
+                       DEV_NVECTORS_UNSPECIFIED),
+    DEFINE_PROP_VMAPPLE_VIRTIO_BLK_VARIANT("variant", VMAppleVirtIOBlkPCI, variant,
+                                           VM_APPLE_VIRTIO_BLK_VARIANT_UNSPECIFIED),
+};
+
+static void vmapple_virtio_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    ERRP_GUARD();
+    VMAppleVirtIOBlkPCI *dev = VMAPPLE_VIRTIO_BLK_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+    VirtIOBlkConf *conf = &dev->vdev.parent_obj.conf;
+
+    if (dev->variant == VM_APPLE_VIRTIO_BLK_VARIANT_UNSPECIFIED) {
+        error_setg(errp, "vmapple virtio block device variant unspecified");
+        error_append_hint(errp,
+                          "Variant property must be set to 'aux' or 'root'.\n"
+                          "Use a regular virtio-blk-pci device instead when "
+                          "neither is applicaple.\n");
+        return;
+    }
+
+    if (conf->num_queues == VIRTIO_BLK_AUTO_NUM_QUEUES) {
+        conf->num_queues = virtio_pci_optimal_num_queues(0);
+    }
+
+    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+        vpci_dev->nvectors = conf->num_queues + 1;
+    }
+
+    /*
+     * We don't support zones, but we need the additional config space size.
+     * Let's just expose the feature so the rest of the virtio-blk logic
+     * allocates enough space for us. The guest will ignore zones anyway.
+     */
+    virtio_add_feature(&dev->vdev.parent_obj.host_features, VIRTIO_BLK_F_ZONED);
+    /* Propagate the apple type down to the virtio-blk device */
+    dev->vdev.apple_type = dev->variant;
+    /* and spawn the virtio-blk device */
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+
+    /*
+     * The virtio-pci machinery adjusts its vendor/device ID based on whether
+     * we support modern or legacy virtio. Let's patch it back to the Apple
+     * identifiers here.
+     */
+    pci_config_set_vendor_id(vpci_dev->pci_dev.config, PCI_VENDOR_ID_APPLE);
+    pci_config_set_device_id(vpci_dev->pci_dev.config,
+                             PCI_DEVICE_ID_APPLE_VIRTIO_BLK);
+}
+
+static void vmapple_virtio_blk_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+    device_class_set_props(dc, vmapple_virtio_blk_pci_properties);
+    k->realize = vmapple_virtio_blk_pci_realize;
+    pcidev_k->vendor_id = PCI_VENDOR_ID_APPLE;
+    pcidev_k->device_id = PCI_DEVICE_ID_APPLE_VIRTIO_BLK;
+    pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
+    pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI;
+}
+
+static void vmapple_virtio_blk_pci_instance_init(Object *obj)
+{
+    VMAppleVirtIOBlkPCI *dev = VMAPPLE_VIRTIO_BLK_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VMAPPLE_VIRTIO_BLK);
+}
+
+static const VirtioPCIDeviceTypeInfo vmapple_virtio_blk_pci_info = {
+    .generic_name  = TYPE_VMAPPLE_VIRTIO_BLK_PCI,
+    .instance_size = sizeof(VMAppleVirtIOBlkPCI),
+    .instance_init = vmapple_virtio_blk_pci_instance_init,
+    .class_init    = vmapple_virtio_blk_pci_class_init,
+};
+
+static void vmapple_virtio_blk_register_types(void)
+{
+    type_register_static(&vmapple_virtio_blk_info);
+    virtio_pci_types_register(&vmapple_virtio_blk_pci_info);
+}
+
+type_init(vmapple_virtio_blk_register_types)
diff --git a/hw/vmapple/vmapple.c b/hw/vmapple/vmapple.c
new file mode 100644
index 0000000000000..fa117bf15113a
--- /dev/null
+++ b/hw/vmapple/vmapple.c
@@ -0,0 +1,618 @@
+/*
+ * VMApple machine emulation
+ *
+ * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * VMApple is the device model that the macOS built-in hypervisor called
+ * "Virtualization.framework" exposes to Apple Silicon macOS guests. The
+ * machine model in this file implements the same device model in QEMU, but
+ * does not use any code from Virtualization.Framework.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/bitops.h"
+#include "qemu/datadir.h"
+#include "qemu/error-report.h"
+#include "qemu/guest-random.h"
+#include "qemu/help-texts.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qemu/option.h"
+#include "qemu/units.h"
+#include "monitor/qdev.h"
+#include "hw/boards.h"
+#include "hw/irq.h"
+#include "hw/loader.h"
+#include "hw/qdev-properties.h"
+#include "hw/sysbus.h"
+#include "hw/usb.h"
+#include "hw/arm/boot.h"
+#include "hw/arm/primecell.h"
+#include "hw/char/pl011.h"
+#include "hw/intc/arm_gic.h"
+#include "hw/intc/arm_gicv3_common.h"
+#include "hw/misc/pvpanic.h"
+#include "hw/pci-host/gpex.h"
+#include "hw/usb/hcd-xhci-pci.h"
+#include "hw/virtio/virtio-pci.h"
+#include "hw/vmapple/vmapple.h"
+#include "net/net.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "qapi/qapi-visit-common.h"
+#include "qobject/qlist.h"
+#include "standard-headers/linux/input.h"
+#include "system/hvf.h"
+#include "system/reset.h"
+#include "system/runstate.h"
+#include "system/system.h"
+
+struct VMAppleMachineState {
+    MachineState parent;
+
+    Notifier machine_done;
+    struct arm_boot_info bootinfo;
+    const MemMapEntry *memmap;
+    const int *irqmap;
+    DeviceState *gic;
+    DeviceState *cfg;
+    DeviceState *pvpanic;
+    Notifier powerdown_notifier;
+    PCIBus *bus;
+    MemoryRegion fw_mr;
+    MemoryRegion ecam_alias;
+    uint64_t uuid;
+};
+
+#define TYPE_VMAPPLE_MACHINE   MACHINE_TYPE_NAME("vmapple")
+OBJECT_DECLARE_SIMPLE_TYPE(VMAppleMachineState, VMAPPLE_MACHINE)
+
+/* Number of external interrupt lines to configure the GIC with */
+#define NUM_IRQS 256
+
+enum {
+    VMAPPLE_FIRMWARE,
+    VMAPPLE_CONFIG,
+    VMAPPLE_MEM,
+    VMAPPLE_GIC_DIST,
+    VMAPPLE_GIC_REDIST,
+    VMAPPLE_UART,
+    VMAPPLE_RTC,
+    VMAPPLE_PCIE,
+    VMAPPLE_PCIE_MMIO,
+    VMAPPLE_PCIE_ECAM,
+    VMAPPLE_GPIO,
+    VMAPPLE_PVPANIC,
+    VMAPPLE_APV_GFX,
+    VMAPPLE_APV_IOSFC,
+    VMAPPLE_AES_1,
+    VMAPPLE_AES_2,
+    VMAPPLE_BDOOR,
+    VMAPPLE_MEMMAP_LAST,
+};
+
+static const MemMapEntry memmap[] = {
+    [VMAPPLE_FIRMWARE] =           { 0x00100000, 0x00100000 },
+    [VMAPPLE_CONFIG] =             { 0x00400000, 0x00010000 },
+
+    [VMAPPLE_GIC_DIST] =           { 0x10000000, 0x00010000 },
+    [VMAPPLE_GIC_REDIST] =         { 0x10010000, 0x00400000 },
+
+    [VMAPPLE_UART] =               { 0x20010000, 0x00010000 },
+    [VMAPPLE_RTC] =                { 0x20050000, 0x00001000 },
+    [VMAPPLE_GPIO] =               { 0x20060000, 0x00001000 },
+    [VMAPPLE_PVPANIC] =            { 0x20070000, 0x00000002 },
+    [VMAPPLE_BDOOR] =              { 0x30000000, 0x00200000 },
+    [VMAPPLE_APV_GFX] =            { 0x30200000, 0x00010000 },
+    [VMAPPLE_APV_IOSFC] =          { 0x30210000, 0x00010000 },
+    [VMAPPLE_AES_1] =              { 0x30220000, 0x00004000 },
+    [VMAPPLE_AES_2] =              { 0x30230000, 0x00004000 },
+    [VMAPPLE_PCIE_ECAM] =          { 0x40000000, 0x10000000 },
+    [VMAPPLE_PCIE_MMIO] =          { 0x50000000, 0x1fff0000 },
+
+    /* Actual RAM size depends on configuration */
+    [VMAPPLE_MEM] =                { 0x70000000ULL, GiB},
+};
+
+static const int irqmap[] = {
+    [VMAPPLE_UART] = 1,
+    [VMAPPLE_RTC] = 2,
+    [VMAPPLE_GPIO] = 0x5,
+    [VMAPPLE_APV_IOSFC] = 0x10,
+    [VMAPPLE_APV_GFX] = 0x11,
+    [VMAPPLE_AES_1] = 0x12,
+    [VMAPPLE_PCIE] = 0x20,
+};
+
+#define GPEX_NUM_IRQS 16
+
+static void create_bdif(VMAppleMachineState *vms, MemoryRegion *mem)
+{
+    DeviceState *bdif;
+    SysBusDevice *bdif_sb;
+    DriveInfo *di_aux = drive_get(IF_PFLASH, 0, 0);
+    DriveInfo *di_root = drive_get(IF_PFLASH, 0, 1);
+
+    if (!di_aux) {
+        error_report("No AUX device. Please specify one as pflash drive.");
+        exit(1);
+    }
+
+    if (!di_root) {
+        /* Fall back to the first IF_VIRTIO device as root device */
+        di_root = drive_get(IF_VIRTIO, 0, 0);
+    }
+
+    if (!di_root) {
+        error_report("No root device. Please specify one as virtio drive.");
+        exit(1);
+    }
+
+    /* PV backdoor device */
+    bdif = qdev_new(TYPE_VMAPPLE_BDIF);
+    bdif_sb = SYS_BUS_DEVICE(bdif);
+    sysbus_mmio_map(bdif_sb, 0, vms->memmap[VMAPPLE_BDOOR].base);
+
+    qdev_prop_set_drive(DEVICE(bdif), "aux", blk_by_legacy_dinfo(di_aux));
+    qdev_prop_set_drive(DEVICE(bdif), "root", blk_by_legacy_dinfo(di_root));
+
+    sysbus_realize_and_unref(bdif_sb, &error_fatal);
+}
+
+static void create_pvpanic(VMAppleMachineState *vms, MemoryRegion *mem)
+{
+    SysBusDevice *pvpanic;
+
+    vms->pvpanic = qdev_new(TYPE_PVPANIC_MMIO_DEVICE);
+    pvpanic = SYS_BUS_DEVICE(vms->pvpanic);
+    sysbus_mmio_map(pvpanic, 0, vms->memmap[VMAPPLE_PVPANIC].base);
+
+    sysbus_realize_and_unref(pvpanic, &error_fatal);
+}
+
+static bool create_cfg(VMAppleMachineState *vms, MemoryRegion *mem,
+                       Error **errp)
+{
+    ERRP_GUARD();
+    SysBusDevice *cfg;
+    MachineState *machine = MACHINE(vms);
+    uint32_t rnd = 1;
+
+    vms->cfg = qdev_new(TYPE_VMAPPLE_CFG);
+    cfg = SYS_BUS_DEVICE(vms->cfg);
+    sysbus_mmio_map(cfg, 0, vms->memmap[VMAPPLE_CONFIG].base);
+
+    qemu_guest_getrandom_nofail(&rnd, sizeof(rnd));
+
+    qdev_prop_set_uint32(vms->cfg, "nr-cpus", machine->smp.cpus);
+    qdev_prop_set_uint64(vms->cfg, "ecid", vms->uuid);
+    qdev_prop_set_uint64(vms->cfg, "ram-size", machine->ram_size);
+    qdev_prop_set_uint32(vms->cfg, "rnd", rnd);
+
+    if (!sysbus_realize_and_unref(cfg, errp)) {
+        error_prepend(errp, "Error creating vmapple cfg device: ");
+        return false;
+    }
+
+    return true;
+}
+
+static void create_gfx(VMAppleMachineState *vms, MemoryRegion *mem)
+{
+    int irq_gfx = vms->irqmap[VMAPPLE_APV_GFX];
+    int irq_iosfc = vms->irqmap[VMAPPLE_APV_IOSFC];
+    SysBusDevice *gfx;
+
+    gfx = SYS_BUS_DEVICE(qdev_new("apple-gfx-mmio"));
+    sysbus_mmio_map(gfx, 0, vms->memmap[VMAPPLE_APV_GFX].base);
+    sysbus_mmio_map(gfx, 1, vms->memmap[VMAPPLE_APV_IOSFC].base);
+    sysbus_connect_irq(gfx, 0, qdev_get_gpio_in(vms->gic, irq_gfx));
+    sysbus_connect_irq(gfx, 1, qdev_get_gpio_in(vms->gic, irq_iosfc));
+    sysbus_realize_and_unref(gfx, &error_fatal);
+}
+
+static void create_aes(VMAppleMachineState *vms, MemoryRegion *mem)
+{
+    int irq = vms->irqmap[VMAPPLE_AES_1];
+    SysBusDevice *aes;
+
+    aes = SYS_BUS_DEVICE(qdev_new(TYPE_APPLE_AES));
+    sysbus_mmio_map(aes, 0, vms->memmap[VMAPPLE_AES_1].base);
+    sysbus_mmio_map(aes, 1, vms->memmap[VMAPPLE_AES_2].base);
+    sysbus_connect_irq(aes, 0, qdev_get_gpio_in(vms->gic, irq));
+    sysbus_realize_and_unref(aes, &error_fatal);
+}
+
+static int arm_gic_ppi_index(int cpu_nr, int ppi_index)
+{
+    return NUM_IRQS + cpu_nr * GIC_INTERNAL + ppi_index;
+}
+
+static void create_gic(VMAppleMachineState *vms, MemoryRegion *mem)
+{
+    MachineState *ms = MACHINE(vms);
+    /* We create a standalone GIC */
+    SysBusDevice *gicbusdev;
+    QList *redist_region_count;
+    int i;
+    unsigned int smp_cpus = ms->smp.cpus;
+
+    vms->gic = qdev_new(gicv3_class_name());
+    qdev_prop_set_uint32(vms->gic, "revision", 3);
+    qdev_prop_set_uint32(vms->gic, "num-cpu", smp_cpus);
+    /*
+     * Note that the num-irq property counts both internal and external
+     * interrupts; there are always 32 of the former (mandated by GIC spec).
+     */
+    qdev_prop_set_uint32(vms->gic, "num-irq", NUM_IRQS + 32);
+
+    uint32_t redist0_capacity =
+                vms->memmap[VMAPPLE_GIC_REDIST].size / GICV3_REDIST_SIZE;
+    uint32_t redist0_count = MIN(smp_cpus, redist0_capacity);
+
+    redist_region_count = qlist_new();
+    qlist_append_int(redist_region_count, redist0_count);
+    qdev_prop_set_array(vms->gic, "redist-region-count", redist_region_count);
+
+    gicbusdev = SYS_BUS_DEVICE(vms->gic);
+    sysbus_realize_and_unref(gicbusdev, &error_fatal);
+    sysbus_mmio_map(gicbusdev, 0, vms->memmap[VMAPPLE_GIC_DIST].base);
+    sysbus_mmio_map(gicbusdev, 1, vms->memmap[VMAPPLE_GIC_REDIST].base);
+
+    /*
+     * Wire the outputs from each CPU's generic timer and the GICv3
+     * maintenance interrupt signal to the appropriate GIC PPI inputs,
+     * and the GIC's IRQ/FIQ/VIRQ/VFIQ interrupt outputs to the CPU's inputs.
+     */
+    for (i = 0; i < smp_cpus; i++) {
+        DeviceState *cpudev = DEVICE(qemu_get_cpu(i));
+
+        /* Map the virt timer to PPI 27 */
+        qdev_connect_gpio_out(cpudev, GTIMER_VIRT,
+                              qdev_get_gpio_in(vms->gic,
+                                               arm_gic_ppi_index(i, 27)));
+
+        /* Map the GIC IRQ and FIQ lines to CPU */
+        sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ));
+        sysbus_connect_irq(gicbusdev, i + smp_cpus,
+                           qdev_get_gpio_in(cpudev, ARM_CPU_FIQ));
+    }
+}
+
+static void create_uart(const VMAppleMachineState *vms, int uart,
+                        MemoryRegion *mem, Chardev *chr)
+{
+    hwaddr base = vms->memmap[uart].base;
+    int irq = vms->irqmap[uart];
+    DeviceState *dev = qdev_new(TYPE_PL011);
+    SysBusDevice *s = SYS_BUS_DEVICE(dev);
+
+    qdev_prop_set_chr(dev, "chardev", chr);
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+    memory_region_add_subregion(mem, base,
+                                sysbus_mmio_get_region(s, 0));
+    sysbus_connect_irq(s, 0, qdev_get_gpio_in(vms->gic, irq));
+}
+
+static void create_rtc(const VMAppleMachineState *vms)
+{
+    hwaddr base = vms->memmap[VMAPPLE_RTC].base;
+    int irq = vms->irqmap[VMAPPLE_RTC];
+
+    sysbus_create_simple("pl031", base, qdev_get_gpio_in(vms->gic, irq));
+}
+
+static DeviceState *gpio_key_dev;
+static void vmapple_powerdown_req(Notifier *n, void *opaque)
+{
+    /* use gpio Pin 3 for power button event */
+    qemu_set_irq(qdev_get_gpio_in(gpio_key_dev, 0), 1);
+}
+
+static void create_gpio_devices(const VMAppleMachineState *vms, int gpio,
+                                MemoryRegion *mem)
+{
+    DeviceState *pl061_dev;
+    hwaddr base = vms->memmap[gpio].base;
+    int irq = vms->irqmap[gpio];
+    SysBusDevice *s;
+
+    pl061_dev = qdev_new("pl061");
+    /* Pull lines down to 0 if not driven by the PL061 */
+    qdev_prop_set_uint32(pl061_dev, "pullups", 0);
+    qdev_prop_set_uint32(pl061_dev, "pulldowns", 0xff);
+    s = SYS_BUS_DEVICE(pl061_dev);
+    sysbus_realize_and_unref(s, &error_fatal);
+    memory_region_add_subregion(mem, base, sysbus_mmio_get_region(s, 0));
+    sysbus_connect_irq(s, 0, qdev_get_gpio_in(vms->gic, irq));
+    gpio_key_dev = sysbus_create_simple("gpio-key", -1,
+                                        qdev_get_gpio_in(pl061_dev, 3));
+}
+
+static void vmapple_firmware_init(VMAppleMachineState *vms,
+                                  MemoryRegion *sysmem)
+{
+    hwaddr size = vms->memmap[VMAPPLE_FIRMWARE].size;
+    hwaddr base = vms->memmap[VMAPPLE_FIRMWARE].base;
+    const char *bios_name;
+    int image_size;
+    char *fname;
+
+    bios_name = MACHINE(vms)->firmware;
+    if (!bios_name) {
+        error_report("No firmware specified");
+        exit(1);
+    }
+
+    fname = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+    if (!fname) {
+        error_report("Could not find ROM image '%s'", bios_name);
+        exit(1);
+    }
+
+    memory_region_init_ram(&vms->fw_mr, NULL, "firmware", size, &error_fatal);
+    image_size = load_image_mr(fname, &vms->fw_mr);
+
+    g_free(fname);
+    if (image_size < 0) {
+        error_report("Could not load ROM image '%s'", bios_name);
+        exit(1);
+    }
+
+    memory_region_add_subregion(get_system_memory(), base, &vms->fw_mr);
+}
+
+static void create_pcie(VMAppleMachineState *vms)
+{
+    hwaddr base_mmio = vms->memmap[VMAPPLE_PCIE_MMIO].base;
+    hwaddr size_mmio = vms->memmap[VMAPPLE_PCIE_MMIO].size;
+    hwaddr base_ecam = vms->memmap[VMAPPLE_PCIE_ECAM].base;
+    hwaddr size_ecam = vms->memmap[VMAPPLE_PCIE_ECAM].size;
+    int irq = vms->irqmap[VMAPPLE_PCIE];
+    MemoryRegion *mmio_alias;
+    MemoryRegion *mmio_reg;
+    MemoryRegion *ecam_reg;
+    DeviceState *dev;
+    int i;
+    PCIHostState *pci;
+    DeviceState *usb_controller;
+    USBBus *usb_bus;
+
+    dev = qdev_new(TYPE_GPEX_HOST);
+    qdev_prop_set_uint32(dev, "num-irqs", GPEX_NUM_IRQS);
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+
+    /* Map only the first size_ecam bytes of ECAM space */
+    ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
+    memory_region_init_alias(&vms->ecam_alias, OBJECT(dev), "pcie-ecam",
+                             ecam_reg, 0, size_ecam);
+    memory_region_add_subregion(get_system_memory(), base_ecam,
+                                &vms->ecam_alias);
+
+    /*
+     * Map the MMIO window from [0x50000000-0x7fff0000] in PCI space into
+     * system address space at [0x50000000-0x7fff0000].
+     */
+    mmio_alias = g_new0(MemoryRegion, 1);
+    mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
+    memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
+                             mmio_reg, base_mmio, size_mmio);
+    memory_region_add_subregion(get_system_memory(), base_mmio, mmio_alias);
+
+    for (i = 0; i < GPEX_NUM_IRQS; i++) {
+        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i,
+                           qdev_get_gpio_in(vms->gic, irq + i));
+        gpex_set_irq_num(GPEX_HOST(dev), i, irq + i);
+    }
+
+    pci = PCI_HOST_BRIDGE(dev);
+    vms->bus = pci->bus;
+    g_assert(vms->bus);
+
+    while ((dev = qemu_create_nic_device("virtio-net-pci", true, NULL))) {
+        qdev_realize_and_unref(dev, BUS(vms->bus), &error_fatal);
+    }
+
+    if (defaults_enabled()) {
+        usb_controller = qdev_new(TYPE_QEMU_XHCI);
+        qdev_realize_and_unref(usb_controller, BUS(pci->bus), &error_fatal);
+
+        usb_bus = USB_BUS(object_resolve_type_unambiguous(TYPE_USB_BUS,
+                                                          &error_fatal));
+        usb_create_simple(usb_bus, "usb-kbd");
+        usb_create_simple(usb_bus, "usb-tablet");
+    }
+}
+
+static void vmapple_reset(void *opaque)
+{
+    VMAppleMachineState *vms = opaque;
+    hwaddr base = vms->memmap[VMAPPLE_FIRMWARE].base;
+
+    cpu_set_pc(first_cpu, base);
+}
+
+static void mach_vmapple_init(MachineState *machine)
+{
+    VMAppleMachineState *vms = VMAPPLE_MACHINE(machine);
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
+    const CPUArchIdList *possible_cpus;
+    MemoryRegion *sysmem = get_system_memory();
+    int n;
+    unsigned int smp_cpus = machine->smp.cpus;
+    unsigned int max_cpus = machine->smp.max_cpus;
+
+    vms->memmap = memmap;
+    machine->usb = true;
+
+    possible_cpus = mc->possible_cpu_arch_ids(machine);
+    assert(possible_cpus->len == max_cpus);
+    for (n = 0; n < possible_cpus->len; n++) {
+        Object *cpu;
+        CPUState *cs;
+
+        if (n >= smp_cpus) {
+            break;
+        }
+
+        cpu = object_new(possible_cpus->cpus[n].type);
+        object_property_set_int(cpu, "mp-affinity",
+                                possible_cpus->cpus[n].arch_id, &error_fatal);
+
+        cs = CPU(cpu);
+        cs->cpu_index = n;
+
+        numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpu),
+                          &error_fatal);
+
+        if (object_property_find(cpu, "has_el3")) {
+            object_property_set_bool(cpu, "has_el3", false, &error_fatal);
+        }
+        if (object_property_find(cpu, "has_el2")) {
+            object_property_set_bool(cpu, "has_el2", false, &error_fatal);
+        }
+        object_property_set_int(cpu, "psci-conduit", QEMU_PSCI_CONDUIT_HVC,
+                                &error_fatal);
+
+        /* Secondary CPUs start in PSCI powered-down state */
+        if (n > 0) {
+            object_property_set_bool(cpu, "start-powered-off", true,
+                                     &error_fatal);
+        }
+
+        object_property_set_link(cpu, "memory", OBJECT(sysmem), &error_abort);
+        qdev_realize(DEVICE(cpu), NULL, &error_fatal);
+        object_unref(cpu);
+    }
+
+    memory_region_add_subregion(sysmem, vms->memmap[VMAPPLE_MEM].base,
+                                machine->ram);
+
+    create_gic(vms, sysmem);
+    create_bdif(vms, sysmem);
+    create_pvpanic(vms, sysmem);
+    create_aes(vms, sysmem);
+    create_gfx(vms, sysmem);
+    create_uart(vms, VMAPPLE_UART, sysmem, serial_hd(0));
+    create_rtc(vms);
+    create_pcie(vms);
+
+    create_gpio_devices(vms, VMAPPLE_GPIO, sysmem);
+
+    vmapple_firmware_init(vms, sysmem);
+    create_cfg(vms, sysmem, &error_fatal);
+
+    /* connect powerdown request */
+    vms->powerdown_notifier.notify = vmapple_powerdown_req;
+    qemu_register_powerdown_notifier(&vms->powerdown_notifier);
+
+    vms->bootinfo.ram_size = machine->ram_size;
+    vms->bootinfo.board_id = -1;
+    vms->bootinfo.loader_start = vms->memmap[VMAPPLE_MEM].base;
+    vms->bootinfo.skip_dtb_autoload = true;
+    vms->bootinfo.firmware_loaded = true;
+    arm_load_kernel(ARM_CPU(first_cpu), machine, &vms->bootinfo);
+
+    qemu_register_reset(vmapple_reset, vms);
+}
+
+static CpuInstanceProperties
+vmapple_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
+{
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+    const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
+
+    assert(cpu_index < possible_cpus->len);
+    return possible_cpus->cpus[cpu_index].props;
+}
+
+
+static int64_t vmapple_get_default_cpu_node_id(const MachineState *ms, int idx)
+{
+    return idx % ms->numa_state->num_nodes;
+}
+
+static const CPUArchIdList *vmapple_possible_cpu_arch_ids(MachineState *ms)
+{
+    int n;
+    unsigned int max_cpus = ms->smp.max_cpus;
+
+    if (ms->possible_cpus) {
+        assert(ms->possible_cpus->len == max_cpus);
+        return ms->possible_cpus;
+    }
+
+    ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
+                                  sizeof(CPUArchId) * max_cpus);
+    ms->possible_cpus->len = max_cpus;
+    for (n = 0; n < ms->possible_cpus->len; n++) {
+        ms->possible_cpus->cpus[n].type = ms->cpu_type;
+        ms->possible_cpus->cpus[n].arch_id =
+            arm_build_mp_affinity(n, GICV3_TARGETLIST_BITS);
+        ms->possible_cpus->cpus[n].props.has_thread_id = true;
+        ms->possible_cpus->cpus[n].props.thread_id = n;
+    }
+    return ms->possible_cpus;
+}
+
+static GlobalProperty vmapple_compat_defaults[] = {
+    { TYPE_VIRTIO_PCI, "disable-legacy", "on" },
+    /*
+     * macOS XHCI driver attempts to schedule events onto even rings 1 & 2
+     * even when (as here) there is no MSI(-X) support. Disabling interrupter
+     * mapping in the XHCI controller works around the problem.
+     */
+    { TYPE_XHCI_PCI, "conditional-intr-mapping", "on" },
+};
+
+static void vmapple_machine_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->init = mach_vmapple_init;
+    mc->max_cpus = 32;
+    mc->block_default_type = IF_VIRTIO;
+    mc->no_cdrom = 1;
+    mc->pci_allow_0_address = true;
+    mc->minimum_page_bits = 12;
+    mc->possible_cpu_arch_ids = vmapple_possible_cpu_arch_ids;
+    mc->cpu_index_to_instance_props = vmapple_cpu_index_to_props;
+    mc->default_cpu_type = ARM_CPU_TYPE_NAME("host");
+    mc->get_default_cpu_node_id = vmapple_get_default_cpu_node_id;
+    mc->default_ram_id = "mach-vmapple.ram";
+    mc->desc = "Apple aarch64 Virtual Machine";
+
+    compat_props_add(mc->compat_props, vmapple_compat_defaults,
+                     G_N_ELEMENTS(vmapple_compat_defaults));
+}
+
+static void vmapple_instance_init(Object *obj)
+{
+    VMAppleMachineState *vms = VMAPPLE_MACHINE(obj);
+
+    vms->irqmap = irqmap;
+
+    object_property_add_uint64_ptr(obj, "uuid", &vms->uuid,
+                                   OBJ_PROP_FLAG_READWRITE);
+    object_property_set_description(obj, "uuid", "Machine UUID (SDOM)");
+}
+
+static const TypeInfo vmapple_machine_info = {
+    .name          = TYPE_VMAPPLE_MACHINE,
+    .parent        = TYPE_MACHINE,
+    .instance_size = sizeof(VMAppleMachineState),
+    .class_init    = vmapple_machine_class_init,
+    .instance_init = vmapple_instance_init,
+};
+
+static void machvmapple_machine_init(void)
+{
+    type_register_static(&vmapple_machine_info);
+}
+type_init(machvmapple_machine_init);
+
diff --git a/hw/watchdog/allwinner-wdt.c b/hw/watchdog/allwinner-wdt.c
index 1bfec41ff8b48..78f4f9d6f6793 100644
--- a/hw/watchdog/allwinner-wdt.c
+++ b/hw/watchdog/allwinner-wdt.c
@@ -275,7 +275,7 @@ static void allwinner_wdt_write(void *opaque, hwaddr offset,
 static const MemoryRegionOps allwinner_wdt_ops = {
     .read = allwinner_wdt_read,
     .write = allwinner_wdt_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c
index 7ffbbfea23b33..9a677e8ed7490 100644
--- a/hw/xen/xen-hvm-common.c
+++ b/hw/xen/xen-hvm-common.c
@@ -1,14 +1,21 @@
 #include "qemu/osdep.h"
 #include "qemu/units.h"
+#include "qemu/error-report.h"
 #include "qapi/error.h"
+#include "exec/target_long.h"
 #include "exec/target_page.h"
 #include "trace.h"
 
+#include "hw/hw.h"
 #include "hw/pci/pci_host.h"
 #include "hw/xen/xen-hvm-common.h"
 #include "hw/xen/xen-bus.h"
 #include "hw/boards.h"
 #include "hw/xen/arch_hvm.h"
+#include "system/runstate.h"
+#include "system/system.h"
+#include "system/xen.h"
+#include "system/xen-mapcache.h"
 
 MemoryRegion xen_memory, xen_grants;
 
diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c
index 118c571b3a741..bf58db0ca68ef 100644
--- a/hw/xen/xen-legacy-backend.c
+++ b/hw/xen/xen-legacy-backend.c
@@ -163,7 +163,7 @@ static struct XenLegacyDevice *xen_be_get_xendev(const char *type, int dom,
 
     /* init new xendev */
     xendev = g_malloc0(ops->size);
-    object_initialize(&xendev->qdev, ops->size, TYPE_XENBACKEND);
+    object_initialize(xendev, ops->size, TYPE_XENBACKEND);
     OBJECT(xendev)->free = g_free;
     qdev_set_id(DEVICE(xendev), g_strdup_printf("xen-%s-%d", type, dev),
                 &error_fatal);
@@ -640,16 +640,14 @@ static void xendev_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
-    /* xen-backend devices can be plugged/unplugged dynamically */
-    dc->user_creatable = true;
     dc->bus_type = TYPE_XENSYSBUS;
 }
 
 static const TypeInfo xendev_type_info = {
     .name          = TYPE_XENBACKEND,
-    .parent        = TYPE_DEVICE,
+    .parent        = TYPE_DYNAMIC_SYS_BUS_DEVICE,
     .class_init    = xendev_class_init,
-    .instance_size = sizeof(struct XenLegacyDevice),
+    .instance_size = sizeof(XenLegacyDevice),
 };
 
 static void xen_sysbus_class_init(ObjectClass *klass, void *data)
@@ -672,7 +670,6 @@ static const TypeInfo xensysbus_info = {
 static const TypeInfo xensysdev_info = {
     .name          = TYPE_XENSYSDEV,
     .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(SysBusDevice),
 };
 
 static void xenbe_register_types(void)
diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index 00bfbcc6fb047..698b5c53ed58f 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -700,7 +700,6 @@ void xen_invalidate_map_cache(void)
     bdrv_drain_all();
 
     xen_invalidate_map_cache_single(mapcache);
-    xen_invalidate_map_cache_single(mapcache_grants);
 }
 
 static uint8_t *xen_replace_cache_entry_unlocked(MapCache *mc,
diff --git a/hw/xen/xen-pvh-common.c b/hw/xen/xen-pvh-common.c
index 9c21fa858d343..d675f7a8aebcf 100644
--- a/hw/xen/xen-pvh-common.c
+++ b/hw/xen/xen-pvh-common.c
@@ -8,14 +8,13 @@
 
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
-#include "qapi/error.h"
+#include "qemu/units.h"
 #include "qapi/visitor.h"
 #include "hw/boards.h"
 #include "hw/irq.h"
-#include "hw/sysbus.h"
-#include "system/system.h"
 #include "system/tpm.h"
 #include "system/tpm_backend.h"
+#include "system/runstate.h"
 #include "hw/xen/xen-pvh-common.h"
 #include "trace.h"
 
diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index e2bd4c7d41166..9487f68f2e98c 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -766,6 +766,57 @@ static void xen_pt_destroy(PCIDevice *d) {
 }
 /* init */
 
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 42000
+static bool xen_pt_need_gsi(void)
+{
+    FILE *fp;
+    int len;
+    /*
+     * The max length of guest_type is "PVH"+'\n'+'\0', it is 5,
+     * so here set the length of type to be twice.
+     */
+    char type[10];
+    const char *guest_type = "/sys/hypervisor/guest_type";
+
+    fp = fopen(guest_type, "r");
+    if (!fp) {
+        error_report("Cannot open %s: %s", guest_type, strerror(errno));
+        return false;
+    }
+
+    if (fgets(type, sizeof(type), fp)) {
+        len = strlen(type);
+        if (len) {
+            type[len - 1] = '\0';
+            if (!strcmp(type, "PVH")) {
+                fclose(fp);
+                return true;
+            }
+        }
+    }
+
+    fclose(fp);
+    return false;
+}
+
+static int xen_pt_map_pirq_for_gsi(PCIDevice *d, int *pirq)
+{
+    int gsi;
+    XenPCIPassthroughState *s = XEN_PT_DEVICE(d);
+
+    gsi = xc_pcidev_get_gsi(xen_xc,
+                            PCI_SBDF(s->real_device.domain,
+                                     s->real_device.bus,
+                                     s->real_device.dev,
+                                     s->real_device.func));
+    if (gsi >= 0) {
+        return xc_physdev_map_pirq_gsi(xen_xc, xen_domid, gsi, pirq);
+    }
+
+    return gsi;
+}
+#endif
+
 static void xen_pt_realize(PCIDevice *d, Error **errp)
 {
     ERRP_GUARD();
@@ -847,7 +898,16 @@ static void xen_pt_realize(PCIDevice *d, Error **errp)
         goto out;
     }
 
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 42000
+    if (xen_pt_need_gsi()) {
+        rc = xen_pt_map_pirq_for_gsi(d, &pirq);
+    } else {
+        rc = xc_physdev_map_pirq(xen_xc, xen_domid, machine_irq, &pirq);
+    }
+#else
     rc = xc_physdev_map_pirq(xen_xc, xen_domid, machine_irq, &pirq);
+#endif
+
     if (rc < 0) {
         XEN_PT_ERR(d, "Mapping machine irq %u to pirq %i failed, (err: %d)\n",
                    machine_irq, pirq, errno);
diff --git a/hw/xen/xen_pvdev.c b/hw/xen/xen_pvdev.c
index c9143ba259eb1..fe95b62d13315 100644
--- a/hw/xen/xen_pvdev.c
+++ b/hw/xen/xen_pvdev.c
@@ -273,7 +273,7 @@ void xen_pv_del_xendev(struct XenLegacyDevice *xendev)
 
     QTAILQ_REMOVE(&xendevs, xendev, next);
 
-    qdev_unplug(&xendev->qdev, NULL);
+    qdev_unplug(DEVICE(xendev), NULL);
 }
 
 void xen_pv_insert_xendev(struct XenLegacyDevice *xendev)
diff --git a/include/hw/core/accel-cpu.h b/include/accel/accel-cpu-target.h
similarity index 92%
rename from include/hw/core/accel-cpu.h
rename to include/accel/accel-cpu-target.h
index 24dad45ab9ebc..37dde7fae3e05 100644
--- a/include/hw/core/accel-cpu.h
+++ b/include/accel/accel-cpu-target.h
@@ -8,8 +8,8 @@
  * See the COPYING file in the top-level directory.
  */
 
-#ifndef ACCEL_CPU_H
-#define ACCEL_CPU_H
+#ifndef ACCEL_CPU_TARGET_H
+#define ACCEL_CPU_TARGET_H
 
 /*
  * This header is used to define new accelerator-specific target-specific
@@ -20,6 +20,9 @@
  * subclasses in target/, or the accel implementation itself in accel/
  */
 
+#include "qom/object.h"
+#include "cpu.h"
+
 #define TYPE_ACCEL_CPU "accel-" CPU_RESOLVING_TYPE
 #define ACCEL_CPU_NAME(name) (name "-" TYPE_ACCEL_CPU)
 typedef struct AccelCPUClass AccelCPUClass;
diff --git a/include/hw/core/tcg-cpu-ops.h b/include/accel/tcg/cpu-ops.h
similarity index 99%
rename from include/hw/core/tcg-cpu-ops.h
rename to include/accel/tcg/cpu-ops.h
index 2e3f1690f127e..f60e5303f2141 100644
--- a/include/hw/core/tcg-cpu-ops.h
+++ b/include/accel/tcg/cpu-ops.h
@@ -141,7 +141,7 @@ struct TCGCPUOps {
      *
      * This method must be provided. If the target does not need to
      * do anything special for halt, the same function used for its
-     * CPUClass::has_work method can be used here, as they have the
+     * SysemuCPUOps::has_work method can be used here, as they have the
      * same function signature.
      */
     bool (*cpu_exec_halt)(CPUState *cpu);
diff --git a/include/accel/tcg/getpc.h b/include/accel/tcg/getpc.h
new file mode 100644
index 0000000000000..8a97ce34e76fb
--- /dev/null
+++ b/include/accel/tcg/getpc.h
@@ -0,0 +1,24 @@
+/*
+ * Get host pc for helper unwinding.
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#ifndef ACCEL_TCG_GETPC_H
+#define ACCEL_TCG_GETPC_H
+
+#ifndef CONFIG_TCG
+#error Can only include this header with TCG
+#endif
+
+/* GETPC is the true target of the return instruction that we'll execute.  */
+#ifdef CONFIG_TCG_INTERPRETER
+extern __thread uintptr_t tci_tb_ptr;
+# define GETPC() tci_tb_ptr
+#else
+# define GETPC() \
+    ((uintptr_t)__builtin_extract_return_addr(__builtin_return_address(0)))
+#endif
+
+#endif /* ACCEL_TCG_GETPC_H */
diff --git a/include/block/aio.h b/include/block/aio.h
index 43883a8a33a8e..99ff48420b1f6 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -54,7 +54,7 @@ typedef void QEMUBHFunc(void *opaque);
 typedef bool AioPollFn(void *opaque);
 typedef void IOHandler(void *opaque);
 
-struct ThreadPool;
+struct ThreadPoolAio;
 struct LinuxAioState;
 typedef struct LuringState LuringState;
 
@@ -123,6 +123,10 @@ struct BHListSlice {
 
 typedef QSLIST_HEAD(, AioHandler) AioHandlerSList;
 
+typedef struct AioPolledEvent {
+    int64_t ns;        /* current polling time in nanoseconds */
+} AioPolledEvent;
+
 struct AioContext {
     GSource source;
 
@@ -207,7 +211,7 @@ struct AioContext {
     /* Thread pool for performing work and receiving completion callbacks.
      * Has its own locking.
      */
-    struct ThreadPool *thread_pool;
+    struct ThreadPoolAio *thread_pool;
 
 #ifdef CONFIG_LINUX_AIO
     struct LinuxAioState *linux_aio;
@@ -229,7 +233,6 @@ struct AioContext {
     int poll_disable_cnt;
 
     /* Polling mode parameters */
-    int64_t poll_ns;        /* current polling time in nanoseconds */
     int64_t poll_max_ns;    /* maximum polling time in nanoseconds */
     int64_t poll_grow;      /* polling time growth factor */
     int64_t poll_shrink;    /* polling time shrink factor */
@@ -500,8 +503,8 @@ void aio_set_event_notifier_poll(AioContext *ctx,
  */
 GSource *aio_get_g_source(AioContext *ctx);
 
-/* Return the ThreadPool bound to this AioContext */
-struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
+/* Return the ThreadPoolAio bound to this AioContext */
+struct ThreadPoolAio *aio_get_thread_pool(AioContext *ctx);
 
 /* Setup the LinuxAioState bound to this AioContext */
 struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp);
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index bb91a0f62fa1d..ebb4e56a503b2 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -28,7 +28,6 @@
 #include "block/block-common.h"
 #include "block/block-global-state.h"
 #include "block/snapshot.h"
-#include "qemu/clang-tsa.h"
 #include "qemu/iov.h"
 #include "qemu/rcu.h"
 #include "qemu/stats64.h"
diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h
index dc8d949184360..2c26c721081e7 100644
--- a/include/block/graph-lock.h
+++ b/include/block/graph-lock.h
@@ -20,8 +20,6 @@
 #ifndef GRAPH_LOCK_H
 #define GRAPH_LOCK_H
 
-#include "qemu/clang-tsa.h"
-
 /**
  * Graph Lock API
  * This API provides a rwlock used to protect block layer
diff --git a/include/block/nvme.h b/include/block/nvme.h
index f4d108841bf59..358e516e38b0d 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -142,9 +142,9 @@ enum NvmeCapMask {
     ((cap) |= (uint64_t)((val) & CAP_CMBS_MASK)   << CAP_CMBS_SHIFT)
 
 enum NvmeCapCss {
-    NVME_CAP_CSS_NVM        = 1 << 0,
-    NVME_CAP_CSS_CSI_SUPP   = 1 << 6,
-    NVME_CAP_CSS_ADMIN_ONLY = 1 << 7,
+    NVME_CAP_CSS_NCSS    = 1 << 0,
+    NVME_CAP_CSS_IOCSS   = 1 << 6,
+    NVME_CAP_CSS_NOIOCSS = 1 << 7,
 };
 
 enum NvmeCcShift {
@@ -177,7 +177,7 @@ enum NvmeCcMask {
 
 enum NvmeCcCss {
     NVME_CC_CSS_NVM        = 0x0,
-    NVME_CC_CSS_CSI        = 0x6,
+    NVME_CC_CSS_ALL        = 0x6,
     NVME_CC_CSS_ADMIN_ONLY = 0x7,
 };
 
@@ -906,8 +906,7 @@ enum NvmeStatusCodes {
     NVME_SGL_DESCR_TYPE_INVALID = 0x0011,
     NVME_INVALID_USE_OF_CMB     = 0x0012,
     NVME_INVALID_PRP_OFFSET     = 0x0013,
-    NVME_CMD_SET_CMB_REJECTED   = 0x002b,
-    NVME_INVALID_CMD_SET        = 0x002c,
+    NVME_COMMAND_INTERRUPTED    = 0x0021,
     NVME_FDP_DISABLED           = 0x0029,
     NVME_INVALID_PHID_LIST      = 0x002a,
     NVME_LBA_RANGE              = 0x0080,
@@ -940,6 +939,10 @@ enum NvmeStatusCodes {
     NVME_INVALID_SEC_CTRL_STATE = 0x0120,
     NVME_INVALID_NUM_RESOURCES  = 0x0121,
     NVME_INVALID_RESOURCE_ID    = 0x0122,
+    NVME_IOCS_NOT_SUPPORTED     = 0x0129,
+    NVME_IOCS_NOT_ENABLED       = 0x012a,
+    NVME_IOCS_COMBINATION_REJECTED = 0x012b,
+    NVME_INVALID_IOCS           = 0x012c,
     NVME_CONFLICTING_ATTRS      = 0x0180,
     NVME_INVALID_PROT_INFO      = 0x0181,
     NVME_WRITE_TO_RO            = 0x0182,
@@ -1015,6 +1018,40 @@ typedef struct QEMU_PACKED NvmeSmartLog {
     uint8_t     reserved2[320];
 } NvmeSmartLog;
 
+typedef struct QEMU_PACKED NvmeSmartLogExtended {
+    uint64_t    physical_media_units_written[2];
+    uint64_t    physical_media_units_read[2];
+    uint64_t    bad_user_blocks;
+    uint64_t    bad_system_nand_blocks;
+    uint64_t    xor_recovery_count;
+    uint64_t    uncorrectable_read_error_count;
+    uint64_t    soft_ecc_error_count;
+    uint64_t    end2end_correction_counts;
+    uint8_t     system_data_percent_used;
+    uint8_t     refresh_counts[7];
+    uint64_t    user_data_erase_counts;
+    uint16_t    thermal_throttling_stat_and_count;
+    uint16_t    dssd_spec_version[3];
+    uint64_t    pcie_correctable_error_count;
+    uint32_t    incomplete_shutdowns;
+    uint32_t    rsvd116;
+    uint8_t     percent_free_blocks;
+    uint8_t     rsvd121[7];
+    uint16_t    capacity_health;
+    uint8_t     nvme_errata_ver;
+    uint8_t     rsvd131[5];
+    uint64_t    unaligned_io;
+    uint64_t    security_ver_num;
+    uint64_t    total_nuse;
+    uint64_t    plp_start_count[2];
+    uint64_t    endurance_estimate[2];
+    uint64_t    pcie_retraining_count;
+    uint64_t    power_state_change_count;
+    uint8_t     rsvd208[286];
+    uint16_t    log_page_version;
+    uint64_t    log_page_guid[2];
+} NvmeSmartLogExtended;
+
 #define NVME_SMART_WARN_MAX     6
 enum NvmeSmartWarn {
     NVME_SMART_SPARE                  = 1 << 0,
@@ -1052,6 +1089,12 @@ enum NvmeLogIdentifier {
     NVME_LOG_FDP_RUH_USAGE              = 0x21,
     NVME_LOG_FDP_STATS                  = 0x22,
     NVME_LOG_FDP_EVENTS                 = 0x23,
+    NVME_LOG_VENDOR_START               = 0xc0,
+    NVME_LOG_VENDOR_END                 = 0xff,
+};
+
+enum NvmeOcpLogIdentifier {
+    NVME_OCP_EXTENDED_SMART_INFO = 0xc0,
 };
 
 typedef struct QEMU_PACKED NvmePSD {
@@ -1167,6 +1210,8 @@ typedef struct NvmeIdCtrlZoned {
     uint8_t     rsvd1[4095];
 } NvmeIdCtrlZoned;
 
+#define NVME_ID_CTRL_NVM_DMRL_MAX 255
+
 typedef struct NvmeIdCtrlNvm {
     uint8_t     vsl;
     uint8_t     wzsl;
@@ -1192,9 +1237,10 @@ enum NvmeIdCtrlOacs {
     NVME_OACS_SECURITY      = 1 << 0,
     NVME_OACS_FORMAT        = 1 << 1,
     NVME_OACS_FW            = 1 << 2,
-    NVME_OACS_NS_MGMT       = 1 << 3,
+    NVME_OACS_NMS           = 1 << 3,
     NVME_OACS_DIRECTIVES    = 1 << 5,
-    NVME_OACS_DBBUF         = 1 << 8,
+    NVME_OACS_VMS           = 1 << 7,
+    NVME_OACS_DBCS          = 1 << 8,
 };
 
 enum NvmeIdCtrlOncs {
@@ -1899,6 +1945,7 @@ static inline void _nvme_check_size(void)
     QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64);
     QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512);
     QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLog) != 512);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLogExtended) != 512);
     QEMU_BUILD_BUG_ON(sizeof(NvmeEffectsLog) != 4096);
     QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrl) != 4096);
     QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrlZoned) != 4096);
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
index 626706827f543..6570244496a52 100644
--- a/include/block/raw-aio.h
+++ b/include/block/raw-aio.h
@@ -17,6 +17,7 @@
 #define QEMU_RAW_AIO_H
 
 #include "block/aio.h"
+#include "block/block-common.h"
 #include "qemu/iov.h"
 
 /* AIO request types */
@@ -58,11 +59,18 @@ void laio_cleanup(LinuxAioState *s);
 
 /* laio_co_submit: submit I/O requests in the thread's current AioContext. */
 int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
-                                int type, uint64_t dev_max_batch);
+                                int type, BdrvRequestFlags flags,
+                                uint64_t dev_max_batch);
 
 bool laio_has_fdsync(int);
+bool laio_has_fua(void);
 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
 void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
+#else
+static inline bool laio_has_fua(void)
+{
+    return false;
+}
 #endif
 /* io_uring.c - Linux io_uring implementation */
 #ifdef CONFIG_LINUX_IO_URING
@@ -71,9 +79,16 @@ void luring_cleanup(LuringState *s);
 
 /* luring_co_submit: submit I/O requests in the thread's current AioContext. */
 int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
-                                  QEMUIOVector *qiov, int type);
+                                  QEMUIOVector *qiov, int type,
+                                  BdrvRequestFlags flags);
 void luring_detach_aio_context(LuringState *s, AioContext *old_context);
 void luring_attach_aio_context(LuringState *s, AioContext *new_context);
+bool luring_has_fua(void);
+#else
+static inline bool luring_has_fua(void)
+{
+    return false;
+}
 #endif
 
 #ifdef _WIN32
diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h
index 948ff5f30c31d..dd48cf07e85f8 100644
--- a/include/block/thread-pool.h
+++ b/include/block/thread-pool.h
@@ -24,20 +24,70 @@
 
 typedef int ThreadPoolFunc(void *opaque);
 
-typedef struct ThreadPool ThreadPool;
+typedef struct ThreadPoolAio ThreadPoolAio;
 
-ThreadPool *thread_pool_new(struct AioContext *ctx);
-void thread_pool_free(ThreadPool *pool);
+ThreadPoolAio *thread_pool_new_aio(struct AioContext *ctx);
+void thread_pool_free_aio(ThreadPoolAio *pool);
 
 /*
- * thread_pool_submit* API: submit I/O requests in the thread's
+ * thread_pool_submit_{aio,co} API: submit I/O requests in the thread's
  * current AioContext.
  */
 BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
                                    BlockCompletionFunc *cb, void *opaque);
 int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg);
-void thread_pool_submit(ThreadPoolFunc *func, void *arg);
+void thread_pool_update_params(ThreadPoolAio *pool, struct AioContext *ctx);
+
+/* ------------------------------------------- */
+/* Generic thread pool types and methods below */
+typedef struct ThreadPool ThreadPool;
+
+/* Create a new thread pool. Never returns NULL. */
+ThreadPool *thread_pool_new(void);
+
+/*
+ * Free the thread pool.
+ * Waits for all the previously submitted work to complete before performing
+ * the actual freeing operation.
+ */
+void thread_pool_free(ThreadPool *pool);
+
+/*
+ * Submit a new work (task) for the pool.
+ *
+ * @opaque_destroy is an optional GDestroyNotify for the @opaque argument
+ * to the work function at @func.
+ */
+void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func,
+                        void *opaque, GDestroyNotify opaque_destroy);
+
+/*
+ * Submit a new work (task) for the pool, making sure it starts getting
+ * processed immediately, launching a new thread for it if necessary.
+ *
+ * @opaque_destroy is an optional GDestroyNotify for the @opaque argument
+ * to the work function at @func.
+ */
+void thread_pool_submit_immediate(ThreadPool *pool, ThreadPoolFunc *func,
+                                  void *opaque, GDestroyNotify opaque_destroy);
+
+/*
+ * Wait for all previously submitted work to complete before returning.
+ *
+ * Can be used as a barrier between two sets of tasks executed on a thread
+ * pool without destroying it or in a performance sensitive path where the
+ * caller just wants to wait for all tasks to complete while deferring the
+ * pool free operation for later, less performance sensitive time.
+ */
+void thread_pool_wait(ThreadPool *pool);
 
-void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx);
+/* Set the maximum number of threads in the pool. */
+bool thread_pool_set_max_threads(ThreadPool *pool, int max_threads);
+
+/*
+ * Adjust the maximum number of threads in the pool to give each task its
+ * own thread (exactly one thread per task).
+ */
+bool thread_pool_adjust_max_threads_to_work(ThreadPool *pool);
 
 #endif
diff --git a/include/block/ufs.h b/include/block/ufs.h
index 57f5ea3500c3c..a3ee62b027a50 100644
--- a/include/block/ufs.h
+++ b/include/block/ufs.h
@@ -461,7 +461,7 @@ typedef struct Attributes {
     uint8_t psa_state;
     uint32_t psa_data_size;
     uint8_t ref_clk_gating_wait_time;
-    uint8_t device_case_rough_temperaure;
+    uint8_t device_case_rough_temperature;
     uint8_t device_too_high_temp_boundary;
     uint8_t device_too_low_temp_boundary;
     uint8_t throttling_status;
@@ -1073,6 +1073,11 @@ enum health_desc_param {
     UFS_HEALTH_DESC_PARAM_LIFE_TIME_EST_B = 0x4,
 };
 
+enum {
+    UFS_DEV_HIGH_TEMP_NOTIF = BIT(4),
+    UFS_DEV_LOW_TEMP_NOTIF = BIT(5),
+};
+
 /* WriteBooster buffer mode */
 enum {
     UFS_WB_BUF_MODE_LU_DEDICATED = 0x0,
@@ -1091,6 +1096,12 @@ enum ufs_lu_wp_type {
     UFS_LU_PERM_WP = 0x02,
 };
 
+/* Exception event mask values */
+enum {
+    MASK_EE_TOO_HIGH_TEMP = BIT(3),
+    MASK_EE_TOO_LOW_TEMP = BIT(4),
+};
+
 /* UTP QUERY Transaction Specific Fields OpCode */
 enum query_opcode {
     UFS_UPIU_QUERY_OPCODE_NOP = 0x0,
diff --git a/include/crypto/tlssession.h b/include/crypto/tlssession.h
index f694a5c3c5579..d77ae0d423709 100644
--- a/include/crypto/tlssession.h
+++ b/include/crypto/tlssession.h
@@ -75,12 +75,14 @@
  *                                      GINT_TO_POINTER(fd));
  *
  *    while (1) {
- *       if (qcrypto_tls_session_handshake(sess, errp) < 0) {
+ *       int ret = qcrypto_tls_session_handshake(sess, errp);
+ *
+ *       if (ret < 0) {
  *           qcrypto_tls_session_free(sess);
  *           return -1;
  *       }
  *
- *       switch(qcrypto_tls_session_get_handshake_status(sess)) {
+ *       switch(ret) {
  *       case QCRYPTO_TLS_HANDSHAKE_COMPLETE:
  *           if (qcrypto_tls_session_check_credentials(sess, errp) < )) {
  *               qcrypto_tls_session_free(sess);
@@ -170,7 +172,7 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(QCryptoTLSSession, qcrypto_tls_session_free)
  *
  * Validate the peer's credentials after a successful
  * TLS handshake. It is an error to call this before
- * qcrypto_tls_session_get_handshake_status() returns
+ * qcrypto_tls_session_handshake() returns
  * QCRYPTO_TLS_HANDSHAKE_COMPLETE
  *
  * Returns 0 if the credentials validated, -1 on error
@@ -226,7 +228,7 @@ void qcrypto_tls_session_set_callbacks(QCryptoTLSSession *sess,
  * registered with qcrypto_tls_session_set_callbacks()
  *
  * It is an error to call this before
- * qcrypto_tls_session_get_handshake_status() returns
+ * qcrypto_tls_session_handshake() returns
  * QCRYPTO_TLS_HANDSHAKE_COMPLETE
  *
  * Returns: the number of bytes sent,
@@ -256,7 +258,7 @@ ssize_t qcrypto_tls_session_write(QCryptoTLSSession *sess,
  * opposed to an error.
  *
  * It is an error to call this before
- * qcrypto_tls_session_get_handshake_status() returns
+ * qcrypto_tls_session_handshake() returns
  * QCRYPTO_TLS_HANDSHAKE_COMPLETE
  *
  * Returns: the number of bytes received,
@@ -289,8 +291,7 @@ size_t qcrypto_tls_session_check_pending(QCryptoTLSSession *sess);
  * the underlying data channel is non-blocking, then
  * this method may return control before the handshake
  * is complete. On non-blocking channels the
- * qcrypto_tls_session_get_handshake_status() method
- * should be used to determine whether the handshake
+ * return value determines whether the handshake
  * has completed, or is waiting to send or receive
  * data. In the latter cases, the caller should setup
  * an event loop watch and call this method again
@@ -306,22 +307,27 @@ typedef enum {
     QCRYPTO_TLS_HANDSHAKE_RECVING,
 } QCryptoTLSSessionHandshakeStatus;
 
+typedef enum {
+    QCRYPTO_TLS_BYE_COMPLETE,
+    QCRYPTO_TLS_BYE_SENDING,
+    QCRYPTO_TLS_BYE_RECVING,
+} QCryptoTLSSessionByeStatus;
+
 /**
- * qcrypto_tls_session_get_handshake_status:
- * @sess: the TLS session object
- *
- * Check the status of the TLS handshake. This
- * is used with non-blocking data channels to
- * determine whether the handshake is waiting
- * to send or receive further data to/from the
- * remote peer.
+ * qcrypto_tls_session_bye:
+ * @session: the TLS session object
+ * @errp: pointer to a NULL-initialized error object
  *
- * Once this returns QCRYPTO_TLS_HANDSHAKE_COMPLETE
- * it is permitted to send/receive payload data on
- * the channel
+ * Start, or continue, a TLS termination sequence. If the underlying
+ * data channel is non-blocking, then this method may return control
+ * before the termination is complete. The return value will indicate
+ * whether the termination has completed, or is waiting to send or
+ * receive data. In the latter cases, the caller should setup an event
+ * loop watch and call this method again once the underlying data
+ * channel is ready to read or write again.
  */
-QCryptoTLSSessionHandshakeStatus
-qcrypto_tls_session_get_handshake_status(QCryptoTLSSession *sess);
+int
+qcrypto_tls_session_bye(QCryptoTLSSession *session, Error **errp);
 
 /**
  * qcrypto_tls_session_get_key_size:
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 09f537d06faf0..8cd6c00cf892e 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -21,6 +21,7 @@
 
 #include "exec/page-protection.h"
 #include "exec/cpu-common.h"
+#include "exec/cpu-interrupt.h"
 #include "exec/memory.h"
 #include "exec/tswap.h"
 #include "hw/core/cpu.h"
@@ -105,81 +106,10 @@ static inline void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val
 
 /* page related stuff */
 #include "exec/cpu-defs.h"
-#ifdef TARGET_PAGE_BITS_VARY
-# include "exec/page-vary.h"
-extern const TargetPageBits target_page;
-# ifdef CONFIG_DEBUG_TCG
-#  define TARGET_PAGE_BITS   ({ assert(target_page.decided); \
-                                target_page.bits; })
-#  define TARGET_PAGE_MASK   ({ assert(target_page.decided); \
-                                (target_long)target_page.mask; })
-# else
-#  define TARGET_PAGE_BITS   target_page.bits
-#  define TARGET_PAGE_MASK   ((target_long)target_page.mask)
-# endif
-# define TARGET_PAGE_SIZE    (-(int)TARGET_PAGE_MASK)
-#else
-# define TARGET_PAGE_BITS_MIN TARGET_PAGE_BITS
-# define TARGET_PAGE_SIZE    (1 << TARGET_PAGE_BITS)
-# define TARGET_PAGE_MASK    ((target_long)-1 << TARGET_PAGE_BITS)
-#endif
-
-#define TARGET_PAGE_ALIGN(addr) ROUND_UP((addr), TARGET_PAGE_SIZE)
+#include "exec/target_page.h"
 
 CPUArchState *cpu_copy(CPUArchState *env);
 
-/* Flags for use in ENV->INTERRUPT_PENDING.
-
-   The numbers assigned here are non-sequential in order to preserve
-   binary compatibility with the vmstate dump.  Bit 0 (0x0001) was
-   previously used for CPU_INTERRUPT_EXIT, and is cleared when loading
-   the vmstate dump.  */
-
-/* External hardware interrupt pending.  This is typically used for
-   interrupts from devices.  */
-#define CPU_INTERRUPT_HARD        0x0002
-
-/* Exit the current TB.  This is typically used when some system-level device
-   makes some change to the memory mapping.  E.g. the a20 line change.  */
-#define CPU_INTERRUPT_EXITTB      0x0004
-
-/* Halt the CPU.  */
-#define CPU_INTERRUPT_HALT        0x0020
-
-/* Debug event pending.  */
-#define CPU_INTERRUPT_DEBUG       0x0080
-
-/* Reset signal.  */
-#define CPU_INTERRUPT_RESET       0x0400
-
-/* Several target-specific external hardware interrupts.  Each target/cpu.h
-   should define proper names based on these defines.  */
-#define CPU_INTERRUPT_TGT_EXT_0   0x0008
-#define CPU_INTERRUPT_TGT_EXT_1   0x0010
-#define CPU_INTERRUPT_TGT_EXT_2   0x0040
-#define CPU_INTERRUPT_TGT_EXT_3   0x0200
-#define CPU_INTERRUPT_TGT_EXT_4   0x1000
-
-/* Several target-specific internal interrupts.  These differ from the
-   preceding target-specific interrupts in that they are intended to
-   originate from within the cpu itself, typically in response to some
-   instruction being executed.  These, therefore, are not masked while
-   single-stepping within the debugger.  */
-#define CPU_INTERRUPT_TGT_INT_0   0x0100
-#define CPU_INTERRUPT_TGT_INT_1   0x0800
-#define CPU_INTERRUPT_TGT_INT_2   0x2000
-
-/* First unused bit: 0x4000.  */
-
-/* The set of all bits that should be masked when single-stepping.  */
-#define CPU_INTERRUPT_SSTEP_MASK \
-    (CPU_INTERRUPT_HARD          \
-     | CPU_INTERRUPT_TGT_EXT_0   \
-     | CPU_INTERRUPT_TGT_EXT_1   \
-     | CPU_INTERRUPT_TGT_EXT_2   \
-     | CPU_INTERRUPT_TGT_EXT_3   \
-     | CPU_INTERRUPT_TGT_EXT_4)
-
 #include "cpu.h"
 
 #ifdef CONFIG_USER_ONLY
@@ -249,29 +179,6 @@ static inline int cpu_mmu_index(CPUState *cs, bool ifetch)
 /* The two sets of flags must not overlap. */
 QEMU_BUILD_BUG_ON(TLB_FLAGS_MASK & TLB_SLOW_FLAGS_MASK);
 
-/**
- * tlb_hit_page: return true if page aligned @addr is a hit against the
- * TLB entry @tlb_addr
- *
- * @addr: virtual address to test (must be page aligned)
- * @tlb_addr: TLB entry address (a CPUTLBEntry addr_read/write/code value)
- */
-static inline bool tlb_hit_page(uint64_t tlb_addr, vaddr addr)
-{
-    return addr == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK));
-}
-
-/**
- * tlb_hit: return true if @addr is a hit against the TLB entry @tlb_addr
- *
- * @addr: virtual address to test (need not be page aligned)
- * @tlb_addr: TLB entry address (a CPUTLBEntry addr_read/write/code value)
- */
-static inline bool tlb_hit(uint64_t tlb_addr, vaddr addr)
-{
-    return tlb_hit_page(tlb_addr, addr & TARGET_PAGE_MASK);
-}
-
 #endif /* !CONFIG_USER_ONLY */
 
 /* Validate correct placement of CPUArchState. */
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index b1d76d6985088..3771b2130c26b 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -67,7 +67,7 @@ typedef uintptr_t ram_addr_t;
 
 /* memory API */
 
-void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
+void qemu_ram_remap(ram_addr_t addr);
 /* This should not be used by devices.  */
 ram_addr_t qemu_ram_addr_from_host(void *ptr);
 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index ae18398fa9965..9f955f53fde27 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -46,30 +46,4 @@
 
 #include "exec/target_long.h"
 
-#if defined(CONFIG_SOFTMMU) && defined(CONFIG_TCG)
-#define CPU_TLB_DYN_MIN_BITS 6
-#define CPU_TLB_DYN_DEFAULT_BITS 8
-
-# if HOST_LONG_BITS == 32
-/* Make sure we do not require a double-word shift for the TLB load */
-#  define CPU_TLB_DYN_MAX_BITS (32 - TARGET_PAGE_BITS)
-# else /* HOST_LONG_BITS == 64 */
-/*
- * Assuming TARGET_PAGE_BITS==12, with 2**22 entries we can cover 2**(22+12) ==
- * 2**34 == 16G of address space. This is roughly what one would expect a
- * TLB to cover in a modern (as of 2018) x86_64 CPU. For instance, Intel
- * Skylake's Level-2 STLB has 16 1G entries.
- * Also, make sure we do not size the TLB past the guest's address space.
- */
-#  ifdef TARGET_PAGE_BITS_VARY
-#   define CPU_TLB_DYN_MAX_BITS                                  \
-    MIN(22, TARGET_VIRT_ADDR_SPACE_BITS - TARGET_PAGE_BITS)
-#  else
-#   define CPU_TLB_DYN_MAX_BITS                                  \
-    MIN_CONST(22, TARGET_VIRT_ADDR_SPACE_BITS - TARGET_PAGE_BITS)
-#  endif
-# endif
-
-#endif /* CONFIG_SOFTMMU && CONFIG_TCG */
-
 #endif
diff --git a/include/exec/cpu-interrupt.h b/include/exec/cpu-interrupt.h
new file mode 100644
index 0000000000000..40715193ca309
--- /dev/null
+++ b/include/exec/cpu-interrupt.h
@@ -0,0 +1,70 @@
+/*
+ * Flags for use with cpu_interrupt()
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#ifndef CPU_INTERRUPT_H
+#define CPU_INTERRUPT_H
+
+/*
+ * The numbers assigned here are non-sequential in order to preserve binary
+ * compatibility with the vmstate dump.  Bit 0 (0x0001) was previously used
+ * for CPU_INTERRUPT_EXIT, and is cleared when loading the vmstate dump.
+ */
+
+/*
+ * External hardware interrupt pending.
+ * This is typically used for interrupts from devices.
+ */
+#define CPU_INTERRUPT_HARD        0x0002
+
+/*
+ * Exit the current TB.  This is typically used when some system-level device
+ * makes some change to the memory mapping.  E.g. the a20 line change.
+ */
+#define CPU_INTERRUPT_EXITTB      0x0004
+
+/* Halt the CPU.  */
+#define CPU_INTERRUPT_HALT        0x0020
+
+/* Debug event pending.  */
+#define CPU_INTERRUPT_DEBUG       0x0080
+
+/* Reset signal.  */
+#define CPU_INTERRUPT_RESET       0x0400
+
+/*
+ * Several target-specific external hardware interrupts.  Each target/cpu.h
+ * should define proper names based on these defines.
+ */
+#define CPU_INTERRUPT_TGT_EXT_0   0x0008
+#define CPU_INTERRUPT_TGT_EXT_1   0x0010
+#define CPU_INTERRUPT_TGT_EXT_2   0x0040
+#define CPU_INTERRUPT_TGT_EXT_3   0x0200
+#define CPU_INTERRUPT_TGT_EXT_4   0x1000
+
+/*
+ * Several target-specific internal interrupts.  These differ from the
+ * preceding target-specific interrupts in that they are intended to
+ * originate from within the cpu itself, typically in response to some
+ * instruction being executed.  These, therefore, are not masked while
+ * single-stepping within the debugger.
+ */
+#define CPU_INTERRUPT_TGT_INT_0   0x0100
+#define CPU_INTERRUPT_TGT_INT_1   0x0800
+#define CPU_INTERRUPT_TGT_INT_2   0x2000
+
+/* First unused bit: 0x4000.  */
+
+/* The set of all bits that should be masked when single-stepping.  */
+#define CPU_INTERRUPT_SSTEP_MASK \
+    (CPU_INTERRUPT_HARD          \
+     | CPU_INTERRUPT_TGT_EXT_0   \
+     | CPU_INTERRUPT_TGT_EXT_1   \
+     | CPU_INTERRUPT_TGT_EXT_2   \
+     | CPU_INTERRUPT_TGT_EXT_3   \
+     | CPU_INTERRUPT_TGT_EXT_4)
+
+#endif /* CPU_INTERRUPT_H */
diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h
index ef18642a32946..8125f6809c6f1 100644
--- a/include/exec/cputlb.h
+++ b/include/exec/cputlb.h
@@ -21,15 +21,266 @@
 #define CPUTLB_H
 
 #include "exec/cpu-common.h"
+#include "exec/hwaddr.h"
+#include "exec/memattrs.h"
+#include "exec/vaddr.h"
 
-#ifdef CONFIG_TCG
-
-#if !defined(CONFIG_USER_ONLY)
-/* cputlb.c */
+#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY)
 void tlb_protect_code(ram_addr_t ram_addr);
 void tlb_unprotect_code(ram_addr_t ram_addr);
 #endif
 
-#endif /* CONFIG_TCG */
-
+#ifndef CONFIG_USER_ONLY
+void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length);
+void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length);
 #endif
+
+/**
+ * tlb_set_page_full:
+ * @cpu: CPU context
+ * @mmu_idx: mmu index of the tlb to modify
+ * @addr: virtual address of the entry to add
+ * @full: the details of the tlb entry
+ *
+ * Add an entry to @cpu tlb index @mmu_idx.  All of the fields of
+ * @full must be filled, except for xlat_section, and constitute
+ * the complete description of the translated page.
+ *
+ * This is generally called by the target tlb_fill function after
+ * having performed a successful page table walk to find the physical
+ * address and attributes for the translation.
+ *
+ * At most one entry for a given virtual address is permitted. Only a
+ * single TARGET_PAGE_SIZE region is mapped; @full->lg_page_size is only
+ * used by tlb_flush_page.
+ */
+void tlb_set_page_full(CPUState *cpu, int mmu_idx, vaddr addr,
+                       CPUTLBEntryFull *full);
+
+/**
+ * tlb_set_page_with_attrs:
+ * @cpu: CPU to add this TLB entry for
+ * @addr: virtual address of page to add entry for
+ * @paddr: physical address of the page
+ * @attrs: memory transaction attributes
+ * @prot: access permissions (PAGE_READ/PAGE_WRITE/PAGE_EXEC bits)
+ * @mmu_idx: MMU index to insert TLB entry for
+ * @size: size of the page in bytes
+ *
+ * Add an entry to this CPU's TLB (a mapping from virtual address
+ * @addr to physical address @paddr) with the specified memory
+ * transaction attributes. This is generally called by the target CPU
+ * specific code after it has been called through the tlb_fill()
+ * entry point and performed a successful page table walk to find
+ * the physical address and attributes for the virtual address
+ * which provoked the TLB miss.
+ *
+ * At most one entry for a given virtual address is permitted. Only a
+ * single TARGET_PAGE_SIZE region is mapped; the supplied @size is only
+ * used by tlb_flush_page.
+ */
+void tlb_set_page_with_attrs(CPUState *cpu, vaddr addr,
+                             hwaddr paddr, MemTxAttrs attrs,
+                             int prot, int mmu_idx, vaddr size);
+
+/**
+ * tlb_set_page:
+ *
+ * This function is equivalent to calling tlb_set_page_with_attrs()
+ * with an @attrs argument of MEMTXATTRS_UNSPECIFIED. It's provided
+ * as a convenience for CPUs which don't use memory transaction attributes.
+ */
+void tlb_set_page(CPUState *cpu, vaddr addr,
+                  hwaddr paddr, int prot,
+                  int mmu_idx, vaddr size);
+
+#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY)
+/**
+ * tlb_flush_page:
+ * @cpu: CPU whose TLB should be flushed
+ * @addr: virtual address of page to be flushed
+ *
+ * Flush one page from the TLB of the specified CPU, for all
+ * MMU indexes.
+ */
+void tlb_flush_page(CPUState *cpu, vaddr addr);
+
+/**
+ * tlb_flush_page_all_cpus_synced:
+ * @cpu: src CPU of the flush
+ * @addr: virtual address of page to be flushed
+ *
+ * Flush one page from the TLB of all CPUs, for all
+ * MMU indexes.
+ *
+ * When this function returns, no CPUs will subsequently perform
+ * translations using the flushed TLBs.
+ */
+void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr);
+
+/**
+ * tlb_flush:
+ * @cpu: CPU whose TLB should be flushed
+ *
+ * Flush the entire TLB for the specified CPU. Most CPU architectures
+ * allow the implementation to drop entries from the TLB at any time
+ * so this is generally safe. If more selective flushing is required
+ * use one of the other functions for efficiency.
+ */
+void tlb_flush(CPUState *cpu);
+
+/**
+ * tlb_flush_all_cpus_synced:
+ * @cpu: src CPU of the flush
+ *
+ * Flush the entire TLB for all CPUs, for all MMU indexes.
+ *
+ * When this function returns, no CPUs will subsequently perform
+ * translations using the flushed TLBs.
+ */
+void tlb_flush_all_cpus_synced(CPUState *src_cpu);
+
+/**
+ * tlb_flush_page_by_mmuidx:
+ * @cpu: CPU whose TLB should be flushed
+ * @addr: virtual address of page to be flushed
+ * @idxmap: bitmap of MMU indexes to flush
+ *
+ * Flush one page from the TLB of the specified CPU, for the specified
+ * MMU indexes.
+ */
+void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr,
+                              uint16_t idxmap);
+
+/**
+ * tlb_flush_page_by_mmuidx_all_cpus_synced:
+ * @cpu: Originating CPU of the flush
+ * @addr: virtual address of page to be flushed
+ * @idxmap: bitmap of MMU indexes to flush
+ *
+ * Flush one page from the TLB of all CPUs, for the specified
+ * MMU indexes.
+ *
+ * When this function returns, no CPUs will subsequently perform
+ * translations using the flushed TLBs.
+ */
+void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, vaddr addr,
+                                              uint16_t idxmap);
+
+/**
+ * tlb_flush_by_mmuidx:
+ * @cpu: CPU whose TLB should be flushed
+ * @wait: If true ensure synchronisation by exiting the cpu_loop
+ * @idxmap: bitmap of MMU indexes to flush
+ *
+ * Flush all entries from the TLB of the specified CPU, for the specified
+ * MMU indexes.
+ */
+void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap);
+
+/**
+ * tlb_flush_by_mmuidx_all_cpus_synced:
+ * @cpu: Originating CPU of the flush
+ * @idxmap: bitmap of MMU indexes to flush
+ *
+ * Flush all entries from the TLB of all CPUs, for the specified
+ * MMU indexes.
+ *
+ * When this function returns, no CPUs will subsequently perform
+ * translations using the flushed TLBs.
+ */
+void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap);
+
+/**
+ * tlb_flush_page_bits_by_mmuidx
+ * @cpu: CPU whose TLB should be flushed
+ * @addr: virtual address of page to be flushed
+ * @idxmap: bitmap of mmu indexes to flush
+ * @bits: number of significant bits in address
+ *
+ * Similar to tlb_flush_page_mask, but with a bitmap of indexes.
+ */
+void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, vaddr addr,
+                                   uint16_t idxmap, unsigned bits);
+
+/* Similarly, with broadcast and syncing. */
+void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, vaddr addr,
+                                                   uint16_t idxmap,
+                                                   unsigned bits);
+
+/**
+ * tlb_flush_range_by_mmuidx
+ * @cpu: CPU whose TLB should be flushed
+ * @addr: virtual address of the start of the range to be flushed
+ * @len: length of range to be flushed
+ * @idxmap: bitmap of mmu indexes to flush
+ * @bits: number of significant bits in address
+ *
+ * For each mmuidx in @idxmap, flush all pages within [@addr,@addr+@len),
+ * comparing only the low @bits worth of each virtual page.
+ */
+void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr,
+                               vaddr len, uint16_t idxmap,
+                               unsigned bits);
+
+/* Similarly, with broadcast and syncing. */
+void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
+                                               vaddr addr,
+                                               vaddr len,
+                                               uint16_t idxmap,
+                                               unsigned bits);
+#else
+static inline void tlb_flush_page(CPUState *cpu, vaddr addr)
+{
+}
+static inline void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr)
+{
+}
+static inline void tlb_flush(CPUState *cpu)
+{
+}
+static inline void tlb_flush_all_cpus_synced(CPUState *src_cpu)
+{
+}
+static inline void tlb_flush_page_by_mmuidx(CPUState *cpu,
+                                            vaddr addr, uint16_t idxmap)
+{
+}
+
+static inline void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
+{
+}
+static inline void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu,
+                                                            vaddr addr,
+                                                            uint16_t idxmap)
+{
+}
+static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
+                                                       uint16_t idxmap)
+{
+}
+static inline void tlb_flush_page_bits_by_mmuidx(CPUState *cpu,
+                                                 vaddr addr,
+                                                 uint16_t idxmap,
+                                                 unsigned bits)
+{
+}
+static inline void
+tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, vaddr addr,
+                                              uint16_t idxmap, unsigned bits)
+{
+}
+static inline void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr,
+                                             vaddr len, uint16_t idxmap,
+                                             unsigned bits)
+{
+}
+static inline void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
+                                                             vaddr addr,
+                                                             vaddr len,
+                                                             uint16_t idxmap,
+                                                             unsigned bits)
+{
+}
+#endif /* CONFIG_TCG && !CONFIG_USER_ONLY */
+#endif /* CPUTLB_H */
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index d9045c9ac4c72..dd5c40f22331a 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -27,263 +27,8 @@
 #include "exec/mmu-access-type.h"
 #include "exec/translation-block.h"
 
-#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
-/* cputlb.c */
-/**
- * tlb_init - initialize a CPU's TLB
- * @cpu: CPU whose TLB should be initialized
- */
-void tlb_init(CPUState *cpu);
-/**
- * tlb_destroy - destroy a CPU's TLB
- * @cpu: CPU whose TLB should be destroyed
- */
-void tlb_destroy(CPUState *cpu);
-/**
- * tlb_flush_page:
- * @cpu: CPU whose TLB should be flushed
- * @addr: virtual address of page to be flushed
- *
- * Flush one page from the TLB of the specified CPU, for all
- * MMU indexes.
- */
-void tlb_flush_page(CPUState *cpu, vaddr addr);
-/**
- * tlb_flush_page_all_cpus_synced:
- * @cpu: src CPU of the flush
- * @addr: virtual address of page to be flushed
- *
- * Flush one page from the TLB of all CPUs, for all
- * MMU indexes.
- *
- * When this function returns, no CPUs will subsequently perform
- * translations using the flushed TLBs.
- */
-void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr);
-/**
- * tlb_flush:
- * @cpu: CPU whose TLB should be flushed
- *
- * Flush the entire TLB for the specified CPU. Most CPU architectures
- * allow the implementation to drop entries from the TLB at any time
- * so this is generally safe. If more selective flushing is required
- * use one of the other functions for efficiency.
- */
-void tlb_flush(CPUState *cpu);
-/**
- * tlb_flush_all_cpus_synced:
- * @cpu: src CPU of the flush
- *
- * Flush the entire TLB for all CPUs, for all MMU indexes.
- *
- * When this function returns, no CPUs will subsequently perform
- * translations using the flushed TLBs.
- */
-void tlb_flush_all_cpus_synced(CPUState *src_cpu);
-/**
- * tlb_flush_page_by_mmuidx:
- * @cpu: CPU whose TLB should be flushed
- * @addr: virtual address of page to be flushed
- * @idxmap: bitmap of MMU indexes to flush
- *
- * Flush one page from the TLB of the specified CPU, for the specified
- * MMU indexes.
- */
-void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr,
-                              uint16_t idxmap);
-/**
- * tlb_flush_page_by_mmuidx_all_cpus_synced:
- * @cpu: Originating CPU of the flush
- * @addr: virtual address of page to be flushed
- * @idxmap: bitmap of MMU indexes to flush
- *
- * Flush one page from the TLB of all CPUs, for the specified
- * MMU indexes.
- *
- * When this function returns, no CPUs will subsequently perform
- * translations using the flushed TLBs.
- */
-void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, vaddr addr,
-                                              uint16_t idxmap);
-/**
- * tlb_flush_by_mmuidx:
- * @cpu: CPU whose TLB should be flushed
- * @wait: If true ensure synchronisation by exiting the cpu_loop
- * @idxmap: bitmap of MMU indexes to flush
- *
- * Flush all entries from the TLB of the specified CPU, for the specified
- * MMU indexes.
- */
-void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap);
-/**
- * tlb_flush_by_mmuidx_all_cpus_synced:
- * @cpu: Originating CPU of the flush
- * @idxmap: bitmap of MMU indexes to flush
- *
- * Flush all entries from the TLB of all CPUs, for the specified
- * MMU indexes.
- *
- * When this function returns, no CPUs will subsequently perform
- * translations using the flushed TLBs.
- */
-void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap);
-
-/**
- * tlb_flush_page_bits_by_mmuidx
- * @cpu: CPU whose TLB should be flushed
- * @addr: virtual address of page to be flushed
- * @idxmap: bitmap of mmu indexes to flush
- * @bits: number of significant bits in address
- *
- * Similar to tlb_flush_page_mask, but with a bitmap of indexes.
- */
-void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, vaddr addr,
-                                   uint16_t idxmap, unsigned bits);
-
-/* Similarly, with broadcast and syncing. */
-void tlb_flush_page_bits_by_mmuidx_all_cpus_synced
-    (CPUState *cpu, vaddr addr, uint16_t idxmap, unsigned bits);
-
-/**
- * tlb_flush_range_by_mmuidx
- * @cpu: CPU whose TLB should be flushed
- * @addr: virtual address of the start of the range to be flushed
- * @len: length of range to be flushed
- * @idxmap: bitmap of mmu indexes to flush
- * @bits: number of significant bits in address
- *
- * For each mmuidx in @idxmap, flush all pages within [@addr,@addr+@len),
- * comparing only the low @bits worth of each virtual page.
- */
-void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr,
-                               vaddr len, uint16_t idxmap,
-                               unsigned bits);
-
-/* Similarly, with broadcast and syncing. */
-void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
-                                               vaddr addr,
-                                               vaddr len,
-                                               uint16_t idxmap,
-                                               unsigned bits);
-
-/**
- * tlb_set_page_full:
- * @cpu: CPU context
- * @mmu_idx: mmu index of the tlb to modify
- * @addr: virtual address of the entry to add
- * @full: the details of the tlb entry
- *
- * Add an entry to @cpu tlb index @mmu_idx.  All of the fields of
- * @full must be filled, except for xlat_section, and constitute
- * the complete description of the translated page.
- *
- * This is generally called by the target tlb_fill function after
- * having performed a successful page table walk to find the physical
- * address and attributes for the translation.
- *
- * At most one entry for a given virtual address is permitted. Only a
- * single TARGET_PAGE_SIZE region is mapped; @full->lg_page_size is only
- * used by tlb_flush_page.
- */
-void tlb_set_page_full(CPUState *cpu, int mmu_idx, vaddr addr,
-                       CPUTLBEntryFull *full);
-
-/**
- * tlb_set_page_with_attrs:
- * @cpu: CPU to add this TLB entry for
- * @addr: virtual address of page to add entry for
- * @paddr: physical address of the page
- * @attrs: memory transaction attributes
- * @prot: access permissions (PAGE_READ/PAGE_WRITE/PAGE_EXEC bits)
- * @mmu_idx: MMU index to insert TLB entry for
- * @size: size of the page in bytes
- *
- * Add an entry to this CPU's TLB (a mapping from virtual address
- * @addr to physical address @paddr) with the specified memory
- * transaction attributes. This is generally called by the target CPU
- * specific code after it has been called through the tlb_fill()
- * entry point and performed a successful page table walk to find
- * the physical address and attributes for the virtual address
- * which provoked the TLB miss.
- *
- * At most one entry for a given virtual address is permitted. Only a
- * single TARGET_PAGE_SIZE region is mapped; the supplied @size is only
- * used by tlb_flush_page.
- */
-void tlb_set_page_with_attrs(CPUState *cpu, vaddr addr,
-                             hwaddr paddr, MemTxAttrs attrs,
-                             int prot, int mmu_idx, vaddr size);
-/* tlb_set_page:
- *
- * This function is equivalent to calling tlb_set_page_with_attrs()
- * with an @attrs argument of MEMTXATTRS_UNSPECIFIED. It's provided
- * as a convenience for CPUs which don't use memory transaction attributes.
- */
-void tlb_set_page(CPUState *cpu, vaddr addr,
-                  hwaddr paddr, int prot,
-                  int mmu_idx, vaddr size);
-#else
-static inline void tlb_init(CPUState *cpu)
-{
-}
-static inline void tlb_destroy(CPUState *cpu)
-{
-}
-static inline void tlb_flush_page(CPUState *cpu, vaddr addr)
-{
-}
-static inline void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr)
-{
-}
-static inline void tlb_flush(CPUState *cpu)
-{
-}
-static inline void tlb_flush_all_cpus_synced(CPUState *src_cpu)
-{
-}
-static inline void tlb_flush_page_by_mmuidx(CPUState *cpu,
-                                            vaddr addr, uint16_t idxmap)
-{
-}
-
-static inline void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
-{
-}
-static inline void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu,
-                                                            vaddr addr,
-                                                            uint16_t idxmap)
-{
-}
-static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
-                                                       uint16_t idxmap)
-{
-}
-static inline void tlb_flush_page_bits_by_mmuidx(CPUState *cpu,
-                                                 vaddr addr,
-                                                 uint16_t idxmap,
-                                                 unsigned bits)
-{
-}
-static inline void
-tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, vaddr addr,
-                                              uint16_t idxmap, unsigned bits)
-{
-}
-static inline void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr,
-                                             vaddr len, uint16_t idxmap,
-                                             unsigned bits)
-{
-}
-static inline void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
-                                                             vaddr addr,
-                                                             vaddr len,
-                                                             uint16_t idxmap,
-                                                             unsigned bits)
-{
-}
-#endif
-
 #if defined(CONFIG_TCG)
+#include "accel/tcg/getpc.h"
 
 /**
  * probe_access:
@@ -433,24 +178,6 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last);
 void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);
 
-/* GETPC is the true target of the return instruction that we'll execute.  */
-#if defined(CONFIG_TCG_INTERPRETER)
-extern __thread uintptr_t tci_tb_ptr;
-# define GETPC() tci_tb_ptr
-#else
-# define GETPC() \
-    ((uintptr_t)__builtin_extract_return_addr(__builtin_return_address(0)))
-#endif
-
-/* The true return address will often point to a host insn that is part of
-   the next translated guest insn.  Adjust the address backward to point to
-   the middle of the call insn.  Subtracting one would do the job except for
-   several compressed mode architectures (arm, mips) which set the low bit
-   to indicate the compressed mode; subtracting two works around that.  It
-   is also the case that there are no host isas that contain a call insn
-   smaller than 4 bytes, so we don't worry about special-casing this.  */
-#define GETPC_ADJ   2
-
 #if !defined(CONFIG_USER_ONLY)
 
 /**
@@ -502,9 +229,6 @@ static inline tb_page_addr_t get_page_addr_code(CPUArchState *env,
 
 #if !defined(CONFIG_USER_ONLY)
 
-void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length);
-void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length);
-
 MemoryRegionSection *
 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
                                   hwaddr *xlat, hwaddr *plen,
diff --git a/include/exec/memattrs.h b/include/exec/memattrs.h
index 060b7e713149b..8db1d3046479c 100644
--- a/include/exec/memattrs.h
+++ b/include/exec/memattrs.h
@@ -44,6 +44,8 @@ typedef struct MemTxAttrs {
      * (see MEMTX_ACCESS_ERROR).
      */
     unsigned int memory:1;
+    /* Debug access that can even write to ROM. */
+    unsigned int debug:1;
     /* Requester ID (for MSI for example) */
     unsigned int requester_id:16;
 
@@ -56,7 +58,8 @@ typedef struct MemTxAttrs {
      * Bus masters which don't specify any attributes will get this
      * (via the MEMTXATTRS_UNSPECIFIED constant), so that we can
      * distinguish "all attributes deliberately clear" from
-     * "didn't specify" if necessary.
+     * "didn't specify" if necessary. "debug" can be set alongside
+     * "unspecified".
      */
     bool unspecified;
 
diff --git a/include/exec/memop.h b/include/exec/memop.h
index acdb40a9b3b39..407a47d82c7ef 100644
--- a/include/exec/memop.h
+++ b/include/exec/memop.h
@@ -91,8 +91,12 @@ typedef enum MemOp {
      *    Depending on alignment, one or both will be single-copy atomic.
      *    This is the atomicity e.g. of Arm FEAT_LSE2 LDP.
      * MO_ATOM_SUBALIGN: the operation is single-copy atomic by parts
-     *    by the alignment.  E.g. if the address is 0 mod 4, then each
-     *    4-byte subobject is single-copy atomic.
+     *    by the alignment.  E.g. if an 8-byte value is accessed at an
+     *    address which is 0 mod 8, then the whole 8-byte access is
+     *    single-copy atomic; otherwise, if it is accessed at 0 mod 4
+     *    then each 4-byte subobject is single-copy atomic; otherwise
+     *    if it is accessed at 0 mod 2 then the four 2-byte subobjects
+     *    are single-copy atomic.
      *    This is the atomicity e.g. of IBM Power.
      * MO_ATOM_NONE: the operation has no atomicity requirements.
      *
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 9f73b59867260..d09af58c971b5 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -2995,15 +2995,34 @@ MemTxResult address_space_write_cached_slow(MemoryRegionCache *cache,
 int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr);
 bool prepare_mmio_access(MemoryRegion *mr);
 
-static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
+static inline bool memory_region_supports_direct_access(MemoryRegion *mr)
 {
-    if (is_write) {
-        return memory_region_is_ram(mr) && !mr->readonly &&
-               !mr->rom_device && !memory_region_is_ram_device(mr);
-    } else {
-        return (memory_region_is_ram(mr) && !memory_region_is_ram_device(mr)) ||
-               memory_region_is_romd(mr);
+    /* ROM DEVICE regions only allow direct access if in ROMD mode. */
+    if (memory_region_is_romd(mr)) {
+        return true;
+    }
+    if (!memory_region_is_ram(mr)) {
+        return false;
+    }
+    /*
+     * RAM DEVICE regions can be accessed directly using memcpy, but it might
+     * be MMIO and access using mempy can be wrong (e.g., using instructions not
+     * intended for MMIO access). So we treat this as IO.
+     */
+    return !memory_region_is_ram_device(mr);
+}
+
+static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write,
+                                           MemTxAttrs attrs)
+{
+    if (!memory_region_supports_direct_access(mr)) {
+        return false;
     }
+    /* Debug access can write to ROM. */
+    if (is_write && !attrs.debug) {
+        return !mr->readonly && !mr->rom_device;
+    }
+    return true;
 }
 
 /**
@@ -3036,7 +3055,7 @@ MemTxResult address_space_read(AddressSpace *as, hwaddr addr,
             fv = address_space_to_flatview(as);
             l = len;
             mr = flatview_translate(fv, addr, &addr1, &l, false, attrs);
-            if (len == l && memory_access_is_direct(mr, false)) {
+            if (len == l && memory_access_is_direct(mr, false, attrs)) {
                 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
                 memcpy(buf, ptr, len);
             } else {
@@ -3184,6 +3203,9 @@ bool ram_block_discard_is_disabled(void);
  */
 bool ram_block_discard_is_required(void);
 
+void ram_block_add_cpr_blocker(RAMBlock *rb, Error **errp);
+void ram_block_del_cpr_blocker(RAMBlock *rb);
+
 #endif
 
 #endif
diff --git a/include/exec/page-protection.h b/include/exec/page-protection.h
index bae3355f62c81..3e0a8a033311f 100644
--- a/include/exec/page-protection.h
+++ b/include/exec/page-protection.h
@@ -40,8 +40,6 @@
 
 #ifdef CONFIG_USER_ONLY
 
-#include "qemu/clang-tsa.h"
-
 void TSA_NO_TSA mmap_lock(void);
 void TSA_NO_TSA mmap_unlock(void);
 bool have_mmap_lock(void);
diff --git a/include/exec/poison.h b/include/exec/poison.h
index f4283f693af25..8ed04b31083eb 100644
--- a/include/exec/poison.h
+++ b/include/exec/poison.h
@@ -44,23 +44,7 @@
 #pragma GCC poison TARGET_FMT_ld
 #pragma GCC poison TARGET_FMT_lu
 
-#pragma GCC poison TARGET_PAGE_SIZE
-#pragma GCC poison TARGET_PAGE_MASK
-#pragma GCC poison TARGET_PAGE_BITS
-#pragma GCC poison TARGET_PAGE_ALIGN
-
-#pragma GCC poison CPU_INTERRUPT_HARD
-#pragma GCC poison CPU_INTERRUPT_EXITTB
-#pragma GCC poison CPU_INTERRUPT_HALT
-#pragma GCC poison CPU_INTERRUPT_DEBUG
-#pragma GCC poison CPU_INTERRUPT_TGT_EXT_0
-#pragma GCC poison CPU_INTERRUPT_TGT_EXT_1
-#pragma GCC poison CPU_INTERRUPT_TGT_EXT_2
-#pragma GCC poison CPU_INTERRUPT_TGT_EXT_3
-#pragma GCC poison CPU_INTERRUPT_TGT_EXT_4
-#pragma GCC poison CPU_INTERRUPT_TGT_INT_0
-#pragma GCC poison CPU_INTERRUPT_TGT_INT_1
-#pragma GCC poison CPU_INTERRUPT_TGT_INT_2
+#pragma GCC poison TARGET_PHYS_ADDR_SPACE_BITS
 
 #pragma GCC poison CONFIG_ALPHA_DIS
 #pragma GCC poison CONFIG_HPPA_DIS
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 94bb3ccbe42b8..e4c28fbec9b47 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -23,6 +23,7 @@
 #include "cpu.h"
 #include "system/xen.h"
 #include "system/tcg.h"
+#include "exec/cputlb.h"
 #include "exec/ramlist.h"
 #include "exec/ramblock.h"
 #include "exec/exec-all.h"
@@ -101,9 +102,6 @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
 
 bool ramblock_is_pmem(RAMBlock *rb);
 
-long qemu_minrampagesize(void);
-long qemu_maxrampagesize(void);
-
 /**
  * qemu_ram_alloc_from_file,
  * qemu_ram_alloc_from_fd:  Allocate a ram block from the specified backing
diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h
index 0babd105c020f..64484cd8217a4 100644
--- a/include/exec/ramblock.h
+++ b/include/exec/ramblock.h
@@ -39,6 +39,7 @@ struct RAMBlock {
     /* RCU-enabled, writes protected by the ramlist lock */
     QLIST_ENTRY(RAMBlock) next;
     QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
+    Error *cpr_blocker;
     int fd;
     uint64_t fd_offset;
     int guest_memfd;
diff --git a/include/exec/target_page.h b/include/exec/target_page.h
index 98ffbb5c2396f..8e89e5cbe6f96 100644
--- a/include/exec/target_page.h
+++ b/include/exec/target_page.h
@@ -14,10 +14,56 @@
 #ifndef EXEC_TARGET_PAGE_H
 #define EXEC_TARGET_PAGE_H
 
-size_t qemu_target_page_size(void);
-int qemu_target_page_mask(void);
-int qemu_target_page_bits(void);
-int qemu_target_page_bits_min(void);
+/*
+ * If compiling per-target, get the real values.
+ * For generic code, reuse the mechanism for variable page size.
+ */
+#ifdef COMPILING_PER_TARGET
+#include "cpu-param.h"
+#include "exec/target_long.h"
+#define TARGET_PAGE_TYPE  target_long
+#else
+#define TARGET_PAGE_BITS_VARY
+#define TARGET_PAGE_TYPE  int
+#endif
+
+#ifdef TARGET_PAGE_BITS_VARY
+# include "exec/page-vary.h"
+extern const TargetPageBits target_page;
+# ifdef CONFIG_DEBUG_TCG
+#  define TARGET_PAGE_BITS   ({ assert(target_page.decided); \
+                                target_page.bits; })
+#  define TARGET_PAGE_MASK   ({ assert(target_page.decided); \
+                                (TARGET_PAGE_TYPE)target_page.mask; })
+# else
+#  define TARGET_PAGE_BITS   target_page.bits
+#  define TARGET_PAGE_MASK   ((TARGET_PAGE_TYPE)target_page.mask)
+# endif
+# define TARGET_PAGE_SIZE    (-(int)TARGET_PAGE_MASK)
+#else
+# define TARGET_PAGE_BITS_MIN TARGET_PAGE_BITS
+# define TARGET_PAGE_SIZE    (1 << TARGET_PAGE_BITS)
+# define TARGET_PAGE_MASK    ((TARGET_PAGE_TYPE)-1 << TARGET_PAGE_BITS)
+#endif
 
+#define TARGET_PAGE_ALIGN(addr) ROUND_UP((addr), TARGET_PAGE_SIZE)
+
+static inline size_t qemu_target_page_size(void)
+{
+    return TARGET_PAGE_SIZE;
+}
+
+static inline int qemu_target_page_mask(void)
+{
+    return TARGET_PAGE_MASK;
+}
+
+static inline int qemu_target_page_bits(void)
+{
+    return TARGET_PAGE_BITS;
+}
+
+int qemu_target_page_bits_min(void);
 size_t qemu_target_pages_to_MiB(size_t pages);
+
 #endif
diff --git a/include/exec/tlb-common.h b/include/exec/tlb-common.h
index dc5a5faa0b31d..03b5a8ffc71f2 100644
--- a/include/exec/tlb-common.h
+++ b/include/exec/tlb-common.h
@@ -19,14 +19,14 @@
 #ifndef EXEC_TLB_COMMON_H
 #define EXEC_TLB_COMMON_H 1
 
-#define CPU_TLB_ENTRY_BITS 5
+#define CPU_TLB_ENTRY_BITS (HOST_LONG_BITS == 32 ? 4 : 5)
 
 /* Minimalized TLB entry for use by TCG fast path. */
 typedef union CPUTLBEntry {
     struct {
-        uint64_t addr_read;
-        uint64_t addr_write;
-        uint64_t addr_code;
+        uintptr_t addr_read;
+        uintptr_t addr_write;
+        uintptr_t addr_code;
         /*
          * Addend to virtual address to get host address.  IO accesses
          * use the corresponding iotlb value.
@@ -37,7 +37,7 @@ typedef union CPUTLBEntry {
      * Padding to get a power of two size, as well as index
      * access to addr_{read,write,code}.
      */
-    uint64_t addr_idx[(1 << CPU_TLB_ENTRY_BITS) / sizeof(uint64_t)];
+    uintptr_t addr_idx[(1 << CPU_TLB_ENTRY_BITS) / sizeof(uintptr_t)];
 } CPUTLBEntry;
 
 QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));
diff --git a/include/exec/vaddr.h b/include/exec/vaddr.h
index b9844afc77396..28bec632fbbf6 100644
--- a/include/exec/vaddr.h
+++ b/include/exec/vaddr.h
@@ -6,13 +6,15 @@
 /**
  * vaddr:
  * Type wide enough to contain any #target_ulong virtual address.
+ * We do not support 64-bit guest on 32-host and detect at configure time.
+ * Therefore, a host pointer width will always fit a guest pointer.
  */
-typedef uint64_t vaddr;
-#define VADDR_PRId PRId64
-#define VADDR_PRIu PRIu64
-#define VADDR_PRIo PRIo64
-#define VADDR_PRIx PRIx64
-#define VADDR_PRIX PRIX64
-#define VADDR_MAX UINT64_MAX
+typedef uintptr_t vaddr;
+#define VADDR_PRId PRIdPTR
+#define VADDR_PRIu PRIuPTR
+#define VADDR_PRIo PRIoPTR
+#define VADDR_PRIx PRIxPTR
+#define VADDR_PRIX PRIXPTR
+#define VADDR_MAX UINTPTR_MAX
 
 #endif
diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h
index 8983c2748ecd8..90862f5cd2270 100644
--- a/include/fpu/softfloat-helpers.h
+++ b/include/fpu/softfloat-helpers.h
@@ -75,6 +75,12 @@ static inline void set_floatx80_rounding_precision(FloatX80RoundPrec val,
     status->floatx80_rounding_precision = val;
 }
 
+static inline void set_floatx80_behaviour(FloatX80Behaviour b,
+                                          float_status *status)
+{
+    status->floatx80_behaviour = b;
+}
+
 static inline void set_float_2nan_prop_rule(Float2NaNPropRule rule,
                                             float_status *status)
 {
@@ -151,6 +157,12 @@ get_floatx80_rounding_precision(const float_status *status)
     return status->floatx80_rounding_precision;
 }
 
+static inline FloatX80Behaviour
+get_floatx80_behaviour(const float_status *status)
+{
+    return status->floatx80_behaviour;
+}
+
 static inline Float2NaNPropRule
 get_float_2nan_prop_rule(const float_status *status)
 {
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
index 53d5eb85210e2..1af2a0cb14bc2 100644
--- a/include/fpu/softfloat-types.h
+++ b/include/fpu/softfloat-types.h
@@ -320,6 +320,56 @@ typedef enum __attribute__((__packed__)) {
     float_ftz_before_rounding = 1,
 } FloatFTZDetection;
 
+/*
+ * floatx80 is primarily used by x86 and m68k, and there are
+ * differences in the handling, largely related to the explicit
+ * Integer bit which floatx80 has and the other float formats do not.
+ * These flag values allow specification of the target's requirements
+ * and can be ORed together to set floatx80_behaviour.
+ */
+typedef enum __attribute__((__packed__)) {
+    /* In the default Infinity value, is the Integer bit 0 ? */
+    floatx80_default_inf_int_bit_is_zero = 1,
+    /*
+     * Are Pseudo-infinities (Inf with the Integer bit zero) valid?
+     * If so, floatx80_is_infinity() will return true for them.
+     * If not, floatx80_invalid_encoding will return false for them,
+     * and using them as inputs to a float op will raise Invalid.
+     */
+    floatx80_pseudo_inf_valid = 2,
+    /*
+     * Are Pseudo-NaNs (NaNs where the Integer bit is zero) valid?
+     * If not, floatx80_invalid_encoding() will return false for them,
+     * and using them as inputs to a float op will raise Invalid.
+     */
+    floatx80_pseudo_nan_valid = 4,
+    /*
+     * Are Unnormals (0 < exp < 0x7fff, Integer bit zero) valid?
+     * If not, floatx80_invalid_encoding() will return false for them,
+     * and using them as inputs to a float op will raise Invalid.
+     */
+    floatx80_unnormal_valid = 8,
+
+    /*
+     * If the exponent is 0 and the Integer bit is set, Intel call
+     * this a "pseudo-denormal"; x86 supports that only on input
+     * (treating them as denormals by ignoring the Integer bit).
+     * For m68k, the integer bit is considered validly part of the
+     * input value when the exponent is 0, and may be 0 or 1,
+     * giving extra range. They may also be generated as outputs.
+     * (The m68k manual actually calls these values part of the
+     * normalized number range, not the denormalized number range.)
+     *
+     * By default you get the Intel behaviour where the Integer
+     * bit is ignored; if this is set then the Integer bit value
+     * is honoured, m68k-style.
+     *
+     * Either way, floatx80_invalid_encoding() will always accept
+     * pseudo-denormals.
+     */
+    floatx80_pseudo_denormal_valid = 16,
+} FloatX80Behaviour;
+
 /*
  * Floating Point Status. Individual architectures may maintain
  * several versions of float_status for different functions. The
@@ -331,6 +381,7 @@ typedef struct float_status {
     uint16_t float_exception_flags;
     FloatRoundMode float_rounding_mode;
     FloatX80RoundPrec floatx80_rounding_precision;
+    FloatX80Behaviour floatx80_behaviour;
     Float2NaNPropRule float_2nan_prop_rule;
     Float3NaNPropRule float_3nan_prop_rule;
     FloatInfZeroNaNRule float_infzeronan_rule;
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 09a40b4310636..c18ab2cb6097a 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -960,7 +960,7 @@ float128 floatx80_to_float128(floatx80, float_status *status);
 /*----------------------------------------------------------------------------
 | The pattern for an extended double-precision inf.
 *----------------------------------------------------------------------------*/
-extern const floatx80 floatx80_infinity;
+floatx80 floatx80_default_inf(bool zSign, float_status *status);
 
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE extended double-precision operations.
@@ -995,14 +995,19 @@ static inline floatx80 floatx80_chs(floatx80 a)
     return a;
 }
 
-static inline bool floatx80_is_infinity(floatx80 a)
+static inline bool floatx80_is_infinity(floatx80 a, float_status *status)
 {
-#if defined(TARGET_M68K)
-    return (a.high & 0x7fff) == floatx80_infinity.high && !(a.low << 1);
-#else
-    return (a.high & 0x7fff) == floatx80_infinity.high &&
-                       a.low == floatx80_infinity.low;
-#endif
+    /*
+     * It's target-specific whether the Integer bit is permitted
+     * to be 0 in a valid Infinity value. (x86 says no, m68k says yes).
+     */
+    bool intbit = a.low >> 63;
+
+    if (!intbit &&
+        !(status->floatx80_behaviour & floatx80_pseudo_inf_valid)) {
+        return false;
+    }
+    return (a.high & 0x7fff) == 0x7fff && !(a.low << 1);
 }
 
 static inline bool floatx80_is_neg(floatx80 a)
@@ -1068,41 +1073,45 @@ static inline bool floatx80_unordered_quiet(floatx80 a, floatx80 b,
 
 /*----------------------------------------------------------------------------
 | Return whether the given value is an invalid floatx80 encoding.
-| Invalid floatx80 encodings arise when the integer bit is not set, but
-| the exponent is not zero. The only times the integer bit is permitted to
-| be zero is in subnormal numbers and the value zero.
-| This includes what the Intel software developer's manual calls pseudo-NaNs,
-| pseudo-infinities and un-normal numbers. It does not include
-| pseudo-denormals, which must still be correctly handled as inputs even
-| if they are never generated as outputs.
+| Invalid floatx80 encodings may arise when the integer bit is not set
+| correctly; this is target-specific. In Intel terminology the
+| categories are:
+|  exp == 0, int = 0, mantissa == 0 : zeroes
+|  exp == 0, int = 0, mantissa != 0 : denormals
+|  exp == 0, int = 1 : pseudo-denormals
+|  0 < exp < 0x7fff, int = 0 : unnormals
+|  0 < exp < 0x7fff, int = 1 : normals
+|  exp == 0x7fff, int = 0, mantissa == 0 : pseudo-infinities
+|  exp == 0x7fff, int = 1, mantissa == 0 : infinities
+|  exp == 0x7fff, int = 0, mantissa != 0 : pseudo-NaNs
+|  exp == 0x7fff, int = 1, mantissa == 0 : NaNs
+|
+| The usual IEEE cases of zero, denormal, normal, inf and NaN are always valid.
+| x87 permits as input also pseudo-denormals.
+| m68k permits all those and also pseudo-infinities, pseudo-NaNs and unnormals.
+|
+| Since we don't have a target that handles floatx80 but prohibits
+| pseudo-denormals in input, we don't currently have a floatx80_behaviour
+| flag for that case, but instead always accept it. Conveniently this
+| means that all cases with either exponent 0 or the integer bit set are
+| valid for all targets.
 *----------------------------------------------------------------------------*/
-static inline bool floatx80_invalid_encoding(floatx80 a)
-{
-#if defined(TARGET_M68K)
-    /*-------------------------------------------------------------------------
-    | With m68k, the explicit integer bit can be zero in the case of:
-    | - zeros                (exp == 0, mantissa == 0)
-    | - denormalized numbers (exp == 0, mantissa != 0)
-    | - unnormalized numbers (exp != 0, exp < 0x7FFF)
-    | - infinities           (exp == 0x7FFF, mantissa == 0)
-    | - not-a-numbers        (exp == 0x7FFF, mantissa != 0)
-    |
-    | For infinities and NaNs, the explicit integer bit can be either one or
-    | zero.
-    |
-    | The IEEE 754 standard does not define a zero integer bit. Such a number
-    | is an unnormalized number. Hardware does not directly support
-    | denormalized and unnormalized numbers, but implicitly supports them by
-    | trapping them as unimplemented data types, allowing efficient conversion
-    | in software.
-    |
-    | See "M68000 FAMILY PROGRAMMER’S REFERENCE MANUAL",
-    |     "1.6 FLOATING-POINT DATA TYPES"
-    *------------------------------------------------------------------------*/
-    return false;
-#else
-    return (a.low & (1ULL << 63)) == 0 && (a.high & 0x7FFF) != 0;
-#endif
+static inline bool floatx80_invalid_encoding(floatx80 a, float_status *s)
+{
+    if ((a.low >> 63) || (a.high & 0x7fff) == 0) {
+        /* Anything with the Integer bit set or the exponent 0 is valid */
+        return false;
+    }
+
+    if ((a.high & 0x7fff) == 0x7fff) {
+        if (a.low) {
+            return !(s->floatx80_behaviour & floatx80_pseudo_nan_valid);
+        } else {
+            return !(s->floatx80_behaviour & floatx80_pseudo_inf_valid);
+        }
+    } else {
+        return !(s->floatx80_behaviour & floatx80_unnormal_valid);
+    }
 }
 
 #define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL)
diff --git a/include/hw/acpi/acpi.h b/include/hw/acpi/acpi.h
index e0e51e85b418f..d1a4fa2af8480 100644
--- a/include/hw/acpi/acpi.h
+++ b/include/hw/acpi/acpi.h
@@ -150,6 +150,9 @@ struct ACPIREGS {
     Notifier wakeup;
 };
 
+/* Return whether ACPI subsystem is built in */
+bool acpi_builtin(void);
+
 /* PM_TMR */
 void acpi_pm_tmr_update(ACPIREGS *ar, bool enable);
 void acpi_pm_tmr_calc_overflow_time(ACPIREGS *ar);
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
index 39619a2457cbb..578a582203cea 100644
--- a/include/hw/acpi/ghes.h
+++ b/include/hw/acpi/ghes.h
@@ -75,8 +75,6 @@ void acpi_build_hest(GArray *table_data, GArray *hardware_errors,
 void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
                           GArray *hardware_errors);
 int acpi_ghes_memory_errors(uint16_t source_id, uint64_t error_physical_addr);
-void ghes_record_cper_errors(const void *cper, size_t len,
-                             uint16_t source_id, Error **errp);
 
 /**
  * acpi_ghes_present: Report whether ACPI GHES table is present
diff --git a/include/hw/arm/aspeed_soc.h b/include/hw/arm/aspeed_soc.h
index 689f52dae8fbe..f899356ed94c5 100644
--- a/include/hw/arm/aspeed_soc.h
+++ b/include/hw/arm/aspeed_soc.h
@@ -128,7 +128,7 @@ struct Aspeed27x0SoCState {
     AspeedSoCState parent;
 
     ARMCPU cpu[ASPEED_CPUS_NUM];
-    AspeedINTCState intc;
+    AspeedINTCState intc[2];
     GICv3State gic;
     MemoryRegion dram_empty;
 };
@@ -148,7 +148,6 @@ OBJECT_DECLARE_SIMPLE_TYPE(Aspeed10x0SoCState, ASPEED10X0_SOC)
 struct AspeedSoCClass {
     DeviceClass parent_class;
 
-    const char *name;
     /** valid_cpu_types: NULL terminated array of a single CPU type. */
     const char * const *valid_cpu_types;
     uint32_t silicon_rev;
@@ -195,6 +194,7 @@ enum {
     ASPEED_DEV_EHCI2,
     ASPEED_DEV_VIC,
     ASPEED_DEV_INTC,
+    ASPEED_DEV_INTCIO,
     ASPEED_DEV_SDMC,
     ASPEED_DEV_SCU,
     ASPEED_DEV_ADC,
diff --git a/include/hw/arm/bsa.h b/include/hw/arm/bsa.h
index 8eaab603c03ac..13ed2d2ac1913 100644
--- a/include/hw/arm/bsa.h
+++ b/include/hw/arm/bsa.h
@@ -22,6 +22,8 @@
 #define QEMU_ARM_BSA_H
 
 /* These are architectural INTID values */
+#define ARCH_TIMER_S_EL2_VIRT_IRQ  19
+#define ARCH_TIMER_S_EL2_IRQ       20
 #define VIRTUAL_PMU_IRQ            23
 #define ARCH_GIC_MAINT_IRQ         25
 #define ARCH_TIMER_NS_EL2_IRQ      26
diff --git a/include/hw/arm/fsl-imx8mp.h b/include/hw/arm/fsl-imx8mp.h
new file mode 100644
index 0000000000000..bc97fc416eb96
--- /dev/null
+++ b/include/hw/arm/fsl-imx8mp.h
@@ -0,0 +1,284 @@
+/*
+ * i.MX 8M Plus SoC Definitions
+ *
+ * Copyright (c) 2024, Bernhard Beschow <shentey@gmail.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef FSL_IMX8MP_H
+#define FSL_IMX8MP_H
+
+#include "cpu.h"
+#include "hw/char/imx_serial.h"
+#include "hw/gpio/imx_gpio.h"
+#include "hw/i2c/imx_i2c.h"
+#include "hw/intc/arm_gicv3_common.h"
+#include "hw/misc/imx7_snvs.h"
+#include "hw/misc/imx8mp_analog.h"
+#include "hw/misc/imx8mp_ccm.h"
+#include "hw/net/imx_fec.h"
+#include "hw/or-irq.h"
+#include "hw/pci-host/designware.h"
+#include "hw/pci-host/fsl_imx8m_phy.h"
+#include "hw/sd/sdhci.h"
+#include "hw/ssi/imx_spi.h"
+#include "hw/timer/imx_gpt.h"
+#include "hw/usb/hcd-dwc3.h"
+#include "hw/watchdog/wdt_imx2.h"
+#include "qom/object.h"
+#include "qemu/units.h"
+
+#define TYPE_FSL_IMX8MP "fsl-imx8mp"
+OBJECT_DECLARE_SIMPLE_TYPE(FslImx8mpState, FSL_IMX8MP)
+
+#define FSL_IMX8MP_RAM_START        0x40000000
+#define FSL_IMX8MP_RAM_SIZE_MAX     (8 * GiB)
+
+enum FslImx8mpConfiguration {
+    FSL_IMX8MP_NUM_CPUS         = 4,
+    FSL_IMX8MP_NUM_ECSPIS       = 3,
+    FSL_IMX8MP_NUM_GPIOS        = 5,
+    FSL_IMX8MP_NUM_GPTS         = 6,
+    FSL_IMX8MP_NUM_I2CS         = 6,
+    FSL_IMX8MP_NUM_IRQS         = 160,
+    FSL_IMX8MP_NUM_UARTS        = 4,
+    FSL_IMX8MP_NUM_USBS         = 2,
+    FSL_IMX8MP_NUM_USDHCS       = 3,
+    FSL_IMX8MP_NUM_WDTS         = 3,
+};
+
+struct FslImx8mpState {
+    DeviceState    parent_obj;
+
+    ARMCPU             cpu[FSL_IMX8MP_NUM_CPUS];
+    GICv3State         gic;
+    IMXGPTState        gpt[FSL_IMX8MP_NUM_GPTS];
+    IMXGPIOState       gpio[FSL_IMX8MP_NUM_GPIOS];
+    IMX8MPCCMState     ccm;
+    IMX8MPAnalogState  analog;
+    IMX7SNVSState      snvs;
+    IMXSPIState        spi[FSL_IMX8MP_NUM_ECSPIS];
+    IMXI2CState        i2c[FSL_IMX8MP_NUM_I2CS];
+    IMXSerialState     uart[FSL_IMX8MP_NUM_UARTS];
+    IMXFECState        enet;
+    SDHCIState         usdhc[FSL_IMX8MP_NUM_USDHCS];
+    IMX2WdtState       wdt[FSL_IMX8MP_NUM_WDTS];
+    USBDWC3            usb[FSL_IMX8MP_NUM_USBS];
+    DesignwarePCIEHost pcie;
+    FslImx8mPciePhyState   pcie_phy;
+    OrIRQState         gpt5_gpt6_irq;
+    MemoryRegion       ocram;
+
+    uint32_t           phy_num;
+    bool               phy_connected;
+};
+
+enum FslImx8mpMemoryRegions {
+    FSL_IMX8MP_A53_DAP,
+    FSL_IMX8MP_AIPS1_CONFIGURATION,
+    FSL_IMX8MP_AIPS2_CONFIGURATION,
+    FSL_IMX8MP_AIPS3_CONFIGURATION,
+    FSL_IMX8MP_AIPS4_CONFIGURATION,
+    FSL_IMX8MP_AIPS5_CONFIGURATION,
+    FSL_IMX8MP_ANA_OSC,
+    FSL_IMX8MP_ANA_PLL,
+    FSL_IMX8MP_ANA_TSENSOR,
+    FSL_IMX8MP_APBH_DMA,
+    FSL_IMX8MP_ASRC,
+    FSL_IMX8MP_AUDIO_BLK_CTRL,
+    FSL_IMX8MP_AUDIO_DSP,
+    FSL_IMX8MP_AUDIO_XCVR_RX,
+    FSL_IMX8MP_AUD_IRQ_STEER,
+    FSL_IMX8MP_BOOT_ROM,
+    FSL_IMX8MP_BOOT_ROM_PROTECTED,
+    FSL_IMX8MP_CAAM,
+    FSL_IMX8MP_CAAM_MEM,
+    FSL_IMX8MP_CCM,
+    FSL_IMX8MP_CSU,
+    FSL_IMX8MP_DDR_BLK_CTRL,
+    FSL_IMX8MP_DDR_CTL,
+    FSL_IMX8MP_DDR_PERF_MON,
+    FSL_IMX8MP_DDR_PHY,
+    FSL_IMX8MP_DDR_PHY_BROADCAST,
+    FSL_IMX8MP_ECSPI1,
+    FSL_IMX8MP_ECSPI2,
+    FSL_IMX8MP_ECSPI3,
+    FSL_IMX8MP_EDMA_CHANNELS,
+    FSL_IMX8MP_EDMA_MANAGEMENT_PAGE,
+    FSL_IMX8MP_ENET1,
+    FSL_IMX8MP_ENET2_TSN,
+    FSL_IMX8MP_FLEXCAN1,
+    FSL_IMX8MP_FLEXCAN2,
+    FSL_IMX8MP_GIC_DIST,
+    FSL_IMX8MP_GIC_REDIST,
+    FSL_IMX8MP_GPC,
+    FSL_IMX8MP_GPIO1,
+    FSL_IMX8MP_GPIO2,
+    FSL_IMX8MP_GPIO3,
+    FSL_IMX8MP_GPIO4,
+    FSL_IMX8MP_GPIO5,
+    FSL_IMX8MP_GPT1,
+    FSL_IMX8MP_GPT2,
+    FSL_IMX8MP_GPT3,
+    FSL_IMX8MP_GPT4,
+    FSL_IMX8MP_GPT5,
+    FSL_IMX8MP_GPT6,
+    FSL_IMX8MP_GPU2D,
+    FSL_IMX8MP_GPU3D,
+    FSL_IMX8MP_HDMI_TX,
+    FSL_IMX8MP_HDMI_TX_AUDLNK_MSTR,
+    FSL_IMX8MP_HSIO_BLK_CTL,
+    FSL_IMX8MP_I2C1,
+    FSL_IMX8MP_I2C2,
+    FSL_IMX8MP_I2C3,
+    FSL_IMX8MP_I2C4,
+    FSL_IMX8MP_I2C5,
+    FSL_IMX8MP_I2C6,
+    FSL_IMX8MP_INTERCONNECT,
+    FSL_IMX8MP_IOMUXC,
+    FSL_IMX8MP_IOMUXC_GPR,
+    FSL_IMX8MP_IPS_DEWARP,
+    FSL_IMX8MP_ISI,
+    FSL_IMX8MP_ISP1,
+    FSL_IMX8MP_ISP2,
+    FSL_IMX8MP_LCDIF1,
+    FSL_IMX8MP_LCDIF2,
+    FSL_IMX8MP_MEDIA_BLK_CTL,
+    FSL_IMX8MP_MIPI_CSI1,
+    FSL_IMX8MP_MIPI_CSI2,
+    FSL_IMX8MP_MIPI_DSI1,
+    FSL_IMX8MP_MU_1_A,
+    FSL_IMX8MP_MU_1_B,
+    FSL_IMX8MP_MU_2_A,
+    FSL_IMX8MP_MU_2_B,
+    FSL_IMX8MP_MU_3_A,
+    FSL_IMX8MP_MU_3_B,
+    FSL_IMX8MP_NPU,
+    FSL_IMX8MP_OCOTP_CTRL,
+    FSL_IMX8MP_OCRAM,
+    FSL_IMX8MP_OCRAM_S,
+    FSL_IMX8MP_PCIE1,
+    FSL_IMX8MP_PCIE1_MEM,
+    FSL_IMX8MP_PCIE_PHY1,
+    FSL_IMX8MP_PDM,
+    FSL_IMX8MP_PERFMON1,
+    FSL_IMX8MP_PERFMON2,
+    FSL_IMX8MP_PWM1,
+    FSL_IMX8MP_PWM2,
+    FSL_IMX8MP_PWM3,
+    FSL_IMX8MP_PWM4,
+    FSL_IMX8MP_QOSC,
+    FSL_IMX8MP_QSPI,
+    FSL_IMX8MP_QSPI1_RX_BUFFER,
+    FSL_IMX8MP_QSPI1_TX_BUFFER,
+    FSL_IMX8MP_QSPI_MEM,
+    FSL_IMX8MP_RAM,
+    FSL_IMX8MP_RDC,
+    FSL_IMX8MP_SAI1,
+    FSL_IMX8MP_SAI2,
+    FSL_IMX8MP_SAI3,
+    FSL_IMX8MP_SAI5,
+    FSL_IMX8MP_SAI6,
+    FSL_IMX8MP_SAI7,
+    FSL_IMX8MP_SDMA1,
+    FSL_IMX8MP_SDMA2,
+    FSL_IMX8MP_SDMA3,
+    FSL_IMX8MP_SEMAPHORE1,
+    FSL_IMX8MP_SEMAPHORE2,
+    FSL_IMX8MP_SEMAPHORE_HS,
+    FSL_IMX8MP_SNVS_HP,
+    FSL_IMX8MP_SPBA1,
+    FSL_IMX8MP_SPBA2,
+    FSL_IMX8MP_SRC,
+    FSL_IMX8MP_SYSCNT_CMP,
+    FSL_IMX8MP_SYSCNT_CTRL,
+    FSL_IMX8MP_SYSCNT_RD,
+    FSL_IMX8MP_TCM_DTCM,
+    FSL_IMX8MP_TCM_ITCM,
+    FSL_IMX8MP_TZASC,
+    FSL_IMX8MP_UART1,
+    FSL_IMX8MP_UART2,
+    FSL_IMX8MP_UART3,
+    FSL_IMX8MP_UART4,
+    FSL_IMX8MP_USB1,
+    FSL_IMX8MP_USB2,
+    FSL_IMX8MP_USB1_DEV,
+    FSL_IMX8MP_USB2_DEV,
+    FSL_IMX8MP_USB1_OTG,
+    FSL_IMX8MP_USB2_OTG,
+    FSL_IMX8MP_USB1_GLUE,
+    FSL_IMX8MP_USB2_GLUE,
+    FSL_IMX8MP_USDHC1,
+    FSL_IMX8MP_USDHC2,
+    FSL_IMX8MP_USDHC3,
+    FSL_IMX8MP_VPU,
+    FSL_IMX8MP_VPU_BLK_CTRL,
+    FSL_IMX8MP_VPU_G1_DECODER,
+    FSL_IMX8MP_VPU_G2_DECODER,
+    FSL_IMX8MP_VPU_VC8000E_ENCODER,
+    FSL_IMX8MP_WDOG1,
+    FSL_IMX8MP_WDOG2,
+    FSL_IMX8MP_WDOG3,
+};
+
+enum FslImx8mpIrqs {
+    FSL_IMX8MP_USDHC1_IRQ   = 22,
+    FSL_IMX8MP_USDHC2_IRQ   = 23,
+    FSL_IMX8MP_USDHC3_IRQ   = 24,
+
+    FSL_IMX8MP_UART1_IRQ    = 26,
+    FSL_IMX8MP_UART2_IRQ    = 27,
+    FSL_IMX8MP_UART3_IRQ    = 28,
+    FSL_IMX8MP_UART4_IRQ    = 29,
+    FSL_IMX8MP_UART5_IRQ    = 30,
+    FSL_IMX8MP_UART6_IRQ    = 16,
+
+    FSL_IMX8MP_ECSPI1_IRQ   = 31,
+    FSL_IMX8MP_ECSPI2_IRQ   = 32,
+    FSL_IMX8MP_ECSPI3_IRQ   = 33,
+    FSL_IMX8MP_ECSPI4_IRQ   = 34,
+
+    FSL_IMX8MP_I2C1_IRQ     = 35,
+    FSL_IMX8MP_I2C2_IRQ     = 36,
+    FSL_IMX8MP_I2C3_IRQ     = 37,
+    FSL_IMX8MP_I2C4_IRQ     = 38,
+
+    FSL_IMX8MP_USB1_IRQ     = 40,
+    FSL_IMX8MP_USB2_IRQ     = 41,
+
+    FSL_IMX8MP_GPT1_IRQ      = 55,
+    FSL_IMX8MP_GPT2_IRQ      = 54,
+    FSL_IMX8MP_GPT3_IRQ      = 53,
+    FSL_IMX8MP_GPT4_IRQ      = 52,
+    FSL_IMX8MP_GPT5_GPT6_IRQ = 51,
+
+    FSL_IMX8MP_GPIO1_LOW_IRQ  = 64,
+    FSL_IMX8MP_GPIO1_HIGH_IRQ = 65,
+    FSL_IMX8MP_GPIO2_LOW_IRQ  = 66,
+    FSL_IMX8MP_GPIO2_HIGH_IRQ = 67,
+    FSL_IMX8MP_GPIO3_LOW_IRQ  = 68,
+    FSL_IMX8MP_GPIO3_HIGH_IRQ = 69,
+    FSL_IMX8MP_GPIO4_LOW_IRQ  = 70,
+    FSL_IMX8MP_GPIO4_HIGH_IRQ = 71,
+    FSL_IMX8MP_GPIO5_LOW_IRQ  = 72,
+    FSL_IMX8MP_GPIO5_HIGH_IRQ = 73,
+
+    FSL_IMX8MP_I2C5_IRQ     = 76,
+    FSL_IMX8MP_I2C6_IRQ     = 77,
+
+    FSL_IMX8MP_WDOG1_IRQ    = 78,
+    FSL_IMX8MP_WDOG2_IRQ    = 79,
+    FSL_IMX8MP_WDOG3_IRQ    = 10,
+
+    FSL_IMX8MP_ENET1_MAC_IRQ    = 118,
+    FSL_IMX6_ENET1_MAC_1588_IRQ = 121,
+
+    FSL_IMX8MP_PCI_INTA_IRQ = 126,
+    FSL_IMX8MP_PCI_INTB_IRQ = 125,
+    FSL_IMX8MP_PCI_INTC_IRQ = 124,
+    FSL_IMX8MP_PCI_INTD_IRQ = 123,
+    FSL_IMX8MP_PCI_MSI_IRQ  = 140,
+};
+
+#endif /* FSL_IMX8MP_H */
diff --git a/include/hw/arm/npcm7xx.h b/include/hw/arm/npcm7xx.h
index 4e0d2101885c9..56536565b748c 100644
--- a/include/hw/arm/npcm7xx.h
+++ b/include/hw/arm/npcm7xx.h
@@ -23,8 +23,8 @@
 #include "hw/gpio/npcm7xx_gpio.h"
 #include "hw/i2c/npcm7xx_smbus.h"
 #include "hw/mem/npcm7xx_mc.h"
-#include "hw/misc/npcm7xx_clk.h"
-#include "hw/misc/npcm7xx_gcr.h"
+#include "hw/misc/npcm_clk.h"
+#include "hw/misc/npcm_gcr.h"
 #include "hw/misc/npcm7xx_mft.h"
 #include "hw/misc/npcm7xx_pwm.h"
 #include "hw/misc/npcm7xx_rng.h"
@@ -89,8 +89,8 @@ struct NPCM7xxState {
     MemoryRegion        ram3;
     MemoryRegion        *dram;
 
-    NPCM7xxGCRState     gcr;
-    NPCM7xxCLKState     clk;
+    NPCMGCRState        gcr;
+    NPCMCLKState     clk;
     NPCM7xxTimerCtrlState tim[3];
     NPCM7xxADCState     adc;
     NPCM7xxPWMState     pwm[NPCM7XX_NR_PWM_MODULES];
diff --git a/include/hw/arm/npcm8xx.h b/include/hw/arm/npcm8xx.h
new file mode 100644
index 0000000000000..9812e6fa7ec66
--- /dev/null
+++ b/include/hw/arm/npcm8xx.h
@@ -0,0 +1,127 @@
+/*
+ * Nuvoton NPCM8xx SoC family.
+ *
+ * Copyright 2022 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef NPCM8XX_H
+#define NPCM8XX_H
+
+#include "hw/adc/npcm7xx_adc.h"
+#include "hw/core/split-irq.h"
+#include "hw/cpu/cluster.h"
+#include "hw/gpio/npcm7xx_gpio.h"
+#include "hw/i2c/npcm7xx_smbus.h"
+#include "hw/intc/arm_gic_common.h"
+#include "hw/mem/npcm7xx_mc.h"
+#include "hw/misc/npcm_clk.h"
+#include "hw/misc/npcm_gcr.h"
+#include "hw/misc/npcm7xx_mft.h"
+#include "hw/misc/npcm7xx_pwm.h"
+#include "hw/misc/npcm7xx_rng.h"
+#include "hw/net/npcm7xx_emc.h"
+#include "hw/nvram/npcm7xx_otp.h"
+#include "hw/sd/npcm7xx_sdhci.h"
+#include "hw/timer/npcm7xx_timer.h"
+#include "hw/ssi/npcm7xx_fiu.h"
+#include "hw/usb/hcd-ehci.h"
+#include "hw/usb/hcd-ohci.h"
+#include "target/arm/cpu.h"
+
+#define NPCM8XX_MAX_NUM_CPUS    (4)
+
+/* The first half of the address space is reserved for DDR4 DRAM. */
+#define NPCM8XX_DRAM_BA         (0x00000000)
+#define NPCM8XX_DRAM_SZ         (2 * GiB)
+
+/* Magic addresses for setting up direct kernel booting and SMP boot stubs. */
+#define NPCM8XX_LOADER_START            (0x00000000)  /* Start of SDRAM */
+#define NPCM8XX_SMP_LOADER_START        (0xffff0000)  /* Boot ROM */
+#define NPCM8XX_SMP_BOOTREG_ADDR        (0xf080013c)  /* GCR.SCRPAD */
+#define NPCM8XX_BOARD_SETUP_ADDR        (0xffff1000)  /* Boot ROM */
+
+#define NPCM8XX_NR_PWM_MODULES 3
+
+struct NPCM8xxMachine {
+    MachineState        parent_obj;
+
+    /*
+     * PWM fan splitter. each splitter connects to one PWM output and
+     * multiple MFT inputs.
+     */
+    SplitIRQ            fan_splitter[NPCM8XX_NR_PWM_MODULES *
+                                     NPCM7XX_PWM_PER_MODULE];
+};
+
+
+struct NPCM8xxMachineClass {
+    MachineClass        parent_class;
+
+    const char          *soc_type;
+};
+
+#define TYPE_NPCM8XX_MACHINE MACHINE_TYPE_NAME("npcm8xx")
+OBJECT_DECLARE_TYPE(NPCM8xxMachine, NPCM8xxMachineClass, NPCM8XX_MACHINE)
+
+struct NPCM8xxState {
+    DeviceState         parent_obj;
+
+    ARMCPU              cpu[NPCM8XX_MAX_NUM_CPUS];
+    CPUClusterState     cpu_cluster;
+    GICState            gic;
+
+    MemoryRegion        sram;
+    MemoryRegion        irom;
+    MemoryRegion        ram3;
+    MemoryRegion        *dram;
+
+    NPCMGCRState        gcr;
+    NPCMCLKState        clk;
+    NPCM7xxTimerCtrlState tim[3];
+    NPCM7xxADCState     adc;
+    NPCM7xxPWMState     pwm[NPCM8XX_NR_PWM_MODULES];
+    NPCM7xxMFTState     mft[8];
+    NPCM7xxOTPState     fuse_array;
+    NPCM7xxMCState      mc;
+    NPCM7xxRNGState     rng;
+    NPCM7xxGPIOState    gpio[8];
+    NPCM7xxSMBusState   smbus[27];
+    EHCISysBusState     ehci[2];
+    OHCISysBusState     ohci[2];
+    NPCM7xxFIUState     fiu[3];
+    NPCM7xxSDHCIState   mmc;
+};
+
+struct NPCM8xxClass {
+    DeviceClass         parent_class;
+
+    /* Bitmask of modules that are permanently disabled on this chip. */
+    uint32_t            disabled_modules;
+    /* Number of CPU cores enabled in this SoC class. */
+    uint32_t            num_cpus;
+};
+
+#define TYPE_NPCM8XX    "npcm8xx"
+OBJECT_DECLARE_TYPE(NPCM8xxState, NPCM8xxClass, NPCM8XX)
+
+/**
+ * npcm8xx_load_kernel - Loads memory with everything needed to boot
+ * @machine - The machine containing the SoC to be booted.
+ * @soc - The SoC containing the CPU to be booted.
+ *
+ * This will set up the ARM boot info structure for the specific NPCM8xx
+ * derivative and call arm_load_kernel() to set up loading of the kernel, etc.
+ * into memory, if requested by the user.
+ */
+void npcm8xx_load_kernel(MachineState *machine, NPCM8xxState *soc);
+
+#endif /* NPCM8XX_H */
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index d1a4a64551d3c..e5e2d09294d73 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -110,7 +110,6 @@ typedef struct SMMUTransCfg {
     /* Used by stage-1 only. */
     bool aa64;                 /* arch64 or aarch32 translation table */
     bool record_faults;        /* record fault events */
-    uint64_t ttb;              /* TT base address */
     uint8_t oas;               /* output address width */
     uint8_t tbi;               /* Top Byte Ignore */
     int asid;
@@ -143,6 +142,11 @@ typedef struct SMMUIOTLBKey {
     uint8_t level;
 } SMMUIOTLBKey;
 
+typedef struct SMMUSIDRange {
+    uint32_t start;
+    uint32_t end;
+} SMMUSIDRange;
+
 struct SMMUState {
     /* <private> */
     SysBusDevice  dev;
@@ -220,6 +224,7 @@ void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova,
                          uint8_t tg, uint64_t num_pages, uint8_t ttl);
 void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, dma_addr_t ipa, uint8_t tg,
                         uint64_t num_pages, uint8_t ttl);
+void smmu_configs_inv_sid_range(SMMUState *s, SMMUSIDRange sid_range);
 /* Unmap the range of all the notifiers registered to any IOMMU mr */
 void smmu_inv_notifiers_all(SMMUState *s);
 
diff --git a/include/hw/boards.h b/include/hw/boards.h
index e1f41b2a53336..f22b2e7fc75b7 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -156,6 +156,8 @@ typedef struct {
  * @modules_supported - whether modules are supported by the machine
  * @cache_supported - whether cache (l1d, l1i, l2 and l3) configuration are
  *                    supported by the machine
+ * @has_caches - whether cache properties are explicitly specified in the
+ *               user provided smp-cache configuration
  */
 typedef struct {
     bool prefer_sockets;
@@ -166,6 +168,7 @@ typedef struct {
     bool drawers_supported;
     bool modules_supported;
     bool cache_supported[CACHE_LEVEL_AND_TYPE__MAX];
+    bool has_caches;
 } SMPCompatProps;
 
 /**
@@ -283,9 +286,9 @@ struct MachineClass {
         no_parallel:1,
         no_floppy:1,
         no_cdrom:1,
-        no_sdcard:1,
         pci_allow_0_address:1,
         legacy_fw_cfg_order:1;
+    bool auto_create_sdcard;
     bool is_default;
     const char *default_machine_opts;
     const char *default_boot_order;
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index fb397cdfc53d1..5d11d26556a5e 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -104,7 +104,6 @@ struct SysemuCPUOps;
  *                 instantiatable CPU type.
  * @parse_features: Callback to parse command line arguments.
  * @reset_dump_flags: #CPUDumpFlags to use for reset logging.
- * @has_work: Callback for checking if there is work to do.
  * @mmu_index: Callback for choosing softmmu mmu index;
  *       may be used internally by memory_rw_debug without TCG.
  * @memory_rw_debug: Callback for GDB memory access.
@@ -124,7 +123,9 @@ struct SysemuCPUOps;
  * @get_pc: Callback for getting the Program Counter register.
  *       As above, with the semantics of the target architecture.
  * @gdb_read_register: Callback for letting GDB read a register.
+ *                     No more than @gdb_num_core_regs registers can be read.
  * @gdb_write_register: Callback for letting GDB write a register.
+ *                     No more than @gdb_num_core_regs registers can be written.
  * @gdb_adjust_breakpoint: Callback for adjusting the address of a
  *       breakpoint.  Used by AVR to handle a gdb mis-feature with
  *       its Harvard architecture split code and data.
@@ -151,7 +152,6 @@ struct CPUClass {
     ObjectClass *(*class_by_name)(const char *cpu_model);
     void (*parse_features)(const char *typename, char *str, Error **errp);
 
-    bool (*has_work)(CPUState *cpu);
     int (*mmu_index)(CPUState *cpu, bool ifetch);
     int (*memory_rw_debug)(CPUState *cpu, vaddr addr,
                            uint8_t *buf, int len, bool is_write);
@@ -614,6 +614,8 @@ extern bool mttcg_enabled;
  */
 bool cpu_paging_enabled(const CPUState *cpu);
 
+#if !defined(CONFIG_USER_ONLY)
+
 /**
  * cpu_get_memory_mapping:
  * @cpu: The CPU whose memory mappings are to be obtained.
@@ -625,8 +627,6 @@ bool cpu_paging_enabled(const CPUState *cpu);
 bool cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list,
                             Error **errp);
 
-#if !defined(CONFIG_USER_ONLY)
-
 /**
  * cpu_write_elf64_note:
  * @f: pointer to a function that writes memory to a file
@@ -748,6 +748,16 @@ int cpu_asidx_from_attrs(CPUState *cpu, MemTxAttrs attrs);
  */
 bool cpu_virtio_is_big_endian(CPUState *cpu);
 
+/**
+ * cpu_has_work:
+ * @cpu: The vCPU to check.
+ *
+ * Checks whether the CPU has work to do.
+ *
+ * Returns: %true if the CPU has work, %false otherwise.
+ */
+bool cpu_has_work(CPUState *cpu);
+
 #endif /* CONFIG_USER_ONLY */
 
 /**
@@ -814,22 +824,6 @@ CPUState *cpu_create(const char *typename);
  */
 const char *parse_cpu_option(const char *cpu_option);
 
-/**
- * cpu_has_work:
- * @cpu: The vCPU to check.
- *
- * Checks whether the CPU has work to do.
- *
- * Returns: %true if the CPU has work, %false otherwise.
- */
-static inline bool cpu_has_work(CPUState *cpu)
-{
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    g_assert(cc->has_work);
-    return cc->has_work(cpu);
-}
-
 /**
  * qemu_cpu_is_self:
  * @cpu: The vCPU to check against.
@@ -966,9 +960,7 @@ void cpu_interrupt(CPUState *cpu, int mask);
  */
 static inline void cpu_set_pc(CPUState *cpu, vaddr addr)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    cc->set_pc(cpu, addr);
+    cpu->cc->set_pc(cpu, addr);
 }
 
 /**
@@ -1162,7 +1154,10 @@ G_NORETURN void cpu_abort(CPUState *cpu, const char *fmt, ...)
 
 /* $(top_srcdir)/cpu.c */
 void cpu_class_init_props(DeviceClass *dc);
+void cpu_exec_class_post_init(CPUClass *cc);
 void cpu_exec_initfn(CPUState *cpu);
+void cpu_vmstate_register(CPUState *cpu);
+void cpu_vmstate_unregister(CPUState *cpu);
 bool cpu_exec_realizefn(CPUState *cpu, Error **errp);
 void cpu_exec_unrealizefn(CPUState *cpu);
 void cpu_exec_reset_hold(CPUState *cpu);
diff --git a/include/hw/core/sysemu-cpu-ops.h b/include/hw/core/sysemu-cpu-ops.h
index 0df5b058f5007..877892373f95b 100644
--- a/include/hw/core/sysemu-cpu-ops.h
+++ b/include/hw/core/sysemu-cpu-ops.h
@@ -16,6 +16,10 @@
  * struct SysemuCPUOps: System operations specific to a CPU class
  */
 typedef struct SysemuCPUOps {
+    /**
+     * @has_work: Callback for checking if there is work to do.
+     */
+    bool (*has_work)(CPUState *cpu); /* MANDATORY NON-NULL */
     /**
      * @get_memory_mapping: Callback for obtaining the memory mappings.
      */
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 561b375dc86de..3a0ee7e8e7992 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -264,8 +264,8 @@ void cxl_device_register_block_init(Object *obj, CXLDeviceState *dev,
 typedef struct CXLType3Dev CXLType3Dev;
 typedef struct CSWMBCCIDev CSWMBCCIDev;
 /* Set up default values for the register block */
-void cxl_device_register_init_t3(CXLType3Dev *ct3d);
-void cxl_device_register_init_swcci(CSWMBCCIDev *sw);
+void cxl_device_register_init_t3(CXLType3Dev *ct3d, int msi_n);
+void cxl_device_register_init_swcci(CSWMBCCIDev *sw, int msi_n);
 
 /*
  * CXL r3.1 Section 8.2.8.1: CXL Device Capabilities Array Register
diff --git a/include/hw/hexagon/hexagon.h b/include/hw/hexagon/hexagon.h
new file mode 100644
index 0000000000000..ce356325fcd77
--- /dev/null
+++ b/include/hw/hexagon/hexagon.h
@@ -0,0 +1,150 @@
+/*
+ * Hexagon Baseboard System emulation.
+ *
+ * Copyright (c) 2020-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+
+#ifndef HW_HEXAGON_H
+#define HW_HEXAGON_H
+
+#include "exec/memory.h"
+
+struct hexagon_board_boot_info {
+    uint64_t ram_size;
+    const char *kernel_filename;
+    uint32_t kernel_elf_flags;
+};
+
+typedef enum {
+    unknown_rev = 0,
+    v66_rev = 0xa666,
+    v67_rev = 0x2667,
+    v68_rev = 0x8d68,
+    v69_rev = 0x8c69,
+    v71_rev = 0x8c71,
+    v73_rev = 0x8c73,
+    v73m_rev = 0xcc73,
+} Rev_t;
+#define HEXAGON_LATEST_REV v73
+#define HEXAGON_LATEST_REV_UPPER V73
+
+/*
+ * Config table address bases represent bits [35:16].
+ */
+#define HEXAGON_CFG_ADDR_BASE(addr) (((addr) >> 16) & 0x0fffff)
+
+#define HEXAGON_CFGSPACE_ENTRIES (128)
+
+typedef  union {
+  struct {
+    /* Base address of L2TCM space */
+    uint32_t l2tcm_base;
+    uint32_t reserved0;
+    /* Base address of subsystem space */
+    uint32_t subsystem_base;
+    /* Base address of ETM space */
+    uint32_t etm_base;
+    /* Base address of L2 configuration space */
+    uint32_t l2cfg_base;
+    uint32_t reserved1;
+    /* Base address of L1S */
+    uint32_t l1s0_base;
+    /* Base address of AXI2 */
+    uint32_t axi2_lowaddr;
+    /* Base address of streamer base */
+    uint32_t streamer_base;
+    uint32_t reserved2;
+    /* Base address of fast L2VIC */
+    uint32_t fastl2vic_base;
+    /* Number of entries in JTLB */
+    uint32_t jtlb_size_entries;
+    /* Coprocessor type */
+    uint32_t coproc_present;
+    /* Number of extension execution contexts available */
+    uint32_t ext_contexts;
+    /* Base address of Hexagon Vector Tightly Coupled Memory (VTCM) */
+    uint32_t vtcm_base;
+    /* Size of VTCM (in KB) */
+    uint32_t vtcm_size_kb;
+    /* L2 tag size */
+    uint32_t l2tag_size;
+    /* Amount of physical L2 memory in released version */
+    uint32_t l2ecomem_size;
+    /* Hardware threads available on the core */
+    uint32_t thread_enable_mask;
+    /* Base address of the ECC registers */
+    uint32_t eccreg_base;
+    /* L2 line size */
+    uint32_t l2line_size;
+    /* Small Core processor (also implies audio extension) */
+    uint32_t tiny_core;
+    /* Size of L2TCM */
+    uint32_t l2itcm_size;
+    /* Base address of L2-ITCM */
+    uint32_t l2itcm_base;
+    uint32_t reserved3;
+    /* DTM is present */
+    uint32_t dtm_present;
+    /* Version of the DMA */
+    uint32_t dma_version;
+    /* Native HVX vector length in log of bytes */
+    uint32_t hvx_vec_log_length;
+    /* Core ID of the multi-core */
+    uint32_t core_id;
+    /* Number of multi-core cores */
+    uint32_t core_count;
+    uint32_t coproc2_reg0;
+    uint32_t coproc2_reg1;
+    /* Supported HVX vector length */
+    uint32_t v2x_mode;
+    uint32_t coproc2_reg2;
+    uint32_t coproc2_reg3;
+    uint32_t coproc2_reg4;
+    uint32_t coproc2_reg5;
+    uint32_t coproc2_reg6;
+    uint32_t coproc2_reg7;
+    /* Voltage droop mitigation technique parameter */
+    uint32_t acd_preset;
+    /* Voltage droop mitigation technique parameter */
+    uint32_t mnd_preset;
+    /* L1 data cache size (in KB) */
+    uint32_t l1d_size_kb;
+    /* L1 instruction cache size in (KB) */
+    uint32_t l1i_size_kb;
+    /* L1 data cache write policy: see HexagonL1WritePolicy */
+    uint32_t l1d_write_policy;
+    /* VTCM bank width  */
+    uint32_t vtcm_bank_width;
+    uint32_t reserved4;
+    uint32_t reserved5;
+    uint32_t reserved6;
+    uint32_t coproc2_cvt_mpy_size;
+    uint32_t consistency_domain;
+    uint32_t capacity_domain;
+    uint32_t axi3_lowaddr;
+    uint32_t coproc2_int8_subcolumns;
+    uint32_t corecfg_present;
+    uint32_t coproc2_fp16_acc_exp;
+    uint32_t AXIM2_secondary_base;
+  };
+  uint32_t raw[HEXAGON_CFGSPACE_ENTRIES];
+} hexagon_config_table;
+
+typedef struct {
+    /* Base address of config table */
+    uint32_t cfgbase;
+    /* Size of L2 TCM */
+    uint32_t l2tcm_size;
+    /* Base address of L2VIC */
+    uint32_t l2vic_base;
+    /* Size of L2VIC region */
+    uint32_t l2vic_size;
+    /* QTimer csr base */
+    uint32_t csr_base;
+    uint32_t qtmr_region;
+    hexagon_config_table cfgtable;
+} hexagon_machine_config;
+
+#endif
diff --git a/include/hw/hexagon/virt.h b/include/hw/hexagon/virt.h
new file mode 100644
index 0000000000000..0c165a786d305
--- /dev/null
+++ b/include/hw/hexagon/virt.h
@@ -0,0 +1,41 @@
+/*
+ * Definitions for hexagon virt board.
+ *
+ * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_HEXAGONVIRT_H
+#define HW_HEXAGONVIRT_H
+
+#include "hw/boards.h"
+#include "target/hexagon/cpu.h"
+
+struct HexagonVirtMachineState {
+    /*< private >*/
+    MachineState parent_obj;
+
+    int fdt_size;
+    MemoryRegion *sys;
+    MemoryRegion cfgtable;
+    MemoryRegion ram;
+    MemoryRegion tcm;
+    MemoryRegion vtcm;
+    DeviceState *l2vic;
+};
+
+void hexagon_load_fdt(const struct HexagonVirtMachineState *vms);
+
+enum {
+    VIRT_UART0,
+    VIRT_QTMR0,
+    VIRT_QTMR1,
+    VIRT_GPT,
+    VIRT_MMIO,
+    VIRT_FDT,
+};
+
+#define TYPE_HEXAGON_VIRT_MACHINE MACHINE_TYPE_NAME("virt")
+OBJECT_DECLARE_SIMPLE_TYPE(HexagonVirtMachineState, HEXAGON_VIRT_MACHINE)
+
+#endif /* HW_HEXAGONVIRT_H */
diff --git a/include/hw/hyperv/hyperv-proto.h b/include/hw/hyperv/hyperv-proto.h
index 4a2297307b009..fffc5ce342f29 100644
--- a/include/hw/hyperv/hyperv-proto.h
+++ b/include/hw/hyperv/hyperv-proto.h
@@ -61,6 +61,18 @@
 #define HV_MESSAGE_X64_APIC_EOI               0x80010004
 #define HV_MESSAGE_X64_LEGACY_FP_ERROR        0x80010005
 
+/*
+ * Hyper-V Synthetic debug options MSR
+ */
+#define HV_X64_MSR_SYNDBG_CONTROL               0x400000F1
+#define HV_X64_MSR_SYNDBG_STATUS                0x400000F2
+#define HV_X64_MSR_SYNDBG_SEND_BUFFER           0x400000F3
+#define HV_X64_MSR_SYNDBG_RECV_BUFFER           0x400000F4
+#define HV_X64_MSR_SYNDBG_PENDING_BUFFER        0x400000F5
+#define HV_X64_MSR_SYNDBG_OPTIONS               0x400000FF
+
+#define HV_X64_SYNDBG_OPTION_USE_HCALLS         BIT(2)
+
 /*
  * Message flags
  */
diff --git a/include/hw/intc/aspeed_intc.h b/include/hw/intc/aspeed_intc.h
index 18cb43476cd71..3727ba24be726 100644
--- a/include/hw/intc/aspeed_intc.h
+++ b/include/hw/intc/aspeed_intc.h
@@ -14,10 +14,19 @@
 
 #define TYPE_ASPEED_INTC "aspeed.intc"
 #define TYPE_ASPEED_2700_INTC TYPE_ASPEED_INTC "-ast2700"
+#define TYPE_ASPEED_2700_INTCIO TYPE_ASPEED_INTC "io-ast2700"
 OBJECT_DECLARE_TYPE(AspeedINTCState, AspeedINTCClass, ASPEED_INTC)
 
-#define ASPEED_INTC_NR_REGS (0x2000 >> 2)
-#define ASPEED_INTC_NR_INTS 9
+#define ASPEED_INTC_MAX_INPINS 10
+#define ASPEED_INTC_MAX_OUTPINS 19
+
+typedef struct AspeedINTCIRQ {
+    int inpin_idx;
+    int outpin_idx;
+    int num_outpins;
+    uint32_t enable_reg;
+    uint32_t status_reg;
+} AspeedINTCIRQ;
 
 struct AspeedINTCState {
     /*< private >*/
@@ -25,20 +34,29 @@ struct AspeedINTCState {
 
     /*< public >*/
     MemoryRegion iomem;
-    uint32_t regs[ASPEED_INTC_NR_REGS];
-    OrIRQState orgates[ASPEED_INTC_NR_INTS];
-    qemu_irq output_pins[ASPEED_INTC_NR_INTS];
+    MemoryRegion iomem_container;
+
+    uint32_t *regs;
+    OrIRQState orgates[ASPEED_INTC_MAX_INPINS];
+    qemu_irq output_pins[ASPEED_INTC_MAX_OUTPINS];
 
-    uint32_t enable[ASPEED_INTC_NR_INTS];
-    uint32_t mask[ASPEED_INTC_NR_INTS];
-    uint32_t pending[ASPEED_INTC_NR_INTS];
+    uint32_t enable[ASPEED_INTC_MAX_INPINS];
+    uint32_t mask[ASPEED_INTC_MAX_INPINS];
+    uint32_t pending[ASPEED_INTC_MAX_INPINS];
 };
 
 struct AspeedINTCClass {
     SysBusDeviceClass parent_class;
 
     uint32_t num_lines;
-    uint32_t num_ints;
+    uint32_t num_inpins;
+    uint32_t num_outpins;
+    uint64_t mem_size;
+    uint64_t nr_regs;
+    uint64_t reg_offset;
+    const MemoryRegionOps *reg_ops;
+    const AspeedINTCIRQ *irq_table;
+    int irq_table_count;
 };
 
 #endif /* ASPEED_INTC_H */
diff --git a/include/hw/intc/l2vic.h b/include/hw/intc/l2vic.h
new file mode 100644
index 0000000000000..ed8ccf33b1f8c
--- /dev/null
+++ b/include/hw/intc/l2vic.h
@@ -0,0 +1,37 @@
+/*
+ * QEMU L2VIC Interrupt Controller
+ *
+ * Copyright(c) 2020-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#define L2VIC_VID_GRP_0 0x0 /* Read */
+#define L2VIC_VID_GRP_1 0x4 /* Read */
+#define L2VIC_VID_GRP_2 0x8 /* Read */
+#define L2VIC_VID_GRP_3 0xC /* Read */
+#define L2VIC_VID_0 0x10 /* Read SOFTWARE DEFINED */
+#define L2VIC_VID_1 0x14 /* Read SOFTWARE DEFINED NOT YET USED */
+#define L2VIC_INT_ENABLEn 0x100 /* Read/Write */
+#define L2VIC_INT_ENABLE_CLEARn 0x180 /* Write */
+#define L2VIC_INT_ENABLE_SETn 0x200 /* Write */
+#define L2VIC_INT_TYPEn 0x280 /* Read/Write */
+#define L2VIC_INT_STATUSn 0x380 /* Read */
+#define L2VIC_INT_CLEARn 0x400 /* Write */
+#define L2VIC_SOFT_INTn 0x480 /* Write */
+#define L2VIC_INT_PENDINGn 0x500 /* Read */
+#define L2VIC_INT_GRPn_0 0x600 /* Read/Write */
+#define L2VIC_INT_GRPn_1 0x680 /* Read/Write */
+#define L2VIC_INT_GRPn_2 0x700 /* Read/Write */
+#define L2VIC_INT_GRPn_3 0x780 /* Read/Write */
+
+#define L2VIC_INTERRUPT_MAX 1024
+#define L2VIC_CIAD_INSTRUCTION -1
+/*
+ * Note about l2vic groups:
+ * Each interrupt to L2VIC can be configured to associate with one of
+ * four groups.
+ * Group 0 interrupts go to IRQ2 via VID 0 (SSR: 0xC2, the default)
+ * Group 1 interrupts go to IRQ3 via VID 1 (SSR: 0xC3)
+ * Group 2 interrupts go to IRQ4 via VID 2 (SSR: 0xC4)
+ * Group 3 interrupts go to IRQ5 via VID 3 (SSR: 0xC5)
+ */
diff --git a/include/hw/loader-fit.h b/include/hw/loader-fit.h
index 0832e379dc906..9a43490ed6370 100644
--- a/include/hw/loader-fit.h
+++ b/include/hw/loader-fit.h
@@ -30,12 +30,27 @@ struct fit_loader_match {
 struct fit_loader {
     const struct fit_loader_match *matches;
     hwaddr (*addr_to_phys)(void *opaque, uint64_t addr);
-    const void *(*fdt_filter)(void *opaque, const void *fdt,
-                              const void *match_data, hwaddr *load_addr);
+    void *(*fdt_filter)(void *opaque, const void *fdt,
+                        const void *match_data, hwaddr *load_addr);
     const void *(*kernel_filter)(void *opaque, const void *kernel,
                                  hwaddr *load_addr, hwaddr *entry_addr);
 };
 
-int load_fit(const struct fit_loader *ldr, const char *filename, void *opaque);
+/**
+ * load_fit: load a FIT format image
+ * @ldr: structure defining board specific properties and hooks
+ * @filename: image to load
+ * @pfdt: pointer to update with address of FDT blob
+ * @opaque: opaque value passed back to the hook functions in @ldr
+ * Returns: 0 on success, or a negative errno on failure
+ *
+ * @pfdt is used to tell the caller about the FDT blob. On return, it
+ * has been set to point to the FDT blob, and it is now the caller's
+ * responsibility to free that memory with g_free(). Usually the caller
+ * will want to pass in &machine->fdt here, to record the FDT blob for
+ * the dumpdtb option and QMP/HMP commands.
+ */
+int load_fit(const struct fit_loader *ldr, const char *filename, void **pfdt,
+             void *opaque);
 
 #endif /* HW_LOADER_FIT_H */
diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h
index 661efae61dfab..2b7d19953f43e 100644
--- a/include/hw/loongarch/virt.h
+++ b/include/hw/loongarch/virt.h
@@ -30,6 +30,7 @@
 #define VIRT_GED_EVT_ADDR       0x100e0000
 #define VIRT_GED_MEM_ADDR       (VIRT_GED_EVT_ADDR + ACPI_GED_EVT_SEL_LEN)
 #define VIRT_GED_REG_ADDR       (VIRT_GED_MEM_ADDR + MEMORY_HOTPLUG_IO_LEN)
+#define VIRT_GED_CPUHP_ADDR     (VIRT_GED_REG_ADDR + ACPI_GED_REG_COUNT)
 
 #define COMMAND_LINE_SIZE       512
 
@@ -68,4 +69,13 @@ struct LoongArchVirtMachineState {
 OBJECT_DECLARE_SIMPLE_TYPE(LoongArchVirtMachineState, LOONGARCH_VIRT_MACHINE)
 void virt_acpi_setup(LoongArchVirtMachineState *lvms);
 void virt_fdt_setup(LoongArchVirtMachineState *lvms);
+
+static inline bool virt_is_veiointc_enabled(LoongArchVirtMachineState *lvms)
+{
+    if (lvms->veiointc == ON_OFF_AUTO_OFF) {
+        return false;
+    }
+    return true;
+}
+
 #endif
diff --git a/include/hw/misc/aspeed_hace.h b/include/hw/misc/aspeed_hace.h
index 4af99191955a5..5d4aa19cfecba 100644
--- a/include/hw/misc/aspeed_hace.h
+++ b/include/hw/misc/aspeed_hace.h
@@ -18,6 +18,7 @@
 #define TYPE_ASPEED_AST2500_HACE TYPE_ASPEED_HACE "-ast2500"
 #define TYPE_ASPEED_AST2600_HACE TYPE_ASPEED_HACE "-ast2600"
 #define TYPE_ASPEED_AST1030_HACE TYPE_ASPEED_HACE "-ast1030"
+#define TYPE_ASPEED_AST2700_HACE TYPE_ASPEED_HACE "-ast2700"
 
 OBJECT_DECLARE_TYPE(AspeedHACEState, AspeedHACEClass, ASPEED_HACE)
 
@@ -49,6 +50,7 @@ struct AspeedHACEClass {
     uint32_t dest_mask;
     uint32_t key_mask;
     uint32_t hash_mask;
+    bool raise_crypt_interrupt_workaround;
 };
 
 #endif /* ASPEED_HACE_H */
diff --git a/include/hw/misc/aspeed_scu.h b/include/hw/misc/aspeed_scu.h
index 356be95e45855..684b48b7222aa 100644
--- a/include/hw/misc/aspeed_scu.h
+++ b/include/hw/misc/aspeed_scu.h
@@ -54,6 +54,8 @@ struct AspeedSCUState {
 #define AST2700_A0_SILICON_REV   0x06000103U
 #define AST2720_A0_SILICON_REV   0x06000203U
 #define AST2750_A0_SILICON_REV   0x06000003U
+#define AST2700_A1_SILICON_REV   0x06010103U
+#define AST2750_A1_SILICON_REV   0x06010003U
 
 #define ASPEED_IS_AST2500(si_rev)     ((((si_rev) >> 24) & 0xff) == 0x04)
 
diff --git a/include/hw/misc/imx8mp_analog.h b/include/hw/misc/imx8mp_analog.h
new file mode 100644
index 0000000000000..955f03215a03a
--- /dev/null
+++ b/include/hw/misc/imx8mp_analog.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2025 Bernhard Beschow <shentey@gmail.com>
+ *
+ * i.MX8MP ANALOG IP block emulation code
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef IMX8MP_ANALOG_H
+#define IMX8MP_ANALOG_H
+
+#include "qom/object.h"
+#include "hw/sysbus.h"
+
+enum IMX8MPAnalogRegisters {
+    ANALOG_AUDIO_PLL1_GEN_CTRL = 0x000 / 4,
+    ANALOG_AUDIO_PLL1_FDIV_CTL0 = 0x004 / 4,
+    ANALOG_AUDIO_PLL1_FDIV_CTL1 = 0x008 / 4,
+    ANALOG_AUDIO_PLL1_SSCG_CTRL = 0x00c / 4,
+    ANALOG_AUDIO_PLL1_MNIT_CTRL = 0x010 / 4,
+    ANALOG_AUDIO_PLL2_GEN_CTRL = 0x014 / 4,
+    ANALOG_AUDIO_PLL2_FDIV_CTL0 = 0x018 / 4,
+    ANALOG_AUDIO_PLL2_FDIV_CTL1 = 0x01c / 4,
+    ANALOG_AUDIO_PLL2_SSCG_CTRL = 0x020 / 4,
+    ANALOG_AUDIO_PLL2_MNIT_CTRL = 0x024 / 4,
+    ANALOG_VIDEO_PLL1_GEN_CTRL = 0x028 / 4,
+    ANALOG_VIDEO_PLL1_FDIV_CTL0 = 0x02c / 4,
+    ANALOG_VIDEO_PLL1_FDIV_CTL1 = 0x030 / 4,
+    ANALOG_VIDEO_PLL1_SSCG_CTRL = 0x034 / 4,
+    ANALOG_VIDEO_PLL1_MNIT_CTRL = 0x038 / 4,
+    ANALOG_DRAM_PLL_GEN_CTRL = 0x050 / 4,
+    ANALOG_DRAM_PLL_FDIV_CTL0 = 0x054 / 4,
+    ANALOG_DRAM_PLL_FDIV_CTL1 = 0x058 / 4,
+    ANALOG_DRAM_PLL_SSCG_CTRL = 0x05c / 4,
+    ANALOG_DRAM_PLL_MNIT_CTRL = 0x060 / 4,
+    ANALOG_GPU_PLL_GEN_CTRL = 0x064 / 4,
+    ANALOG_GPU_PLL_FDIV_CTL0 = 0x068 / 4,
+    ANALOG_GPU_PLL_LOCKD_CTRL = 0x06c / 4,
+    ANALOG_GPU_PLL_MNIT_CTRL = 0x070 / 4,
+    ANALOG_VPU_PLL_GEN_CTRL = 0x074 / 4,
+    ANALOG_VPU_PLL_FDIV_CTL0 = 0x078 / 4,
+    ANALOG_VPU_PLL_LOCKD_CTRL = 0x07c / 4,
+    ANALOG_VPU_PLL_MNIT_CTRL = 0x080 / 4,
+    ANALOG_ARM_PLL_GEN_CTRL = 0x084 / 4,
+    ANALOG_ARM_PLL_FDIV_CTL0 = 0x088 / 4,
+    ANALOG_ARM_PLL_LOCKD_CTRL = 0x08c / 4,
+    ANALOG_ARM_PLL_MNIT_CTRL = 0x090 / 4,
+    ANALOG_SYS_PLL1_GEN_CTRL = 0x094 / 4,
+    ANALOG_SYS_PLL1_FDIV_CTL0 = 0x098 / 4,
+    ANALOG_SYS_PLL1_LOCKD_CTRL = 0x09c / 4,
+    ANALOG_SYS_PLL1_MNIT_CTRL = 0x100 / 4,
+    ANALOG_SYS_PLL2_GEN_CTRL = 0x104 / 4,
+    ANALOG_SYS_PLL2_FDIV_CTL0 = 0x108 / 4,
+    ANALOG_SYS_PLL2_LOCKD_CTRL = 0x10c / 4,
+    ANALOG_SYS_PLL2_MNIT_CTRL = 0x110 / 4,
+    ANALOG_SYS_PLL3_GEN_CTRL = 0x114 / 4,
+    ANALOG_SYS_PLL3_FDIV_CTL0 = 0x118 / 4,
+    ANALOG_SYS_PLL3_LOCKD_CTRL = 0x11c / 4,
+    ANALOG_SYS_PLL3_MNIT_CTRL = 0x120 / 4,
+    ANALOG_OSC_MISC_CFG = 0x124 / 4,
+    ANALOG_ANAMIX_PLL_MNIT_CTL = 0x128 / 4,
+
+    ANALOG_DIGPROG = 0x800 / 4,
+    ANALOG_MAX,
+};
+
+#define TYPE_IMX8MP_ANALOG "imx8mp.analog"
+OBJECT_DECLARE_SIMPLE_TYPE(IMX8MPAnalogState, IMX8MP_ANALOG)
+
+struct IMX8MPAnalogState {
+    SysBusDevice parent_obj;
+
+    struct {
+        MemoryRegion container;
+        MemoryRegion analog;
+    } mmio;
+
+    uint32_t analog[ANALOG_MAX];
+};
+
+#endif /* IMX8MP_ANALOG_H */
diff --git a/include/hw/misc/imx8mp_ccm.h b/include/hw/misc/imx8mp_ccm.h
new file mode 100644
index 0000000000000..685c8582ff852
--- /dev/null
+++ b/include/hw/misc/imx8mp_ccm.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2025 Bernhard Beschow <shentey@gmail.com>
+ *
+ * i.MX 8M Plus CCM IP block emulation code
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef IMX8MP_CCM_H
+#define IMX8MP_CCM_H
+
+#include "hw/misc/imx_ccm.h"
+#include "qom/object.h"
+
+enum IMX8MPCCMRegisters {
+    CCM_MAX = 0xc6fc / sizeof(uint32_t) + 1,
+};
+
+#define TYPE_IMX8MP_CCM "imx8mp.ccm"
+OBJECT_DECLARE_SIMPLE_TYPE(IMX8MPCCMState, IMX8MP_CCM)
+
+struct IMX8MPCCMState {
+    IMXCCMState parent_obj;
+
+    MemoryRegion iomem;
+
+    uint32_t ccm[CCM_MAX];
+};
+
+#endif /* IMX8MP_CCM_H */
diff --git a/include/hw/misc/npcm7xx_clk.h b/include/hw/misc/npcm_clk.h
similarity index 83%
rename from include/hw/misc/npcm7xx_clk.h
rename to include/hw/misc/npcm_clk.h
index 5ed4a4672b36a..8fa1e14bddfba 100644
--- a/include/hw/misc/npcm7xx_clk.h
+++ b/include/hw/misc/npcm_clk.h
@@ -1,5 +1,5 @@
 /*
- * Nuvoton NPCM7xx Clock Control Registers.
+ * Nuvoton NPCM7xx/8xx Clock Control Registers.
  *
  * Copyright 2020 Google LLC
  *
@@ -13,18 +13,20 @@
  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  * for more details.
  */
-#ifndef NPCM7XX_CLK_H
-#define NPCM7XX_CLK_H
+#ifndef NPCM_CLK_H
+#define NPCM_CLK_H
 
 #include "exec/memory.h"
 #include "hw/clock.h"
 #include "hw/sysbus.h"
 
+#define NPCM7XX_CLK_NR_REGS             (0x70 / sizeof(uint32_t))
+#define NPCM8XX_CLK_NR_REGS             (0xc4 / sizeof(uint32_t))
 /*
- * Number of registers in our device state structure. Don't change this without
- * incrementing the version_id in the vmstate.
+ * Number of maximum registers in NPCM device state structure. Don't change
+ * this without incrementing the version_id in the vmstate.
  */
-#define NPCM7XX_CLK_NR_REGS             (0x70 / sizeof(uint32_t))
+#define NPCM_CLK_MAX_NR_REGS            NPCM8XX_CLK_NR_REGS
 
 #define NPCM7XX_WATCHDOG_RESET_GPIO_IN "npcm7xx-clk-watchdog-reset-gpio-in"
 
@@ -80,7 +82,7 @@ typedef enum NPCM7xxClockDivider {
     NPCM7XX_CLOCK_NR_DIVIDERS,
 } NPCM7xxClockConverter;
 
-typedef struct NPCM7xxCLKState NPCM7xxCLKState;
+typedef struct NPCMCLKState NPCMCLKState;
 
 /**
  * struct NPCM7xxClockPLLState - A PLL module in CLK module.
@@ -94,7 +96,7 @@ typedef struct NPCM7xxClockPLLState {
     DeviceState parent;
 
     const char *name;
-    NPCM7xxCLKState *clk;
+    NPCMCLKState *clk;
     Clock *clock_in;
     Clock *clock_out;
 
@@ -115,7 +117,7 @@ typedef struct NPCM7xxClockSELState {
     DeviceState parent;
 
     const char *name;
-    NPCM7xxCLKState *clk;
+    NPCMCLKState *clk;
     uint8_t input_size;
     Clock *clock_in[NPCM7XX_CLK_SEL_MAX_INPUT];
     Clock *clock_out;
@@ -140,7 +142,7 @@ typedef struct NPCM7xxClockDividerState {
     DeviceState parent;
 
     const char *name;
-    NPCM7xxCLKState *clk;
+    NPCMCLKState *clk;
     Clock *clock_in;
     Clock *clock_out;
 
@@ -155,17 +157,21 @@ typedef struct NPCM7xxClockDividerState {
     };
 } NPCM7xxClockDividerState;
 
-struct NPCM7xxCLKState {
+struct NPCMCLKState {
     SysBusDevice parent;
 
     MemoryRegion iomem;
 
     /* Clock converters */
+    /*
+     * TODO: Implement unique clock converters for NPCM8xx.
+     * NPCM8xx adds a few more clock outputs.
+     */
     NPCM7xxClockPLLState plls[NPCM7XX_CLOCK_NR_PLLS];
     NPCM7xxClockSELState sels[NPCM7XX_CLOCK_NR_SELS];
     NPCM7xxClockDividerState dividers[NPCM7XX_CLOCK_NR_DIVIDERS];
 
-    uint32_t regs[NPCM7XX_CLK_NR_REGS];
+    uint32_t regs[NPCM_CLK_MAX_NR_REGS];
 
     /* Time reference for SECCNT and CNTR25M, initialized by power on reset */
     int64_t ref_ns;
@@ -174,7 +180,16 @@ struct NPCM7xxCLKState {
     Clock *clkref;
 };
 
+typedef struct NPCMCLKClass {
+    SysBusDeviceClass parent;
+
+    size_t nr_regs;
+    const uint32_t *cold_reset_values;
+} NPCMCLKClass;
+
+#define TYPE_NPCM_CLK "npcm-clk"
+OBJECT_DECLARE_TYPE(NPCMCLKState, NPCMCLKClass, NPCM_CLK)
 #define TYPE_NPCM7XX_CLK "npcm7xx-clk"
-OBJECT_DECLARE_SIMPLE_TYPE(NPCM7xxCLKState, NPCM7XX_CLK)
+#define TYPE_NPCM8XX_CLK "npcm8xx-clk"
 
-#endif /* NPCM7XX_CLK_H */
+#endif /* NPCM_CLK_H */
diff --git a/include/hw/misc/npcm7xx_gcr.h b/include/hw/misc/npcm_gcr.h
similarity index 76%
rename from include/hw/misc/npcm7xx_gcr.h
rename to include/hw/misc/npcm_gcr.h
index c0bbdda77e59f..d81bb9afb2638 100644
--- a/include/hw/misc/npcm7xx_gcr.h
+++ b/include/hw/misc/npcm_gcr.h
@@ -1,5 +1,5 @@
 /*
- * Nuvoton NPCM7xx System Global Control Registers.
+ * Nuvoton NPCM7xx/8xx System Global Control Registers.
  *
  * Copyright 2020 Google LLC
  *
@@ -13,11 +13,12 @@
  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  * for more details.
  */
-#ifndef NPCM7XX_GCR_H
-#define NPCM7XX_GCR_H
+#ifndef NPCM_GCR_H
+#define NPCM_GCR_H
 
 #include "exec/memory.h"
 #include "hw/sysbus.h"
+#include "qom/object.h"
 
 /*
  * NPCM7XX PWRON STRAP bit fields
@@ -53,21 +54,33 @@
  * Number of registers in our device state structure. Don't change this without
  * incrementing the version_id in the vmstate.
  */
+#define NPCM_GCR_MAX_NR_REGS NPCM8XX_GCR_NR_REGS
 #define NPCM7XX_GCR_NR_REGS (0x148 / sizeof(uint32_t))
+#define NPCM8XX_GCR_NR_REGS (0xf80 / sizeof(uint32_t))
 
-struct NPCM7xxGCRState {
+typedef struct NPCMGCRState {
     SysBusDevice parent;
 
     MemoryRegion iomem;
 
-    uint32_t regs[NPCM7XX_GCR_NR_REGS];
+    uint32_t regs[NPCM_GCR_MAX_NR_REGS];
 
     uint32_t reset_pwron;
     uint32_t reset_mdlr;
     uint32_t reset_intcr3;
-};
+    uint32_t reset_scrpad_b;
+} NPCMGCRState;
 
+typedef struct NPCMGCRClass {
+    SysBusDeviceClass parent;
+
+    size_t nr_regs;
+    const uint32_t *cold_reset_values;
+} NPCMGCRClass;
+
+#define TYPE_NPCM_GCR "npcm-gcr"
 #define TYPE_NPCM7XX_GCR "npcm7xx-gcr"
-OBJECT_DECLARE_SIMPLE_TYPE(NPCM7xxGCRState, NPCM7XX_GCR)
+#define TYPE_NPCM8XX_GCR "npcm8xx-gcr"
+OBJECT_DECLARE_TYPE(NPCMGCRState, NPCMGCRClass, NPCM_GCR)
 
-#endif /* NPCM7XX_GCR_H */
+#endif /* NPCM_GCR_H */
diff --git a/include/hw/misc/pvpanic.h b/include/hw/misc/pvpanic.h
index 9a71a5ad0d7dd..049a94c11254e 100644
--- a/include/hw/misc/pvpanic.h
+++ b/include/hw/misc/pvpanic.h
@@ -26,6 +26,7 @@
 
 #define TYPE_PVPANIC_ISA_DEVICE "pvpanic"
 #define TYPE_PVPANIC_PCI_DEVICE "pvpanic-pci"
+#define TYPE_PVPANIC_MMIO_DEVICE "pvpanic-mmio"
 
 #define PVPANIC_IOPORT_PROP "ioport"
 
diff --git a/include/hw/net/npcm_pcs.h b/include/hw/net/npcm_pcs.h
new file mode 100644
index 0000000000000..d5c481ad70d20
--- /dev/null
+++ b/include/hw/net/npcm_pcs.h
@@ -0,0 +1,42 @@
+/*
+ * Nuvoton NPCM8xx PCS Module
+ *
+ * Copyright 2022 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef NPCM_PCS_H
+#define NPCM_PCS_H
+
+#include "hw/sysbus.h"
+
+#define NPCM_PCS_NR_SR_CTLS     (0x12 / sizeof(uint16_t))
+#define NPCM_PCS_NR_SR_MIIS     (0x20 / sizeof(uint16_t))
+#define NPCM_PCS_NR_SR_TIMS     (0x22 / sizeof(uint16_t))
+#define NPCM_PCS_NR_VR_MIIS     (0x1c6 / sizeof(uint16_t))
+
+struct NPCMPCSState {
+    SysBusDevice parent;
+
+    MemoryRegion iomem;
+
+    uint16_t indirect_access_base;
+    uint16_t sr_ctl[NPCM_PCS_NR_SR_CTLS];
+    uint16_t sr_mii[NPCM_PCS_NR_SR_MIIS];
+    uint16_t sr_tim[NPCM_PCS_NR_SR_TIMS];
+    uint16_t vr_mii[NPCM_PCS_NR_VR_MIIS];
+};
+
+#define TYPE_NPCM_PCS "npcm-pcs"
+OBJECT_DECLARE_SIMPLE_TYPE(NPCMPCSState, NPCM_PCS)
+
+#endif /* NPCM_PCS_H */
diff --git a/include/hw/openrisc/boot.h b/include/hw/openrisc/boot.h
index 25a313d63a1c3..9b4d88072c411 100644
--- a/include/hw/openrisc/boot.h
+++ b/include/hw/openrisc/boot.h
@@ -20,6 +20,7 @@
 #define OPENRISC_BOOT_H
 
 #include "exec/cpu-defs.h"
+#include "hw/boards.h"
 
 hwaddr openrisc_load_kernel(ram_addr_t ram_size,
                             const char *kernel_filename,
@@ -28,7 +29,7 @@ hwaddr openrisc_load_kernel(ram_addr_t ram_size,
 hwaddr openrisc_load_initrd(void *fdt, const char *filename,
                             hwaddr load_start, uint64_t mem_size);
 
-uint32_t openrisc_load_fdt(void *fdt, hwaddr load_start,
+uint32_t openrisc_load_fdt(MachineState *ms, void *fdt, hwaddr load_start,
                            uint64_t mem_size);
 
 #endif /* OPENRISC_BOOT_H */
diff --git a/include/hw/pci-host/designware.h b/include/hw/pci-host/designware.h
index bf8b2789787ac..a35a3bd06c88f 100644
--- a/include/hw/pci-host/designware.h
+++ b/include/hw/pci-host/designware.h
@@ -25,12 +25,19 @@
 #include "hw/pci/pci_bridge.h"
 #include "qom/object.h"
 
+#define TYPE_DESIGNWARE_PCIE_ROOT_BUS "designware-pcie-root-BUS"
+OBJECT_DECLARE_SIMPLE_TYPE(DesignwarePCIERootBus, DESIGNWARE_PCIE_ROOT_BUS)
+
 #define TYPE_DESIGNWARE_PCIE_HOST "designware-pcie-host"
 OBJECT_DECLARE_SIMPLE_TYPE(DesignwarePCIEHost, DESIGNWARE_PCIE_HOST)
 
 #define TYPE_DESIGNWARE_PCIE_ROOT "designware-pcie-root"
 OBJECT_DECLARE_SIMPLE_TYPE(DesignwarePCIERoot, DESIGNWARE_PCIE_ROOT)
 
+struct DesignwarePCIERootBus {
+    PCIBus parent;
+};
+
 typedef struct DesignwarePCIEViewport {
     DesignwarePCIERoot *root;
 
diff --git a/include/hw/pci-host/fsl_imx8m_phy.h b/include/hw/pci-host/fsl_imx8m_phy.h
new file mode 100644
index 0000000000000..4f4875b37d76e
--- /dev/null
+++ b/include/hw/pci-host/fsl_imx8m_phy.h
@@ -0,0 +1,28 @@
+/*
+ * i.MX8 PCIe PHY emulation
+ *
+ * Copyright (c) 2025 Bernhard Beschow <shentey@gmail.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_PCIHOST_FSLIMX8MPCIEPHY_H
+#define HW_PCIHOST_FSLIMX8MPCIEPHY_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+#include "exec/memory.h"
+
+#define TYPE_FSL_IMX8M_PCIE_PHY "fsl-imx8m-pcie-phy"
+OBJECT_DECLARE_SIMPLE_TYPE(FslImx8mPciePhyState, FSL_IMX8M_PCIE_PHY)
+
+#define FSL_IMX8M_PCIE_PHY_DATA_SIZE 0x800
+
+struct FslImx8mPciePhyState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    uint8_t data[FSL_IMX8M_PCIE_PHY_DATA_SIZE];
+};
+
+#endif
diff --git a/include/hw/pci-host/pnv_phb4.h b/include/hw/pci-host/pnv_phb4.h
index 8abee78e4d456..8a80c0c667a89 100644
--- a/include/hw/pci-host/pnv_phb4.h
+++ b/include/hw/pci-host/pnv_phb4.h
@@ -13,6 +13,7 @@
 #include "hw/pci-host/pnv_phb.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_nest_pervasive.h"
 #include "hw/ppc/xive.h"
 #include "qom/object.h"
 
@@ -174,6 +175,9 @@ struct PnvPhb4PecState {
     uint32_t index;
     uint32_t chip_id;
 
+    /* Pervasive chiplet control */
+    PnvNestChipletPervasive nest_pervasive;
+
     /* Nest registers, excuding per-stack */
 #define PHB4_PEC_NEST_REGS_COUNT    0xf
     uint64_t nest_regs[PHB4_PEC_NEST_REGS_COUNT];
@@ -196,6 +200,7 @@ struct PnvPhb4PecState {
 struct PnvPhb4PecClass {
     DeviceClass parent_class;
 
+    uint32_t (*xscom_cplt_base)(PnvPhb4PecState *pec);
     uint32_t (*xscom_nest_base)(PnvPhb4PecState *pec);
     uint32_t xscom_nest_size;
     uint32_t (*xscom_pci_base)(PnvPhb4PecState *pec);
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 4002bbeebde09..822fbacdf0177 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -23,6 +23,10 @@ extern bool pci_available;
 #define PCI_SLOT_MAX            32
 #define PCI_FUNC_MAX            8
 
+#define PCI_SBDF(seg, bus, dev, func) \
+            ((((uint32_t)(seg)) << 16) | \
+            (PCI_BUILD_BDF(bus, PCI_DEVFN(dev, func))))
+
 /* Class, Vendor and Device IDs from Linux's pci_ids.h */
 #include "hw/pci/pci_ids.h"
 
@@ -216,6 +220,8 @@ enum {
     QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
 #define QEMU_PCIE_EXT_TAG_BITNR 13
     QEMU_PCIE_EXT_TAG = (1 << QEMU_PCIE_EXT_TAG_BITNR),
+#define QEMU_PCI_CAP_PM_BITNR 14
+    QEMU_PCI_CAP_PM = (1 << QEMU_PCI_CAP_PM_BITNR),
 };
 
 typedef struct PCIINTxRoute {
@@ -676,5 +682,6 @@ static inline void pci_irq_deassert(PCIDevice *pci_dev)
 MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
 void pci_set_enabled(PCIDevice *pci_dev, bool state);
 void pci_set_power(PCIDevice *pci_dev, bool state);
+int pci_pm_init(PCIDevice *pci_dev, uint8_t offset, Error **errp);
 
 #endif
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index add208edfabda..345b12eaac1a1 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -105,6 +105,9 @@ struct PCIDevice {
     /* Capability bits */
     uint32_t cap_present;
 
+    /* Offset of PM capability in config space */
+    uint8_t pm_cap;
+
     /* Offset of MSI-X capability in config space */
     uint8_t msix_cap;
 
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index f1a53fea8d629..33e2898be9576 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -191,6 +191,7 @@
 #define PCI_DEVICE_ID_APPLE_UNI_N_AGP    0x0020
 #define PCI_DEVICE_ID_APPLE_U3_AGP       0x004b
 #define PCI_DEVICE_ID_APPLE_UNI_N_GMAC   0x0021
+#define PCI_DEVICE_ID_APPLE_VIRTIO_BLK   0x1a00
 
 #define PCI_VENDOR_ID_SUN                0x108e
 #define PCI_DEVICE_ID_SUN_EBUS           0x1000
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index b8d59732bc636..70a5de09de395 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -58,8 +58,6 @@ typedef enum {
 struct PCIExpressDevice {
     /* Offset of express capability in config space */
     uint8_t exp_cap;
-    /* Offset of Power Management capability in config space */
-    uint8_t pm_cap;
 
     /* SLOT */
     bool hpev_notified; /* Logical AND of conditions for hot plug event.
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
index 450cbef6c201b..c5d2d318d330a 100644
--- a/include/hw/pci/pcie_sriov.h
+++ b/include/hw/pci/pcie_sriov.h
@@ -16,9 +16,7 @@
 #include "hw/pci/pci.h"
 
 typedef struct PCIESriovPF {
-    uint16_t num_vfs;   /* Number of virtual functions created */
     uint8_t vf_bar_type[PCI_NUM_REGIONS];   /* Store type for each VF bar */
-    const char *vfname; /* Reference to the device type used for the VFs */
     PCIDevice **vf;     /* Pointer to an array of num_vfs VF devices */
 } PCIESriovPF;
 
@@ -27,10 +25,11 @@ typedef struct PCIESriovVF {
     uint16_t vf_number; /* Logical VF number of this function */
 } PCIESriovVF;
 
-void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
+bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
                         const char *vfname, uint16_t vf_dev_id,
                         uint16_t init_vfs, uint16_t total_vfs,
-                        uint16_t vf_offset, uint16_t vf_stride);
+                        uint16_t vf_offset, uint16_t vf_stride,
+                        Error **errp);
 void pcie_sriov_pf_exit(PCIDevice *dev);
 
 /* Set up a VF bar in the SR/IOV bar area */
@@ -58,6 +57,8 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize);
 void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
                              uint32_t val, int len);
 
+void pcie_sriov_pf_post_load(PCIDevice *dev);
+
 /* Reset SR/IOV */
 void pcie_sriov_pf_reset(PCIDevice *dev);
 
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index fcb6699150c87..d8fca079f2feb 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -205,9 +205,8 @@ void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor);
 #define PNV9_OCC_SENSOR_BASE(chip)   (PNV9_OCC_COMMON_AREA_BASE +       \
     PNV_OCC_SENSOR_DATA_BLOCK_BASE((chip)->chip_id))
 
-#define PNV9_HOMER_SIZE              0x0000000000400000ull
 #define PNV9_HOMER_BASE(chip)                                           \
-    (0x203ffd800000ull + ((uint64_t)(chip)->chip_id) * PNV9_HOMER_SIZE)
+    (0x203ffd800000ull + ((uint64_t)(chip)->chip_id) * PNV_HOMER_SIZE)
 
 /*
  * POWER10 MMIO base addresses - 16TB stride per chip
@@ -250,8 +249,7 @@ void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor);
 #define PNV10_OCC_SENSOR_BASE(chip) (PNV10_OCC_COMMON_AREA_BASE +       \
     PNV_OCC_SENSOR_DATA_BLOCK_BASE((chip)->chip_id))
 
-#define PNV10_HOMER_SIZE              0x0000000000400000ull
 #define PNV10_HOMER_BASE(chip)                                           \
-    (0x300ffd800000ll + ((uint64_t)(chip)->chip_id) * PNV10_HOMER_SIZE)
+    (0x300ffd800000ll + ((uint64_t)(chip)->chip_id) * PNV_HOMER_SIZE)
 
 #endif /* PPC_PNV_H */
diff --git a/include/hw/ppc/pnv_homer.h b/include/hw/ppc/pnv_homer.h
index b1c5d498dc555..a6f2710fa16b5 100644
--- a/include/hw/ppc/pnv_homer.h
+++ b/include/hw/ppc/pnv_homer.h
@@ -41,19 +41,21 @@ struct PnvHomer {
 
     PnvChip *chip;
     MemoryRegion pba_regs;
-    MemoryRegion regs;
+    MemoryRegion mem;
+    hwaddr base;
 };
 
 
 struct PnvHomerClass {
     DeviceClass parent_class;
 
+    /* Get base address of HOMER memory */
+    hwaddr (*get_base)(PnvChip *chip);
+    /* Size of HOMER memory */
+    int size;
+
     int pba_size;
     const MemoryRegionOps *pba_ops;
-    int homer_size;
-    const MemoryRegionOps *homer_ops;
-
-    hwaddr core_max_base;
 };
 
 #endif /* PPC_PNV_HOMER_H */
diff --git a/include/hw/ppc/pnv_occ.h b/include/hw/ppc/pnv_occ.h
index df321244e3b1c..3ec42de0ff16d 100644
--- a/include/hw/ppc/pnv_occ.h
+++ b/include/hw/ppc/pnv_occ.h
@@ -41,11 +41,17 @@ DECLARE_INSTANCE_CHECKER(PnvOCC, PNV10_OCC, TYPE_PNV10_OCC)
 struct PnvOCC {
     DeviceState xd;
 
+    /* OCC dynamic model is driven by this timer. */
+    QEMUTimer state_machine_timer;
+
     /* OCC Misc interrupt */
     uint64_t occmisc;
 
     qemu_irq psi_irq;
 
+    /* OCCs operate on regions of HOMER memory */
+    PnvHomer *homer;
+
     MemoryRegion xscom_regs;
     MemoryRegion sram_regs;
 };
@@ -53,6 +59,9 @@ struct PnvOCC {
 struct PnvOCCClass {
     DeviceClass parent_class;
 
+    hwaddr opal_shared_memory_offset; /* offset in HOMER */
+    uint8_t opal_shared_memory_version;
+
     int xscom_size;
     const MemoryRegionOps *xscom_ops;
 };
diff --git a/include/hw/ppc/pnv_pnor.h b/include/hw/ppc/pnv_pnor.h
index 2e37ac88bf15c..19c2d642e828b 100644
--- a/include/hw/ppc/pnv_pnor.h
+++ b/include/hw/ppc/pnv_pnor.h
@@ -13,9 +13,11 @@
 #include "hw/sysbus.h"
 
 /*
- * PNOR offset on the LPC FW address space
+ * PNOR offset on the LPC FW address space. For now this should be 0 because
+ * skiboot 7.1 has a bug where IDSEL > 0 (LPC FW address > 256MB) access is
+ * not performed correctly.
  */
-#define PNOR_SPI_OFFSET         0x0c000000UL
+#define PNOR_SPI_OFFSET         0x00000000UL
 
 #define TYPE_PNV_PNOR  "pnv-pnor"
 OBJECT_DECLARE_SIMPLE_TYPE(PnvPnor, PNV_PNOR)
diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h
index 648388a599511..a927aea1c09ca 100644
--- a/include/hw/ppc/pnv_xscom.h
+++ b/include/hw/ppc/pnv_xscom.h
@@ -126,6 +126,8 @@ struct PnvXScomInterfaceClass {
 #define PNV9_XSCOM_PEC_PCI_BASE   0xd010800
 #define PNV9_XSCOM_PEC_PCI_SIZE   0x200
 
+#define PNV9_XSCOM_PEC_NEST_CPLT_BASE  0x0d000000
+
 /* XSCOM PCI "pass-through" window to PHB SCOM */
 #define PNV9_XSCOM_PEC_PCI_STK0   0x100
 #define PNV9_XSCOM_PEC_PCI_STK1   0x140
@@ -197,6 +199,8 @@ struct PnvXScomInterfaceClass {
 #define PNV10_XSCOM_PEC_NEST_BASE  0x3011800 /* index goes downwards ... */
 #define PNV10_XSCOM_PEC_NEST_SIZE  0x100
 
+#define PNV10_XSCOM_PEC_NEST_CPLT_BASE 0x08000000
+
 #define PNV10_XSCOM_PEC_PCI_BASE   0x8010800 /* index goes upwards ... */
 #define PNV10_XSCOM_PEC_PCI_SIZE   0x200
 
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index a6c0547e313dc..39bd5bd5ed310 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -83,8 +83,10 @@ typedef enum {
 #define SPAPR_CAP_AIL_MODE_3            0x0C
 /* Nested PAPR */
 #define SPAPR_CAP_NESTED_PAPR           0x0D
+/* DAWR1 */
+#define SPAPR_CAP_DAWR1                 0x0E
 /* Num Caps */
-#define SPAPR_CAP_NUM                   (SPAPR_CAP_NESTED_PAPR + 1)
+#define SPAPR_CAP_NUM                   (SPAPR_CAP_DAWR1 + 1)
 
 /*
  * Capability Values
@@ -201,6 +203,7 @@ struct SpaprMachineState {
     uint32_t fdt_initial_size;
     void *fdt_blob;
     uint8_t fdt_rng_seed[32];
+    uint64_t hashpkey_val;
     long kernel_size;
     bool kernel_le;
     uint64_t kernel_addr;
@@ -406,6 +409,7 @@ struct SpaprMachineState {
 #define H_SET_MODE_RESOURCE_SET_DAWR0           2
 #define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE     3
 #define H_SET_MODE_RESOURCE_LE                  4
+#define H_SET_MODE_RESOURCE_SET_DAWR1           5
 
 /* Flags for H_SET_MODE_RESOURCE_LE */
 #define H_SET_MODE_ENDIAN_BIG    0
@@ -1003,6 +1007,7 @@ extern const VMStateDescription vmstate_spapr_cap_fwnmi;
 extern const VMStateDescription vmstate_spapr_cap_rpt_invalidate;
 extern const VMStateDescription vmstate_spapr_cap_ail_mode_3;
 extern const VMStateDescription vmstate_spapr_wdt;
+extern const VMStateDescription vmstate_spapr_cap_dawr1;
 
 static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
 {
diff --git a/include/hw/ppc/spapr_nested.h b/include/hw/ppc/spapr_nested.h
index e4202204846c3..f7be0d5a95574 100644
--- a/include/hw/ppc/spapr_nested.h
+++ b/include/hw/ppc/spapr_nested.h
@@ -11,7 +11,13 @@
 #define GSB_TB_OFFSET           0x0004 /* Timebase Offset */
 #define GSB_PART_SCOPED_PAGETBL 0x0005 /* Partition Scoped Page Table */
 #define GSB_PROCESS_TBL         0x0006 /* Process Table */
-                    /* RESERVED 0x0007 - 0x0BFF */
+                    /* RESERVED 0x0007 - 0x07FF */
+#define GSB_L0_GUEST_HEAP_INUSE 0x0800 /* Guest Management Heap Size */
+#define GSB_L0_GUEST_HEAP_MAX   0x0801 /* Guest Management Heap Max Size */
+#define GSB_L0_GUEST_PGTABLE_SIZE_INUSE  0x0802 /* Guest Pagetable Size */
+#define GSB_L0_GUEST_PGTABLE_SIZE_MAX    0x0803 /* Guest Pagetable Max Size */
+#define GSB_L0_GUEST_PGTABLE_RECLAIMED   0x0804 /* Pagetable Reclaim in bytes */
+                    /* RESERVED 0x0805 - 0xBFF */
 #define GSB_VCPU_IN_BUFFER      0x0C00 /* Run VCPU Input Buffer */
 #define GSB_VCPU_OUT_BUFFER     0x0C01 /* Run VCPU Out Buffer */
 #define GSB_VCPU_VPA            0x0C02 /* HRA to Guest VCPU VPA */
@@ -196,6 +202,38 @@ typedef struct SpaprMachineStateNested {
 #define NESTED_API_PAPR    2
     bool capabilities_set;
     uint32_t pvr_base;
+
+    /**
+     * l0_guest_heap_inuse: The currently used bytes in the Hypervisor's Guest
+     * Management Space associated with the Host Partition.
+     **/
+    uint64_t l0_guest_heap_inuse;
+
+    /**
+     * host_heap_max: The maximum bytes available in the Hypervisor's Guest
+     * Management Space associated with the Host Partition.
+     **/
+    uint64_t l0_guest_heap_max;
+
+    /**
+     * host_pagetable: The currently used bytes in the Hypervisor's Guest
+     * Page Table Management Space associated with the Host Partition.
+     **/
+    uint64_t l0_guest_pgtable_size_inuse;
+
+    /**
+     * host_pagetable_max: The maximum bytes available in the Hypervisor's Guest
+     * Page Table Management Space associated with the Host Partition.
+     **/
+    uint64_t l0_guest_pgtable_size_max;
+
+    /**
+     * host_pagetable_reclaim: The amount of space in bytes that has been
+     * reclaimed due to overcommit in the  Hypervisor's Guest Page Table
+     * Management Space associated with the Host Partition.
+     **/
+    uint64_t l0_guest_pgtable_reclaimed;
+
     GHashTable *guests;
 } SpaprMachineStateNested;
 
@@ -229,9 +267,15 @@ typedef struct SpaprMachineStateNestedGuest {
 #define HVMASK_HDEXCR                 0x00000000FFFFFFFF
 #define HVMASK_TB_OFFSET              0x000000FFFFFFFFFF
 #define GSB_MAX_BUF_SIZE              (1024 * 1024)
-#define H_GUEST_GETSET_STATE_FLAG_GUEST_WIDE 0x8000000000000000
-#define GUEST_STATE_REQUEST_GUEST_WIDE       0x1
-#define GUEST_STATE_REQUEST_SET              0x2
+#define H_GUEST_GET_STATE_FLAGS_MASK   0xC000000000000000ULL
+#define H_GUEST_SET_STATE_FLAGS_MASK   0x8000000000000000ULL
+#define H_GUEST_SET_STATE_FLAGS_GUEST_WIDE 0x8000000000000000ULL
+#define H_GUEST_GET_STATE_FLAGS_GUEST_WIDE 0x8000000000000000ULL
+#define H_GUEST_GET_STATE_FLAGS_HOST_WIDE  0x4000000000000000ULL
+
+#define GUEST_STATE_REQUEST_GUEST_WIDE     0x1
+#define GUEST_STATE_REQUEST_HOST_WIDE      0x2
+#define GUEST_STATE_REQUEST_SET            0x4
 
 /*
  * As per ISA v3.1B, following bits are reserved:
@@ -251,6 +295,15 @@ typedef struct SpaprMachineStateNestedGuest {
     .copy = (c)                                    \
 }
 
+#define GSBE_NESTED_MACHINE_DW(i, f)  {                             \
+        .id = (i),                                                  \
+        .size = 8,                                                  \
+        .location = get_machine_ptr,                                \
+        .offset = offsetof(struct SpaprMachineStateNested, f),     \
+        .copy = copy_state_8to8,                                    \
+        .mask = HVMASK_DEFAULT                                      \
+}
+
 #define GSBE_NESTED(i, sz, f, c) {                             \
     .id = (i),                                                 \
     .size = (sz),                                              \
@@ -509,9 +562,11 @@ struct guest_state_element_type {
     uint16_t id;
     int size;
 #define GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE 0x1
-#define GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY  0x2
+#define GUEST_STATE_ELEMENT_TYPE_FLAG_HOST_WIDE 0x2
+#define GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY 0x4
    uint16_t flags;
-    void *(*location)(SpaprMachineStateNestedGuest *, target_ulong);
+   void *(*location)(struct SpaprMachineState *, SpaprMachineStateNestedGuest *,
+                     target_ulong);
     size_t offset;
     void (*copy)(void *, void *, bool);
     uint64_t mask;
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index ea5d03a346358..538f438681727 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -130,11 +130,9 @@
  *   TCTX   Thread interrupt Context
  *
  *
- * Copyright (c) 2017-2018, IBM Corporation.
- *
- * This code is licensed under the GPL version 2 or later. See the
- * COPYING file in the top-level directory.
+ * Copyright (c) 2017-2024, IBM Corporation.
  *
+ * SPDX-License-Identifier: GPL-2.0-or-later
  */
 
 #ifndef PPC_XIVE_H
@@ -424,6 +422,7 @@ void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas);
 typedef struct XiveTCTXMatch {
     XiveTCTX *tctx;
     uint8_t ring;
+    bool precluded;
 } XiveTCTXMatch;
 
 #define TYPE_XIVE_PRESENTER "xive-presenter"
@@ -439,10 +438,13 @@ struct XivePresenterClass {
     InterfaceClass parent;
     int (*match_nvt)(XivePresenter *xptr, uint8_t format,
                      uint8_t nvt_blk, uint32_t nvt_idx,
-                     bool cam_ignore, uint8_t priority,
+                     bool crowd, bool cam_ignore, uint8_t priority,
                      uint32_t logic_serv, XiveTCTXMatch *match);
     bool (*in_kernel)(const XivePresenter *xptr);
     uint32_t (*get_config)(XivePresenter *xptr);
+    int (*broadcast)(XivePresenter *xptr,
+                     uint8_t nvt_blk, uint32_t nvt_idx,
+                     bool crowd, bool cam_ignore, uint8_t priority);
 };
 
 int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
@@ -451,8 +453,10 @@ int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
                               bool cam_ignore, uint32_t logic_serv);
 bool xive_presenter_notify(XiveFabric *xfb, uint8_t format,
                            uint8_t nvt_blk, uint32_t nvt_idx,
-                           bool cam_ignore, uint8_t priority,
-                           uint32_t logic_serv);
+                           bool crowd, bool cam_ignore, uint8_t priority,
+                           uint32_t logic_serv, bool *precluded);
+
+uint32_t xive_get_vpgroup_size(uint32_t nvp_index);
 
 /*
  * XIVE Fabric (Interface between Interrupt Controller and Machine)
@@ -469,8 +473,10 @@ struct XiveFabricClass {
     InterfaceClass parent;
     int (*match_nvt)(XiveFabric *xfb, uint8_t format,
                      uint8_t nvt_blk, uint32_t nvt_idx,
-                     bool cam_ignore, uint8_t priority,
+                     bool crowd, bool cam_ignore, uint8_t priority,
                      uint32_t logic_serv, XiveTCTXMatch *match);
+    int (*broadcast)(XiveFabric *xfb, uint8_t nvt_blk, uint32_t nvt_idx,
+                     bool crowd, bool cam_ignore, uint8_t priority);
 };
 
 /*
@@ -510,6 +516,21 @@ static inline uint8_t xive_priority_to_ipb(uint8_t priority)
         0 : 1 << (XIVE_PRIORITY_MAX - priority);
 }
 
+static inline uint8_t xive_priority_to_pipr(uint8_t priority)
+{
+    return priority > XIVE_PRIORITY_MAX ? 0xFF : priority;
+}
+
+/*
+ * Convert an Interrupt Pending Buffer (IPB) register to a Pending
+ * Interrupt Priority Register (PIPR), which contains the priority of
+ * the most favored pending notification.
+ */
+static inline uint8_t xive_ipb_to_pipr(uint8_t ibp)
+{
+    return ibp ? clz32((uint32_t)ibp << 24) : 0xff;
+}
+
 /*
  * XIVE Thread Interrupt Management Aera (TIMA)
  *
@@ -532,8 +553,10 @@ void xive_tctx_pic_print_info(XiveTCTX *tctx, GString *buf);
 Object *xive_tctx_create(Object *cpu, XivePresenter *xptr, Error **errp);
 void xive_tctx_reset(XiveTCTX *tctx);
 void xive_tctx_destroy(XiveTCTX *tctx);
-void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb);
+void xive_tctx_pipr_update(XiveTCTX *tctx, uint8_t ring, uint8_t priority,
+                           uint8_t group_level);
 void xive_tctx_reset_signal(XiveTCTX *tctx, uint8_t ring);
+void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring, uint8_t group_level);
 
 /*
  * KVM XIVE device helpers
diff --git a/include/hw/ppc/xive2.h b/include/hw/ppc/xive2.h
index 5bccf41159cb4..8cdf8191742e9 100644
--- a/include/hw/ppc/xive2.h
+++ b/include/hw/ppc/xive2.h
@@ -1,11 +1,9 @@
 /*
  * QEMU PowerPC XIVE2 interrupt controller model  (POWER10)
  *
- * Copyright (c) 2019-2022, IBM Corporation.
- *
- * This code is licensed under the GPL version 2 or later. See the
- * COPYING file in the top-level directory.
+ * Copyright (c) 2019-2024, IBM Corporation.
  *
+ * SPDX-License-Identifier: GPL-2.0-or-later
  */
 
 #ifndef PPC_XIVE2_H
@@ -90,7 +88,17 @@ void xive2_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked);
 int xive2_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
                                uint8_t format,
                                uint8_t nvt_blk, uint32_t nvt_idx,
-                               bool cam_ignore, uint32_t logic_serv);
+                               bool crowd, bool cam_ignore,
+                               uint32_t logic_serv);
+
+uint64_t xive2_presenter_nvp_backlog_op(XivePresenter *xptr,
+                                        uint8_t blk, uint32_t idx,
+                                        uint16_t offset);
+
+uint64_t xive2_presenter_nvgc_backlog_op(XivePresenter *xptr,
+                                         bool crowd,
+                                         uint8_t blk, uint32_t idx,
+                                         uint16_t offset, uint16_t val);
 
 /*
  * XIVE2 END ESBs  (POWER10)
@@ -115,12 +123,18 @@ typedef struct Xive2EndSource {
  * XIVE2 Thread Interrupt Management Area (POWER10)
  */
 
+void xive2_tm_set_hv_cppr(XivePresenter *xptr, XiveTCTX *tctx,
+                          hwaddr offset, uint64_t value, unsigned size);
+void xive2_tm_set_os_cppr(XivePresenter *xptr, XiveTCTX *tctx,
+                          hwaddr offset, uint64_t value, unsigned size);
 void xive2_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
                            uint64_t value, unsigned size);
 uint64_t xive2_tm_pull_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
                                hwaddr offset, unsigned size);
 void xive2_tm_pull_os_ctx_ol(XivePresenter *xptr, XiveTCTX *tctx,
                              hwaddr offset, uint64_t value, unsigned size);
+bool xive2_tm_irq_precluded(XiveTCTX *tctx, int ring, uint8_t priority);
+void xive2_tm_set_lsmfb(XiveTCTX *tctx, int ring, uint8_t priority);
 void xive2_tm_set_hv_target(XivePresenter *xptr, XiveTCTX *tctx,
                             hwaddr offset, uint64_t value, unsigned size);
 void xive2_tm_pull_phys_ctx_ol(XivePresenter *xptr, XiveTCTX *tctx,
diff --git a/include/hw/ppc/xive2_regs.h b/include/hw/ppc/xive2_regs.h
index 1d00c8df64294..b11395c563504 100644
--- a/include/hw/ppc/xive2_regs.h
+++ b/include/hw/ppc/xive2_regs.h
@@ -1,10 +1,9 @@
 /*
  * QEMU PowerPC XIVE2 internal structure definitions (POWER10)
  *
- * Copyright (c) 2019-2022, IBM Corporation.
+ * Copyright (c) 2019-2024, IBM Corporation.
  *
- * This code is licensed under the GPL version 2 or later. See the
- * COPYING file in the top-level directory.
+ * SPDX-License-Identifier: GPL-2.0-or-later
  */
 
 #ifndef PPC_XIVE2_REGS_H
@@ -152,6 +151,9 @@ typedef struct Xive2Nvp {
         uint32_t       w0;
 #define NVP2_W0_VALID              PPC_BIT32(0)
 #define NVP2_W0_HW                 PPC_BIT32(7)
+#define NVP2_W0_L                  PPC_BIT32(8)
+#define NVP2_W0_G                  PPC_BIT32(9)
+#define NVP2_W0_T                  PPC_BIT32(10)
 #define NVP2_W0_ESC_END            PPC_BIT32(25) /* 'N' bit 0:ESB  1:END */
 #define NVP2_W0_PGOFIRST           PPC_BITMASK32(26, 31)
         uint32_t       w1;
@@ -163,6 +165,8 @@ typedef struct Xive2Nvp {
 #define NVP2_W2_CPPR               PPC_BITMASK32(0, 7)
 #define NVP2_W2_IPB                PPC_BITMASK32(8, 15)
 #define NVP2_W2_LSMFB              PPC_BITMASK32(16, 23)
+#define NVP2_W2_T                  PPC_BIT32(27)
+#define NVP2_W2_LGS                PPC_BITMASK32(28, 31)
         uint32_t       w3;
         uint32_t       w4;
 #define NVP2_W4_ESC_ESB_BLOCK      PPC_BITMASK32(0, 3)  /* N:0 */
@@ -229,4 +233,11 @@ typedef struct Xive2Nvgc {
 void xive2_nvgc_pic_print_info(Xive2Nvgc *nvgc, uint32_t nvgc_idx,
                                GString *buf);
 
+#define NVx_BACKLOG_OP            PPC_BITMASK(52, 53)
+#define NVx_BACKLOG_PRIO          PPC_BITMASK(57, 59)
+
+/* split the 6-bit crowd/group level */
+#define NVx_CROWD_LVL(level)      ((level >> 4) & 0b11)
+#define NVx_GROUP_LVL(level)      (level & 0b1111)
+
 #endif /* PPC_XIVE2_REGS_H */
diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
index 326327fc79f97..54bc6c53b46bc 100644
--- a/include/hw/ppc/xive_regs.h
+++ b/include/hw/ppc/xive_regs.h
@@ -7,10 +7,9 @@
  * access to the different fields.
  *
  *
- * Copyright (c) 2016-2018, IBM Corporation.
+ * Copyright (c) 2016-2024, IBM Corporation.
  *
- * This code is licensed under the GPL version 2 or later. See the
- * COPYING file in the top-level directory.
+ * SPDX-License-Identifier: GPL-2.0-or-later
  */
 
 #ifndef PPC_XIVE_REGS_H
@@ -146,7 +145,14 @@
 #define TM_SPC_PULL_PHYS_CTX_OL 0xc38   /* Pull phys ctx to odd cache line    */
 /* XXX more... */
 
-/* NSR fields for the various QW ack types */
+/*
+ * NSR fields for the various QW ack types
+ *
+ * P10 has an extra bit in QW3 for the group level instead of the
+ * reserved 'i' bit. Since it is not used and we don't support group
+ * interrupts on P9, we use the P10 definition for the group level so
+ * that we can have common macros for the NSR
+ */
 #define TM_QW0_NSR_EB           PPC_BIT8(0)
 #define TM_QW1_NSR_EO           PPC_BIT8(0)
 #define TM_QW3_NSR_HE           PPC_BITMASK8(0, 1)
@@ -154,8 +160,15 @@
 #define  TM_QW3_NSR_HE_POOL     1
 #define  TM_QW3_NSR_HE_PHYS     2
 #define  TM_QW3_NSR_HE_LSI      3
-#define TM_QW3_NSR_I            PPC_BIT8(2)
-#define TM_QW3_NSR_GRP_LVL      PPC_BIT8(3, 7)
+#define TM_NSR_GRP_LVL          PPC_BITMASK8(2, 7)
+/*
+ * On P10, the format of the 6-bit group level is: 2 bits for the
+ * crowd size and 4 bits for the group size. Since group/crowd size is
+ * always a power of 2, we encode the log. For example, group_level=4
+ * means crowd size = 0 and group size = 16 (2^4)
+ * Same encoding is used in the NVP and NVGC structures for
+ * PGoFirst and PGoNext fields
+ */
 
 /*
  * EAS (Event Assignment Structure)
diff --git a/include/hw/qdev-properties-system.h b/include/hw/qdev-properties-system.h
index 7ec37f6316c3c..b921392c52567 100644
--- a/include/hw/qdev-properties-system.h
+++ b/include/hw/qdev-properties-system.h
@@ -30,6 +30,8 @@ extern const PropertyInfo qdev_prop_pcie_link_speed;
 extern const PropertyInfo qdev_prop_pcie_link_width;
 extern const PropertyInfo qdev_prop_cpus390entitlement;
 extern const PropertyInfo qdev_prop_iothread_vq_mapping_list;
+extern const PropertyInfo qdev_prop_endian_mode;
+extern const PropertyInfo qdev_prop_vmapple_virtio_blk_variant;
 
 #define DEFINE_PROP_PCI_DEVFN(_n, _s, _f, _d)                   \
     DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pci_devfn, int32_t)
@@ -97,4 +99,15 @@ extern const PropertyInfo qdev_prop_iothread_vq_mapping_list;
     DEFINE_PROP(_name, _state, _field, qdev_prop_iothread_vq_mapping_list, \
                 IOThreadVirtQueueMappingList *)
 
+#define DEFINE_PROP_ENDIAN(_name, _state, _field, _default) \
+    DEFINE_PROP_UNSIGNED(_name, _state, _field, _default, \
+                         qdev_prop_endian_mode, EndianMode)
+#define DEFINE_PROP_ENDIAN_NODEFAULT(_name, _state, _field) \
+    DEFINE_PROP_ENDIAN(_name, _state, _field, ENDIAN_MODE_UNSPECIFIED)
+
+#define DEFINE_PROP_VMAPPLE_VIRTIO_BLK_VARIANT(_name, _state, _fld, _default) \
+    DEFINE_PROP_UNSIGNED(_name, _state, _fld, _default, \
+                         qdev_prop_vmapple_virtio_blk_variant, \
+                         VMAppleVirtioBlkVariant)
+
 #endif
diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index bf27375a3ccdb..15fcec5260c79 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h
@@ -30,7 +30,7 @@ struct Property {
 };
 
 struct PropertyInfo {
-    const char *name;
+    const char *type;
     const char *description;
     const QEnumLookup *enum_table;
     bool realized_set_allowed; /* allow setting property on realized device */
@@ -49,7 +49,6 @@ struct PropertyInfo {
 extern const PropertyInfo qdev_prop_bit;
 extern const PropertyInfo qdev_prop_bit64;
 extern const PropertyInfo qdev_prop_bool;
-extern const PropertyInfo qdev_prop_enum;
 extern const PropertyInfo qdev_prop_uint8;
 extern const PropertyInfo qdev_prop_uint16;
 extern const PropertyInfo qdev_prop_uint32;
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
index 2c43ea123f0c7..04944d4fed758 100644
--- a/include/hw/s390x/s390-pci-bus.h
+++ b/include/hw/s390x/s390-pci-bus.h
@@ -277,6 +277,7 @@ struct S390PCIIOMMU {
     AddressSpace as;
     MemoryRegion mr;
     IOMMUMemoryRegion iommu_mr;
+    MemoryRegion *dm_mr;
     bool enabled;
     uint64_t g_iota;
     uint64_t pba;
@@ -362,6 +363,7 @@ struct S390PCIBusDevice {
     bool interp;
     bool forwarding_assist;
     bool aif;
+    bool rtr_avail;
     QTAILQ_ENTRY(S390PCIBusDevice) link;
 };
 
@@ -389,6 +391,7 @@ int pci_chsc_sei_nt2_have_event(void);
 void s390_pci_sclp_configure(SCCB *sccb);
 void s390_pci_sclp_deconfigure(SCCB *sccb);
 void s390_pci_iommu_enable(S390PCIIOMMU *iommu);
+void s390_pci_iommu_direct_map_enable(S390PCIIOMMU *iommu);
 void s390_pci_iommu_disable(S390PCIIOMMU *iommu);
 void s390_pci_generate_error_event(uint16_t pec, uint32_t fh, uint32_t fid,
                                    uint64_t faddr, uint32_t e);
diff --git a/include/hw/s390x/s390-pci-clp.h b/include/hw/s390x/s390-pci-clp.h
index 03b7f9ba5f749..6a635d693baf0 100644
--- a/include/hw/s390x/s390-pci-clp.h
+++ b/include/hw/s390x/s390-pci-clp.h
@@ -158,6 +158,7 @@ typedef struct ClpRspQueryPciGrp {
 #define CLP_RSP_QPCIG_MASK_NOI 0xfff
     uint16_t i;
     uint8_t version;
+#define CLP_RSP_QPCIG_MASK_RTR     0x20
 #define CLP_RSP_QPCIG_MASK_FRAME   0x2
 #define CLP_RSP_QPCIG_MASK_REFRESH 0x1
     uint8_t fr;
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
index c3d5e17e3852e..90ee192b4d40b 100644
--- a/include/hw/scsi/scsi.h
+++ b/include/hw/scsi/scsi.h
@@ -24,6 +24,7 @@ struct SCSIRequest {
     SCSIBus           *bus;
     SCSIDevice        *dev;
     const SCSIReqOps  *ops;
+    AioContext        *ctx;
     uint32_t          refcount;
     uint32_t          tag;
     uint32_t          lun;
@@ -48,6 +49,8 @@ struct SCSIRequest {
     bool              dma_started;
     BlockAIOCB        *aiocb;
     QEMUSGList        *sg;
+
+    /* Protected by SCSIDevice->requests_lock */
     QTAILQ_ENTRY(SCSIRequest) next;
 };
 
@@ -76,10 +79,7 @@ struct SCSIDevice
     uint8_t sense[SCSI_SENSE_BUF_SIZE];
     uint32_t sense_len;
 
-    /*
-     * The requests list is only accessed from the AioContext that executes
-     * requests or from the main loop when IOThread processing is stopped.
-     */
+    QemuMutex requests_lock; /* protects the requests list */
     QTAILQ_HEAD(, SCSIRequest) requests;
 
     uint32_t channel;
diff --git a/include/hw/sd/sdhci.h b/include/hw/sd/sdhci.h
index 38c08e2859807..51fb30ea528b8 100644
--- a/include/hw/sd/sdhci.h
+++ b/include/hw/sd/sdhci.h
@@ -109,7 +109,7 @@ struct SDHCIState {
 typedef struct SDHCIState SDHCIState;
 
 #define SDHCI_VENDOR_NONE       0
-#define SDHCI_VENDOR_IMX        1
+#define SDHCI_VENDOR_FSL        2
 
 /*
  * Controller does not provide transfer-complete interrupt when not
diff --git a/include/hw/ssi/npcm7xx_fiu.h b/include/hw/ssi/npcm7xx_fiu.h
index a3a1704289627..7ebd422ca6c0a 100644
--- a/include/hw/ssi/npcm7xx_fiu.h
+++ b/include/hw/ssi/npcm7xx_fiu.h
@@ -60,6 +60,7 @@ struct NPCM7xxFIUState {
     int32_t cs_count;
     int32_t active_cs;
     qemu_irq *cs_lines;
+    uint64_t flash_size;
     NPCM7xxFIUFlash *flash;
 
     SSIBus *spi;
diff --git a/include/hw/ssi/pnv_spi.h b/include/hw/ssi/pnv_spi.h
index 8815f67d45398..c591a0663da60 100644
--- a/include/hw/ssi/pnv_spi.h
+++ b/include/hw/ssi/pnv_spi.h
@@ -23,6 +23,7 @@
 
 #include "hw/ssi/ssi.h"
 #include "hw/sysbus.h"
+#include "qemu/fifo8.h"
 
 #define TYPE_PNV_SPI "pnv-spi"
 OBJECT_DECLARE_SIMPLE_TYPE(PnvSpi, PNV_SPI)
@@ -30,15 +31,19 @@ OBJECT_DECLARE_SIMPLE_TYPE(PnvSpi, PNV_SPI)
 #define PNV_SPI_REG_SIZE 8
 #define PNV_SPI_REGS 7
 
-#define TYPE_PNV_SPI_BUS "pnv-spi-bus"
+#define TYPE_PNV_SPI_BUS "spi"
 typedef struct PnvSpi {
     SysBusDevice parent_obj;
 
     SSIBus *ssi_bus;
     qemu_irq *cs_line;
     MemoryRegion    xscom_spic_regs;
+    Fifo8 tx_fifo;
+    Fifo8 rx_fifo;
+    uint8_t fail_count; /* RDR Match failure counter */
     /* SPI object number */
     uint32_t        spic_num;
+    uint32_t        chip_id;
     uint8_t         transfer_len;
     uint8_t         responder_select;
     /* To verify if shift_n1 happens prior to shift_n2 */
diff --git a/include/hw/sysbus.h b/include/hw/sysbus.h
index c9b1e0e90e3ac..81bbda10d372c 100644
--- a/include/hw/sysbus.h
+++ b/include/hw/sysbus.h
@@ -19,6 +19,8 @@ DECLARE_INSTANCE_CHECKER(BusState, SYSTEM_BUS,
 OBJECT_DECLARE_TYPE(SysBusDevice, SysBusDeviceClass,
                     SYS_BUS_DEVICE)
 
+#define TYPE_DYNAMIC_SYS_BUS_DEVICE "dynamic-sysbus-device"
+
 /**
  * SysBusDeviceClass:
  *
diff --git a/include/hw/timer/hpet.h b/include/hw/timer/hpet.h
index 71e8c62453d1b..c2656f7f0bef6 100644
--- a/include/hw/timer/hpet.h
+++ b/include/hw/timer/hpet.h
@@ -73,7 +73,7 @@ struct hpet_fw_config
     struct hpet_fw_entry hpet[8];
 } QEMU_PACKED;
 
-extern struct hpet_fw_config hpet_cfg;
+extern struct hpet_fw_config hpet_fw_cfg;
 
 #define TYPE_HPET "hpet"
 
diff --git a/include/hw/timer/imx_gpt.h b/include/hw/timer/imx_gpt.h
index 5a1230da35e5e..5488f7e4df574 100644
--- a/include/hw/timer/imx_gpt.h
+++ b/include/hw/timer/imx_gpt.h
@@ -80,6 +80,7 @@
 #define TYPE_IMX6_GPT "imx6.gpt"
 #define TYPE_IMX6UL_GPT "imx6ul.gpt"
 #define TYPE_IMX7_GPT "imx7.gpt"
+#define TYPE_IMX8MP_GPT "imx8mp.gpt"
 
 #define TYPE_IMX_GPT TYPE_IMX25_GPT
 
diff --git a/include/hw/timer/qct-qtimer.h b/include/hw/timer/qct-qtimer.h
new file mode 100644
index 0000000000000..90f7981ccf8df
--- /dev/null
+++ b/include/hw/timer/qct-qtimer.h
@@ -0,0 +1,85 @@
+/*
+ * Qualcomm QCT QTimer
+ *
+ * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef TIMER_QCT_QTIMER_H
+#define TIMER_QCT_QTIMER_H
+
+#include "hw/ptimer.h"
+#include "hw/sysbus.h"
+
+#define TYPE_QCT_QTIMER "qct-qtimer"
+#define TYPE_QCT_HEXTIMER "qct-hextimer"
+OBJECT_DECLARE_SIMPLE_TYPE(QCTQtimerState, QCT_QTIMER)
+OBJECT_DECLARE_SIMPLE_TYPE(QCTHextimerState, QCT_HEXTIMER)
+
+struct QCTHextimerState {
+    QCTQtimerState *qtimer;
+    ptimer_state *timer;
+    uint64_t cntval; /*
+                      * Physical timer compare value interrupt when cntpct >
+                      * cntval
+                      */
+    uint64_t cntpct; /* Physical counter */
+    uint32_t control;
+    uint32_t cnt_ctrl;
+    uint32_t cntpl0acr;
+    uint64_t limit;
+    uint32_t freq;
+    uint32_t int_level;
+    qemu_irq irq;
+};
+
+#define QCT_QTIMER_TIMER_FRAME_ELTS (8)
+#define QCT_QTIMER_TIMER_VIEW_ELTS (2)
+struct QCTQtimerState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    MemoryRegion view_iomem;
+    uint32_t secure;
+    struct QCTHextimerState timer[QCT_QTIMER_TIMER_FRAME_ELTS];
+    uint32_t frame_id;
+    uint32_t freq;
+    uint32_t nr_frames;
+    uint32_t nr_views;
+    uint32_t cnttid;
+};
+
+#define QCT_QTIMER_AC_CNTFRQ (0x000)
+#define QCT_QTIMER_AC_CNTSR (0x004)
+#define QCT_QTIMER_AC_CNTSR_NSN_1 (1 << 0)
+#define QCT_QTIMER_AC_CNTSR_NSN_2 (1 << 1)
+#define QCT_QTIMER_AC_CNTSR_NSN_3 (1 << 2)
+#define QCT_QTIMER_AC_CNTTID (0x08)
+#define QCT_QTIMER_AC_CNTACR_0 (0x40)
+#define QCT_QTIMER_AC_CNTACR_1 (0x44)
+#define QCT_QTIMER_AC_CNTACR_2 (0x48)
+#define QCT_QTIMER_AC_CNTACR_RWPT (1 << 5) /* R/W of CNTP_* regs */
+#define QCT_QTIMER_AC_CNTACR_RWVT (1 << 4) /* R/W of CNTV_* regs */
+#define QCT_QTIMER_AC_CNTACR_RVOFF (1 << 3) /* R/W of CNTVOFF register */
+#define QCT_QTIMER_AC_CNTACR_RFRQ (1 << 2) /* R/W of CNTFRQ register */
+#define QCT_QTIMER_AC_CNTACR_RPVCT (1 << 1) /* R/W of CNTVCT register */
+#define QCT_QTIMER_AC_CNTACR_RPCT (1 << 0) /* R/W of CNTPCT register */
+#define QCT_QTIMER_VERSION (0x0fd0)
+#define QCT_QTIMER_CNTPCT_LO (0x000)
+#define QCT_QTIMER_CNTPCT_HI (0x004)
+#define QCT_QTIMER_CNT_FREQ (0x010)
+#define QCT_QTIMER_CNTPL0ACR (0x014)
+#define QCT_QTIMER_CNTPL0ACR_PL0CTEN (1 << 9)
+#define QCT_QTIMER_CNTPL0ACR_PL0TVEN (1 << 8)
+#define QCT_QTIMER_CNTPL0ACR_PL0VCTEN (1 << 1)
+#define QCT_QTIMER_CNTPL0ACR_PL0PCTEN (1 << 0)
+#define QCT_QTIMER_CNTP_CVAL_LO (0x020)
+#define QCT_QTIMER_CNTP_CVAL_HI (0x024)
+#define QCT_QTIMER_CNTP_TVAL (0x028)
+#define QCT_QTIMER_CNTP_CTL (0x02c)
+#define QCT_QTIMER_CNTP_CTL_ISTAT (1 << 2)
+#define QCT_QTIMER_CNTP_CTL_INTEN (1 << 1)
+#define QCT_QTIMER_CNTP_CTL_ENABLE (1 << 0)
+#define QCT_QTIMER_AC_CNTACR_START 0x40
+#define QCT_QTIMER_AC_CNTACR_END 0x5C
+
+#endif /* TIMER_QCT_QTIMER_H */
diff --git a/include/hw/uefi/hardware-info.h b/include/hw/uefi/hardware-info.h
new file mode 100644
index 0000000000000..94c38cff20076
--- /dev/null
+++ b/include/hw/uefi/hardware-info.h
@@ -0,0 +1,35 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * pass hardware information to uefi
+ *
+ * see OvmfPkg/Library/HardwareInfoLib/ in edk2
+ */
+#ifndef QEMU_UEFI_HARDWARE_INFO_H
+#define QEMU_UEFI_HARDWARE_INFO_H
+
+/* data structures */
+
+typedef enum {
+    HardwareInfoTypeUndefined  = 0,
+    HardwareInfoTypeHostBridge = 1,
+    HardwareInfoQemuUefiVars   = 2,
+} HARDWARE_INFO_TYPE;
+
+typedef struct {
+    union {
+        uint64_t            uint64;
+        HARDWARE_INFO_TYPE  value;
+    } type;
+    uint64_t  size;
+} HARDWARE_INFO_HEADER;
+
+typedef struct {
+    uint64_t  mmio_address;
+} HARDWARE_INFO_SIMPLE_DEVICE;
+
+/* qemu functions */
+
+void hardware_info_register(HARDWARE_INFO_TYPE type, void *info, uint64_t size);
+
+#endif /* QEMU_UEFI_HARDWARE_INFO_H */
diff --git a/include/hw/uefi/var-service-api.h b/include/hw/uefi/var-service-api.h
new file mode 100644
index 0000000000000..0d71638f3efee
--- /dev/null
+++ b/include/hw/uefi/var-service-api.h
@@ -0,0 +1,48 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * uefi-vars device - API of the virtual device for guest/host communication.
+ */
+#ifndef QEMU_UEFI_VAR_SERVICE_API_H
+#define QEMU_UEFI_VAR_SERVICE_API_H
+
+/* qom: device names */
+#define TYPE_UEFI_VARS_X64       "uefi-vars-x64"
+#define TYPE_UEFI_VARS_SYSBUS    "uefi-vars-sysbus"
+
+/* sysbus: fdt node path */
+#define UEFI_VARS_FDT_NODE       "qemu-uefi-vars"
+#define UEFI_VARS_FDT_COMPAT     "qemu,uefi-vars"
+
+/* registers */
+#define UEFI_VARS_REG_MAGIC                  0x00  /* 16 bit */
+#define UEFI_VARS_REG_CMD_STS                0x02  /* 16 bit */
+#define UEFI_VARS_REG_BUFFER_SIZE            0x04  /* 32 bit */
+#define UEFI_VARS_REG_DMA_BUFFER_ADDR_LO     0x08  /* 32 bit */
+#define UEFI_VARS_REG_DMA_BUFFER_ADDR_HI     0x0c  /* 32 bit */
+#define UEFI_VARS_REG_PIO_BUFFER_TRANSFER    0x10  /* 8-64 bit */
+#define UEFI_VARS_REG_PIO_BUFFER_CRC32C      0x18  /* 32 bit (read-only) */
+#define UEFI_VARS_REG_FLAGS                  0x1c  /* 32 bit */
+#define UEFI_VARS_REGS_SIZE                  0x20
+
+/* flags register */
+#define UEFI_VARS_FLAG_USE_PIO           (1 << 0)
+
+/* magic value */
+#define UEFI_VARS_MAGIC_VALUE               0xef1
+
+/* command values */
+#define UEFI_VARS_CMD_RESET                  0x01
+#define UEFI_VARS_CMD_DMA_MM                 0x02
+#define UEFI_VARS_CMD_PIO_MM                 0x03
+#define UEFI_VARS_CMD_PIO_ZERO_OFFSET        0x04
+
+/* status values */
+#define UEFI_VARS_STS_SUCCESS                0x00
+#define UEFI_VARS_STS_BUSY                   0x01
+#define UEFI_VARS_STS_ERR_UNKNOWN            0x10
+#define UEFI_VARS_STS_ERR_NOT_SUPPORTED      0x11
+#define UEFI_VARS_STS_ERR_BAD_BUFFER_SIZE    0x12
+
+
+#endif /* QEMU_UEFI_VAR_SERVICE_API_H */
diff --git a/include/hw/uefi/var-service-edk2.h b/include/hw/uefi/var-service-edk2.h
new file mode 100644
index 0000000000000..c743a8df948d4
--- /dev/null
+++ b/include/hw/uefi/var-service-edk2.h
@@ -0,0 +1,227 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * uefi-vars device - structs and defines from edk2
+ *
+ * Note: The edk2 UINTN type has been mapped to uint64_t,
+ *       so the structs are compatible with 64bit edk2 builds.
+ */
+#ifndef QEMU_UEFI_VAR_SERVICE_EDK2_H
+#define QEMU_UEFI_VAR_SERVICE_EDK2_H
+
+#include "qemu/uuid.h"
+
+#define MAX_BIT                   0x8000000000000000ULL
+#define ENCODE_ERROR(StatusCode)  (MAX_BIT | (StatusCode))
+#define EFI_SUCCESS               0
+#define EFI_INVALID_PARAMETER     ENCODE_ERROR(2)
+#define EFI_UNSUPPORTED           ENCODE_ERROR(3)
+#define EFI_BAD_BUFFER_SIZE       ENCODE_ERROR(4)
+#define EFI_BUFFER_TOO_SMALL      ENCODE_ERROR(5)
+#define EFI_WRITE_PROTECTED       ENCODE_ERROR(8)
+#define EFI_OUT_OF_RESOURCES      ENCODE_ERROR(9)
+#define EFI_NOT_FOUND             ENCODE_ERROR(14)
+#define EFI_ACCESS_DENIED         ENCODE_ERROR(15)
+#define EFI_ALREADY_STARTED       ENCODE_ERROR(20)
+#define EFI_SECURITY_VIOLATION    ENCODE_ERROR(26)
+
+#define EFI_VARIABLE_NON_VOLATILE                           0x01
+#define EFI_VARIABLE_BOOTSERVICE_ACCESS                     0x02
+#define EFI_VARIABLE_RUNTIME_ACCESS                         0x04
+#define EFI_VARIABLE_HARDWARE_ERROR_RECORD                  0x08
+#define EFI_VARIABLE_AUTHENTICATED_WRITE_ACCESS             0x10  /* deprecated */
+#define EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS  0x20
+#define EFI_VARIABLE_APPEND_WRITE                           0x40
+
+/* SecureBootEnable */
+#define SECURE_BOOT_ENABLE         1
+#define SECURE_BOOT_DISABLE        0
+
+/* SecureBoot */
+#define SECURE_BOOT_MODE_ENABLE    1
+#define SECURE_BOOT_MODE_DISABLE   0
+
+/* CustomMode */
+#define CUSTOM_SECURE_BOOT_MODE    1
+#define STANDARD_SECURE_BOOT_MODE  0
+
+/* SetupMode */
+#define SETUP_MODE                 1
+#define USER_MODE                  0
+
+typedef uint64_t efi_status;
+typedef struct mm_header mm_header;
+
+/* EFI_MM_COMMUNICATE_HEADER */
+struct mm_header {
+    QemuUUID  guid;
+    uint64_t  length;
+};
+
+/* --- EfiSmmVariableProtocol ---------------------------------------- */
+
+#define SMM_VARIABLE_FUNCTION_GET_VARIABLE            1
+#define SMM_VARIABLE_FUNCTION_GET_NEXT_VARIABLE_NAME  2
+#define SMM_VARIABLE_FUNCTION_SET_VARIABLE            3
+#define SMM_VARIABLE_FUNCTION_QUERY_VARIABLE_INFO     4
+#define SMM_VARIABLE_FUNCTION_READY_TO_BOOT           5
+#define SMM_VARIABLE_FUNCTION_EXIT_BOOT_SERVICE       6
+#define SMM_VARIABLE_FUNCTION_LOCK_VARIABLE           8
+#define SMM_VARIABLE_FUNCTION_GET_PAYLOAD_SIZE       11
+
+typedef struct mm_variable mm_variable;
+typedef struct mm_variable_access mm_variable_access;
+typedef struct mm_next_variable mm_next_variable;
+typedef struct mm_next_variable mm_lock_variable;
+typedef struct mm_variable_info mm_variable_info;
+typedef struct mm_get_payload_size mm_get_payload_size;
+
+/* SMM_VARIABLE_COMMUNICATE_HEADER */
+struct mm_variable {
+    uint64_t  function;
+    uint64_t  status;
+};
+
+/* SMM_VARIABLE_COMMUNICATE_ACCESS_VARIABLE */
+struct QEMU_PACKED mm_variable_access {
+    QemuUUID  guid;
+    uint64_t  data_size;
+    uint64_t  name_size;
+    uint32_t  attributes;
+    /* Name */
+    /* Data */
+};
+
+/* SMM_VARIABLE_COMMUNICATE_GET_NEXT_VARIABLE_NAME */
+struct mm_next_variable {
+    QemuUUID  guid;
+    uint64_t  name_size;
+    /* Name */
+};
+
+/* SMM_VARIABLE_COMMUNICATE_QUERY_VARIABLE_INFO */
+struct QEMU_PACKED mm_variable_info {
+    uint64_t max_storage_size;
+    uint64_t free_storage_size;
+    uint64_t max_variable_size;
+    uint32_t attributes;
+};
+
+/* SMM_VARIABLE_COMMUNICATE_GET_PAYLOAD_SIZE */
+struct mm_get_payload_size {
+    uint64_t  payload_size;
+};
+
+/* --- VarCheckPolicyLibMmiHandler ----------------------------------- */
+
+#define VAR_CHECK_POLICY_COMMAND_DISABLE     0x01
+#define VAR_CHECK_POLICY_COMMAND_IS_ENABLED  0x02
+#define VAR_CHECK_POLICY_COMMAND_REGISTER    0x03
+#define VAR_CHECK_POLICY_COMMAND_DUMP        0x04
+#define VAR_CHECK_POLICY_COMMAND_LOCK        0x05
+
+typedef struct mm_check_policy mm_check_policy;
+typedef struct mm_check_policy_is_enabled mm_check_policy_is_enabled;
+typedef struct mm_check_policy_dump_params mm_check_policy_dump_params;
+
+/* VAR_CHECK_POLICY_COMM_HEADER */
+struct QEMU_PACKED mm_check_policy {
+    uint32_t  signature;
+    uint32_t  revision;
+    uint32_t  command;
+    uint64_t  result;
+};
+
+/* VAR_CHECK_POLICY_COMM_IS_ENABLED_PARAMS */
+struct QEMU_PACKED mm_check_policy_is_enabled {
+    uint8_t   state;
+};
+
+/* VAR_CHECK_POLICY_COMM_DUMP_PARAMS */
+struct QEMU_PACKED mm_check_policy_dump_params {
+    uint32_t  page_requested;
+    uint32_t  total_size;
+    uint32_t  page_size;
+    uint8_t   has_more;
+};
+
+/* --- Edk2VariablePolicyProtocol ------------------------------------ */
+
+#define VARIABLE_POLICY_ENTRY_REVISION  0x00010000
+
+#define VARIABLE_POLICY_TYPE_NO_LOCK            0
+#define VARIABLE_POLICY_TYPE_LOCK_NOW           1
+#define VARIABLE_POLICY_TYPE_LOCK_ON_CREATE     2
+#define VARIABLE_POLICY_TYPE_LOCK_ON_VAR_STATE  3
+
+typedef struct variable_policy_entry variable_policy_entry;
+typedef struct variable_lock_on_var_state variable_lock_on_var_state;
+
+/* VARIABLE_POLICY_ENTRY */
+struct variable_policy_entry {
+    uint32_t      version;
+    uint16_t      size;
+    uint16_t      offset_to_name;
+    QemuUUID      namespace;
+    uint32_t      min_size;
+    uint32_t      max_size;
+    uint32_t      attributes_must_have;
+    uint32_t      attributes_cant_have;
+    uint8_t       lock_policy_type;
+    uint8_t       padding[3];
+    /* LockPolicy */
+    /* Name */
+};
+
+/* VARIABLE_LOCK_ON_VAR_STATE_POLICY */
+struct variable_lock_on_var_state {
+    QemuUUID      namespace;
+    uint8_t       value;
+    uint8_t       padding;
+    /* Name */
+};
+
+/* --- variable authentication --------------------------------------- */
+
+#define WIN_CERT_TYPE_EFI_GUID  0x0EF1
+
+typedef struct efi_time efi_time;
+typedef struct efi_siglist efi_siglist;
+typedef struct variable_auth_2 variable_auth_2;
+
+/* EFI_TIME */
+struct efi_time {
+    uint16_t  year;
+    uint8_t   month;
+    uint8_t   day;
+    uint8_t   hour;
+    uint8_t   minute;
+    uint8_t   second;
+    uint8_t   pad1;
+    uint32_t  nanosecond;
+    int16_t   timezone;
+    uint8_t   daylight;
+    uint8_t   pad2;
+};
+
+/* EFI_SIGNATURE_LIST */
+struct efi_siglist {
+    QemuUUID  guid_type;
+    uint32_t  siglist_size;
+    uint32_t  header_size;
+    uint32_t  sig_size;
+};
+
+/* EFI_VARIABLE_AUTHENTICATION_2 */
+struct variable_auth_2 {
+    struct efi_time timestamp;
+
+    /* WIN_CERTIFICATE_UEFI_GUID */
+    uint32_t  hdr_length;
+    uint16_t  hdr_revision;
+    uint16_t  hdr_cert_type;
+    QemuUUID  guid_cert_type;
+    uint8_t   cert_data[];
+};
+
+#endif /* QEMU_UEFI_VAR_SERVICE_EDK2_H */
diff --git a/include/hw/uefi/var-service.h b/include/hw/uefi/var-service.h
new file mode 100644
index 0000000000000..f7ceac4ce243d
--- /dev/null
+++ b/include/hw/uefi/var-service.h
@@ -0,0 +1,191 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * uefi-vars device - state struct and function prototypes
+ */
+#ifndef QEMU_UEFI_VAR_SERVICE_H
+#define QEMU_UEFI_VAR_SERVICE_H
+
+#include "qemu/uuid.h"
+#include "qemu/queue.h"
+
+#include "hw/uefi/var-service-edk2.h"
+
+#define MAX_BUFFER_SIZE (64 * 1024)
+
+typedef struct uefi_variable uefi_variable;
+typedef struct uefi_var_policy uefi_var_policy;
+typedef struct uefi_vars_state uefi_vars_state;
+
+typedef struct uefi_vars_cert uefi_vars_cert;
+typedef struct uefi_vars_hash uefi_vars_hash;
+typedef struct uefi_vars_siglist uefi_vars_siglist;
+
+struct uefi_variable {
+    QemuUUID                          guid;
+    uint16_t                          *name;
+    uint32_t                          name_size;
+    uint32_t                          attributes;
+    void                              *data;
+    uint32_t                          data_size;
+    efi_time                          time;
+    void                              *digest;
+    uint32_t                          digest_size;
+    QTAILQ_ENTRY(uefi_variable)       next;
+};
+
+struct uefi_var_policy {
+    variable_policy_entry             *entry;
+    uint32_t                          entry_size;
+    uint16_t                          *name;
+    uint32_t                          name_size;
+
+    /* number of hashmarks (wildcard character) in name */
+    uint32_t                          hashmarks;
+
+    QTAILQ_ENTRY(uefi_var_policy)     next;
+};
+
+struct uefi_vars_state {
+    MemoryRegion                      mr;
+    uint16_t                          sts;
+    uint32_t                          buf_size;
+    uint32_t                          buf_addr_lo;
+    uint32_t                          buf_addr_hi;
+    uint8_t                           *buffer;
+    QTAILQ_HEAD(, uefi_variable)      variables;
+    QTAILQ_HEAD(, uefi_var_policy)    var_policies;
+
+    /* pio transfer buffer */
+    uint32_t                          pio_xfer_offset;
+    uint8_t                           *pio_xfer_buffer;
+
+    /* boot phases */
+    bool                              end_of_dxe;
+    bool                              ready_to_boot;
+    bool                              exit_boot_service;
+    bool                              policy_locked;
+
+    /* storage accounting */
+    uint64_t                          max_storage;
+    uint64_t                          used_storage;
+
+    /* config options */
+    char                              *jsonfile;
+    int                               jsonfd;
+    bool                              force_secure_boot;
+    bool                              disable_custom_mode;
+    bool                              use_pio;
+};
+
+struct uefi_vars_cert {
+    QTAILQ_ENTRY(uefi_vars_cert)  next;
+    QemuUUID                      owner;
+    uint64_t                      size;
+    uint8_t                       data[];
+};
+
+struct uefi_vars_hash {
+    QTAILQ_ENTRY(uefi_vars_hash)  next;
+    QemuUUID                      owner;
+    uint8_t                       data[];
+};
+
+struct uefi_vars_siglist {
+    QTAILQ_HEAD(, uefi_vars_cert)  x509;
+    QTAILQ_HEAD(, uefi_vars_hash)  sha256;
+};
+
+/* vars-service-guid.c */
+extern const QemuUUID EfiGlobalVariable;
+extern const QemuUUID EfiImageSecurityDatabase;
+extern const QemuUUID EfiCustomModeEnable;
+extern const QemuUUID EfiSecureBootEnableDisable;
+
+extern const QemuUUID EfiCertSha256Guid;
+extern const QemuUUID EfiCertSha384Guid;
+extern const QemuUUID EfiCertSha512Guid;
+extern const QemuUUID EfiCertRsa2048Guid;
+extern const QemuUUID EfiCertX509Guid;
+extern const QemuUUID EfiCertTypePkcs7Guid;
+
+extern const QemuUUID EfiSmmVariableProtocolGuid;
+extern const QemuUUID VarCheckPolicyLibMmiHandlerGuid;
+
+extern const QemuUUID EfiEndOfDxeEventGroupGuid;
+extern const QemuUUID EfiEventReadyToBootGuid;
+extern const QemuUUID EfiEventExitBootServicesGuid;
+
+/* vars-service-utils.c */
+gboolean uefi_str_is_valid(const uint16_t *str, size_t len,
+                           gboolean must_be_null_terminated);
+size_t uefi_strlen(const uint16_t *str, size_t len);
+gboolean uefi_str_equal_ex(const uint16_t *a, size_t alen,
+                           const uint16_t *b, size_t blen,
+                           gboolean wildcards_in_a);
+gboolean uefi_str_equal(const uint16_t *a, size_t alen,
+                        const uint16_t *b, size_t blen);
+char *uefi_ucs2_to_ascii(const uint16_t *ucs2, uint64_t ucs2_size);
+int uefi_time_compare(efi_time *a, efi_time *b);
+void uefi_trace_variable(const char *action, QemuUUID guid,
+                         const uint16_t *name, uint64_t name_size);
+void uefi_trace_status(const char *action, efi_status status);
+
+/* vars-service-core.c */
+extern const VMStateDescription vmstate_uefi_vars;
+void uefi_vars_init(Object *obj, uefi_vars_state *uv);
+void uefi_vars_realize(uefi_vars_state *uv, Error **errp);
+void uefi_vars_hard_reset(uefi_vars_state *uv);
+
+/* vars-service-json.c */
+void uefi_vars_json_init(uefi_vars_state *uv, Error **errp);
+void uefi_vars_json_save(uefi_vars_state *uv);
+void uefi_vars_json_load(uefi_vars_state *uv, Error **errp);
+
+/* vars-service-vars.c */
+extern const VMStateDescription vmstate_uefi_variable;
+uefi_variable *uefi_vars_find_variable(uefi_vars_state *uv, QemuUUID guid,
+                                       const uint16_t *name,
+                                       uint64_t name_size);
+void uefi_vars_set_variable(uefi_vars_state *uv, QemuUUID guid,
+                            const uint16_t *name, uint64_t name_size,
+                            uint32_t attributes,
+                            void *data, uint64_t data_size);
+void uefi_vars_clear_volatile(uefi_vars_state *uv);
+void uefi_vars_clear_all(uefi_vars_state *uv);
+void uefi_vars_update_storage(uefi_vars_state *uv);
+uint32_t uefi_vars_mm_vars_proto(uefi_vars_state *uv);
+
+/* vars-service-auth.c */
+bool uefi_vars_is_sb_pk(uefi_variable *var);
+bool uefi_vars_is_sb_any(uefi_variable *var);
+efi_status uefi_vars_check_auth_2(uefi_vars_state *uv, uefi_variable *var,
+                                  mm_variable_access *va, void *data);
+efi_status uefi_vars_check_secure_boot(uefi_vars_state *uv, uefi_variable *var);
+void uefi_vars_auth_init(uefi_vars_state *uv);
+
+/* vars-service-pkcs7.c */
+efi_status uefi_vars_check_pkcs7_2(uefi_variable *siglist,
+                                   void **digest, uint32_t *digest_size,
+                                   mm_variable_access *va, void *data);
+
+/* vars-service-siglist.c */
+void uefi_vars_siglist_init(uefi_vars_siglist *siglist);
+void uefi_vars_siglist_free(uefi_vars_siglist *siglist);
+void uefi_vars_siglist_parse(uefi_vars_siglist *siglist,
+                             void *data, uint64_t size);
+uint64_t uefi_vars_siglist_blob_size(uefi_vars_siglist *siglist);
+void uefi_vars_siglist_blob_generate(uefi_vars_siglist *siglist,
+                                     void *data, uint64_t size);
+
+/* vars-service-policy.c */
+extern const VMStateDescription vmstate_uefi_var_policy;
+efi_status uefi_vars_policy_check(uefi_vars_state *uv,
+                                  uefi_variable *var,
+                                  gboolean is_newvar);
+void uefi_vars_policies_clear(uefi_vars_state *uv);
+uefi_var_policy *uefi_vars_add_policy(uefi_vars_state *uv,
+                                      variable_policy_entry *pe);
+uint32_t uefi_vars_mm_check_policy_proto(uefi_vars_state *uv);
+
+#endif /* QEMU_UEFI_VAR_SERVICE_H */
diff --git a/include/hw/usb/hcd-dwc3.h b/include/hw/usb/hcd-dwc3.h
index f752a27e94044..dbdf12b21d743 100644
--- a/include/hw/usb/hcd-dwc3.h
+++ b/include/hw/usb/hcd-dwc3.h
@@ -35,7 +35,7 @@
 #define USB_DWC3(obj) \
      OBJECT_CHECK(USBDWC3, (obj), TYPE_USB_DWC3)
 
-#define USB_DWC3_R_MAX ((0x530 / 4) + 1)
+#define USB_DWC3_R_MAX (0x600 / 4)
 #define DWC3_SIZE 0x10000
 
 typedef struct USBDWC3 {
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index ac35136a11051..04b123a6c929a 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -36,6 +36,23 @@
 
 #define VFIO_MSG_PREFIX "vfio %s: "
 
+/*
+ * Flags to be used as unique delimiters for VFIO devices in the migration
+ * stream. These flags are composed as:
+ * 0xffffffff => MSB 32-bit all 1s
+ * 0xef10     => Magic ID, represents emulated (virtual) function IO
+ * 0x0000     => 16-bits reserved for flags
+ *
+ * The beginning of state information is marked by _DEV_CONFIG_STATE,
+ * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
+ * certain state information is marked by _END_OF_STATE.
+ */
+#define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
+#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
+#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
+#define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
+#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL)
+
 enum {
     VFIO_DEVICE_TYPE_PCI = 0,
     VFIO_DEVICE_TYPE_PLATFORM = 1,
@@ -61,6 +78,8 @@ typedef struct VFIORegion {
     uint8_t nr; /* cache the region number for debug */
 } VFIORegion;
 
+typedef struct VFIOMultifd VFIOMultifd;
+
 typedef struct VFIOMigration {
     struct VFIODevice *vbasedev;
     VMChangeStateEntry *vm_state;
@@ -72,6 +91,8 @@ typedef struct VFIOMigration {
     uint64_t mig_flags;
     uint64_t precopy_init_size;
     uint64_t precopy_dirty_size;
+    bool multifd_transfer;
+    VFIOMultifd *multifd;
     bool initial_data_sent;
 
     bool event_save_iterate_started;
@@ -133,6 +154,7 @@ typedef struct VFIODevice {
     bool no_mmap;
     bool ram_block_discard_allowed;
     OnOffAuto enable_migration;
+    OnOffAuto migration_multifd_transfer;
     bool migration_events;
     VFIODeviceOps *ops;
     unsigned int num_irqs;
@@ -274,9 +296,13 @@ void vfio_unblock_multiple_devices_migration(void);
 bool vfio_viommu_preset(VFIODevice *vbasedev);
 int64_t vfio_mig_bytes_transferred(void);
 void vfio_reset_bytes_transferred(void);
+void vfio_mig_add_bytes_transferred(unsigned long val);
 bool vfio_device_state_is_running(VFIODevice *vbasedev);
 bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
 
+int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp);
+int vfio_load_device_config_state(QEMUFile *f, void *opaque);
+
 #ifdef CONFIG_LINUX
 int vfio_get_region_info(VFIODevice *vbasedev, int index,
                          struct vfio_region_info **info);
@@ -291,6 +317,11 @@ struct vfio_info_cap_header *
 vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
 struct vfio_info_cap_header *
 vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id);
+
+int vfio_migration_set_state(VFIODevice *vbasedev,
+                             enum vfio_device_mig_state new_state,
+                             enum vfio_device_mig_state recover_state,
+                             Error **errp);
 #endif
 
 bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
diff --git a/include/hw/virtio/iothread-vq-mapping.h b/include/hw/virtio/iothread-vq-mapping.h
new file mode 100644
index 0000000000000..57335c3703cc5
--- /dev/null
+++ b/include/hw/virtio/iothread-vq-mapping.h
@@ -0,0 +1,45 @@
+/*
+ * IOThread Virtqueue Mapping
+ *
+ * Copyright Red Hat, Inc
+ *
+ * SPDX-License-Identifier: GPL-2.0-only
+ */
+
+#ifndef HW_VIRTIO_IOTHREAD_VQ_MAPPING_H
+#define HW_VIRTIO_IOTHREAD_VQ_MAPPING_H
+
+#include "qapi/error.h"
+#include "qapi/qapi-types-virtio.h"
+
+/**
+ * iothread_vq_mapping_apply:
+ * @list: The mapping of virtqueues to IOThreads.
+ * @vq_aio_context: The array of AioContext pointers to fill in.
+ * @num_queues: The length of @vq_aio_context.
+ * @errp: If an error occurs, a pointer to the area to store the error.
+ *
+ * Fill in the AioContext for each virtqueue in the @vq_aio_context array given
+ * the iothread-vq-mapping parameter in @list.
+ *
+ * iothread_vq_mapping_cleanup() must be called to free IOThread object
+ * references after this function returns success.
+ *
+ * Returns: %true on success, %false on failure.
+ **/
+bool iothread_vq_mapping_apply(
+        IOThreadVirtQueueMappingList *list,
+        AioContext **vq_aio_context,
+        uint16_t num_queues,
+        Error **errp);
+
+/**
+ * iothread_vq_mapping_cleanup:
+ * @list: The mapping of virtqueues to IOThreads.
+ *
+ * Release IOThread object references that were acquired by
+ * iothread_vq_mapping_apply().
+ */
+void iothread_vq_mapping_cleanup(IOThreadVirtQueueMappingList *list);
+
+#endif /* HW_VIRTIO_IOTHREAD_VQ_MAPPING_H */
diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
index b12c18a43b695..0456c211c6e60 100644
--- a/include/hw/virtio/virtio-balloon.h
+++ b/include/hw/virtio/virtio-balloon.h
@@ -16,6 +16,7 @@
 #define QEMU_VIRTIO_BALLOON_H
 
 #include "standard-headers/linux/virtio_balloon.h"
+#include "hw/resettable.h"
 #include "hw/virtio/virtio.h"
 #include "system/iothread.h"
 #include "qom/object.h"
@@ -71,6 +72,9 @@ struct VirtIOBalloon {
 
     bool qemu_4_0_config_size;
     uint32_t poison_val;
+
+    /* State of the resettable container */
+    ResettableState reset_state;
 };
 
 #endif
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index 8a16218c4098c..3d8dee7ec1584 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -24,7 +24,7 @@
 #include "qapi/qapi-types-virtio.h"
 
 #define TYPE_VIRTIO_BLK "virtio-blk-device"
-OBJECT_DECLARE_SIMPLE_TYPE(VirtIOBlock, VIRTIO_BLK)
+OBJECT_DECLARE_TYPE(VirtIOBlock, VirtIOBlkClass, VIRTIO_BLK)
 
 /* This is the last element of the write scatter-gather list */
 struct virtio_blk_inhdr
@@ -100,6 +100,15 @@ typedef struct MultiReqBuffer {
     bool is_write;
 } MultiReqBuffer;
 
+typedef struct VirtIOBlkClass {
+    /*< private >*/
+    VirtioDeviceClass parent;
+    /*< public >*/
+    bool (*handle_unknown_request)(VirtIOBlockReq *req, MultiReqBuffer *mrb,
+                                   uint32_t type);
+} VirtIOBlkClass;
+
 void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq);
+void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status);
 
 #endif
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
index be230cd4bfcd7..31e852ed6c44e 100644
--- a/include/hw/virtio/virtio-scsi.h
+++ b/include/hw/virtio/virtio-scsi.h
@@ -22,6 +22,7 @@
 #include "hw/virtio/virtio.h"
 #include "hw/scsi/scsi.h"
 #include "chardev/char-fe.h"
+#include "qapi/qapi-types-virtio.h"
 #include "system/iothread.h"
 
 #define TYPE_VIRTIO_SCSI_COMMON "virtio-scsi-common"
@@ -60,6 +61,7 @@ struct VirtIOSCSIConf {
     CharBackend chardev;
     uint32_t boot_tpgt;
     IOThread *iothread;
+    IOThreadVirtQueueMappingList *iothread_vq_mapping_list;
 };
 
 struct VirtIOSCSI;
@@ -82,18 +84,14 @@ struct VirtIOSCSI {
 
     SCSIBus bus;
     int resetting; /* written from main loop thread, read from any thread */
+
+    QemuMutex event_lock; /* protects event_vq and events_dropped */
     bool events_dropped;
 
-    /*
-     * TMFs deferred to main loop BH. These fields are protected by
-     * tmf_bh_lock.
-     */
-    QemuMutex tmf_bh_lock;
-    QEMUBH *tmf_bh;
-    QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list;
+    QemuMutex ctrl_lock; /* protects ctrl_vq */
 
     /* Fields for dataplane below */
-    AioContext *ctx; /* one iothread per virtio-scsi-pci for now */
+    AioContext **vq_aio_context; /* per-virtqueue AioContext pointer */
 
     bool dataplane_started;
     bool dataplane_starting;
@@ -111,6 +109,7 @@ void virtio_scsi_common_realize(DeviceState *dev,
 void virtio_scsi_common_unrealize(DeviceState *dev);
 
 void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp);
+void virtio_scsi_dataplane_cleanup(VirtIOSCSI *s);
 int virtio_scsi_dataplane_start(VirtIODevice *s);
 void virtio_scsi_dataplane_stop(VirtIODevice *s);
 
diff --git a/include/hw/vmapple/vmapple.h b/include/hw/vmapple/vmapple.h
new file mode 100644
index 0000000000000..9c1ad1bd8c3c0
--- /dev/null
+++ b/include/hw/vmapple/vmapple.h
@@ -0,0 +1,23 @@
+/*
+ * Devices specific to the VMApple machine type
+ *
+ * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VMAPPLE_VMAPPLE_H
+#define HW_VMAPPLE_VMAPPLE_H
+
+#define TYPE_APPLE_AES "apple-aes"
+
+#define TYPE_VMAPPLE_BDIF "vmapple-bdif"
+
+#define TYPE_VMAPPLE_CFG "vmapple-cfg"
+
+#define TYPE_VMAPPLE_VIRTIO_BLK_PCI "vmapple-virtio-blk-pci"
+
+#endif /* HW_VMAPPLE_VMAPPLE_H */
diff --git a/include/hw/xen/arch_hvm.h b/include/hw/xen/arch_hvm.h
index c7c515220d252..df39c819c8f18 100644
--- a/include/hw/xen/arch_hvm.h
+++ b/include/hw/xen/arch_hvm.h
@@ -1,5 +1,5 @@
 #if defined(TARGET_I386) || defined(TARGET_X86_64)
 #include "hw/i386/xen_arch_hvm.h"
-#elif defined(TARGET_ARM) || defined(TARGET_ARM_64)
+#elif defined(TARGET_ARM) || defined(TARGET_AARCH64)
 #include "hw/arm/xen_arch_hvm.h"
 #endif
diff --git a/include/hw/xen/xen-bus.h b/include/hw/xen/xen-bus.h
index 2adb2af83919b..bdbf1ed6fd0cf 100644
--- a/include/hw/xen/xen-bus.h
+++ b/include/hw/xen/xen-bus.h
@@ -8,9 +8,10 @@
 #ifndef HW_XEN_BUS_H
 #define HW_XEN_BUS_H
 
+#include "hw/qdev-core.h"
 #include "hw/xen/xen_backend_ops.h"
-#include "hw/sysbus.h"
 #include "qemu/notify.h"
+#include "qemu/queue.h"
 #include "qom/object.h"
 
 typedef struct XenEventChannel XenEventChannel;
diff --git a/include/hw/xen/xen-hvm-common.h b/include/hw/xen/xen-hvm-common.h
index c1ea2c0d787b6..19df5600a3975 100644
--- a/include/hw/xen/xen-hvm-common.h
+++ b/include/hw/xen/xen-hvm-common.h
@@ -1,18 +1,10 @@
 #ifndef HW_XEN_HVM_COMMON_H
 #define HW_XEN_HVM_COMMON_H
 
-#include "qemu/units.h"
-
-#include "cpu.h"
-#include "hw/pci/pci.h"
-#include "hw/hw.h"
+#include "qemu/queue.h"
+#include "exec/hwaddr.h"
 #include "hw/xen/xen_native.h"
-#include "hw/xen/xen-legacy-backend.h"
-#include "system/runstate.h"
-#include "system/system.h"
-#include "system/xen.h"
-#include "system/xen-mapcache.h"
-#include "qemu/error-report.h"
+#include "hw/xen/xen_backend_ops.h"
 #include <xen/hvm/ioreq.h>
 
 extern MemoryRegion xen_memory;
diff --git a/include/hw/xen/xen-legacy-backend.h b/include/hw/xen/xen-legacy-backend.h
index e198b120c5d2f..2d0cbfecade8d 100644
--- a/include/hw/xen/xen-legacy-backend.h
+++ b/include/hw/xen/xen-legacy-backend.h
@@ -3,7 +3,6 @@
 
 #include "hw/xen/xen_backend_ops.h"
 #include "hw/xen/xen_pvdev.h"
-#include "net/net.h"
 #include "qom/object.h"
 
 #define TYPE_XENSYSDEV "xen-sysdev"
diff --git a/include/hw/xen/xen-pvh-common.h b/include/hw/xen/xen-pvh-common.h
index 5cdd23c2f4d32..17c5a58a5a4e2 100644
--- a/include/hw/xen/xen-pvh-common.h
+++ b/include/hw/xen/xen-pvh-common.h
@@ -9,11 +9,11 @@
 #ifndef XEN_PVH_COMMON_H__
 #define XEN_PVH_COMMON_H__
 
-#include <assert.h>
-#include "hw/sysbus.h"
-#include "hw/hw.h"
-#include "hw/xen/xen-hvm-common.h"
+#include "exec/memory.h"
+#include "qom/object.h"
+#include "hw/boards.h"
 #include "hw/pci-host/gpex.h"
+#include "hw/xen/xen-hvm-common.h"
 
 #define TYPE_XEN_PVH_MACHINE MACHINE_TYPE_NAME("xen-pvh-base")
 OBJECT_DECLARE_TYPE(XenPVHMachineState, XenPVHMachineClass,
diff --git a/include/hw/xen/xen_pvdev.h b/include/hw/xen/xen_pvdev.h
index 0c98444047607..629bec90d0929 100644
--- a/include/hw/xen/xen_pvdev.h
+++ b/include/hw/xen/xen_pvdev.h
@@ -1,7 +1,7 @@
 #ifndef QEMU_HW_XEN_PVDEV_H
 #define QEMU_HW_XEN_PVDEV_H
 
-#include "hw/qdev-core.h"
+#include "hw/sysbus.h"
 #include "hw/xen/xen_backend_ops.h"
 
 /* ------------------------------------------------------------- */
@@ -32,7 +32,8 @@ struct XenDevOps {
 };
 
 struct XenLegacyDevice {
-    DeviceState        qdev;
+    SysBusDevice parent_obj;
+
     const char         *type;
     int                dom;
     int                dev;
diff --git a/include/io/channel-tls.h b/include/io/channel-tls.h
index 26c67f17e2d36..7e9023570d89c 100644
--- a/include/io/channel-tls.h
+++ b/include/io/channel-tls.h
@@ -49,8 +49,20 @@ struct QIOChannelTLS {
     QCryptoTLSSession *session;
     QIOChannelShutdown shutdown;
     guint hs_ioc_tag;
+    guint bye_ioc_tag;
 };
 
+/**
+ * qio_channel_tls_bye:
+ * @ioc: the TLS channel object
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Perform the TLS session termination. This method will return
+ * immediately and the termination will continue in the background,
+ * provided the main loop is running.
+ */
+void qio_channel_tls_bye(QIOChannelTLS *ioc, Error **errp);
+
 /**
  * qio_channel_tls_new_server:
  * @master: the underlying channel object
diff --git a/include/io/channel.h b/include/io/channel.h
index bdf0bca92ae2e..62b657109c7d9 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -35,6 +35,7 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
 #define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
 
 #define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1
+#define QIO_CHANNEL_READ_FLAG_RELAXED_EOF 0x2
 
 typedef enum QIOChannelFeature QIOChannelFeature;
 
@@ -885,6 +886,7 @@ void qio_channel_set_aio_fd_handler(QIOChannel *ioc,
  * @niov: the length of the @iov array
  * @fds: an array of file handles to read
  * @nfds: number of file handles in @fds
+ * @flags: read flags (QIO_CHANNEL_READ_FLAG_*)
  * @errp: pointer to a NULL-initialized error object
  *
  *
@@ -903,6 +905,7 @@ int coroutine_mixed_fn qio_channel_readv_full_all_eof(QIOChannel *ioc,
                                                       const struct iovec *iov,
                                                       size_t niov,
                                                       int **fds, size_t *nfds,
+                                                      int flags,
                                                       Error **errp);
 
 /**
diff --git a/include/migration/client-options.h b/include/migration/client-options.h
index 59f4b55cf4f74..289c9d776221c 100644
--- a/include/migration/client-options.h
+++ b/include/migration/client-options.h
@@ -10,6 +10,10 @@
 #ifndef QEMU_MIGRATION_CLIENT_OPTIONS_H
 #define QEMU_MIGRATION_CLIENT_OPTIONS_H
 
+
+/* properties */
+bool migrate_send_switchover_start(void);
+
 /* capabilities */
 
 bool migrate_background_snapshot(void);
diff --git a/include/migration/cpr.h b/include/migration/cpr.h
index 3a6deb793330c..7561fc75adc5e 100644
--- a/include/migration/cpr.h
+++ b/include/migration/cpr.h
@@ -21,6 +21,7 @@ int cpr_find_fd(const char *name, int id);
 
 MigMode cpr_get_incoming_mode(void);
 void cpr_set_incoming_mode(MigMode mode);
+bool cpr_is_incoming(void);
 
 int cpr_state_save(MigrationChannel *channel, Error **errp);
 int cpr_state_load(MigrationChannel *channel, Error **errp);
diff --git a/include/migration/misc.h b/include/migration/misc.h
index c660be80954ab..8fd36eba1da72 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -45,9 +45,12 @@ bool migrate_ram_is_ignored(RAMBlock *block);
 /* migration/block.c */
 
 AnnounceParameters *migrate_announce_params(void);
+
 /* migration/savevm.c */
 
 void dump_vmstate_json_to_file(FILE *out_fp);
+void qemu_loadvm_start_load_thread(MigrationLoadThread function,
+                                   void *opaque);
 
 /* migration/migration.c */
 void migration_object_init(void);
@@ -115,4 +118,26 @@ bool migrate_is_uri(const char *uri);
 bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
                        Error **errp);
 
+/* migration/multifd-device-state.c */
+typedef struct SaveLiveCompletePrecopyThreadData {
+    SaveLiveCompletePrecopyThreadHandler hdlr;
+    char *idstr;
+    uint32_t instance_id;
+    void *handler_opaque;
+} SaveLiveCompletePrecopyThreadData;
+
+bool multifd_queue_device_state(char *idstr, uint32_t instance_id,
+                                char *data, size_t len);
+bool multifd_device_state_supported(void);
+
+void
+multifd_spawn_device_state_save_thread(SaveLiveCompletePrecopyThreadHandler hdlr,
+                                       char *idstr, uint32_t instance_id,
+                                       void *opaque);
+
+bool multifd_device_state_save_thread_should_exit(void);
+
+void multifd_abort_device_state_save_threads(void);
+bool multifd_join_device_state_save_threads(void);
+
 #endif
diff --git a/include/migration/register.h b/include/migration/register.h
index f60e797894e5f..c041ce32f2fcf 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -69,7 +69,9 @@ typedef struct SaveVMHandlers {
     /**
      * @save_cleanup
      *
-     * Uninitializes the data structures on the source
+     * Uninitializes the data structures on the source.
+     * Note that this handler can be called even if save_setup
+     * wasn't called earlier.
      *
      * @opaque: data pointer passed to register_savevm_live()
      */
@@ -103,6 +105,25 @@ typedef struct SaveVMHandlers {
      */
     int (*save_live_complete_precopy)(QEMUFile *f, void *opaque);
 
+    /**
+     * @save_live_complete_precopy_thread (invoked in a separate thread)
+     *
+     * Called at the end of a precopy phase from a separate worker thread
+     * in configurations where multifd device state transfer is supported
+     * in order to perform asynchronous transmission of the remaining data in
+     * parallel with @save_live_complete_precopy handlers.
+     * When postcopy is enabled, devices that support postcopy will skip this
+     * step.
+     *
+     * @d: a #SaveLiveCompletePrecopyThreadData containing parameters that the
+     * handler may need, including this device section idstr and instance_id,
+     * and opaque data pointer passed to register_savevm_live().
+     * @errp: pointer to Error*, to store an error if it happens.
+     *
+     * Returns true to indicate success and false for errors.
+     */
+    SaveLiveCompletePrecopyThreadHandler save_live_complete_precopy_thread;
+
     /* This runs both outside and inside the BQL.  */
 
     /**
@@ -227,6 +248,21 @@ typedef struct SaveVMHandlers {
      */
     int (*load_state)(QEMUFile *f, void *opaque, int version_id);
 
+    /**
+     * @load_state_buffer (invoked outside the BQL)
+     *
+     * Load device state buffer provided to qemu_loadvm_load_state_buffer().
+     *
+     * @opaque: data pointer passed to register_savevm_live()
+     * @buf: the data buffer to load
+     * @len: the data length in buffer
+     * @errp: pointer to Error*, to store an error if it happens.
+     *
+     * Returns true to indicate success and false for errors.
+     */
+    bool (*load_state_buffer)(void *opaque, char *buf, size_t len,
+                              Error **errp);
+
     /**
      * @load_setup
      *
@@ -244,6 +280,8 @@ typedef struct SaveVMHandlers {
      * @load_cleanup
      *
      * Uninitializes the data structures on the destination.
+     * Note that this handler can be called even if load_setup
+     * wasn't called earlier.
      *
      * @opaque: data pointer passed to register_savevm_live()
      *
@@ -275,6 +313,18 @@ typedef struct SaveVMHandlers {
      * otherwise
      */
     bool (*switchover_ack_needed)(void *opaque);
+
+    /**
+     * @switchover_start
+     *
+     * Notifies that the switchover has started. Called only on
+     * the destination.
+     *
+     * @opaque: data pointer passed to register_savevm_live()
+     *
+     * Returns zero to indicate success and negative for error
+     */
+    int (*switchover_start)(void *opaque);
 } SaveVMHandlers;
 
 /**
diff --git a/include/qapi/error.h b/include/qapi/error.h
index f5fe2162623e5..41e3816380490 100644
--- a/include/qapi/error.h
+++ b/include/qapi/error.h
@@ -437,6 +437,8 @@ Error *error_copy(const Error *err);
  */
 void error_free(Error *err);
 
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(Error, error_free)
+
 /*
  * Convenience function to assert that *@errp is set, then silently free it.
  */
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index 7a3f2e6576b35..f80cba24cf780 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -56,25 +56,13 @@
  */
 #define signal_barrier()    __atomic_signal_fence(__ATOMIC_SEQ_CST)
 
-/* Sanity check that the size of an atomic operation isn't "overly large".
+/*
+ * Sanity check that the size of an atomic operation isn't "overly large".
  * Despite the fact that e.g. i686 has 64-bit atomic operations, we do not
  * want to use them because we ought not need them, and this lets us do a
  * bit of sanity checking that other 32-bit hosts might build.
- *
- * That said, we have a problem on 64-bit ILP32 hosts in that in order to
- * sync with TCG_OVERSIZED_GUEST, this must match TCG_TARGET_REG_BITS.
- * We'd prefer not want to pull in everything else TCG related, so handle
- * those few cases by hand.
- *
- * Note that x32 is fully detected with __x86_64__ + _ILP32, and that for
- * Sparc we always force the use of sparcv9 in configure. MIPS n32 (ILP32) &
- * n64 (LP64) ABIs are both detected using __mips64.
  */
-#if defined(__x86_64__) || defined(__sparc__) || defined(__mips64)
-# define ATOMIC_REG_SIZE  8
-#else
-# define ATOMIC_REG_SIZE  sizeof(void *)
-#endif
+#define ATOMIC_REG_SIZE  sizeof(void *)
 
 /* Weak atomic operations prevent the compiler moving other
  * loads/stores past the atomic operation load/store. However there is
diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
index 88af6d4ea3faa..31e5c48d8fa38 100644
--- a/include/qemu/atomic128.h
+++ b/include/qemu/atomic128.h
@@ -13,6 +13,7 @@
 #ifndef QEMU_ATOMIC128_H
 #define QEMU_ATOMIC128_H
 
+#include "qemu/atomic.h"
 #include "qemu/int128.h"
 
 /*
@@ -58,7 +59,7 @@
  * Therefore, special case each platform.
  */
 
-#include "host/atomic128-cas.h"
-#include "host/atomic128-ldst.h"
+#include "host/atomic128-cas.h.inc"
+#include "host/atomic128-ldst.h.inc"
 
 #endif /* QEMU_ATOMIC128_H */
diff --git a/include/qemu/clang-tsa.h b/include/qemu/clang-tsa.h
deleted file mode 100644
index ba06fb8c92491..0000000000000
--- a/include/qemu/clang-tsa.h
+++ /dev/null
@@ -1,114 +0,0 @@
-#ifndef CLANG_TSA_H
-#define CLANG_TSA_H
-
-/*
- * Copyright 2018 Jarkko Hietaniemi <jhi@iki.fi>
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without
- * limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/* http://clang.llvm.org/docs/ThreadSafetyAnalysis.html
- *
- * TSA is available since clang 3.6-ish.
- */
-#ifdef __clang__
-#  define TSA(x)   __attribute__((x))
-#else
-#  define TSA(x)   /* No TSA, make TSA attributes no-ops. */
-#endif
-
-/* TSA_CAPABILITY() is used to annotate typedefs:
- *
- * typedef pthread_mutex_t TSA_CAPABILITY("mutex") tsa_mutex;
- */
-#define TSA_CAPABILITY(x) TSA(capability(x))
-
-/* TSA_GUARDED_BY() is used to annotate global variables,
- * the data is guarded:
- *
- * Foo foo TSA_GUARDED_BY(mutex);
- */
-#define TSA_GUARDED_BY(x) TSA(guarded_by(x))
-
-/* TSA_PT_GUARDED_BY() is used to annotate global pointers, the data
- * behind the pointer is guarded.
- *
- * Foo* ptr TSA_PT_GUARDED_BY(mutex);
- */
-#define TSA_PT_GUARDED_BY(x) TSA(pt_guarded_by(x))
-
-/* The TSA_REQUIRES() is used to annotate functions: the caller of the
- * function MUST hold the resource, the function will NOT release it.
- *
- * More than one mutex may be specified, comma-separated.
- *
- * void Foo(void) TSA_REQUIRES(mutex);
- */
-#define TSA_REQUIRES(...) TSA(requires_capability(__VA_ARGS__))
-#define TSA_REQUIRES_SHARED(...) TSA(requires_shared_capability(__VA_ARGS__))
-
-/* TSA_EXCLUDES() is used to annotate functions: the caller of the
- * function MUST NOT hold resource, the function first acquires the
- * resource, and then releases it.
- *
- * More than one mutex may be specified, comma-separated.
- *
- * void Foo(void) TSA_EXCLUDES(mutex);
- */
-#define TSA_EXCLUDES(...) TSA(locks_excluded(__VA_ARGS__))
-
-/* TSA_ACQUIRE() is used to annotate functions: the caller of the
- * function MUST NOT hold the resource, the function will acquire the
- * resource, but NOT release it.
- *
- * More than one mutex may be specified, comma-separated.
- *
- * void Foo(void) TSA_ACQUIRE(mutex);
- */
-#define TSA_ACQUIRE(...) TSA(acquire_capability(__VA_ARGS__))
-#define TSA_ACQUIRE_SHARED(...) TSA(acquire_shared_capability(__VA_ARGS__))
-
-/* TSA_RELEASE() is used to annotate functions: the caller of the
- * function MUST hold the resource, but the function will then release it.
- *
- * More than one mutex may be specified, comma-separated.
- *
- * void Foo(void) TSA_RELEASE(mutex);
- */
-#define TSA_RELEASE(...) TSA(release_capability(__VA_ARGS__))
-#define TSA_RELEASE_SHARED(...) TSA(release_shared_capability(__VA_ARGS__))
-
-/* TSA_NO_TSA is used to annotate functions.  Use only when you need to.
- *
- * void Foo(void) TSA_NO_TSA;
- */
-#define TSA_NO_TSA TSA(no_thread_safety_analysis)
-
-/*
- * TSA_ASSERT() is used to annotate functions: This function will assert that
- * the lock is held. When it returns, the caller of the function is assumed to
- * already hold the resource.
- *
- * More than one mutex may be specified, comma-separated.
- */
-#define TSA_ASSERT(...) TSA(assert_capability(__VA_ARGS__))
-#define TSA_ASSERT_SHARED(...) TSA(assert_shared_capability(__VA_ARGS__))
-
-#endif /* #ifndef CLANG_TSA_H */
diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h
index d904408e5ed9b..496dac5ac1113 100644
--- a/include/qemu/compiler.h
+++ b/include/qemu/compiler.h
@@ -207,6 +207,102 @@
 # define QEMU_USED
 #endif
 
+/*
+ * http://clang.llvm.org/docs/ThreadSafetyAnalysis.html
+ *
+ * TSA is available since clang 3.6-ish.
+ */
+#ifdef __clang__
+#  define TSA(x)   __attribute__((x))
+#else
+#  define TSA(x)   /* No TSA, make TSA attributes no-ops. */
+#endif
+
+/*
+ * TSA_CAPABILITY() is used to annotate typedefs:
+ *
+ * typedef pthread_mutex_t TSA_CAPABILITY("mutex") tsa_mutex;
+ */
+#define TSA_CAPABILITY(x) TSA(capability(x))
+
+/*
+ * TSA_GUARDED_BY() is used to annotate global variables,
+ * the data is guarded:
+ *
+ * Foo foo TSA_GUARDED_BY(mutex);
+ */
+#define TSA_GUARDED_BY(x) TSA(guarded_by(x))
+
+/*
+ * TSA_PT_GUARDED_BY() is used to annotate global pointers, the data
+ * behind the pointer is guarded.
+ *
+ * Foo* ptr TSA_PT_GUARDED_BY(mutex);
+ */
+#define TSA_PT_GUARDED_BY(x) TSA(pt_guarded_by(x))
+
+/*
+ * The TSA_REQUIRES() is used to annotate functions: the caller of the
+ * function MUST hold the resource, the function will NOT release it.
+ *
+ * More than one mutex may be specified, comma-separated.
+ *
+ * void Foo(void) TSA_REQUIRES(mutex);
+ */
+#define TSA_REQUIRES(...) TSA(requires_capability(__VA_ARGS__))
+#define TSA_REQUIRES_SHARED(...) TSA(requires_shared_capability(__VA_ARGS__))
+
+/*
+ * TSA_EXCLUDES() is used to annotate functions: the caller of the
+ * function MUST NOT hold resource, the function first acquires the
+ * resource, and then releases it.
+ *
+ * More than one mutex may be specified, comma-separated.
+ *
+ * void Foo(void) TSA_EXCLUDES(mutex);
+ */
+#define TSA_EXCLUDES(...) TSA(locks_excluded(__VA_ARGS__))
+
+/*
+ * TSA_ACQUIRE() is used to annotate functions: the caller of the
+ * function MUST NOT hold the resource, the function will acquire the
+ * resource, but NOT release it.
+ *
+ * More than one mutex may be specified, comma-separated.
+ *
+ * void Foo(void) TSA_ACQUIRE(mutex);
+ */
+#define TSA_ACQUIRE(...) TSA(acquire_capability(__VA_ARGS__))
+#define TSA_ACQUIRE_SHARED(...) TSA(acquire_shared_capability(__VA_ARGS__))
+
+/*
+ * TSA_RELEASE() is used to annotate functions: the caller of the
+ * function MUST hold the resource, but the function will then release it.
+ *
+ * More than one mutex may be specified, comma-separated.
+ *
+ * void Foo(void) TSA_RELEASE(mutex);
+ */
+#define TSA_RELEASE(...) TSA(release_capability(__VA_ARGS__))
+#define TSA_RELEASE_SHARED(...) TSA(release_shared_capability(__VA_ARGS__))
+
+/*
+ * TSA_NO_TSA is used to annotate functions.  Use only when you need to.
+ *
+ * void Foo(void) TSA_NO_TSA;
+ */
+#define TSA_NO_TSA TSA(no_thread_safety_analysis)
+
+/*
+ * TSA_ASSERT() is used to annotate functions: This function will assert that
+ * the lock is held. When it returns, the caller of the function is assumed to
+ * already hold the resource.
+ *
+ * More than one mutex may be specified, comma-separated.
+ */
+#define TSA_ASSERT(...) TSA(assert_capability(__VA_ARGS__))
+#define TSA_ASSERT_SHARED(...) TSA(assert_shared_capability(__VA_ARGS__))
+
 /*
  * Ugly CPP trick that is like "defined FOO", but also works in C
  * code.  Useful to replace #ifdef with "if" statements; assumes
diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h
index 34a9b9b2204fe..36c68ce86c527 100644
--- a/include/qemu/cutils.h
+++ b/include/qemu/cutils.h
@@ -302,4 +302,19 @@ GString *qemu_hexdump_line(GString *str, const void *buf, size_t len,
 void qemu_hexdump(FILE *fp, const char *prefix,
                   const void *bufptr, size_t size);
 
+/**
+ * qemu_hexdump_to_buffer:
+ * @buffer: output string buffer
+ * @buffer_size: amount of available space in buffer. Must be at least
+ *               data_size*2+1.
+ * @data: input bytes
+ * @data_size: number of bytes in data
+ *
+ * Converts the @data_size bytes in @data into hex digit pairs, writing them to
+ * @buffer. Finally, a nul terminating character is written; @buffer therefore
+ * needs space for (data_size*2+1) chars.
+ */
+void qemu_hexdump_to_buffer(char *restrict buffer, size_t buffer_size,
+                            const uint8_t *restrict data, size_t data_size);
+
 #endif
diff --git a/include/qemu/iov.h b/include/qemu/iov.h
index 44f9db5cee972..9535673c13d29 100644
--- a/include/qemu/iov.h
+++ b/include/qemu/iov.h
@@ -31,7 +31,7 @@ size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt);
  * only part of data will be copied, up to the end of the iovec.
  * Number of bytes actually copied will be returned, which is
  *  min(bytes, iov_size(iov)-offset)
- * `Offset' must point to the inside of iovec.
+ * Returns 0 when `offset' points to the outside of iovec.
  */
 size_t iov_from_buf_full(const struct iovec *iov, unsigned int iov_cnt,
                          size_t offset, const void *buf, size_t bytes);
@@ -67,11 +67,12 @@ iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt,
 /**
  * Set data bytes pointed out by iovec `iov' of size `iov_cnt' elements,
  * starting at byte offset `start', to value `fillc', repeating it
- * `bytes' number of times.  `Offset' must point to the inside of iovec.
+ * `bytes' number of times.
  * If `bytes' is large enough, only last bytes portion of iovec,
  * up to the end of it, will be filled with the specified value.
  * Function return actual number of bytes processed, which is
  * min(size, iov_size(iov) - offset).
+ * Returns 0 when `offset' points to the outside of iovec.
  */
 size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt,
                   size_t offset, int fillc, size_t bytes);
diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
index 44a45931d58c6..16d354a814888 100644
--- a/include/qemu/iova-tree.h
+++ b/include/qemu/iova-tree.h
@@ -40,6 +40,28 @@ typedef struct DMAMap {
 } QEMU_PACKED DMAMap;
 typedef gboolean (*iova_tree_iterator)(DMAMap *map);
 
+/**
+ * gpa_tree_new:
+ *
+ * Create a new GPA->IOVA tree.
+ *
+ * Returns: the tree point on success, or NULL otherwise.
+ */
+IOVATree *gpa_tree_new(void);
+
+/**
+ * gpa_tree_insert:
+ *
+ * @tree: The GPA->IOVA tree we're inserting the mapping to
+ * @map: The GPA->IOVA mapping to insert
+ *
+ * Inserts a GPA range to the GPA->IOVA tree. If there are overlapped
+ * ranges, IOVA_ERR_OVERLAP will be returned.
+ *
+ * Return: 0 if successful, < 0 otherwise.
+ */
+int gpa_tree_insert(IOVATree *tree, const DMAMap *map);
+
 /**
  * iova_tree_new:
  *
diff --git a/include/qemu/plugin-memory.h b/include/qemu/plugin-memory.h
index 71c1123308e0f..6065ec7aaf768 100644
--- a/include/qemu/plugin-memory.h
+++ b/include/qemu/plugin-memory.h
@@ -9,7 +9,6 @@
 #ifndef PLUGIN_MEMORY_H
 #define PLUGIN_MEMORY_H
 
-#include "exec/cpu-defs.h"
 #include "exec/hwaddr.h"
 
 struct qemu_plugin_hwaddr {
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index 7eba27a704938..6f800aad31a98 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -3,7 +3,6 @@
 
 #include "qemu/processor.h"
 #include "qemu/atomic.h"
-#include "qemu/clang-tsa.h"
 
 typedef struct QemuCond QemuCond;
 typedef struct QemuSemaphore QemuSemaphore;
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index cc167bd825ba8..abd2204f3beac 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -507,6 +507,8 @@ static inline void timer_init_ms(QEMUTimer *ts, QEMUClockType type,
  * with an AioContext---each of them runs its timer callbacks in its own
  * AioContext thread.
  *
+ * The timer returned must be freed using timer_free().
+ *
  * Returns: a pointer to the timer
  */
 static inline QEMUTimer *timer_new_full(QEMUTimerListGroup *timer_list_group,
@@ -530,6 +532,8 @@ static inline QEMUTimer *timer_new_full(QEMUTimerListGroup *timer_list_group,
  * and associate it with the default timer list for the clock type @type.
  * See timer_new_full for details.
  *
+ * The timer returned must be freed using timer_free().
+ *
  * Returns: a pointer to the timer
  */
 static inline QEMUTimer *timer_new(QEMUClockType type, int scale,
@@ -548,6 +552,8 @@ static inline QEMUTimer *timer_new(QEMUClockType type, int scale,
  * associated with the clock.
  * See timer_new_full for details.
  *
+ * The timer returned must be freed using timer_free().
+ *
  * Returns: a pointer to the newly created timer
  */
 static inline QEMUTimer *timer_new_ns(QEMUClockType type, QEMUTimerCB *cb,
@@ -566,6 +572,8 @@ static inline QEMUTimer *timer_new_ns(QEMUClockType type, QEMUTimerCB *cb,
  * associated with the clock.
  * See timer_new_full for details.
  *
+ * The timer returned must be freed using timer_free().
+ *
  * Returns: a pointer to the newly created timer
  */
 static inline QEMUTimer *timer_new_us(QEMUClockType type, QEMUTimerCB *cb,
@@ -584,6 +592,8 @@ static inline QEMUTimer *timer_new_us(QEMUClockType type, QEMUTimerCB *cb,
  * associated with the clock.
  * See timer_new_full for details.
  *
+ * The timer returned must be freed using timer_free().
+ *
  * Returns: a pointer to the newly created timer
  */
 static inline QEMUTimer *timer_new_ms(QEMUClockType type, QEMUTimerCB *cb,
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 3d84efcac47a2..507f0814d5a57 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -22,6 +22,7 @@
  * Please keep this list in case-insensitive alphabetical order.
  */
 typedef struct AccelCPUState AccelCPUState;
+typedef struct AccelOpsClass AccelOpsClass;
 typedef struct AccelState AccelState;
 typedef struct AddressSpace AddressSpace;
 typedef struct AioContext AioContext;
@@ -108,6 +109,7 @@ typedef struct QString QString;
 typedef struct RAMBlock RAMBlock;
 typedef struct Range Range;
 typedef struct ReservedRegion ReservedRegion;
+typedef struct SaveLiveCompletePrecopyThreadData SaveLiveCompletePrecopyThreadData;
 typedef struct SHPCDevice SHPCDevice;
 typedef struct SSIBus SSIBus;
 typedef struct TCGCPUOps TCGCPUOps;
@@ -131,5 +133,9 @@ typedef struct IRQState *qemu_irq;
  * Function types
  */
 typedef void (*qemu_irq_handler)(void *opaque, int n, int level);
+typedef bool (*MigrationLoadThread)(void *opaque, bool *should_quit,
+                                    Error **errp);
+typedef bool (*SaveLiveCompletePrecopyThreadHandler)(SaveLiveCompletePrecopyThreadData *d,
+                                                     Error **errp);
 
 #endif /* QEMU_TYPEDEFS_H */
diff --git a/include/semihosting/common-semi.h b/include/semihosting/common-semi.h
index 0a91db7c4149a..58dfb99d7a5b4 100644
--- a/include/semihosting/common-semi.h
+++ b/include/semihosting/common-semi.h
@@ -34,6 +34,7 @@
 #ifndef COMMON_SEMI_H
 #define COMMON_SEMI_H
 
+void common_semi_cb(CPUState *cs, uint64_t ret, int err);
 void do_common_semihosting(CPUState *cs);
 
 #endif /* COMMON_SEMI_H */
diff --git a/include/semihosting/semihost.h b/include/semihosting/semihost.h
index 97d2a2ba996d1..6e07766106516 100644
--- a/include/semihosting/semihost.h
+++ b/include/semihosting/semihost.h
@@ -51,6 +51,11 @@ static inline const char *semihosting_get_cmdline(void)
 {
     return NULL;
 }
+
+static inline const char *semihosting_get_usefs(void)
+{
+    return NULL;
+}
 #else /* !CONFIG_USER_ONLY */
 /**
  * semihosting_enabled:
@@ -63,6 +68,7 @@ SemihostingTarget semihosting_get_target(void);
 const char *semihosting_get_arg(int i);
 int semihosting_get_argc(void);
 const char *semihosting_get_cmdline(void);
+const char *semihosting_get_usefs(void);
 void semihosting_arg_fallback(const char *file, const char *cmd);
 /* for vl.c hooks */
 void qemu_semihosting_enable(void);
diff --git a/include/semihosting/syscalls.h b/include/semihosting/syscalls.h
index 6627c45fb281a..dec2ee0ad4acd 100644
--- a/include/semihosting/syscalls.h
+++ b/include/semihosting/syscalls.h
@@ -75,4 +75,6 @@ void semihost_sys_gettimeofday(CPUState *cs, gdb_syscall_complete_cb complete,
 void semihost_sys_poll_one(CPUState *cs, gdb_syscall_complete_cb complete,
                            int fd, GIOCondition cond, int timeout);
 
+void semihost_sys_ftruncate(CPUState *cs, gdb_syscall_complete_cb complete,
+                            int fd, off_t len);
 #endif /* SEMIHOSTING_SYSCALLS_H */
diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h
index 67c47912e596b..e83382531cd1c 100644
--- a/include/standard-headers/linux/ethtool.h
+++ b/include/standard-headers/linux/ethtool.h
@@ -681,6 +681,8 @@ enum ethtool_link_ext_substate_module {
  * @ETH_SS_STATS_ETH_MAC: names of IEEE 802.3 MAC statistics
  * @ETH_SS_STATS_ETH_CTRL: names of IEEE 802.3 MAC Control statistics
  * @ETH_SS_STATS_RMON: names of RMON statistics
+ * @ETH_SS_STATS_PHY: names of PHY(dev) statistics
+ * @ETH_SS_TS_FLAGS: hardware timestamping flags
  *
  * @ETH_SS_COUNT: number of defined string sets
  */
@@ -706,6 +708,8 @@ enum ethtool_stringset {
 	ETH_SS_STATS_ETH_MAC,
 	ETH_SS_STATS_ETH_CTRL,
 	ETH_SS_STATS_RMON,
+	ETH_SS_STATS_PHY,
+	ETH_SS_TS_FLAGS,
 
 	/* add new constants above here */
 	ETH_SS_COUNT
diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h
index 889e12ad1552d..d303effb2a0d2 100644
--- a/include/standard-headers/linux/fuse.h
+++ b/include/standard-headers/linux/fuse.h
@@ -220,6 +220,15 @@
  *
  *  7.41
  *  - add FUSE_ALLOW_IDMAP
+ *  7.42
+ *  - Add FUSE_OVER_IO_URING and all other io-uring related flags and data
+ *    structures:
+ *    - struct fuse_uring_ent_in_out
+ *    - struct fuse_uring_req_header
+ *    - struct fuse_uring_cmd_req
+ *    - FUSE_URING_IN_OUT_HEADER_SZ
+ *    - FUSE_URING_OP_IN_OUT_SZ
+ *    - enum fuse_uring_cmd
  */
 
 #ifndef _LINUX_FUSE_H
@@ -251,7 +260,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 41
+#define FUSE_KERNEL_MINOR_VERSION 42
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -421,6 +430,7 @@ struct fuse_file_lock {
  * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit
  *		    of the request ID indicates resend requests
  * FUSE_ALLOW_IDMAP: allow creation of idmapped mounts
+ * FUSE_OVER_IO_URING: Indicate that client supports io-uring
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -467,6 +477,7 @@ struct fuse_file_lock {
 /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */
 #define FUSE_DIRECT_IO_RELAX	FUSE_DIRECT_IO_ALLOW_MMAP
 #define FUSE_ALLOW_IDMAP	(1ULL << 40)
+#define FUSE_OVER_IO_URING	(1ULL << 41)
 
 /**
  * CUSE INIT request/reply flags
@@ -1202,4 +1213,67 @@ struct fuse_supp_groups {
 	uint32_t	groups[];
 };
 
+/**
+ * Size of the ring buffer header
+ */
+#define FUSE_URING_IN_OUT_HEADER_SZ 128
+#define FUSE_URING_OP_IN_OUT_SZ 128
+
+/* Used as part of the fuse_uring_req_header */
+struct fuse_uring_ent_in_out {
+	uint64_t flags;
+
+	/*
+	 * commit ID to be used in a reply to a ring request (see also
+	 * struct fuse_uring_cmd_req)
+	 */
+	uint64_t commit_id;
+
+	/* size of user payload buffer */
+	uint32_t payload_sz;
+	uint32_t padding;
+
+	uint64_t reserved;
+};
+
+/**
+ * Header for all fuse-io-uring requests
+ */
+struct fuse_uring_req_header {
+	/* struct fuse_in_header / struct fuse_out_header */
+	char in_out[FUSE_URING_IN_OUT_HEADER_SZ];
+
+	/* per op code header */
+	char op_in[FUSE_URING_OP_IN_OUT_SZ];
+
+	struct fuse_uring_ent_in_out ring_ent_in_out;
+};
+
+/**
+ * sqe commands to the kernel
+ */
+enum fuse_uring_cmd {
+	FUSE_IO_URING_CMD_INVALID = 0,
+
+	/* register the request buffer and fetch a fuse request */
+	FUSE_IO_URING_CMD_REGISTER = 1,
+
+	/* commit fuse request result and fetch next request */
+	FUSE_IO_URING_CMD_COMMIT_AND_FETCH = 2,
+};
+
+/**
+ * In the 80B command area of the SQE.
+ */
+struct fuse_uring_cmd_req {
+	uint64_t flags;
+
+	/* entry identifier for commits */
+	uint64_t commit_id;
+
+	/* queue the command is for (queue index) */
+	uint16_t qid;
+	uint8_t padding[6];
+};
+
 #endif /* _LINUX_FUSE_H */
diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h
index 50b2b7497ef3b..09ba0ad878314 100644
--- a/include/standard-headers/linux/input-event-codes.h
+++ b/include/standard-headers/linux/input-event-codes.h
@@ -519,6 +519,7 @@
 #define KEY_NOTIFICATION_CENTER	0x1bc	/* Show/hide the notification center */
 #define KEY_PICKUP_PHONE	0x1bd	/* Answer incoming call */
 #define KEY_HANGUP_PHONE	0x1be	/* Decline incoming call */
+#define KEY_LINK_PHONE		0x1bf   /* AL Phone Syncing */
 
 #define KEY_DEL_EOL		0x1c0
 #define KEY_DEL_EOS		0x1c1
diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h
index 1601c7ed5fab7..3445c4970e4d8 100644
--- a/include/standard-headers/linux/pci_regs.h
+++ b/include/standard-headers/linux/pci_regs.h
@@ -533,7 +533,7 @@
 #define  PCI_EXP_DEVSTA_TRPND	0x0020	/* Transactions Pending */
 #define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V1	12	/* v1 endpoints without link end here */
 #define PCI_EXP_LNKCAP		0x0c	/* Link Capabilities */
-#define  PCI_EXP_LNKCAP_SLS	0x0000000f /* Supported Link Speeds */
+#define  PCI_EXP_LNKCAP_SLS	0x0000000f /* Max Link Speed (prior to PCIe r3.0: Supported Link Speeds) */
 #define  PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector bit 0 */
 #define  PCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector bit 1 */
 #define  PCI_EXP_LNKCAP_SLS_8_0GB 0x00000003 /* LNKCAP2 SLS Vector bit 2 */
@@ -665,6 +665,7 @@
 #define  PCI_EXP_DEVCAP2_OBFF_MSG	0x00040000 /* New message signaling */
 #define  PCI_EXP_DEVCAP2_OBFF_WAKE	0x00080000 /* Re-use WAKE# for OBFF */
 #define  PCI_EXP_DEVCAP2_EE_PREFIX	0x00200000 /* End-End TLP Prefix */
+#define  PCI_EXP_DEVCAP2_EE_PREFIX_MAX	0x00c00000 /* Max End-End TLP Prefixes */
 #define PCI_EXP_DEVCTL2		0x28	/* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_COMP_TIMEOUT	0x000f	/* Completion Timeout Value */
 #define  PCI_EXP_DEVCTL2_COMP_TMOUT_DIS	0x0010	/* Completion Timeout Disable */
@@ -789,10 +790,11 @@
 	/* Same bits as above */
 #define PCI_ERR_CAP		0x18	/* Advanced Error Capabilities & Ctrl*/
 #define  PCI_ERR_CAP_FEP(x)	((x) & 0x1f)	/* First Error Pointer */
-#define  PCI_ERR_CAP_ECRC_GENC	0x00000020	/* ECRC Generation Capable */
-#define  PCI_ERR_CAP_ECRC_GENE	0x00000040	/* ECRC Generation Enable */
-#define  PCI_ERR_CAP_ECRC_CHKC	0x00000080	/* ECRC Check Capable */
-#define  PCI_ERR_CAP_ECRC_CHKE	0x00000100	/* ECRC Check Enable */
+#define  PCI_ERR_CAP_ECRC_GENC		0x00000020 /* ECRC Generation Capable */
+#define  PCI_ERR_CAP_ECRC_GENE		0x00000040 /* ECRC Generation Enable */
+#define  PCI_ERR_CAP_ECRC_CHKC		0x00000080 /* ECRC Check Capable */
+#define  PCI_ERR_CAP_ECRC_CHKE		0x00000100 /* ECRC Check Enable */
+#define  PCI_ERR_CAP_PREFIX_LOG_PRESENT	0x00000800 /* TLP Prefix Log Present */
 #define PCI_ERR_HEADER_LOG	0x1c	/* Header Log Register (16 bytes) */
 #define PCI_ERR_ROOT_COMMAND	0x2c	/* Root Error Command */
 #define  PCI_ERR_ROOT_CMD_COR_EN	0x00000001 /* Correctable Err Reporting Enable */
@@ -808,6 +810,7 @@
 #define  PCI_ERR_ROOT_FATAL_RCV		0x00000040 /* Fatal Received */
 #define  PCI_ERR_ROOT_AER_IRQ		0xf8000000 /* Advanced Error Interrupt Message Number */
 #define PCI_ERR_ROOT_ERR_SRC	0x34	/* Error Source Identification */
+#define PCI_ERR_PREFIX_LOG	0x38	/* TLP Prefix LOG Register (up to 16 bytes) */
 
 /* Virtual Channel */
 #define PCI_VC_PORT_CAP1	0x04
@@ -1001,9 +1004,6 @@
 #define PCI_ACS_CTRL		0x06	/* ACS Control Register */
 #define PCI_ACS_EGRESS_CTL_V	0x08	/* ACS Egress Control Vector */
 
-#define PCI_VSEC_HDR		4	/* extended cap - vendor-specific */
-#define  PCI_VSEC_HDR_LEN_SHIFT	20	/* shift for length field */
-
 /* SATA capability */
 #define PCI_SATA_REGS		4	/* SATA REGs specifier */
 #define  PCI_SATA_REGS_MASK	0xF	/* location - BAR#/inline */
diff --git a/include/standard-headers/linux/virtio_pci.h b/include/standard-headers/linux/virtio_pci.h
index b177ed89727fd..91fec6f502965 100644
--- a/include/standard-headers/linux/virtio_pci.h
+++ b/include/standard-headers/linux/virtio_pci.h
@@ -116,6 +116,8 @@
 #define VIRTIO_PCI_CAP_PCI_CFG		5
 /* Additional shared memory capability */
 #define VIRTIO_PCI_CAP_SHARED_MEMORY_CFG 8
+/* PCI vendor data configuration */
+#define VIRTIO_PCI_CAP_VENDOR_CFG	9
 
 /* This is the PCI capability header: */
 struct virtio_pci_cap {
@@ -130,6 +132,18 @@ struct virtio_pci_cap {
 	uint32_t length;		/* Length of the structure, in bytes. */
 };
 
+/* This is the PCI vendor data capability header: */
+struct virtio_pci_vndr_data {
+	uint8_t cap_vndr;		/* Generic PCI field: PCI_CAP_ID_VNDR */
+	uint8_t cap_next;		/* Generic PCI field: next ptr. */
+	uint8_t cap_len;		/* Generic PCI field: capability length */
+	uint8_t cfg_type;		/* Identifies the structure. */
+	uint16_t vendor_id;	/* Identifies the vendor-specific format. */
+	/* For Vendor Definition */
+	/* Pads structure to a multiple of 4 bytes */
+	/* Reads must not have side effects */
+};
+
 struct virtio_pci_cap64 {
 	struct virtio_pci_cap cap;
 	uint32_t offset_hi;             /* Most sig 32 bits of offset */
diff --git a/include/system/accel-ops.h b/include/system/accel-ops.h
index 137fb96d4441b..4c99d25aeffef 100644
--- a/include/system/accel-ops.h
+++ b/include/system/accel-ops.h
@@ -17,7 +17,6 @@
 #define TYPE_ACCEL_OPS "accel" ACCEL_OPS_SUFFIX
 #define ACCEL_OPS_NAME(name) (name "-" TYPE_ACCEL_OPS)
 
-typedef struct AccelOpsClass AccelOpsClass;
 DECLARE_CLASS_CHECKERS(AccelOpsClass, ACCEL_OPS, TYPE_ACCEL_OPS)
 
 /**
diff --git a/include/system/arch_init.h b/include/system/arch_init.h
index 5b1c1026f3af7..51e24c3091e20 100644
--- a/include/system/arch_init.h
+++ b/include/system/arch_init.h
@@ -25,8 +25,6 @@ enum {
     QEMU_ARCH_LOONGARCH = (1 << 23),
 };
 
-extern const uint32_t arch_type;
-
-void qemu_init_arch_modules(void);
+bool qemu_arch_available(unsigned qemu_arch_mask);
 
 #endif
diff --git a/include/system/block-backend-global-state.h b/include/system/block-backend-global-state.h
index 9cc9b008ec7dd..35b5e8380b932 100644
--- a/include/system/block-backend-global-state.h
+++ b/include/system/block-backend-global-state.h
@@ -86,7 +86,6 @@ bool blk_supports_write_perm(BlockBackend *blk);
 bool blk_is_sg(BlockBackend *blk);
 void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
 int blk_get_flags(BlockBackend *blk);
-bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp);
 int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
                         Error **errp);
 void blk_add_aio_context_notifier(BlockBackend *blk,
diff --git a/include/system/cpus.h b/include/system/cpus.h
index 3d8fd368f3265..3226c765d0104 100644
--- a/include/system/cpus.h
+++ b/include/system/cpus.h
@@ -1,8 +1,6 @@
 #ifndef QEMU_CPUS_H
 #define QEMU_CPUS_H
 
-#include "system/accel-ops.h"
-
 /* register accel-specific operations */
 void cpus_register_accel(const AccelOpsClass *i);
 
@@ -38,8 +36,6 @@ void resume_all_vcpus(void);
 void pause_all_vcpus(void);
 void cpu_stop_current(void);
 
-extern int icount_align_option;
-
 /* Unblock cpu */
 void qemu_cpu_kick_self(void);
 
diff --git a/include/system/device_tree.h b/include/system/device_tree.h
index eb601522f8885..49d8482ed4e1c 100644
--- a/include/system/device_tree.h
+++ b/include/system/device_tree.h
@@ -133,8 +133,6 @@ int qemu_fdt_add_path(void *fdt, const char *path);
                          sizeof(qdt_tmp));                                    \
     } while (0)
 
-void qemu_fdt_dumpdtb(void *fdt, int size);
-
 /**
  * qemu_fdt_setprop_sized_cells_from_array:
  * @fdt: device tree blob
diff --git a/include/system/dma.h b/include/system/dma.h
index 5a49a306284d2..e142f7efa68b7 100644
--- a/include/system/dma.h
+++ b/include/system/dma.h
@@ -290,8 +290,7 @@ typedef BlockAIOCB *DMAIOFunc(int64_t offset, QEMUIOVector *iov,
                               BlockCompletionFunc *cb, void *cb_opaque,
                               void *opaque);
 
-BlockAIOCB *dma_blk_io(AioContext *ctx,
-                       QEMUSGList *sg, uint64_t offset, uint32_t align,
+BlockAIOCB *dma_blk_io(QEMUSGList *sg, uint64_t offset, uint32_t align,
                        DMAIOFunc *io_func, void *io_func_opaque,
                        BlockCompletionFunc *cb, void *opaque, DMADirection dir);
 BlockAIOCB *dma_blk_read(BlockBackend *blk,
diff --git a/include/system/hostmem.h b/include/system/hostmem.h
index 5c21ca55c0176..62642e602ca97 100644
--- a/include/system/hostmem.h
+++ b/include/system/hostmem.h
@@ -93,4 +93,7 @@ bool host_memory_backend_is_mapped(HostMemoryBackend *backend);
 size_t host_memory_backend_pagesize(HostMemoryBackend *memdev);
 char *host_memory_backend_get_name(HostMemoryBackend *backend);
 
+long qemu_minrampagesize(void);
+long qemu_maxrampagesize(void);
+
 #endif
diff --git a/include/system/os-posix.h b/include/system/os-posix.h
index b881ac6c6f743..ce5b3bccf8db8 100644
--- a/include/system/os-posix.h
+++ b/include/system/os-posix.h
@@ -53,7 +53,7 @@ bool os_set_runas(const char *user_id);
 void os_set_chroot(const char *path);
 void os_setup_limits(void);
 void os_setup_post(void);
-int os_mlock(void);
+int os_mlock(bool on_fault);
 
 /**
  * qemu_alloc_stack:
diff --git a/include/system/os-win32.h b/include/system/os-win32.h
index b82a5d3ad93c4..bc623061d8211 100644
--- a/include/system/os-win32.h
+++ b/include/system/os-win32.h
@@ -123,7 +123,7 @@ static inline bool is_daemonized(void)
     return false;
 }
 
-static inline int os_mlock(void)
+static inline int os_mlock(bool on_fault G_GNUC_UNUSED)
 {
     return -ENOSYS;
 }
diff --git a/include/system/system.h b/include/system/system.h
index 0cbb43ec303bd..a7effe7dfd8b2 100644
--- a/include/system/system.h
+++ b/include/system/system.h
@@ -44,10 +44,20 @@ extern int display_opengl;
 extern const char *keyboard_layout;
 extern int old_param;
 extern uint8_t *boot_splash_filedata;
-extern bool enable_mlock;
 extern bool enable_cpu_pm;
 extern QEMUClockType rtc_clock;
 
+typedef enum {
+    MLOCK_OFF = 0,
+    MLOCK_ON,
+    MLOCK_ON_FAULT,
+} MlockState;
+
+bool should_mlock(MlockState);
+bool is_mlock_on_fault(MlockState);
+
+extern MlockState mlock_state;
+
 #define MAX_OPTION_ROMS 16
 typedef struct QEMUOptionRom {
     const char *name;
diff --git a/include/tcg/oversized-guest.h b/include/tcg/oversized-guest.h
deleted file mode 100644
index 641b9749ffcda..0000000000000
--- a/include/tcg/oversized-guest.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Define TCG_OVERSIZED_GUEST
- * Copyright (c) 2008 Fabrice Bellard
- */
-
-#ifndef EXEC_TCG_OVERSIZED_GUEST_H
-#define EXEC_TCG_OVERSIZED_GUEST_H
-
-#include "tcg-target-reg-bits.h"
-#include "cpu-param.h"
-
-/*
- * Oversized TCG guests make things like MTTCG hard
- * as we can't use atomics for cputlb updates.
- */
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-#define TCG_OVERSIZED_GUEST 1
-#else
-#define TCG_OVERSIZED_GUEST 0
-#endif
-
-#endif
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index 9383e295f4743..5bf78b0764623 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -188,36 +188,22 @@ DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
 DEF(plugin_cb, 0, 0, 1, TCG_OPF_NOT_PRESENT)
 DEF(plugin_mem_cb, 0, 1, 1, TCG_OPF_NOT_PRESENT)
 
-/* Replicate ld/st ops for 32 and 64-bit guest addresses. */
-DEF(qemu_ld_a32_i32, 1, 1, 1,
+DEF(qemu_ld_i32, 1, 1, 1,
     TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_st_a32_i32, 0, 1 + 1, 1,
+DEF(qemu_st_i32, 0, 1 + 1, 1,
     TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_ld_a32_i64, DATA64_ARGS, 1, 1,
+DEF(qemu_ld_i64, DATA64_ARGS, 1, 1,
     TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_st_a32_i64, 0, DATA64_ARGS + 1, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-
-DEF(qemu_ld_a64_i32, 1, DATA64_ARGS, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_st_a64_i32, 0, 1 + DATA64_ARGS, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_ld_a64_i64, DATA64_ARGS, DATA64_ARGS, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_st_a64_i64, 0, DATA64_ARGS + DATA64_ARGS, 1,
+DEF(qemu_st_i64, 0, DATA64_ARGS + 1, 1,
     TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 
 /* Only used by i386 to cope with stupid register constraints. */
-DEF(qemu_st8_a32_i32, 0, 1 + 1, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_st8_a64_i32, 0, 1 + DATA64_ARGS, 1,
+DEF(qemu_st8_i32, 0, 1 + 1, 1,
     TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 
 /* Only for 64-bit hosts at the moment. */
-DEF(qemu_ld_a32_i128, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_ld_a64_i128, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_st_a32_i128, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_st_a64_i128, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld_i128, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st_i128, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 
 /* Host vector support.  */
 
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 1d1d668f527bf..84d99508b652e 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -713,7 +713,8 @@ void tb_target_set_jmp_target(const TranslationBlock *, int,
 
 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size);
 
-#define TCG_CT_CONST  1 /* any constant of register size */
+#define TCG_CT_CONST      1  /* any constant of register size */
+#define TCG_CT_REG_ZERO   2  /* zero, in TCG_REG_ZERO */
 
 typedef struct TCGArgConstraint {
     unsigned ct : 16;
diff --git a/include/user/mmap.h b/include/user/mmap.h
new file mode 100644
index 0000000000000..4d5e9aac70abc
--- /dev/null
+++ b/include/user/mmap.h
@@ -0,0 +1,32 @@
+/*
+ * MMAP declarations for QEMU user emulation
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef USER_MMAP_H
+#define USER_MMAP_H
+
+#include "user/abitypes.h"
+
+/*
+ * mmap_next_start: The base address for the next mmap without hint,
+ * increased after each successful map, starting at task_unmapped_base.
+ * This is an optimization within QEMU and not part of ADDR_COMPAT_LAYOUT.
+ */
+extern abi_ulong mmap_next_start;
+
+int target_mprotect(abi_ulong start, abi_ulong len, int prot);
+
+abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
+                     int flags, int fd, off_t offset);
+int target_munmap(abi_ulong start, abi_ulong len);
+abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
+                       abi_ulong new_size, unsigned long flags,
+                       abi_ulong new_addr);
+
+abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong alignment);
+
+void TSA_NO_TSA mmap_fork_start(void);
+void TSA_NO_TSA mmap_fork_end(int child);
+
+#endif
diff --git a/io/channel-tls.c b/io/channel-tls.c
index aab630e5ae32a..caf8301a9e317 100644
--- a/io/channel-tls.c
+++ b/io/channel-tls.c
@@ -162,16 +162,17 @@ static void qio_channel_tls_handshake_task(QIOChannelTLS *ioc,
                                            GMainContext *context)
 {
     Error *err = NULL;
-    QCryptoTLSSessionHandshakeStatus status;
+    int status;
 
-    if (qcrypto_tls_session_handshake(ioc->session, &err) < 0) {
+    status = qcrypto_tls_session_handshake(ioc->session, &err);
+
+    if (status < 0) {
         trace_qio_channel_tls_handshake_fail(ioc);
         qio_task_set_error(task, err);
         qio_task_complete(task);
         return;
     }
 
-    status = qcrypto_tls_session_get_handshake_status(ioc->session);
     if (status == QCRYPTO_TLS_HANDSHAKE_COMPLETE) {
         trace_qio_channel_tls_handshake_complete(ioc);
         if (qcrypto_tls_session_check_credentials(ioc->session,
@@ -247,6 +248,85 @@ void qio_channel_tls_handshake(QIOChannelTLS *ioc,
     qio_channel_tls_handshake_task(ioc, task, context);
 }
 
+static gboolean qio_channel_tls_bye_io(QIOChannel *ioc, GIOCondition condition,
+                                       gpointer user_data);
+
+static void qio_channel_tls_bye_task(QIOChannelTLS *ioc, QIOTask *task,
+                                     GMainContext *context)
+{
+    GIOCondition condition;
+    QIOChannelTLSData *data;
+    int status;
+    Error *err = NULL;
+
+    status = qcrypto_tls_session_bye(ioc->session, &err);
+
+    if (status < 0) {
+        trace_qio_channel_tls_bye_fail(ioc);
+        qio_task_set_error(task, err);
+        qio_task_complete(task);
+        return;
+    }
+
+    if (status == QCRYPTO_TLS_BYE_COMPLETE) {
+        qio_task_complete(task);
+        return;
+    }
+
+    data = g_new0(typeof(*data), 1);
+    data->task = task;
+    data->context = context;
+
+    if (context) {
+        g_main_context_ref(context);
+    }
+
+    if (status == QCRYPTO_TLS_BYE_SENDING) {
+        condition = G_IO_OUT;
+    } else {
+        condition = G_IO_IN;
+    }
+
+    trace_qio_channel_tls_bye_pending(ioc, status);
+    ioc->bye_ioc_tag = qio_channel_add_watch_full(ioc->master, condition,
+                                                  qio_channel_tls_bye_io,
+                                                  data, NULL, context);
+}
+
+
+static gboolean qio_channel_tls_bye_io(QIOChannel *ioc, GIOCondition condition,
+                                       gpointer user_data)
+{
+    QIOChannelTLSData *data = user_data;
+    QIOTask *task = data->task;
+    GMainContext *context = data->context;
+    QIOChannelTLS *tioc = QIO_CHANNEL_TLS(qio_task_get_source(task));
+
+    tioc->bye_ioc_tag = 0;
+    g_free(data);
+    qio_channel_tls_bye_task(tioc, task, context);
+
+    if (context) {
+        g_main_context_unref(context);
+    }
+
+    return FALSE;
+}
+
+static void propagate_error(QIOTask *task, gpointer opaque)
+{
+    qio_task_propagate_error(task, opaque);
+}
+
+void qio_channel_tls_bye(QIOChannelTLS *ioc, Error **errp)
+{
+    QIOTask *task;
+
+    task = qio_task_new(OBJECT(ioc), propagate_error, errp, NULL);
+
+    trace_qio_channel_tls_bye_start(ioc);
+    qio_channel_tls_bye_task(ioc, task, NULL);
+}
 
 static void qio_channel_tls_init(Object *obj G_GNUC_UNUSED)
 {
@@ -279,6 +359,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc,
             tioc->session,
             iov[i].iov_base,
             iov[i].iov_len,
+            flags & QIO_CHANNEL_READ_FLAG_RELAXED_EOF ||
             qatomic_load_acquire(&tioc->shutdown) & QIO_CHANNEL_SHUTDOWN_READ,
             errp);
         if (ret == QCRYPTO_TLS_SESSION_ERR_BLOCK) {
@@ -379,6 +460,11 @@ static int qio_channel_tls_close(QIOChannel *ioc,
         g_clear_handle_id(&tioc->hs_ioc_tag, g_source_remove);
     }
 
+    if (tioc->bye_ioc_tag) {
+        trace_qio_channel_tls_bye_cancel(ioc);
+        g_clear_handle_id(&tioc->bye_ioc_tag, g_source_remove);
+    }
+
     return qio_channel_close(tioc->master, errp);
 }
 
diff --git a/io/channel.c b/io/channel.c
index e3f17c24a00ff..ebd93227651f1 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -115,7 +115,8 @@ int coroutine_mixed_fn qio_channel_readv_all_eof(QIOChannel *ioc,
                                                  size_t niov,
                                                  Error **errp)
 {
-    return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
+    return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, 0,
+                                          errp);
 }
 
 int coroutine_mixed_fn qio_channel_readv_all(QIOChannel *ioc,
@@ -130,6 +131,7 @@ int coroutine_mixed_fn qio_channel_readv_full_all_eof(QIOChannel *ioc,
                                                       const struct iovec *iov,
                                                       size_t niov,
                                                       int **fds, size_t *nfds,
+                                                      int flags,
                                                       Error **errp)
 {
     int ret = -1;
@@ -155,7 +157,7 @@ int coroutine_mixed_fn qio_channel_readv_full_all_eof(QIOChannel *ioc,
     while ((nlocal_iov > 0) || local_fds) {
         ssize_t len;
         len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds,
-                                     local_nfds, 0, errp);
+                                     local_nfds, flags, errp);
         if (len == QIO_CHANNEL_ERR_BLOCK) {
             if (qemu_in_coroutine()) {
                 qio_channel_yield(ioc, G_IO_IN);
@@ -222,7 +224,8 @@ int coroutine_mixed_fn qio_channel_readv_full_all(QIOChannel *ioc,
                                                   int **fds, size_t *nfds,
                                                   Error **errp)
 {
-    int ret = qio_channel_readv_full_all_eof(ioc, iov, niov, fds, nfds, errp);
+    int ret = qio_channel_readv_full_all_eof(ioc, iov, niov, fds, nfds, 0,
+                                             errp);
 
     if (ret == 0) {
         error_setg(errp, "Unexpected end-of-file before all data were read");
diff --git a/io/trace-events b/io/trace-events
index d4c0f84a9a2fc..dc3a63ba1fb25 100644
--- a/io/trace-events
+++ b/io/trace-events
@@ -44,6 +44,11 @@ qio_channel_tls_handshake_pending(void *ioc, int status) "TLS handshake pending
 qio_channel_tls_handshake_fail(void *ioc) "TLS handshake fail ioc=%p"
 qio_channel_tls_handshake_complete(void *ioc) "TLS handshake complete ioc=%p"
 qio_channel_tls_handshake_cancel(void *ioc) "TLS handshake cancel ioc=%p"
+qio_channel_tls_bye_start(void *ioc) "TLS termination start ioc=%p"
+qio_channel_tls_bye_pending(void *ioc, int status) "TLS termination pending ioc=%p status=%d"
+qio_channel_tls_bye_fail(void *ioc) "TLS termination fail ioc=%p"
+qio_channel_tls_bye_complete(void *ioc) "TLS termination complete ioc=%p"
+qio_channel_tls_bye_cancel(void *ioc) "TLS termination cancel ioc=%p"
 qio_channel_tls_credentials_allow(void *ioc) "TLS credentials allow ioc=%p"
 qio_channel_tls_credentials_deny(void *ioc) "TLS credentials deny ioc=%p"
 
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index dccd5d965f3d7..ec1e82bdc8882 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -43,9 +43,6 @@
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_DIRTY_LOG_PAGE_OFFSET 64
 
-#define KVM_REG_SIZE(id)						\
-	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
-
 struct kvm_regs {
 	struct user_pt_regs regs;	/* sp = sp_el0 */
 
diff --git a/linux-headers/asm-loongarch/kvm_para.h b/linux-headers/asm-loongarch/kvm_para.h
index 4ba4ad8db1d9a..fd7f40713d492 100644
--- a/linux-headers/asm-loongarch/kvm_para.h
+++ b/linux-headers/asm-loongarch/kvm_para.h
@@ -17,5 +17,6 @@
 #define  KVM_FEATURE_STEAL_TIME		2
 /* BIT 24 - 31 are features configurable by user space vmm */
 #define  KVM_FEATURE_VIRT_EXTIOI	24
+#define  KVM_FEATURE_USER_HCALL		25
 
 #endif /* _ASM_KVM_PARA_H */
diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h
index 3482c9a73d1b6..f06bc5efcd79a 100644
--- a/linux-headers/asm-riscv/kvm.h
+++ b/linux-headers/asm-riscv/kvm.h
@@ -179,6 +179,9 @@ enum KVM_RISCV_ISA_EXT_ID {
 	KVM_RISCV_ISA_EXT_SSNPM,
 	KVM_RISCV_ISA_EXT_SVADE,
 	KVM_RISCV_ISA_EXT_SVADU,
+	KVM_RISCV_ISA_EXT_SVVPTC,
+	KVM_RISCV_ISA_EXT_ZABHA,
+	KVM_RISCV_ISA_EXT_ZICCRSE,
 	KVM_RISCV_ISA_EXT_MAX,
 };
 
@@ -198,6 +201,7 @@ enum KVM_RISCV_SBI_EXT_ID {
 	KVM_RISCV_SBI_EXT_VENDOR,
 	KVM_RISCV_SBI_EXT_DBCN,
 	KVM_RISCV_SBI_EXT_STA,
+	KVM_RISCV_SBI_EXT_SUSP,
 	KVM_RISCV_SBI_EXT_MAX,
 };
 
@@ -211,9 +215,6 @@ struct kvm_riscv_sbi_sta {
 #define KVM_RISCV_TIMER_STATE_OFF	0
 #define KVM_RISCV_TIMER_STATE_ON	1
 
-#define KVM_REG_SIZE(id)		\
-	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
-
 /* If you need to interpret the index values, here is the key: */
 #define KVM_REG_RISCV_TYPE_MASK		0x00000000FF000000
 #define KVM_REG_RISCV_TYPE_SHIFT	24
diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index 96589490c426c..86f2c34e7afa2 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -923,5 +923,6 @@ struct kvm_hyperv_eventfd {
 #define KVM_X86_SEV_VM		2
 #define KVM_X86_SEV_ES_VM	3
 #define KVM_X86_SNP_VM		4
+#define KVM_X86_TDX_VM		5
 
 #endif /* _ASM_X86_KVM_H */
diff --git a/linux-headers/linux/iommufd.h b/linux-headers/linux/iommufd.h
index 37aae16502049..ccbdca5e1138d 100644
--- a/linux-headers/linux/iommufd.h
+++ b/linux-headers/linux/iommufd.h
@@ -297,7 +297,7 @@ struct iommu_ioas_unmap {
  *                       ioctl(IOMMU_OPTION_HUGE_PAGES)
  * @IOMMU_OPTION_RLIMIT_MODE:
  *    Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege
- *    to invoke this. Value 0 (default) is user based accouting, 1 uses process
+ *    to invoke this. Value 0 (default) is user based accounting, 1 uses process
  *    based accounting. Global option, object_id must be 0
  * @IOMMU_OPTION_HUGE_PAGES:
  *    Value 1 (default) allows contiguous pages to be combined when generating
@@ -390,7 +390,7 @@ struct iommu_vfio_ioas {
  * @IOMMU_HWPT_ALLOC_PASID: Requests a domain that can be used with PASID. The
  *                          domain can be attached to any PASID on the device.
  *                          Any domain attached to the non-PASID part of the
- *                          device must also be flaged, otherwise attaching a
+ *                          device must also be flagged, otherwise attaching a
  *                          PASID will blocked.
  *                          If IOMMU does not support PASID it will return
  *                          error (-EOPNOTSUPP).
@@ -558,16 +558,25 @@ struct iommu_hw_info_vtd {
  * For the details of @idr, @iidr and @aidr, please refer to the chapters
  * from 6.3.1 to 6.3.6 in the SMMUv3 Spec.
  *
- * User space should read the underlying ARM SMMUv3 hardware information for
- * the list of supported features.
+ * This reports the raw HW capability, and not all bits are meaningful to be
+ * read by userspace. Only the following fields should be used:
  *
- * Note that these values reflect the raw HW capability, without any insight if
- * any required kernel driver support is present. Bits may be set indicating the
- * HW has functionality that is lacking kernel software support, such as BTM. If
- * a VMM is using this information to construct emulated copies of these
- * registers it should only forward bits that it knows it can support.
+ * idr[0]: ST_LEVEL, TERM_MODEL, STALL_MODEL, TTENDIAN , CD2L, ASID16, TTF
+ * idr[1]: SIDSIZE, SSIDSIZE
+ * idr[3]: BBML, RIL
+ * idr[5]: VAX, GRAN64K, GRAN16K, GRAN4K
  *
- * In future, presence of required kernel support will be indicated in flags.
+ * - S1P should be assumed to be true if a NESTED HWPT can be created
+ * - VFIO/iommufd only support platforms with COHACC, it should be assumed to be
+ *   true.
+ * - ATS is a per-device property. If the VMM describes any devices as ATS
+ *   capable in ACPI/DT it should set the corresponding idr.
+ *
+ * This list may expand in future (eg E0PD, AIE, PBHA, D128, DS etc). It is
+ * important that VMMs do not read bits outside the list to allow for
+ * compatibility with future kernels. Several features in the SMMUv3
+ * architecture are not currently supported by the kernel for nesting: HTTU,
+ * BTM, MPAM and others.
  */
 struct iommu_hw_info_arm_smmuv3 {
 	__u32 flags;
@@ -766,7 +775,7 @@ struct iommu_hwpt_vtd_s1_invalidate {
 };
 
 /**
- * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cahce invalidation
+ * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cache invalidation
  *         (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3)
  * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ.
  *       Must be little-endian.
@@ -859,6 +868,7 @@ enum iommu_hwpt_pgfault_perm {
  * @pasid: Process Address Space ID
  * @grpid: Page Request Group Index
  * @perm: Combination of enum iommu_hwpt_pgfault_perm
+ * @__reserved: Must be 0.
  * @addr: Fault address
  * @length: a hint of how much data the requestor is expecting to fetch. For
  *          example, if the PRI initiator knows it is going to do a 10MB
@@ -874,7 +884,8 @@ struct iommu_hwpt_pgfault {
 	__u32 pasid;
 	__u32 grpid;
 	__u32 perm;
-	__u64 addr;
+	__u32 __reserved;
+	__aligned_u64 addr;
 	__u32 length;
 	__u32 cookie;
 };
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 3bcd4eabe3245..27181b3dd8fb2 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -609,10 +609,6 @@ struct kvm_ioeventfd {
 #define KVM_X86_DISABLE_EXITS_HLT            (1 << 1)
 #define KVM_X86_DISABLE_EXITS_PAUSE          (1 << 2)
 #define KVM_X86_DISABLE_EXITS_CSTATE         (1 << 3)
-#define KVM_X86_DISABLE_VALID_EXITS          (KVM_X86_DISABLE_EXITS_MWAIT | \
-                                              KVM_X86_DISABLE_EXITS_HLT | \
-                                              KVM_X86_DISABLE_EXITS_PAUSE | \
-                                              KVM_X86_DISABLE_EXITS_CSTATE)
 
 /* for KVM_ENABLE_CAP */
 struct kvm_enable_cap {
@@ -1062,6 +1058,10 @@ struct kvm_dirty_tlb {
 
 #define KVM_REG_SIZE_SHIFT	52
 #define KVM_REG_SIZE_MASK	0x00f0000000000000ULL
+
+#define KVM_REG_SIZE(id)		\
+	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
 #define KVM_REG_SIZE_U8		0x0000000000000000ULL
 #define KVM_REG_SIZE_U16	0x0010000000000000ULL
 #define KVM_REG_SIZE_U32	0x0020000000000000ULL
diff --git a/linux-headers/linux/stddef.h b/linux-headers/linux/stddef.h
index 96aa341942b05..e1416f793738f 100644
--- a/linux-headers/linux/stddef.h
+++ b/linux-headers/linux/stddef.h
@@ -8,6 +8,13 @@
 #define __always_inline __inline__
 #endif
 
+/* Not all C++ standards support type declarations inside an anonymous union */
+#ifndef __cplusplus
+#define __struct_group_tag(TAG)		TAG
+#else
+#define __struct_group_tag(TAG)
+#endif
+
 /**
  * __struct_group() - Create a mirrored named and anonyomous struct
  *
@@ -20,13 +27,13 @@
  * and size: one anonymous and one named. The former's members can be used
  * normally without sub-struct naming, and the latter can be used to
  * reason about the start, end, and size of the group of struct members.
- * The named struct can also be explicitly tagged for layer reuse, as well
- * as both having struct attributes appended.
+ * The named struct can also be explicitly tagged for layer reuse (C only),
+ * as well as both having struct attributes appended.
  */
 #define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \
 	union { \
 		struct { MEMBERS } ATTRS; \
-		struct TAG { MEMBERS } ATTRS NAME; \
+		struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \
 	} ATTRS
 
 #ifdef __cplusplus
diff --git a/linux-headers/linux/vduse.h b/linux-headers/linux/vduse.h
index 6d2ca064b5d65..f46269af349a1 100644
--- a/linux-headers/linux/vduse.h
+++ b/linux-headers/linux/vduse.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
 #ifndef _VDUSE_H_
 #define _VDUSE_H_
 
diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c
index c5d8a483a3fba..fea43cefa6bc9 100644
--- a/linux-user/aarch64/cpu_loop.c
+++ b/linux-user/aarch64/cpu_loop.c
@@ -27,54 +27,6 @@
 #include "target/arm/syndrome.h"
 #include "target/arm/cpu-features.h"
 
-#define get_user_code_u32(x, gaddr, env)                \
-    ({ abi_long __r = get_user_u32((x), (gaddr));       \
-        if (!__r && bswap_code(arm_sctlr_b(env))) {     \
-            (x) = bswap32(x);                           \
-        }                                               \
-        __r;                                            \
-    })
-
-#define get_user_code_u16(x, gaddr, env)                \
-    ({ abi_long __r = get_user_u16((x), (gaddr));       \
-        if (!__r && bswap_code(arm_sctlr_b(env))) {     \
-            (x) = bswap16(x);                           \
-        }                                               \
-        __r;                                            \
-    })
-
-#define get_user_data_u32(x, gaddr, env)                \
-    ({ abi_long __r = get_user_u32((x), (gaddr));       \
-        if (!__r && arm_cpu_bswap_data(env)) {          \
-            (x) = bswap32(x);                           \
-        }                                               \
-        __r;                                            \
-    })
-
-#define get_user_data_u16(x, gaddr, env)                \
-    ({ abi_long __r = get_user_u16((x), (gaddr));       \
-        if (!__r && arm_cpu_bswap_data(env)) {          \
-            (x) = bswap16(x);                           \
-        }                                               \
-        __r;                                            \
-    })
-
-#define put_user_data_u32(x, gaddr, env)                \
-    ({ typeof(x) __x = (x);                             \
-        if (arm_cpu_bswap_data(env)) {                  \
-            __x = bswap32(__x);                         \
-        }                                               \
-        put_user_u32(__x, (gaddr));                     \
-    })
-
-#define put_user_data_u16(x, gaddr, env)                \
-    ({ typeof(x) __x = (x);                             \
-        if (arm_cpu_bswap_data(env)) {                  \
-            __x = bswap16(__x);                         \
-        }                                               \
-        put_user_u16(__x, (gaddr));                     \
-    })
-
 /* AArch64 main loop */
 void cpu_loop(CPUARMState *env)
 {
diff --git a/linux-user/aarch64/target_signal.h b/linux-user/aarch64/target_signal.h
index 40e399d9908df..6f66a50bfd2a6 100644
--- a/linux-user/aarch64/target_signal.h
+++ b/linux-user/aarch64/target_signal.h
@@ -3,6 +3,8 @@
 
 #include "../generic/signal.h"
 
+#define TARGET_SA_RESTORER      0x04000000
+
 #define TARGET_SEGV_MTEAERR  8  /* Asynchronous ARM MTE error */
 #define TARGET_SEGV_MTESERR  9  /* Synchronous ARM MTE exception */
 
diff --git a/linux-user/alpha/target_proc.h b/linux-user/alpha/target_proc.h
index dac37dffc9db1..da437ee0e566f 100644
--- a/linux-user/alpha/target_proc.h
+++ b/linux-user/alpha/target_proc.h
@@ -15,7 +15,7 @@ static int open_cpuinfo(CPUArchState *cpu_env, int fd)
     const char *p, *q;
     int t;
 
-    p = object_class_get_name(OBJECT_CLASS(CPU_GET_CLASS(env_cpu(cpu_env))));
+    p = object_class_get_name(OBJECT_CLASS(env_cpu(cpu_env)->cc));
     q = strchr(p, '-');
     t = q - p;
     assert(t < sizeof(model));
diff --git a/linux-user/arm/cpu_loop.c b/linux-user/arm/cpu_loop.c
index 10d8561f9b974..7416e3216ea18 100644
--- a/linux-user/arm/cpu_loop.c
+++ b/linux-user/arm/cpu_loop.c
@@ -36,45 +36,10 @@
         __r;                                            \
     })
 
-#define get_user_code_u16(x, gaddr, env)                \
-    ({ abi_long __r = get_user_u16((x), (gaddr));       \
-        if (!__r && bswap_code(arm_sctlr_b(env))) {     \
-            (x) = bswap16(x);                           \
-        }                                               \
-        __r;                                            \
-    })
-
-#define get_user_data_u32(x, gaddr, env)                \
-    ({ abi_long __r = get_user_u32((x), (gaddr));       \
-        if (!__r && arm_cpu_bswap_data(env)) {          \
-            (x) = bswap32(x);                           \
-        }                                               \
-        __r;                                            \
-    })
-
-#define get_user_data_u16(x, gaddr, env)                \
-    ({ abi_long __r = get_user_u16((x), (gaddr));       \
-        if (!__r && arm_cpu_bswap_data(env)) {          \
-            (x) = bswap16(x);                           \
-        }                                               \
-        __r;                                            \
-    })
-
-#define put_user_data_u32(x, gaddr, env)                \
-    ({ typeof(x) __x = (x);                             \
-        if (arm_cpu_bswap_data(env)) {                  \
-            __x = bswap32(__x);                         \
-        }                                               \
-        put_user_u32(__x, (gaddr));                     \
-    })
-
-#define put_user_data_u16(x, gaddr, env)                \
-    ({ typeof(x) __x = (x);                             \
-        if (arm_cpu_bswap_data(env)) {                  \
-            __x = bswap16(__x);                         \
-        }                                               \
-        put_user_u16(__x, (gaddr));                     \
-    })
+/*
+ * Note that if we need to do data accesses here, they should do a
+ * bswap if arm_cpu_bswap_data() returns true.
+ */
 
 /*
  * Similar to code in accel/tcg/user-exec.c, but outside the execution loop.
diff --git a/linux-user/arm/target_signal.h b/linux-user/arm/target_signal.h
index 0e6351d9f7898..ff1810b1fe022 100644
--- a/linux-user/arm/target_signal.h
+++ b/linux-user/arm/target_signal.h
@@ -3,6 +3,8 @@
 
 #include "../generic/signal.h"
 
+#define TARGET_SA_RESTORER      0x04000000
+
 #define TARGET_ARCH_HAS_SETUP_FRAME
 #define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
 
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index a2c152e5ad117..8799e4ea27df0 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -3351,8 +3351,8 @@ static void load_elf_image(const char *image_name, const ImageSource *src,
 
     if (align_size != reserve_size) {
         abi_ulong align_addr = ROUND_UP(load_addr, align);
-        abi_ulong align_end = align_addr + reserve_size;
-        abi_ulong load_end = load_addr + align_size;
+        abi_ulong align_end = TARGET_PAGE_ALIGN(align_addr + reserve_size);
+        abi_ulong load_end = TARGET_PAGE_ALIGN(load_addr + align_size);
 
         if (align_addr != load_addr) {
             target_munmap(load_addr, align_addr - load_addr);
diff --git a/linux-user/generic/signal.h b/linux-user/generic/signal.h
index 6fd05b77bb3ec..b34740258edb0 100644
--- a/linux-user/generic/signal.h
+++ b/linux-user/generic/signal.h
@@ -15,7 +15,6 @@
 #define TARGET_SA_RESTART       0x10000000
 #define TARGET_SA_NODEFER       0x40000000
 #define TARGET_SA_RESETHAND     0x80000000
-#define TARGET_SA_RESTORER      0x04000000
 
 #define TARGET_SIGHUP            1
 #define TARGET_SIGINT            2
diff --git a/linux-user/i386/target_signal.h b/linux-user/i386/target_signal.h
index 9315cba241cf4..eee792ef637e5 100644
--- a/linux-user/i386/target_signal.h
+++ b/linux-user/i386/target_signal.h
@@ -3,6 +3,8 @@
 
 #include "../generic/signal.h"
 
+#define TARGET_SA_RESTORER      0x04000000
+
 #define TARGET_ARCH_HAS_SETUP_FRAME
 #define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
 
diff --git a/linux-user/m68k/target_signal.h b/linux-user/m68k/target_signal.h
index 6e0f4b74e391c..b05b9303b0d5c 100644
--- a/linux-user/m68k/target_signal.h
+++ b/linux-user/m68k/target_signal.h
@@ -3,6 +3,7 @@
 
 #include "../generic/signal.h"
 
+#define TARGET_ARCH_HAS_SA_RESTORER 1
 #define TARGET_ARCH_HAS_SETUP_FRAME
 #define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
 
diff --git a/linux-user/main.c b/linux-user/main.c
index 5c74c52cc52ad..e2ec5970bed20 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -72,6 +72,7 @@ char *exec_path;
 char real_exec_path[PATH_MAX];
 
 static bool opt_one_insn_per_tb;
+static unsigned long opt_tb_size;
 static const char *argv0;
 static const char *gdbstub;
 static envlist_t *envlist;
@@ -425,6 +426,13 @@ static void handle_arg_one_insn_per_tb(const char *arg)
     opt_one_insn_per_tb = true;
 }
 
+static void handle_arg_tb_size(const char *arg)
+{
+    if (qemu_strtoul(arg, NULL, 0, &opt_tb_size)) {
+        usage(EXIT_FAILURE);
+    }
+}
+
 static void handle_arg_strace(const char *arg)
 {
     enable_strace = true;
@@ -517,6 +525,8 @@ static const struct qemu_argument arg_table[] = {
     {"one-insn-per-tb",
                    "QEMU_ONE_INSN_PER_TB",  false, handle_arg_one_insn_per_tb,
      "",           "run with one guest instruction per emulated TB"},
+    {"tb-size",    "QEMU_TB_SIZE",     true,  handle_arg_tb_size,
+     "size",       "TCG translation block cache size"},
     {"strace",     "QEMU_STRACE",      false, handle_arg_strace,
      "",           "log system calls"},
     {"seed",       "QEMU_RAND_SEED",   true,  handle_arg_seed,
@@ -808,6 +818,8 @@ int main(int argc, char **argv, char **envp)
         accel_init_interfaces(ac);
         object_property_set_bool(OBJECT(accel), "one-insn-per-tb",
                                  opt_one_insn_per_tb, &error_abort);
+        object_property_set_int(OBJECT(accel), "tb-size",
+                                opt_tb_size, &error_abort);
         ac->init_machine(NULL);
     }
 
diff --git a/linux-user/meson.build b/linux-user/meson.build
index f75b4fe0e3ec3..f47a213ca3dd9 100644
--- a/linux-user/meson.build
+++ b/linux-user/meson.build
@@ -27,6 +27,7 @@ linux_user_ss.add(libdw)
 linux_user_ss.add(when: 'TARGET_HAS_BFLT', if_true: files('flatload.c'))
 linux_user_ss.add(when: 'TARGET_I386', if_true: files('vm86.c'))
 linux_user_ss.add(when: 'CONFIG_ARM_COMPATIBLE_SEMIHOSTING', if_true: files('semihost.c'))
+linux_user_ss.add(when: 'CONFIG_TCG_PLUGINS', if_true: files('plugin-api.c'))
 
 syscall_nr_generators = {}
 
diff --git a/linux-user/microblaze/target_signal.h b/linux-user/microblaze/target_signal.h
index 7dc5c45f00aca..ffe4442213a97 100644
--- a/linux-user/microblaze/target_signal.h
+++ b/linux-user/microblaze/target_signal.h
@@ -3,6 +3,8 @@
 
 #include "../generic/signal.h"
 
+#define TARGET_SA_RESTORER      0x04000000
+
 #define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
 
 #endif /* MICROBLAZE_TARGET_SIGNAL_H */
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index 6828b17a63faf..d1f36e6f16ba8 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -21,6 +21,7 @@
 #include "trace.h"
 #include "exec/log.h"
 #include "exec/page-protection.h"
+#include "exec/tb-flush.h"
 #include "exec/translation-block.h"
 #include "qemu.h"
 #include "user/page-protection.h"
diff --git a/linux-user/plugin-api.c b/linux-user/plugin-api.c
new file mode 100644
index 0000000000000..66755df5263b0
--- /dev/null
+++ b/linux-user/plugin-api.c
@@ -0,0 +1,15 @@
+/*
+ * QEMU Plugin API - linux-user-mode only implementations
+ *
+ * Common user-mode only APIs are in plugins/api-user. These helpers
+ * are only specific to linux-user.
+ *
+ * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
+ * Copyright (C) 2019-2025, Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "common-user/plugin-api.c.inc"
diff --git a/linux-user/ppc/target_signal.h b/linux-user/ppc/target_signal.h
index 5be24e152b7ee..53fae473f3c39 100644
--- a/linux-user/ppc/target_signal.h
+++ b/linux-user/ppc/target_signal.h
@@ -3,6 +3,8 @@
 
 #include "../generic/signal.h"
 
+#define TARGET_SA_RESTORER      0x04000000
+
 #if !defined(TARGET_PPC64)
 #define TARGET_ARCH_HAS_SETUP_FRAME
 #endif
diff --git a/linux-user/s390x/target_signal.h b/linux-user/s390x/target_signal.h
index 41e0e34a55d75..738e0673f4a3e 100644
--- a/linux-user/s390x/target_signal.h
+++ b/linux-user/s390x/target_signal.h
@@ -3,6 +3,8 @@
 
 #include "../generic/signal.h"
 
+#define TARGET_SA_RESTORER      0x04000000
+
 #define TARGET_ARCH_HAS_SETUP_FRAME
 #define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
 
diff --git a/linux-user/sh4/target_signal.h b/linux-user/sh4/target_signal.h
index eee6a1a7cda41..0bde417fd1fd3 100644
--- a/linux-user/sh4/target_signal.h
+++ b/linux-user/sh4/target_signal.h
@@ -3,6 +3,8 @@
 
 #include "../generic/signal.h"
 
+#define TARGET_SA_RESTORER      0x04000000
+
 #define TARGET_ARCH_HAS_SETUP_FRAME
 #define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
 
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 81a98c6d02f5d..4dafc2c3a2980 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -21,7 +21,7 @@
 #include "qemu/cutils.h"
 #include "gdbstub/user.h"
 #include "exec/page-protection.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 
 #include <sys/ucontext.h>
 #include <sys/resource.h>
@@ -753,7 +753,7 @@ void force_sigsegv(int oldsig)
 void cpu_loop_exit_sigsegv(CPUState *cpu, target_ulong addr,
                            MMUAccessType access_type, bool maperr, uintptr_t ra)
 {
-    const TCGCPUOps *tcg_ops = CPU_GET_CLASS(cpu)->tcg_ops;
+    const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
 
     if (tcg_ops->record_sigsegv) {
         tcg_ops->record_sigsegv(cpu, addr, access_type, maperr, ra);
@@ -769,7 +769,7 @@ void cpu_loop_exit_sigsegv(CPUState *cpu, target_ulong addr,
 void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr,
                           MMUAccessType access_type, uintptr_t ra)
 {
-    const TCGCPUOps *tcg_ops = CPU_GET_CLASS(cpu)->tcg_ops;
+    const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
 
     if (tcg_ops->record_sigbus) {
         tcg_ops->record_sigbus(cpu, addr, access_type, ra);
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 02ea4221c96d9..b32de763f7e78 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -26,6 +26,7 @@
 #include "tcg/startup.h"
 #include "target_mman.h"
 #include "exec/page-protection.h"
+#include "exec/tb-flush.h"
 #include "exec/translation-block.h"
 #include <elf.h>
 #include <endian.h>
diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
index b9b05c1d11f28..4aa253b566360 100644
--- a/linux-user/user-internals.h
+++ b/linux-user/user-internals.h
@@ -20,7 +20,6 @@
 
 #include "user/thunk.h"
 #include "exec/exec-all.h"
-#include "exec/tb-flush.h"
 #include "qemu/log.h"
 
 extern char *exec_path;
diff --git a/linux-user/user-mmap.h b/linux-user/user-mmap.h
index b94bcdcf83c50..dfc4477a720d6 100644
--- a/linux-user/user-mmap.h
+++ b/linux-user/user-mmap.h
@@ -18,6 +18,8 @@
 #ifndef LINUX_USER_USER_MMAP_H
 #define LINUX_USER_USER_MMAP_H
 
+#include "user/mmap.h"
+
 /*
  * Guest parameters for the ADDR_COMPAT_LAYOUT personality
  * (at present this is the only layout supported by QEMU).
@@ -39,24 +41,7 @@
 extern abi_ulong task_unmapped_base;
 extern abi_ulong elf_et_dyn_base;
 
-/*
- * mmap_next_start: The base address for the next mmap without hint,
- * increased after each successful map, starting at task_unmapped_base.
- * This is an optimization within QEMU and not part of ADDR_COMPAT_LAYOUT.
- */
-extern abi_ulong mmap_next_start;
-
-int target_mprotect(abi_ulong start, abi_ulong len, int prot);
-abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
-                     int flags, int fd, off_t offset);
-int target_munmap(abi_ulong start, abi_ulong len);
-abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
-                       abi_ulong new_size, unsigned long flags,
-                       abi_ulong new_addr);
 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice);
-abi_ulong mmap_find_vma(abi_ulong, abi_ulong, abi_ulong);
-void mmap_fork_start(void);
-void mmap_fork_end(int child);
 
 abi_ulong target_shmat(CPUArchState *cpu_env, int shmid,
                        abi_ulong shmaddr, int shmflg);
diff --git a/linux-user/x86_64/target_signal.h b/linux-user/x86_64/target_signal.h
index 9d9717406f3be..0af100c661102 100644
--- a/linux-user/x86_64/target_signal.h
+++ b/linux-user/x86_64/target_signal.h
@@ -3,6 +3,8 @@
 
 #include "../generic/signal.h"
 
+#define TARGET_SA_RESTORER      0x04000000
+
 /* For x86_64, use of SA_RESTORER is mandatory. */
 #define TARGET_ARCH_HAS_SIGTRAMP_PAGE 0
 
diff --git a/linux-user/xtensa/target_signal.h b/linux-user/xtensa/target_signal.h
index e4b1bea5cb5dd..8a198bf8ac88e 100644
--- a/linux-user/xtensa/target_signal.h
+++ b/linux-user/xtensa/target_signal.h
@@ -3,6 +3,8 @@
 
 #include "../generic/signal.h"
 
+#define TARGET_SA_RESTORER      0x04000000
+
 #define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
 
 #endif
diff --git a/meson.build b/meson.build
index 18cf9e2913b99..41f68d38069b4 100644
--- a/meson.build
+++ b/meson.build
@@ -5,9 +5,12 @@ project('qemu', ['c'], meson_version: '>=1.5.0',
 
 meson.add_devenv({ 'MESON_BUILD_ROOT' : meson.project_build_root() })
 
-add_test_setup('quick', exclude_suites: ['slow', 'thorough'], is_default: true)
-add_test_setup('slow', exclude_suites: ['thorough'], env: ['G_TEST_SLOW=1', 'SPEED=slow'])
-add_test_setup('thorough', env: ['G_TEST_SLOW=1', 'SPEED=thorough'])
+add_test_setup('quick', exclude_suites: ['slow', 'thorough'], is_default: true,
+               env: ['RUST_BACKTRACE=1'])
+add_test_setup('slow', exclude_suites: ['thorough'],
+               env: ['G_TEST_SLOW=1', 'SPEED=slow', 'RUST_BACKTRACE=1'])
+add_test_setup('thorough',
+               env: ['G_TEST_SLOW=1', 'SPEED=thorough', 'RUST_BACKTRACE=1'])
 
 meson.add_postconf_script(find_program('scripts/symlink-install-tree.py'))
 
@@ -821,7 +824,6 @@ version_res = []
 coref = []
 iokit = []
 pvg = not_found
-metal = []
 emulator_link_args = []
 midl = not_found
 widl = not_found
@@ -843,8 +845,8 @@ elif host_os == 'darwin'
   coref = dependency('appleframeworks', modules: 'CoreFoundation')
   iokit = dependency('appleframeworks', modules: 'IOKit', required: false)
   host_dsosuf = '.dylib'
-  pvg = dependency('appleframeworks', modules: 'ParavirtualizedGraphics')
-  metal = dependency('appleframeworks', modules: 'Metal')
+  pvg = dependency('appleframeworks', modules: ['ParavirtualizedGraphics', 'Metal'],
+                   required: get_option('pvg'))
 elif host_os == 'sunos'
   socket = [cc.find_library('socket'),
             cc.find_library('nsl'),
@@ -2724,6 +2726,14 @@ config_host_data.set('HAVE_OPTRESET',
                      cc.has_header_symbol('getopt.h', 'optreset'))
 config_host_data.set('HAVE_IPPROTO_MPTCP',
                      cc.has_header_symbol('netinet/in.h', 'IPPROTO_MPTCP'))
+if libaio.found()
+  config_host_data.set('HAVE_IO_PREP_PWRITEV2',
+                       cc.has_header_symbol('libaio.h', 'io_prep_pwritev2'))
+endif
+if linux_io_uring.found()
+  config_host_data.set('HAVE_IO_URING_PREP_WRITEV2',
+                       cc.has_header_symbol('liburing.h', 'io_uring_prep_writev2'))
+endif
 
 # has_member
 config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID',
@@ -2885,6 +2895,12 @@ config_host_data.set('HAVE_MLOCKALL', cc.links(gnu_source_prefix + '''
     return mlockall(MCL_FUTURE);
   }'''))
 
+config_host_data.set('HAVE_MLOCK_ONFAULT', cc.links(gnu_source_prefix + '''
+  #include <sys/mman.h>
+  int main(void) {
+      return mlockall(MCL_FUTURE | MCL_ONFAULT);
+  }'''))
+
 have_l2tpv3 = false
 if get_option('l2tpv3').allowed() and have_system
   have_l2tpv3 = cc.has_type('struct mmsghdr',
@@ -3361,6 +3377,12 @@ foreach target : target_dirs
     target_kconfig += 'CONFIG_' + config_target['TARGET_ARCH'].to_upper() + '=y'
     target_kconfig += 'CONFIG_TARGET_BIG_ENDIAN=' + config_target['TARGET_BIG_ENDIAN']
 
+    # PVG is not cross-architecture.  Use accelerator_targets as a proxy to
+    # figure out which target can support PVG on this host
+    if pvg.found() and target in accelerator_targets.get('CONFIG_HVF', [])
+      target_kconfig += 'CONFIG_MAC_PVG=y'
+    endif
+
     config_input = meson.get_external_property(target, 'default')
     config_devices_mak = target + '-config-devices.mak'
     config_devices_mak = configure_file(
@@ -3590,10 +3612,12 @@ if have_system
     'hw/ssi',
     'hw/timer',
     'hw/tpm',
+    'hw/uefi',
     'hw/ufs',
     'hw/usb',
     'hw/vfio',
     'hw/virtio',
+    'hw/vmapple',
     'hw/watchdog',
     'hw/xen',
     'hw/gpio',
@@ -3651,6 +3675,7 @@ qtest_module_ss = ss.source_set()
 
 modules = {}
 target_modules = {}
+plugin_modules = []
 hw_arch = {}
 target_arch = {}
 target_system_arch = {}
@@ -4002,7 +4027,7 @@ libchardev = static_library('chardev', chardev_ss.sources() + genh,
                             build_by_default: false)
 
 chardev = declare_dependency(objects: libchardev.extract_all_objects(recursive: false),
-                             dependencies: chardev_ss.dependencies())
+                             dependencies: [chardev_ss.dependencies(), io])
 
 hwcore_ss = hwcore_ss.apply({})
 libhwcore = static_library('hwcore', sources: hwcore_ss.sources() + genh,
@@ -4073,6 +4098,7 @@ if have_rust
     'MigrationPriority',
     'QEMUChrEvent',
     'QEMUClockType',
+    'ResetType',
     'device_endian',
     'module_init_type',
   ]
@@ -4826,6 +4852,9 @@ summary_info += {'libdw':             libdw}
 if host_os == 'freebsd'
   summary_info += {'libinotify-kqueue': inotify}
 endif
+if host_os == 'darwin'
+  summary_info += {'ParavirtualizedGraphics support': pvg}
+endif
 summary(summary_info, bool_yn: true, section: 'Dependencies')
 
 if host_arch == 'unknown'
diff --git a/meson_options.txt b/meson_options.txt
index 5eeaf3eee5cbc..59d973bca00fa 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -198,6 +198,8 @@ option('lzfse', type : 'feature', value : 'auto',
        description: 'lzfse support for DMG images')
 option('lzo', type : 'feature', value : 'auto',
        description: 'lzo compression support')
+option('pvg', type: 'feature', value: 'auto',
+       description: 'macOS paravirtualized graphics support')
 option('rbd', type : 'feature', value : 'auto',
        description: 'Ceph block device driver')
 option('opengl', type : 'feature', value : 'auto',
diff --git a/migration/channel.c b/migration/channel.c
index f9de064f3b134..a547b1fbfebee 100644
--- a/migration/channel.c
+++ b/migration/channel.c
@@ -33,6 +33,7 @@
 void migration_channel_process_incoming(QIOChannel *ioc)
 {
     MigrationState *s = migrate_get_current();
+    MigrationIncomingState *mis = migration_incoming_get_current();
     Error *local_err = NULL;
 
     trace_migration_set_incoming_channel(
@@ -47,6 +48,10 @@ void migration_channel_process_incoming(QIOChannel *ioc)
 
     if (local_err) {
         error_report_err(local_err);
+        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
+        if (mis->exit_on_error) {
+            exit(EXIT_FAILURE);
+        }
     }
 }
 
@@ -74,7 +79,7 @@ void migration_channel_connect(MigrationState *s,
             if (!error) {
                 /* tls_channel_connect will call back to this
                  * function after the TLS handshake,
-                 * so we mustn't call migrate_fd_connect until then
+                 * so we mustn't call migration_connect until then
                  */
 
                 return;
@@ -89,7 +94,7 @@ void migration_channel_connect(MigrationState *s,
             qemu_mutex_unlock(&s->qemu_file_lock);
         }
     }
-    migrate_fd_connect(s, error);
+    migration_connect(s, error);
     error_free(error);
 }
 
diff --git a/migration/colo.c b/migration/colo.c
index 9a8e5fbe9b94e..c976b3ff344d7 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -452,6 +452,9 @@ static int colo_do_checkpoint_transaction(MigrationState *s,
         bql_unlock();
         goto out;
     }
+
+    qemu_savevm_maybe_send_switchover_start(s->to_dst_file);
+
     /* Note: device state is saved into buffer */
     ret = qemu_save_device_state(fb);
 
diff --git a/migration/cpr.c b/migration/cpr.c
index 584b0b98f79bf..42c46563e522d 100644
--- a/migration/cpr.c
+++ b/migration/cpr.c
@@ -128,6 +128,11 @@ void cpr_set_incoming_mode(MigMode mode)
     incoming_mode = mode;
 }
 
+bool cpr_is_incoming(void)
+{
+    return incoming_mode != MIG_MODE_NONE;
+}
+
 int cpr_state_save(MigrationChannel *channel, Error **errp)
 {
     int ret;
@@ -137,6 +142,7 @@ int cpr_state_save(MigrationChannel *channel, Error **errp)
     trace_cpr_state_save(MigMode_str(mode));
 
     if (mode == MIG_MODE_CPR_TRANSFER) {
+        g_assert(channel);
         f = cpr_transfer_output(channel, errp);
     } else {
         return 0;
diff --git a/migration/meson.build b/migration/meson.build
index d3bfe84d62045..9aa48b290e2a5 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -25,6 +25,7 @@ system_ss.add(files(
   'migration-hmp-cmds.c',
   'migration.c',
   'multifd.c',
+  'multifd-device-state.c',
   'multifd-nocomp.c',
   'multifd-zlib.c',
   'multifd-zero-page.c',
diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 3347e34c48913..49c26daed359f 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -46,6 +46,8 @@ static void migration_global_dump(Monitor *mon)
                    ms->send_configuration ? "on" : "off");
     monitor_printf(mon, "send-section-footer: %s\n",
                    ms->send_section_footer ? "on" : "off");
+    monitor_printf(mon, "send-switchover-start: %s\n",
+                   ms->send_switchover_start ? "on" : "off");
     monitor_printf(mon, "clear-bitmap-shift: %u\n",
                    ms->clear_bitmap_shift);
 }
diff --git a/migration/migration.c b/migration/migration.c
index 396928513ab02..d46e776e244fd 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -106,7 +106,6 @@ static GSList *migration_blockers[MIG_MODE__MAX];
 
 static bool migration_object_check(MigrationState *ms, Error **errp);
 static bool migration_switchover_start(MigrationState *s, Error **errp);
-static void migrate_fd_cancel(MigrationState *s);
 static bool close_return_path_on_source(MigrationState *s);
 static void migration_completion_end(MigrationState *s);
 static void migrate_hup_delete(MigrationState *s);
@@ -117,6 +116,27 @@ static void migration_downtime_start(MigrationState *s)
     s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 }
 
+/*
+ * This is unfortunate: incoming migration actually needs the outgoing
+ * migration state (MigrationState) to be there too, e.g. to query
+ * capabilities, parameters, using locks, setup errors, etc.
+ *
+ * NOTE: when calling this, making sure current_migration exists and not
+ * been freed yet!  Otherwise trying to access the refcount is already
+ * an use-after-free itself..
+ *
+ * TODO: Move shared part of incoming / outgoing out into separate object.
+ * Then this is not needed.
+ */
+static void migrate_incoming_ref_outgoing_state(void)
+{
+    object_ref(migrate_get_current());
+}
+static void migrate_incoming_unref_outgoing_state(void)
+{
+    object_unref(migrate_get_current());
+}
+
 static void migration_downtime_end(MigrationState *s)
 {
     int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
@@ -342,17 +362,6 @@ void migration_bh_schedule(QEMUBHFunc *cb, void *opaque)
     qemu_bh_schedule(bh);
 }
 
-void migration_cancel(const Error *error)
-{
-    if (error) {
-        migrate_set_error(current_migration, error);
-    }
-    if (migrate_dirty_limit()) {
-        qmp_cancel_vcpu_dirty_limit(false, -1, NULL);
-    }
-    migrate_fd_cancel(current_migration);
-}
-
 void migration_shutdown(void)
 {
     /*
@@ -365,7 +374,7 @@ void migration_shutdown(void)
      * Cancel the current migration - that will (eventually)
      * stop the migration using this structure
      */
-    migration_cancel(NULL);
+    migration_cancel();
     object_unref(OBJECT(current_migration));
 
     /*
@@ -414,11 +423,24 @@ void migration_incoming_state_destroy(void)
     struct MigrationIncomingState *mis = migration_incoming_get_current();
 
     multifd_recv_cleanup();
+
     /*
      * RAM state cleanup needs to happen after multifd cleanup, because
      * multifd threads can use some of its states (receivedmap).
+     * The VFIO load_cleanup() implementation is BQL-sensitive. It requires
+     * BQL must NOT be taken when recycling load threads, so that it won't
+     * block the load threads from making progress on address space
+     * modification operations.
+     *
+     * To make it work, we could try to not take BQL for all load_cleanup(),
+     * or conditionally unlock BQL only if bql_locked() in VFIO.
+     *
+     * Since most existing call sites take BQL for load_cleanup(), make
+     * it simple by taking BQL always as the rule, so that VFIO can unlock
+     * BQL and retake unconditionally.
      */
-    qemu_loadvm_state_cleanup();
+    assert(bql_locked());
+    qemu_loadvm_state_cleanup(mis);
 
     if (mis->to_src_file) {
         /* Tell source that we are done */
@@ -862,7 +884,7 @@ process_incoming_migration_co(void *opaque)
              * postcopy thread.
              */
             trace_process_incoming_migration_co_postcopy_end_main();
-            return;
+            goto out;
         }
         /* Else if something went wrong then just fall out of the normal exit */
     }
@@ -878,7 +900,8 @@ process_incoming_migration_co(void *opaque)
     }
 
     migration_bh_schedule(process_incoming_migration_bh, mis);
-    return;
+    goto out;
+
 fail:
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
                       MIGRATION_STATUS_FAILED);
@@ -895,6 +918,9 @@ process_incoming_migration_co(void *opaque)
 
         exit(EXIT_FAILURE);
     }
+out:
+    /* Pairs with the refcount taken in qmp_migrate_incoming() */
+    migrate_incoming_unref_outgoing_state();
 }
 
 /**
@@ -1435,12 +1461,12 @@ static void migration_cleanup_json_writer(MigrationState *s)
     g_clear_pointer(&s->vmdesc, json_writer_free);
 }
 
-static void migrate_fd_cleanup(MigrationState *s)
+static void migration_cleanup(MigrationState *s)
 {
     MigrationEventType type;
     QEMUFile *tmp = NULL;
 
-    trace_migrate_fd_cleanup();
+    trace_migration_cleanup();
 
     migration_cleanup_json_writer(s);
 
@@ -1497,9 +1523,9 @@ static void migrate_fd_cleanup(MigrationState *s)
     yank_unregister_instance(MIGRATION_YANK_INSTANCE);
 }
 
-static void migrate_fd_cleanup_bh(void *opaque)
+static void migration_cleanup_bh(void *opaque)
 {
-    migrate_fd_cleanup(opaque);
+    migration_cleanup(opaque);
 }
 
 void migrate_set_error(MigrationState *s, const Error *error)
@@ -1529,7 +1555,7 @@ static void migrate_error_free(MigrationState *s)
     }
 }
 
-static void migrate_fd_error(MigrationState *s, const Error *error)
+static void migration_connect_set_error(MigrationState *s, const Error *error)
 {
     MigrationStatus current = s->state;
     MigrationStatus next;
@@ -1558,12 +1584,17 @@ static void migrate_fd_error(MigrationState *s, const Error *error)
     migrate_set_error(s, error);
 }
 
-static void migrate_fd_cancel(MigrationState *s)
+void migration_cancel(void)
 {
+    MigrationState *s = migrate_get_current();
     int old_state ;
     bool setup = (s->state == MIGRATION_STATUS_SETUP);
 
-    trace_migrate_fd_cancel();
+    trace_migration_cancel();
+
+    if (migrate_dirty_limit()) {
+        qmp_cancel_vcpu_dirty_limit(false, -1, NULL);
+    }
 
     WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
         if (s->rp_state.from_dst_file) {
@@ -1895,6 +1926,17 @@ void qmp_migrate_incoming(const char *uri, bool has_channels,
         return;
     }
 
+    /*
+     * Making sure MigrationState is available until incoming migration
+     * completes.
+     *
+     * NOTE: QEMU _might_ leak this refcount in some failure paths, but
+     * that's OK.  This is the minimum change we need to at least making
+     * sure success case is clean on the refcount.  We can try harder to
+     * make it accurate for any kind of failures, but it might be an
+     * overkill and doesn't bring us much benefit.
+     */
+    migrate_incoming_ref_outgoing_state();
     once = false;
 }
 
@@ -2205,7 +2247,7 @@ void qmp_migrate(const char *uri, bool has_channels,
 
 out:
     if (local_err) {
-        migrate_fd_error(s, local_err);
+        migration_connect_set_error(s, local_err);
         error_propagate(errp, local_err);
     }
 }
@@ -2250,7 +2292,7 @@ static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested,
         if (!resume_requested) {
             yank_unregister_instance(MIGRATION_YANK_INSTANCE);
         }
-        migrate_fd_error(s, local_err);
+        migration_connect_set_error(s, local_err);
         error_propagate(errp, local_err);
         return;
     }
@@ -2258,7 +2300,18 @@ static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested,
 
 void qmp_migrate_cancel(Error **errp)
 {
-    migration_cancel(NULL);
+    /*
+     * After postcopy migration has started, the source machine is not
+     * recoverable in case of a migration error. This also means the
+     * cancel command cannot be used as cancel should allow the
+     * machine to continue operation.
+     */
+    if (migration_in_postcopy()) {
+        error_setg(errp, "Postcopy migration in progress, cannot cancel.");
+        return;
+    }
+
+    migration_cancel();
 }
 
 void qmp_migrate_continue(MigrationStatus state, Error **errp)
@@ -2644,7 +2697,10 @@ static int postcopy_start(MigrationState *ms, Error **errp)
     if (migrate_postcopy_preempt()) {
         migration_wait_main_channel(ms);
         if (postcopy_preempt_establish_channel(ms)) {
-            migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
+            if (ms->state != MIGRATION_STATUS_CANCELLING) {
+                migrate_set_state(&ms->state, ms->state,
+                                  MIGRATION_STATUS_FAILED);
+            }
             error_setg(errp, "%s: Failed to establish preempt channel",
                        __func__);
             return -1;
@@ -2884,6 +2940,8 @@ static bool migration_switchover_start(MigrationState *s, Error **errp)
 
     precopy_notify_complete();
 
+    qemu_savevm_maybe_send_switchover_start(s->to_dst_file);
+
     return true;
 }
 
@@ -2982,7 +3040,9 @@ static void migration_completion(MigrationState *s)
         error_free(local_err);
     }
 
-    migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
+    if (s->state != MIGRATION_STATUS_CANCELLING) {
+        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
+    }
 }
 
 /**
@@ -3005,7 +3065,7 @@ static void bg_migration_completion(MigrationState *s)
         qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage);
         qemu_fflush(s->to_dst_file);
     } else if (s->state == MIGRATION_STATUS_CANCELLING) {
-        goto fail;
+        return;
     }
 
     if (qemu_file_get_error(s->to_dst_file)) {
@@ -3434,7 +3494,7 @@ static void migration_iteration_finish(MigrationState *s)
         break;
     }
 
-    migration_bh_schedule(migrate_fd_cleanup_bh, s);
+    migration_bh_schedule(migration_cleanup_bh, s);
     bql_unlock();
 }
 
@@ -3462,7 +3522,7 @@ static void bg_migration_iteration_finish(MigrationState *s)
         break;
     }
 
-    migration_bh_schedule(migrate_fd_cleanup_bh, s);
+    migration_bh_schedule(migration_cleanup_bh, s);
     bql_unlock();
 }
 
@@ -3844,7 +3904,7 @@ static void *bg_migration_thread(void *opaque)
     return NULL;
 }
 
-void migrate_fd_connect(MigrationState *s, Error *error_in)
+void migration_connect(MigrationState *s, Error *error_in)
 {
     Error *local_err = NULL;
     uint64_t rate_limit;
@@ -3854,24 +3914,24 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
     /*
      * If there's a previous error, free it and prepare for another one.
      * Meanwhile if migration completes successfully, there won't have an error
-     * dumped when calling migrate_fd_cleanup().
+     * dumped when calling migration_cleanup().
      */
     migrate_error_free(s);
 
     s->expected_downtime = migrate_downtime_limit();
     if (error_in) {
-        migrate_fd_error(s, error_in);
+        migration_connect_set_error(s, error_in);
         if (resume) {
             /*
              * Don't do cleanup for resume if channel is invalid, but only dump
              * the error.  We wait for another channel connect from the user.
              * The error_report still gives HMP user a hint on what failed.
-             * It's normally done in migrate_fd_cleanup(), but call it here
+             * It's normally done in migration_cleanup(), but call it here
              * explicitly.
              */
             error_report_err(error_copy(s->error));
         } else {
-            migrate_fd_cleanup(s);
+            migration_cleanup(s);
         }
         return;
     }
@@ -3949,9 +4009,11 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
 
 fail:
     migrate_set_error(s, local_err);
-    migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
+    if (s->state != MIGRATION_STATUS_CANCELLING) {
+        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
+    }
     error_report_err(local_err);
-    migrate_fd_cleanup(s);
+    migration_cleanup(s);
 }
 
 static void migration_class_init(ObjectClass *klass, void *data)
diff --git a/migration/migration.h b/migration/migration.h
index eaebcc2042dd5..d53f7cad84d8e 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -43,6 +43,7 @@
 #define  MIGRATION_THREAD_DST_PREEMPT       "mig/dst/preempt"
 
 struct PostcopyBlocktimeContext;
+typedef struct ThreadPool ThreadPool;
 
 #define  MIGRATION_RESUME_ACK_VALUE  (1)
 
@@ -187,6 +188,10 @@ struct MigrationIncomingState {
     Coroutine *colo_incoming_co;
     QemuSemaphore colo_incoming_sem;
 
+    /* Optional load threads pool and its thread exit request flag */
+    ThreadPool *load_threads;
+    bool load_threads_abort;
+
     /*
      * PostcopyBlocktimeContext to keep information for postcopy
      * live migration, to calculate vCPU block time
@@ -400,6 +405,8 @@ struct MigrationState {
     bool send_configuration;
     /* Whether we send section footer during migration */
     bool send_section_footer;
+    /* Whether we send switchover start notification during migration */
+    bool send_switchover_start;
 
     /* Needed by postcopy-pause state */
     QemuSemaphore postcopy_pause_sem;
@@ -443,6 +450,39 @@ struct MigrationState {
      * Default value is false. (since 8.1)
      */
     bool multifd_flush_after_each_section;
+
+    /*
+     * This variable only makes sense when set on the machine that is
+     * the destination of a multifd migration with TLS enabled. It
+     * affects the behavior of the last send->recv iteration with
+     * regards to termination of the TLS session.
+     *
+     * When set:
+     *
+     * - the destination QEMU instance can expect to never get a
+     *   GNUTLS_E_PREMATURE_TERMINATION error. Manifested as the error
+     *   message: "The TLS connection was non-properly terminated".
+     *
+     * When clear:
+     *
+     * - the destination QEMU instance can expect to see a
+     *   GNUTLS_E_PREMATURE_TERMINATION error in any multifd channel
+     *   whenever the last recv() call of that channel happens after
+     *   the source QEMU instance has already issued shutdown() on the
+     *   channel.
+     *
+     *   Commit 637280aeb2 (since 9.1) introduced a side effect that
+     *   causes the destination instance to not be affected by the
+     *   premature termination, while commit 1d457daf86 (since 10.0)
+     *   causes the premature termination condition to be once again
+     *   reachable.
+     *
+     * NOTE: Regardless of the state of this option, a premature
+     * termination of the TLS connection might happen due to error at
+     * any moment prior to the last send->recv iteration.
+     */
+    bool multifd_clean_tls_termination;
+
     /*
      * This decides the size of guest memory chunk that will be used
      * to track dirty bitmap clearing.  The size of memory chunk will
@@ -484,7 +524,7 @@ bool  migration_has_all_channels(void);
 void migrate_set_error(MigrationState *s, const Error *error);
 bool migrate_has_error(MigrationState *s);
 
-void migrate_fd_connect(MigrationState *s, Error *error_in);
+void migration_connect(MigrationState *s, Error *error_in);
 
 int migration_call_notifiers(MigrationState *s, MigrationEventType type,
                              Error **errp);
@@ -530,7 +570,7 @@ void migration_make_urgent_request(void);
 void migration_consume_urgent_request(void);
 bool migration_rate_limit(void);
 void migration_bh_schedule(QEMUBHFunc *cb, void *opaque);
-void migration_cancel(const Error *error);
+void migration_cancel(void);
 
 void migration_populate_vfio_info(MigrationInfo *info);
 void migration_reset_vfio_bytes_transferred(void);
diff --git a/migration/multifd-device-state.c b/migration/multifd-device-state.c
new file mode 100644
index 0000000000000..94222d0eb0d8f
--- /dev/null
+++ b/migration/multifd-device-state.c
@@ -0,0 +1,212 @@
+/*
+ * Multifd device state migration
+ *
+ * Copyright (C) 2024,2025 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/lockable.h"
+#include "block/thread-pool.h"
+#include "migration.h"
+#include "migration/misc.h"
+#include "multifd.h"
+#include "options.h"
+
+static struct {
+    QemuMutex queue_job_mutex;
+
+    MultiFDSendData *send_data;
+
+    ThreadPool *threads;
+    bool threads_abort;
+} *multifd_send_device_state;
+
+void multifd_device_state_send_setup(void)
+{
+    assert(!multifd_send_device_state);
+    multifd_send_device_state = g_malloc(sizeof(*multifd_send_device_state));
+
+    qemu_mutex_init(&multifd_send_device_state->queue_job_mutex);
+
+    multifd_send_device_state->send_data = multifd_send_data_alloc();
+
+    multifd_send_device_state->threads = thread_pool_new();
+    multifd_send_device_state->threads_abort = false;
+}
+
+void multifd_device_state_send_cleanup(void)
+{
+    g_clear_pointer(&multifd_send_device_state->threads, thread_pool_free);
+    g_clear_pointer(&multifd_send_device_state->send_data,
+                    multifd_send_data_free);
+
+    qemu_mutex_destroy(&multifd_send_device_state->queue_job_mutex);
+
+    g_clear_pointer(&multifd_send_device_state, g_free);
+}
+
+void multifd_send_data_clear_device_state(MultiFDDeviceState_t *device_state)
+{
+    g_clear_pointer(&device_state->idstr, g_free);
+    g_clear_pointer(&device_state->buf, g_free);
+}
+
+static void multifd_device_state_fill_packet(MultiFDSendParams *p)
+{
+    MultiFDDeviceState_t *device_state = &p->data->u.device_state;
+    MultiFDPacketDeviceState_t *packet = p->packet_device_state;
+
+    packet->hdr.flags = cpu_to_be32(p->flags);
+    strncpy(packet->idstr, device_state->idstr, sizeof(packet->idstr) - 1);
+    packet->idstr[sizeof(packet->idstr) - 1] = 0;
+    packet->instance_id = cpu_to_be32(device_state->instance_id);
+    packet->next_packet_size = cpu_to_be32(p->next_packet_size);
+}
+
+static void multifd_prepare_header_device_state(MultiFDSendParams *p)
+{
+    p->iov[0].iov_len = sizeof(*p->packet_device_state);
+    p->iov[0].iov_base = p->packet_device_state;
+    p->iovs_num++;
+}
+
+void multifd_device_state_send_prepare(MultiFDSendParams *p)
+{
+    MultiFDDeviceState_t *device_state = &p->data->u.device_state;
+
+    assert(multifd_payload_device_state(p->data));
+
+    multifd_prepare_header_device_state(p);
+
+    assert(!(p->flags & MULTIFD_FLAG_SYNC));
+
+    p->next_packet_size = device_state->buf_len;
+    if (p->next_packet_size > 0) {
+        p->iov[p->iovs_num].iov_base = device_state->buf;
+        p->iov[p->iovs_num].iov_len = p->next_packet_size;
+        p->iovs_num++;
+    }
+
+    p->flags |= MULTIFD_FLAG_NOCOMP | MULTIFD_FLAG_DEVICE_STATE;
+
+    multifd_device_state_fill_packet(p);
+}
+
+bool multifd_queue_device_state(char *idstr, uint32_t instance_id,
+                                char *data, size_t len)
+{
+    /* Device state submissions can come from multiple threads */
+    QEMU_LOCK_GUARD(&multifd_send_device_state->queue_job_mutex);
+    MultiFDDeviceState_t *device_state;
+
+    assert(multifd_payload_empty(multifd_send_device_state->send_data));
+
+    multifd_set_payload_type(multifd_send_device_state->send_data,
+                             MULTIFD_PAYLOAD_DEVICE_STATE);
+    device_state = &multifd_send_device_state->send_data->u.device_state;
+    device_state->idstr = g_strdup(idstr);
+    device_state->instance_id = instance_id;
+    device_state->buf = g_memdup2(data, len);
+    device_state->buf_len = len;
+
+    if (!multifd_send(&multifd_send_device_state->send_data)) {
+        multifd_send_data_clear(multifd_send_device_state->send_data);
+        return false;
+    }
+
+    return true;
+}
+
+bool multifd_device_state_supported(void)
+{
+    return migrate_multifd() && !migrate_mapped_ram() &&
+        migrate_multifd_compression() == MULTIFD_COMPRESSION_NONE;
+}
+
+static void multifd_device_state_save_thread_data_free(void *opaque)
+{
+    SaveLiveCompletePrecopyThreadData *data = opaque;
+
+    g_clear_pointer(&data->idstr, g_free);
+    g_free(data);
+}
+
+static int multifd_device_state_save_thread(void *opaque)
+{
+    SaveLiveCompletePrecopyThreadData *data = opaque;
+    g_autoptr(Error) local_err = NULL;
+
+    if (!data->hdlr(data, &local_err)) {
+        MigrationState *s = migrate_get_current();
+
+        /*
+         * Can't call abort_device_state_save_threads() here since new
+         * save threads could still be in process of being launched
+         * (if, for example, the very first save thread launched exited
+         * with an error very quickly).
+         */
+
+        assert(local_err);
+
+        /*
+         * In case of multiple save threads failing which thread error
+         * return we end setting is purely arbitrary.
+         */
+        migrate_set_error(s, local_err);
+    }
+
+    return 0;
+}
+
+bool multifd_device_state_save_thread_should_exit(void)
+{
+    return qatomic_read(&multifd_send_device_state->threads_abort);
+}
+
+void
+multifd_spawn_device_state_save_thread(SaveLiveCompletePrecopyThreadHandler hdlr,
+                                       char *idstr, uint32_t instance_id,
+                                       void *opaque)
+{
+    SaveLiveCompletePrecopyThreadData *data;
+
+    assert(multifd_device_state_supported());
+    assert(multifd_send_device_state);
+
+    assert(!qatomic_read(&multifd_send_device_state->threads_abort));
+
+    data = g_new(SaveLiveCompletePrecopyThreadData, 1);
+    data->hdlr = hdlr;
+    data->idstr = g_strdup(idstr);
+    data->instance_id = instance_id;
+    data->handler_opaque = opaque;
+
+    thread_pool_submit_immediate(multifd_send_device_state->threads,
+                                 multifd_device_state_save_thread,
+                                 data,
+                                 multifd_device_state_save_thread_data_free);
+}
+
+void multifd_abort_device_state_save_threads(void)
+{
+    assert(multifd_device_state_supported());
+
+    qatomic_set(&multifd_send_device_state->threads_abort, true);
+}
+
+bool multifd_join_device_state_save_threads(void)
+{
+    MigrationState *s = migrate_get_current();
+
+    assert(multifd_device_state_supported());
+
+    thread_pool_wait(multifd_send_device_state->threads);
+
+    return !migrate_has_error(s);
+}
diff --git a/migration/multifd-nocomp.c b/migration/multifd-nocomp.c
index 1325dba97cea8..ffe75256c9fbe 100644
--- a/migration/multifd-nocomp.c
+++ b/migration/multifd-nocomp.c
@@ -14,6 +14,7 @@
 #include "exec/ramblock.h"
 #include "exec/target_page.h"
 #include "file.h"
+#include "migration-stats.h"
 #include "multifd.h"
 #include "options.h"
 #include "qapi/error.h"
@@ -24,15 +25,14 @@
 
 static MultiFDSendData *multifd_ram_send;
 
-size_t multifd_ram_payload_size(void)
+void multifd_ram_payload_alloc(MultiFDPages_t *pages)
 {
-    uint32_t n = multifd_ram_page_count();
+    pages->offset = g_new0(ram_addr_t, multifd_ram_page_count());
+}
 
-    /*
-     * We keep an array of page offsets at the end of MultiFDPages_t,
-     * add space for it in the allocation.
-     */
-    return sizeof(MultiFDPages_t) + n * sizeof(ram_addr_t);
+void multifd_ram_payload_free(MultiFDPages_t *pages)
+{
+    g_clear_pointer(&pages->offset, g_free);
 }
 
 void multifd_ram_save_setup(void)
@@ -42,8 +42,7 @@ void multifd_ram_save_setup(void)
 
 void multifd_ram_save_cleanup(void)
 {
-    g_free(multifd_ram_send);
-    multifd_ram_send = NULL;
+    g_clear_pointer(&multifd_ram_send, multifd_send_data_free);
 }
 
 static void multifd_set_file_bitmap(MultiFDSendParams *p)
@@ -86,6 +85,13 @@ static void multifd_nocomp_send_cleanup(MultiFDSendParams *p, Error **errp)
     return;
 }
 
+static void multifd_ram_prepare_header(MultiFDSendParams *p)
+{
+    p->iov[0].iov_len = p->packet_len;
+    p->iov[0].iov_base = p->packet;
+    p->iovs_num++;
+}
+
 static void multifd_send_prepare_iovs(MultiFDSendParams *p)
 {
     MultiFDPages_t *pages = &p->data->u.ram;
@@ -119,7 +125,7 @@ static int multifd_nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
          * Only !zerocopy needs the header in IOV; zerocopy will
          * send it separately.
          */
-        multifd_send_prepare_header(p);
+        multifd_ram_prepare_header(p);
     }
 
     multifd_send_prepare_iovs(p);
@@ -134,6 +140,8 @@ static int multifd_nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
         if (ret != 0) {
             return -1;
         }
+
+        stat64_add(&mig_stats.multifd_bytes, p->packet_len);
     }
 
     return 0;
@@ -432,7 +440,7 @@ int multifd_ram_flush_and_sync(QEMUFile *f)
 bool multifd_send_prepare_common(MultiFDSendParams *p)
 {
     MultiFDPages_t *pages = &p->data->u.ram;
-    multifd_send_prepare_header(p);
+    multifd_ram_prepare_header(p);
     multifd_send_zero_page_detect(p);
 
     if (!pages->normal_num) {
diff --git a/migration/multifd.c b/migration/multifd.c
index ab73d6d984cf5..dfb5189f0ea3d 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -12,6 +12,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
+#include "qemu/iov.h"
 #include "qemu/rcu.h"
 #include "exec/target_page.h"
 #include "system/system.h"
@@ -19,8 +20,10 @@
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "file.h"
+#include "migration/misc.h"
 #include "migration.h"
 #include "migration-stats.h"
+#include "savevm.h"
 #include "socket.h"
 #include "tls.h"
 #include "qemu-file.h"
@@ -49,6 +52,10 @@ typedef struct {
 
 struct {
     MultiFDSendParams *params;
+
+    /* multifd_send() body is not thread safe, needs serialization */
+    QemuMutex multifd_send_mutex;
+
     /*
      * Global number of generated multifd packets.
      *
@@ -98,24 +105,44 @@ struct {
 
 MultiFDSendData *multifd_send_data_alloc(void)
 {
-    size_t max_payload_size, size_minus_payload;
+    MultiFDSendData *new = g_new0(MultiFDSendData, 1);
 
-    /*
-     * MultiFDPages_t has a flexible array at the end, account for it
-     * when allocating MultiFDSendData. Use max() in case other types
-     * added to the union in the future are larger than
-     * (MultiFDPages_t + flex array).
-     */
-    max_payload_size = MAX(multifd_ram_payload_size(), sizeof(MultiFDPayload));
+    multifd_ram_payload_alloc(&new->u.ram);
+    /* Device state allocates its payload on-demand */
 
-    /*
-     * Account for any holes the compiler might insert. We can't pack
-     * the structure because that misaligns the members and triggers
-     * Waddress-of-packed-member.
-     */
-    size_minus_payload = sizeof(MultiFDSendData) - sizeof(MultiFDPayload);
+    return new;
+}
+
+void multifd_send_data_clear(MultiFDSendData *data)
+{
+    if (multifd_payload_empty(data)) {
+        return;
+    }
+
+    switch (data->type) {
+    case MULTIFD_PAYLOAD_DEVICE_STATE:
+        multifd_send_data_clear_device_state(&data->u.device_state);
+        break;
+    default:
+        /* Nothing to do */
+        break;
+    }
+
+    data->type = MULTIFD_PAYLOAD_NONE;
+}
+
+void multifd_send_data_free(MultiFDSendData *data)
+{
+    if (!data) {
+        return;
+    }
+
+    /* This also free's device state payload */
+    multifd_send_data_clear(data);
 
-    return g_malloc0(size_minus_payload + max_payload_size);
+    multifd_ram_payload_free(&data->u.ram);
+
+    g_free(data);
 }
 
 static bool multifd_use_packets(void)
@@ -201,6 +228,7 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
     return msg.id;
 }
 
+/* Fills a RAM multifd packet */
 void multifd_send_fill_packet(MultiFDSendParams *p)
 {
     MultiFDPacket_t *packet = p->packet;
@@ -209,10 +237,10 @@ void multifd_send_fill_packet(MultiFDSendParams *p)
 
     memset(packet, 0, p->packet_len);
 
-    packet->magic = cpu_to_be32(MULTIFD_MAGIC);
-    packet->version = cpu_to_be32(MULTIFD_VERSION);
+    packet->hdr.magic = cpu_to_be32(MULTIFD_MAGIC);
+    packet->hdr.version = cpu_to_be32(MULTIFD_VERSION);
 
-    packet->flags = cpu_to_be32(p->flags);
+    packet->hdr.flags = cpu_to_be32(p->flags);
     packet->next_packet_size = cpu_to_be32(p->next_packet_size);
 
     packet_num = qatomic_fetch_inc(&multifd_send_state->packet_num);
@@ -228,12 +256,12 @@ void multifd_send_fill_packet(MultiFDSendParams *p)
                             p->flags, p->next_packet_size);
 }
 
-static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
+static int multifd_recv_unfill_packet_header(MultiFDRecvParams *p,
+                                             const MultiFDPacketHdr_t *hdr,
+                                             Error **errp)
 {
-    const MultiFDPacket_t *packet = p->packet;
-    uint32_t magic = be32_to_cpu(packet->magic);
-    uint32_t version = be32_to_cpu(packet->version);
-    int ret = 0;
+    uint32_t magic = be32_to_cpu(hdr->magic);
+    uint32_t version = be32_to_cpu(hdr->version);
 
     if (magic != MULTIFD_MAGIC) {
         error_setg(errp, "multifd: received packet magic %x, expected %x",
@@ -247,10 +275,29 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
         return -1;
     }
 
-    p->flags = be32_to_cpu(packet->flags);
+    p->flags = be32_to_cpu(hdr->flags);
+
+    return 0;
+}
+
+static int multifd_recv_unfill_packet_device_state(MultiFDRecvParams *p,
+                                                   Error **errp)
+{
+    MultiFDPacketDeviceState_t *packet = p->packet_dev_state;
+
+    packet->instance_id = be32_to_cpu(packet->instance_id);
+    p->next_packet_size = be32_to_cpu(packet->next_packet_size);
+
+    return 0;
+}
+
+static int multifd_recv_unfill_packet_ram(MultiFDRecvParams *p, Error **errp)
+{
+    const MultiFDPacket_t *packet = p->packet;
+    int ret = 0;
+
     p->next_packet_size = be32_to_cpu(packet->next_packet_size);
     p->packet_num = be64_to_cpu(packet->packet_num);
-    p->packets_recved++;
 
     /* Always unfill, old QEMUs (<9.0) send data along with SYNC */
     ret = multifd_ram_unfill_packet(p, errp);
@@ -261,6 +308,17 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
     return ret;
 }
 
+static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
+{
+    p->packets_recved++;
+
+    if (p->flags & MULTIFD_FLAG_DEVICE_STATE) {
+        return multifd_recv_unfill_packet_device_state(p, errp);
+    }
+
+    return multifd_recv_unfill_packet_ram(p, errp);
+}
+
 static bool multifd_send_should_exit(void)
 {
     return qatomic_read(&multifd_send_state->exiting);
@@ -308,6 +366,8 @@ bool multifd_send(MultiFDSendData **send_data)
         return false;
     }
 
+    QEMU_LOCK_GUARD(&multifd_send_state->multifd_send_mutex);
+
     /* We wait here, until at least one channel is ready */
     qemu_sem_wait(&multifd_send_state->channels_ready);
 
@@ -444,7 +504,7 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp)
          * channels have no I/O handler callback registered when reaching
          * here, because migration thread will wait for all multifd channel
          * establishments to complete during setup.  Since
-         * migrate_fd_cleanup() will be scheduled in main thread too, all
+         * migration_cleanup() will be scheduled in main thread too, all
          * previous callbacks should guarantee to be completed when
          * reaching here.  See multifd_send_state.channels_created and its
          * usage.  In the future, we could replace this with an assert
@@ -459,9 +519,9 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp)
     qemu_sem_destroy(&p->sem_sync);
     g_free(p->name);
     p->name = NULL;
-    g_free(p->data);
-    p->data = NULL;
+    g_clear_pointer(&p->data, multifd_send_data_free);
     p->packet_len = 0;
+    g_clear_pointer(&p->packet_device_state, g_free);
     g_free(p->packet);
     p->packet = NULL;
     multifd_send_state->ops->send_cleanup(p, errp);
@@ -474,8 +534,10 @@ static void multifd_send_cleanup_state(void)
 {
     file_cleanup_outgoing_migration();
     socket_cleanup_outgoing_migration();
+    multifd_device_state_send_cleanup();
     qemu_sem_destroy(&multifd_send_state->channels_created);
     qemu_sem_destroy(&multifd_send_state->channels_ready);
+    qemu_mutex_destroy(&multifd_send_state->multifd_send_mutex);
     g_free(multifd_send_state->params);
     multifd_send_state->params = NULL;
     g_free(multifd_send_state);
@@ -490,6 +552,36 @@ void multifd_send_shutdown(void)
         return;
     }
 
+    for (i = 0; i < migrate_multifd_channels(); i++) {
+        MultiFDSendParams *p = &multifd_send_state->params[i];
+
+        /* thread_created implies the TLS handshake has succeeded */
+        if (p->tls_thread_created && p->thread_created) {
+            Error *local_err = NULL;
+            /*
+             * The destination expects the TLS session to always be
+             * properly terminated. This helps to detect a premature
+             * termination in the middle of the stream.  Note that
+             * older QEMUs always break the connection on the source
+             * and the destination always sees
+             * GNUTLS_E_PREMATURE_TERMINATION.
+             */
+            migration_tls_channel_end(p->c, &local_err);
+
+            /*
+             * The above can return an error in case the migration has
+             * already failed. If the migration succeeded, errors are
+             * not expected but there's no need to kill the source.
+             */
+            if (local_err && !migration_has_failed(migrate_get_current())) {
+                warn_report(
+                    "multifd_send_%d: Failed to terminate TLS connection: %s",
+                    p->id, error_get_pretty(local_err));
+                break;
+            }
+        }
+    }
+
     multifd_send_terminate_threads();
 
     for (i = 0; i < migrate_multifd_channels(); i++) {
@@ -601,16 +693,32 @@ static void *multifd_send_thread(void *opaque)
          * qatomic_store_release() in multifd_send().
          */
         if (qatomic_load_acquire(&p->pending_job)) {
+            bool is_device_state = multifd_payload_device_state(p->data);
+            size_t total_size;
+
             p->flags = 0;
             p->iovs_num = 0;
             assert(!multifd_payload_empty(p->data));
 
-            ret = multifd_send_state->ops->send_prepare(p, &local_err);
-            if (ret != 0) {
-                break;
+            if (is_device_state) {
+                multifd_device_state_send_prepare(p);
+            } else {
+                ret = multifd_send_state->ops->send_prepare(p, &local_err);
+                if (ret != 0) {
+                    break;
+                }
             }
 
+            /*
+             * The packet header in the zerocopy RAM case is accounted for
+             * in multifd_nocomp_send_prepare() - where it is actually
+             * being sent.
+             */
+            total_size = iov_size(p->iov, p->iovs_num);
+
             if (migrate_mapped_ram()) {
+                assert(!is_device_state);
+
                 ret = file_write_ramblock_iov(p->c, p->iov, p->iovs_num,
                                               &p->data->u.ram, &local_err);
             } else {
@@ -623,11 +731,10 @@ static void *multifd_send_thread(void *opaque)
                 break;
             }
 
-            stat64_add(&mig_stats.multifd_bytes,
-                       (uint64_t)p->next_packet_size + p->packet_len);
+            stat64_add(&mig_stats.multifd_bytes, total_size);
 
             p->next_packet_size = 0;
-            multifd_set_payload_type(p->data, MULTIFD_PAYLOAD_NONE);
+            multifd_send_data_clear(p->data);
 
             /*
              * Making sure p->data is published before saying "we're
@@ -826,6 +933,7 @@ bool multifd_send_setup(void)
     thread_count = migrate_multifd_channels();
     multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
     multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
+    qemu_mutex_init(&multifd_send_state->multifd_send_mutex);
     qemu_sem_init(&multifd_send_state->channels_created, 0);
     qemu_sem_init(&multifd_send_state->channels_ready, 0);
     qatomic_set(&multifd_send_state->exiting, 0);
@@ -844,6 +952,9 @@ bool multifd_send_setup(void)
             p->packet_len = sizeof(MultiFDPacket_t)
                           + sizeof(uint64_t) * page_count;
             p->packet = g_malloc0(p->packet_len);
+            p->packet_device_state = g_malloc0(sizeof(*p->packet_device_state));
+            p->packet_device_state->hdr.magic = cpu_to_be32(MULTIFD_MAGIC);
+            p->packet_device_state->hdr.version = cpu_to_be32(MULTIFD_VERSION);
         }
         p->name = g_strdup_printf(MIGRATION_THREAD_SRC_MULTIFD, i);
         p->write_flags = 0;
@@ -879,6 +990,8 @@ bool multifd_send_setup(void)
         assert(p->iov);
     }
 
+    multifd_device_state_send_setup();
+
     return true;
 
 err:
@@ -1018,6 +1131,7 @@ static void multifd_recv_cleanup_channel(MultiFDRecvParams *p)
     p->packet_len = 0;
     g_free(p->packet);
     p->packet = NULL;
+    g_clear_pointer(&p->packet_dev_state, g_free);
     g_free(p->normal);
     p->normal = NULL;
     g_free(p->zero);
@@ -1119,8 +1233,37 @@ void multifd_recv_sync_main(void)
     trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
 }
 
+static int multifd_device_state_recv(MultiFDRecvParams *p, Error **errp)
+{
+    g_autofree char *dev_state_buf = NULL;
+    int ret;
+
+    dev_state_buf = g_malloc(p->next_packet_size);
+
+    ret = qio_channel_read_all(p->c, dev_state_buf, p->next_packet_size, errp);
+    if (ret != 0) {
+        return ret;
+    }
+
+    if (p->packet_dev_state->idstr[sizeof(p->packet_dev_state->idstr) - 1]
+        != 0) {
+        error_setg(errp, "unterminated multifd device state idstr");
+        return -1;
+    }
+
+    if (!qemu_loadvm_load_state_buffer(p->packet_dev_state->idstr,
+                                       p->packet_dev_state->instance_id,
+                                       dev_state_buf, p->next_packet_size,
+                                       errp)) {
+        ret = -1;
+    }
+
+    return ret;
+}
+
 static void *multifd_recv_thread(void *opaque)
 {
+    MigrationState *s = migrate_get_current();
     MultiFDRecvParams *p = opaque;
     Error *local_err = NULL;
     bool use_packets = multifd_use_packets();
@@ -1129,19 +1272,65 @@ static void *multifd_recv_thread(void *opaque)
     trace_multifd_recv_thread_start(p->id);
     rcu_register_thread();
 
+    if (!s->multifd_clean_tls_termination) {
+        p->read_flags = QIO_CHANNEL_READ_FLAG_RELAXED_EOF;
+    }
+
     while (true) {
+        MultiFDPacketHdr_t hdr;
         uint32_t flags = 0;
+        bool is_device_state = false;
         bool has_data = false;
+        uint8_t *pkt_buf;
+        size_t pkt_len;
+
         p->normal_num = 0;
 
         if (use_packets) {
+            struct iovec iov = {
+                .iov_base = (void *)&hdr,
+                .iov_len = sizeof(hdr)
+            };
+
             if (multifd_recv_should_exit()) {
                 break;
             }
 
-            ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
-                                           p->packet_len, &local_err);
-            if (ret == 0 || ret == -1) {   /* 0: EOF  -1: Error */
+            ret = qio_channel_readv_full_all_eof(p->c, &iov, 1, NULL, NULL,
+                                                 p->read_flags, &local_err);
+            if (!ret) {
+                /* EOF */
+                assert(!local_err);
+                break;
+            }
+
+            if (ret == -1) {
+                break;
+            }
+
+            ret = multifd_recv_unfill_packet_header(p, &hdr, &local_err);
+            if (ret) {
+                break;
+            }
+
+            is_device_state = p->flags & MULTIFD_FLAG_DEVICE_STATE;
+            if (is_device_state) {
+                pkt_buf = (uint8_t *)p->packet_dev_state + sizeof(hdr);
+                pkt_len = sizeof(*p->packet_dev_state) - sizeof(hdr);
+            } else {
+                pkt_buf = (uint8_t *)p->packet + sizeof(hdr);
+                pkt_len = p->packet_len - sizeof(hdr);
+            }
+
+            ret = qio_channel_read_all_eof(p->c, (char *)pkt_buf, pkt_len,
+                                           &local_err);
+            if (!ret) {
+                /* EOF */
+                error_setg(&local_err, "multifd: unexpected EOF after packet header");
+                break;
+            }
+
+            if (ret == -1) {
                 break;
             }
 
@@ -1156,12 +1345,17 @@ static void *multifd_recv_thread(void *opaque)
             /* recv methods don't know how to handle the SYNC flag */
             p->flags &= ~MULTIFD_FLAG_SYNC;
 
-            /*
-             * Even if it's a SYNC packet, this needs to be set
-             * because older QEMUs (<9.0) still send data along with
-             * the SYNC packet.
-             */
-            has_data = p->normal_num || p->zero_num;
+            if (is_device_state) {
+                has_data = p->next_packet_size > 0;
+            } else {
+                /*
+                 * Even if it's a SYNC packet, this needs to be set
+                 * because older QEMUs (<9.0) still send data along with
+                 * the SYNC packet.
+                 */
+                has_data = p->normal_num || p->zero_num;
+            }
+
             qemu_mutex_unlock(&p->mutex);
         } else {
             /*
@@ -1190,14 +1384,29 @@ static void *multifd_recv_thread(void *opaque)
         }
 
         if (has_data) {
-            ret = multifd_recv_state->ops->recv(p, &local_err);
+            if (is_device_state) {
+                assert(use_packets);
+                ret = multifd_device_state_recv(p, &local_err);
+            } else {
+                ret = multifd_recv_state->ops->recv(p, &local_err);
+            }
             if (ret != 0) {
                 break;
             }
+        } else if (is_device_state) {
+            error_setg(&local_err,
+                       "multifd: received empty device state packet");
+            break;
         }
 
         if (use_packets) {
             if (flags & MULTIFD_FLAG_SYNC) {
+                if (is_device_state) {
+                    error_setg(&local_err,
+                               "multifd: received SYNC device state packet");
+                    break;
+                }
+
                 qemu_sem_post(&multifd_recv_state->sem_sync);
                 qemu_sem_wait(&p->sem_sync);
             }
@@ -1266,6 +1475,7 @@ int multifd_recv_setup(Error **errp)
             p->packet_len = sizeof(MultiFDPacket_t)
                 + sizeof(uint64_t) * page_count;
             p->packet = g_malloc0(p->packet_len);
+            p->packet_dev_state = g_malloc0(sizeof(*p->packet_dev_state));
         }
         p->name = g_strdup_printf(MIGRATION_THREAD_DST_MULTIFD, i);
         p->normal = g_new0(ram_addr_t, page_count);
diff --git a/migration/multifd.h b/migration/multifd.h
index bd785b987315b..2d337e7b3b52f 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -62,6 +62,12 @@ MultiFDRecvData *multifd_get_recv_data(void);
 #define MULTIFD_FLAG_UADK (8 << 1)
 #define MULTIFD_FLAG_QATZIP (16 << 1)
 
+/*
+ * If set it means that this packet contains device state
+ * (MultiFDPacketDeviceState_t), not RAM data (MultiFDPacket_t).
+ */
+#define MULTIFD_FLAG_DEVICE_STATE (32 << 1)
+
 /* This value needs to be a multiple of qemu_target_page_size() */
 #define MULTIFD_PACKET_SIZE (512 * 1024)
 
@@ -69,6 +75,11 @@ typedef struct {
     uint32_t magic;
     uint32_t version;
     uint32_t flags;
+} __attribute__((packed)) MultiFDPacketHdr_t;
+
+typedef struct {
+    MultiFDPacketHdr_t hdr;
+
     /* maximum number of allocated pages */
     uint32_t pages_alloc;
     /* non zero pages */
@@ -89,14 +100,28 @@ typedef struct {
     uint64_t offset[];
 } __attribute__((packed)) MultiFDPacket_t;
 
+typedef struct {
+    MultiFDPacketHdr_t hdr;
+
+    char idstr[256];
+    uint32_t instance_id;
+
+    /* size of the next packet that contains the actual data */
+    uint32_t next_packet_size;
+} __attribute__((packed)) MultiFDPacketDeviceState_t;
+
 typedef struct {
     /* number of used pages */
     uint32_t num;
     /* number of normal pages */
     uint32_t normal_num;
+    /*
+     * Pointer to the ramblock.  NOTE: it's caller's responsibility to make
+     * sure the pointer is always valid!
+     */
     RAMBlock *block;
-    /* offset of each page */
-    ram_addr_t offset[];
+    /* offset array of each page, managed by multifd */
+    ram_addr_t *offset;
 } MultiFDPages_t;
 
 struct MultiFDRecvData {
@@ -106,13 +131,22 @@ struct MultiFDRecvData {
     off_t file_offset;
 };
 
+typedef struct {
+    char *idstr;
+    uint32_t instance_id;
+    char *buf;
+    size_t buf_len;
+} MultiFDDeviceState_t;
+
 typedef enum {
     MULTIFD_PAYLOAD_NONE,
     MULTIFD_PAYLOAD_RAM,
+    MULTIFD_PAYLOAD_DEVICE_STATE,
 } MultiFDPayloadType;
 
-typedef union MultiFDPayload {
+typedef struct MultiFDPayload {
     MultiFDPages_t ram;
+    MultiFDDeviceState_t device_state;
 } MultiFDPayload;
 
 struct MultiFDSendData {
@@ -125,9 +159,17 @@ static inline bool multifd_payload_empty(MultiFDSendData *data)
     return data->type == MULTIFD_PAYLOAD_NONE;
 }
 
+static inline bool multifd_payload_device_state(MultiFDSendData *data)
+{
+    return data->type == MULTIFD_PAYLOAD_DEVICE_STATE;
+}
+
 static inline void multifd_set_payload_type(MultiFDSendData *data,
                                             MultiFDPayloadType type)
 {
+    assert(multifd_payload_empty(data));
+    assert(type != MULTIFD_PAYLOAD_NONE);
+
     data->type = type;
 }
 
@@ -174,8 +216,9 @@ typedef struct {
 
     /* thread local variables. No locking required */
 
-    /* pointer to the packet */
+    /* pointers to the possible packet types */
     MultiFDPacket_t *packet;
+    MultiFDPacketDeviceState_t *packet_device_state;
     /* size of the next packet that contains pages */
     uint32_t next_packet_size;
     /* packets sent through this channel */
@@ -222,8 +265,9 @@ typedef struct {
 
     /* thread local variables. No locking required */
 
-    /* pointer to the packet */
+    /* pointers to the possible packet types */
     MultiFDPacket_t *packet;
+    MultiFDPacketDeviceState_t *packet_dev_state;
     /* size of the next packet that contains pages */
     uint32_t next_packet_size;
     /* packets received through this channel */
@@ -244,6 +288,8 @@ typedef struct {
     uint32_t zero_num;
     /* used for de-compression methods */
     void *compress_data;
+    /* Flags for the QIOChannel */
+    int read_flags;
 } MultiFDRecvParams;
 
 typedef struct {
@@ -331,16 +377,11 @@ bool multifd_send_prepare_common(MultiFDSendParams *p);
 void multifd_send_zero_page_detect(MultiFDSendParams *p);
 void multifd_recv_zero_page_process(MultiFDRecvParams *p);
 
-static inline void multifd_send_prepare_header(MultiFDSendParams *p)
-{
-    p->iov[0].iov_len = p->packet_len;
-    p->iov[0].iov_base = p->packet;
-    p->iovs_num++;
-}
-
 void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc);
 bool multifd_send(MultiFDSendData **send_data);
 MultiFDSendData *multifd_send_data_alloc(void);
+void multifd_send_data_clear(MultiFDSendData *data);
+void multifd_send_data_free(MultiFDSendData *data);
 
 static inline uint32_t multifd_ram_page_size(void)
 {
@@ -357,7 +398,16 @@ void multifd_ram_save_cleanup(void);
 int multifd_ram_flush_and_sync(QEMUFile *f);
 bool multifd_ram_sync_per_round(void);
 bool multifd_ram_sync_per_section(void);
-size_t multifd_ram_payload_size(void);
+void multifd_ram_payload_alloc(MultiFDPages_t *pages);
+void multifd_ram_payload_free(MultiFDPages_t *pages);
 void multifd_ram_fill_packet(MultiFDSendParams *p);
 int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp);
+
+void multifd_send_data_clear_device_state(MultiFDDeviceState_t *device_state);
+
+void multifd_device_state_send_setup(void);
+void multifd_device_state_send_cleanup(void);
+
+void multifd_device_state_send_prepare(MultiFDSendParams *p);
+
 #endif
diff --git a/migration/options.c b/migration/options.c
index 4db340b502b0d..b0ac2ea4083ff 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -93,12 +93,16 @@ const Property migration_properties[] = {
                      send_configuration, true),
     DEFINE_PROP_BOOL("send-section-footer", MigrationState,
                      send_section_footer, true),
+    DEFINE_PROP_BOOL("send-switchover-start", MigrationState,
+                     send_switchover_start, true),
     DEFINE_PROP_BOOL("multifd-flush-after-each-section", MigrationState,
                       multifd_flush_after_each_section, false),
     DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState,
                       clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT),
     DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState,
                      preempt_pre_7_2, false),
+    DEFINE_PROP_BOOL("multifd-clean-tls-termination", MigrationState,
+                     multifd_clean_tls_termination, true),
 
     /* Migration parameters */
     DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState,
@@ -207,6 +211,13 @@ bool migrate_auto_converge(void)
     return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
 }
 
+bool migrate_send_switchover_start(void)
+{
+    MigrationState *s = migrate_get_current();
+
+    return s->send_switchover_start;
+}
+
 bool migrate_background_snapshot(void)
 {
     MigrationState *s = migrate_get_current();
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 6a6da6ba7f3ab..5d3edfcfec73d 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -651,8 +651,8 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
         mis->have_fault_thread = false;
     }
 
-    if (enable_mlock) {
-        if (os_mlock() < 0) {
+    if (should_mlock(mlock_state)) {
+        if (os_mlock(is_mlock_on_fault(mlock_state)) < 0) {
             error_report("mlock: %s", strerror(errno));
             /*
              * It doesn't feel right to fail at this point, we have a valid
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index 3e47a20621a7e..f5b9f430e04b1 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -33,6 +33,8 @@ QEMUFile *qemu_file_new_input(QIOChannel *ioc);
 QEMUFile *qemu_file_new_output(QIOChannel *ioc);
 int qemu_fclose(QEMUFile *f);
 
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(QEMUFile, qemu_fclose)
+
 /*
  * qemu_file_transferred:
  *
diff --git a/migration/ram.c b/migration/ram.c
index 6f460fd22d201..424df6d9f133c 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1964,6 +1964,11 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
     ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
     int res;
 
+    /* Hand over to RDMA first */
+    if (control_save_page(pss, offset, &res)) {
+        return res;
+    }
+
     if (!migrate_multifd()
         || migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
         if (save_zero_page(rs, pss, offset)) {
@@ -1976,10 +1981,6 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
         return ram_save_multifd_page(block, offset);
     }
 
-    if (control_save_page(pss, offset, &res)) {
-        return res;
-    }
-
     return ram_save_page(rs, pss);
 }
 
@@ -4465,8 +4466,10 @@ static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host,
          * Abort and indicate a proper reason.
          */
         error_setg(&err, "RAM block '%s' resized during precopy.", rb->idstr);
-        migration_cancel(err);
+        migrate_set_error(migrate_get_current(), err);
         error_free(err);
+
+        migration_cancel();
     }
 
     switch (ps) {
diff --git a/migration/rdma.c b/migration/rdma.c
index 855753c67191a..76fb0349238a2 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -4174,7 +4174,7 @@ void rdma_start_outgoing_migration(void *opaque,
 
     s->to_dst_file = rdma_new_output(rdma);
     s->rdma_migration = true;
-    migrate_fd_connect(s, NULL);
+    migration_connect(s, NULL);
     return;
 return_path_err:
     qemu_rdma_cleanup(rdma);
diff --git a/migration/savevm.c b/migration/savevm.c
index bc375db282c28..ce158c3512456 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -37,6 +37,7 @@
 #include "migration/register.h"
 #include "migration/global_state.h"
 #include "migration/channel-block.h"
+#include "multifd.h"
 #include "ram.h"
 #include "qemu-file.h"
 #include "savevm.h"
@@ -54,6 +55,7 @@
 #include "qemu/job.h"
 #include "qemu/main-loop.h"
 #include "block/snapshot.h"
+#include "block/thread-pool.h"
 #include "qemu/cutils.h"
 #include "io/channel-buffer.h"
 #include "io/channel-file.h"
@@ -90,6 +92,7 @@ enum qemu_vm_cmd {
     MIG_CMD_ENABLE_COLO,       /* Enable COLO */
     MIG_CMD_POSTCOPY_RESUME,   /* resume postcopy on dest */
     MIG_CMD_RECV_BITMAP,       /* Request for recved bitmap on dst */
+    MIG_CMD_SWITCHOVER_START,  /* Switchover start notification */
     MIG_CMD_MAX
 };
 
@@ -109,6 +112,7 @@ static struct mig_cmd_args {
     [MIG_CMD_POSTCOPY_RESUME]  = { .len =  0, .name = "POSTCOPY_RESUME" },
     [MIG_CMD_PACKAGED]         = { .len =  4, .name = "PACKAGED" },
     [MIG_CMD_RECV_BITMAP]      = { .len = -1, .name = "RECV_BITMAP" },
+    [MIG_CMD_SWITCHOVER_START] = { .len =  0, .name = "SWITCHOVER_START" },
     [MIG_CMD_MAX]              = { .len = -1, .name = "MAX" },
 };
 
@@ -129,6 +133,35 @@ static struct mig_cmd_args {
  * generic extendable format with an exception for two old entities.
  */
 
+/***********************************************************/
+/* Optional load threads pool support */
+
+static void qemu_loadvm_thread_pool_create(MigrationIncomingState *mis)
+{
+    assert(!mis->load_threads);
+    mis->load_threads = thread_pool_new();
+    mis->load_threads_abort = false;
+}
+
+static void qemu_loadvm_thread_pool_destroy(MigrationIncomingState *mis)
+{
+    qatomic_set(&mis->load_threads_abort, true);
+
+    bql_unlock(); /* Load threads might be waiting for BQL */
+    g_clear_pointer(&mis->load_threads, thread_pool_free);
+    bql_lock();
+}
+
+static bool qemu_loadvm_thread_pool_wait(MigrationState *s,
+                                         MigrationIncomingState *mis)
+{
+    bql_unlock(); /* Let load threads do work requiring BQL */
+    thread_pool_wait(mis->load_threads);
+    bql_lock();
+
+    return !migrate_has_error(s);
+}
+
 /***********************************************************/
 /* savevm/loadvm support */
 
@@ -1201,6 +1234,19 @@ void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name)
     qemu_savevm_command_send(f, MIG_CMD_RECV_BITMAP, len + 1, (uint8_t *)buf);
 }
 
+static void qemu_savevm_send_switchover_start(QEMUFile *f)
+{
+    trace_savevm_send_switchover_start();
+    qemu_savevm_command_send(f, MIG_CMD_SWITCHOVER_START, 0, NULL);
+}
+
+void qemu_savevm_maybe_send_switchover_start(QEMUFile *f)
+{
+    if (migrate_send_switchover_start()) {
+        qemu_savevm_send_switchover_start(f);
+    }
+}
+
 bool qemu_savevm_state_blocked(Error **errp)
 {
     SaveStateEntry *se;
@@ -1482,6 +1528,24 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
     int64_t start_ts_each, end_ts_each;
     SaveStateEntry *se;
     int ret;
+    bool multifd_device_state = multifd_device_state_supported();
+
+    if (multifd_device_state) {
+        QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+            SaveLiveCompletePrecopyThreadHandler hdlr;
+
+            if (!se->ops || (in_postcopy && se->ops->has_postcopy &&
+                             se->ops->has_postcopy(se->opaque)) ||
+                !se->ops->save_live_complete_precopy_thread) {
+                continue;
+            }
+
+            hdlr = se->ops->save_live_complete_precopy_thread;
+            multifd_spawn_device_state_save_thread(hdlr,
+                                                   se->idstr, se->instance_id,
+                                                   se->opaque);
+        }
+    }
 
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!se->ops ||
@@ -1507,16 +1571,35 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
         save_section_footer(f, se);
         if (ret < 0) {
             qemu_file_set_error(f, ret);
-            return -1;
+            goto ret_fail_abort_threads;
         }
         end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
         trace_vmstate_downtime_save("iterable", se->idstr, se->instance_id,
                                     end_ts_each - start_ts_each);
     }
 
+    if (multifd_device_state) {
+        if (migrate_has_error(migrate_get_current())) {
+            multifd_abort_device_state_save_threads();
+        }
+
+        if (!multifd_join_device_state_save_threads()) {
+            qemu_file_set_error(f, -EINVAL);
+            return -1;
+        }
+    }
+
     trace_vmstate_downtime_checkpoint("src-iterable-saved");
 
     return 0;
+
+ret_fail_abort_threads:
+    if (multifd_device_state) {
+        multifd_abort_device_state_save_threads();
+        multifd_join_device_state_save_threads();
+    }
+
+    return -1;
 }
 
 int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
@@ -1687,6 +1770,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
 
     ret = qemu_file_get_error(f);
     if (ret == 0) {
+        qemu_savevm_maybe_send_switchover_start(f);
         qemu_savevm_state_complete_precopy(f, false);
         ret = qemu_file_get_error(f);
     }
@@ -1970,6 +2054,8 @@ static void *postcopy_ram_listen_thread(void *opaque)
      * in qemu_file, and thus we must be blocking now.
      */
     qemu_file_set_blocking(f, true);
+
+    /* TODO: sanity check that only postcopiable data will be loaded here */
     load_res = qemu_loadvm_state_main(f, mis);
 
     /*
@@ -2030,7 +2116,9 @@ static void *postcopy_ram_listen_thread(void *opaque)
      * (If something broke then qemu will have to exit anyway since it's
      * got a bad migration state).
      */
+    bql_lock();
     migration_incoming_state_destroy();
+    bql_unlock();
 
     rcu_unregister_thread();
     mis->have_listen_thread = false;
@@ -2383,6 +2471,26 @@ static int loadvm_process_enable_colo(MigrationIncomingState *mis)
     return ret;
 }
 
+static int loadvm_postcopy_handle_switchover_start(void)
+{
+    SaveStateEntry *se;
+
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+        int ret;
+
+        if (!se->ops || !se->ops->switchover_start) {
+            continue;
+        }
+
+        ret = se->ops->switchover_start(se->opaque);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
 /*
  * Process an incoming 'QEMU_VM_COMMAND'
  * 0           just a normal return
@@ -2481,6 +2589,9 @@ static int loadvm_process_command(QEMUFile *f)
 
     case MIG_CMD_ENABLE_COLO:
         return loadvm_process_enable_colo(mis);
+
+    case MIG_CMD_SWITCHOVER_START:
+        return loadvm_postcopy_handle_switchover_start();
     }
 
     return 0;
@@ -2740,16 +2851,68 @@ static int qemu_loadvm_state_setup(QEMUFile *f, Error **errp)
     return 0;
 }
 
-void qemu_loadvm_state_cleanup(void)
+struct LoadThreadData {
+    MigrationLoadThread function;
+    void *opaque;
+};
+
+static int qemu_loadvm_load_thread(void *thread_opaque)
+{
+    struct LoadThreadData *data = thread_opaque;
+    MigrationIncomingState *mis = migration_incoming_get_current();
+    g_autoptr(Error) local_err = NULL;
+
+    if (!data->function(data->opaque, &mis->load_threads_abort, &local_err)) {
+        MigrationState *s = migrate_get_current();
+
+        /*
+         * Can't set load_threads_abort here since processing of main migration
+         * channel data could still be happening, resulting in launching of new
+         * load threads.
+         */
+
+        assert(local_err);
+
+        /*
+         * In case of multiple load threads failing which thread error
+         * return we end setting is purely arbitrary.
+         */
+        migrate_set_error(s, local_err);
+    }
+
+    return 0;
+}
+
+void qemu_loadvm_start_load_thread(MigrationLoadThread function,
+                                   void *opaque)
+{
+    MigrationIncomingState *mis = migration_incoming_get_current();
+    struct LoadThreadData *data;
+
+    /* We only set it from this thread so it's okay to read it directly */
+    assert(!mis->load_threads_abort);
+
+    data = g_new(struct LoadThreadData, 1);
+    data->function = function;
+    data->opaque = opaque;
+
+    thread_pool_submit_immediate(mis->load_threads, qemu_loadvm_load_thread,
+                                 data, g_free);
+}
+
+void qemu_loadvm_state_cleanup(MigrationIncomingState *mis)
 {
     SaveStateEntry *se;
 
     trace_loadvm_state_cleanup();
+
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (se->ops && se->ops->load_cleanup) {
             se->ops->load_cleanup(se->opaque);
         }
     }
+
+    qemu_loadvm_thread_pool_destroy(mis);
 }
 
 /* Return true if we should continue the migration, or false. */
@@ -2900,6 +3063,7 @@ int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
 
 int qemu_loadvm_state(QEMUFile *f)
 {
+    MigrationState *s = migrate_get_current();
     MigrationIncomingState *mis = migration_incoming_get_current();
     Error *local_err = NULL;
     int ret;
@@ -2909,6 +3073,8 @@ int qemu_loadvm_state(QEMUFile *f)
         return -EINVAL;
     }
 
+    qemu_loadvm_thread_pool_create(mis);
+
     ret = qemu_loadvm_state_header(f);
     if (ret) {
         return ret;
@@ -2940,8 +3106,18 @@ int qemu_loadvm_state(QEMUFile *f)
 
     /* When reaching here, it must be precopy */
     if (ret == 0) {
-        ret = qemu_file_get_error(f);
+        if (migrate_has_error(migrate_get_current()) ||
+            !qemu_loadvm_thread_pool_wait(s, mis)) {
+            ret = -EINVAL;
+        } else {
+            ret = qemu_file_get_error(f);
+        }
     }
+    /*
+     * Set this flag unconditionally so we'll catch further attempts to
+     * start additional threads via an appropriate assert()
+     */
+    qatomic_set(&mis->load_threads_abort, true);
 
     /*
      * Try to read in the VMDESC section as well, so that dumping tools that
@@ -3017,6 +3193,29 @@ int qemu_loadvm_approve_switchover(void)
     return migrate_send_rp_switchover_ack(mis);
 }
 
+bool qemu_loadvm_load_state_buffer(const char *idstr, uint32_t instance_id,
+                                   char *buf, size_t len, Error **errp)
+{
+    SaveStateEntry *se;
+
+    se = find_se(idstr, instance_id);
+    if (!se) {
+        error_setg(errp,
+                   "Unknown idstr %s or instance id %u for load state buffer",
+                   idstr, instance_id);
+        return false;
+    }
+
+    if (!se->ops || !se->ops->load_state_buffer) {
+        error_setg(errp,
+                   "idstr %s / instance %u has no load state buffer operation",
+                   idstr, instance_id);
+        return false;
+    }
+
+    return se->ops->load_state_buffer(se->opaque, buf, len, errp);
+}
+
 bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
                   bool has_devices, strList *devices, Error **errp)
 {
@@ -3315,12 +3514,14 @@ void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
     qemu_ram_set_idstr(mr->ram_block,
                        memory_region_name(mr), dev);
     qemu_ram_set_migratable(mr->ram_block);
+    ram_block_add_cpr_blocker(mr->ram_block, &error_fatal);
 }
 
 void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
 {
     qemu_ram_unset_idstr(mr->ram_block);
     qemu_ram_unset_migratable(mr->ram_block);
+    ram_block_del_cpr_blocker(mr->ram_block);
 }
 
 void vmstate_register_ram_global(MemoryRegion *mr)
diff --git a/migration/savevm.h b/migration/savevm.h
index 7957460062cad..138c39a7f9f97 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -53,6 +53,7 @@ void qemu_savevm_send_postcopy_listen(QEMUFile *f);
 void qemu_savevm_send_postcopy_run(QEMUFile *f);
 void qemu_savevm_send_postcopy_resume(QEMUFile *f);
 void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name);
+void qemu_savevm_maybe_send_switchover_start(QEMUFile *f);
 
 void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
                                            uint16_t len,
@@ -63,11 +64,14 @@ void qemu_savevm_live_state(QEMUFile *f);
 int qemu_save_device_state(QEMUFile *f);
 
 int qemu_loadvm_state(QEMUFile *f);
-void qemu_loadvm_state_cleanup(void);
+void qemu_loadvm_state_cleanup(MigrationIncomingState *mis);
 int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
 int qemu_load_device_state(QEMUFile *f);
 int qemu_loadvm_approve_switchover(void);
 int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
         bool in_postcopy);
 
+bool qemu_loadvm_load_state_buffer(const char *idstr, uint32_t instance_id,
+                                   char *buf, size_t len, Error **errp);
+
 #endif
diff --git a/migration/tls.c b/migration/tls.c
index fa03d9136ca32..5cbf952383691 100644
--- a/migration/tls.c
+++ b/migration/tls.c
@@ -156,6 +156,11 @@ void migration_tls_channel_connect(MigrationState *s,
                               NULL);
 }
 
+void migration_tls_channel_end(QIOChannel *ioc, Error **errp)
+{
+    qio_channel_tls_bye(QIO_CHANNEL_TLS(ioc), errp);
+}
+
 bool migrate_channel_requires_tls_upgrade(QIOChannel *ioc)
 {
     if (!migrate_tls()) {
diff --git a/migration/tls.h b/migration/tls.h
index 5797d153cb01d..58b25e12281ce 100644
--- a/migration/tls.h
+++ b/migration/tls.h
@@ -36,7 +36,7 @@ void migration_tls_channel_connect(MigrationState *s,
                                    QIOChannel *ioc,
                                    const char *hostname,
                                    Error **errp);
-
+void migration_tls_channel_end(QIOChannel *ioc, Error **errp);
 /* Whether the QIO channel requires further TLS handshake? */
 bool migrate_channel_requires_tls_upgrade(QIOChannel *ioc);
 
diff --git a/migration/trace-events b/migration/trace-events
index 12b262f8ee759..c506e11a2e1d3 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -39,6 +39,7 @@ savevm_send_postcopy_run(void) ""
 savevm_send_postcopy_resume(void) ""
 savevm_send_colo_enable(void) ""
 savevm_send_recv_bitmap(char *name) "%s"
+savevm_send_switchover_start(void) ""
 savevm_state_setup(void) ""
 savevm_state_resume_prepare(void) ""
 savevm_state_header(void) ""
@@ -154,9 +155,9 @@ multifd_set_outgoing_channel(void *ioc, const char *ioctype, const char *hostnam
 
 # migration.c
 migrate_set_state(const char *new_state) "new state %s"
-migrate_fd_cleanup(void) ""
+migration_cleanup(void) ""
 migrate_error(const char *error_desc) "error=%s"
-migrate_fd_cancel(void) ""
+migration_cancel(void) ""
 migrate_handle_rp_req_pages(const char *rbname, size_t start, size_t len) "in %s at 0x%zx len 0x%zx"
 migrate_pending_exact(uint64_t size, uint64_t pre, uint64_t post) "exact pending size %" PRIu64 " (pre = %" PRIu64 " post=%" PRIu64 ")"
 migrate_pending_estimate(uint64_t size, uint64_t pre, uint64_t post) "estimate pending size %" PRIu64 " (pre = %" PRIu64 " post=%" PRIu64 ")"
diff --git a/monitor/hmp-cmds-target.c b/monitor/hmp-cmds-target.c
index 27ffe61818db0..239c2a61a4519 100644
--- a/monitor/hmp-cmds-target.c
+++ b/monitor/hmp-cmds-target.c
@@ -301,7 +301,6 @@ void hmp_gpa2hva(Monitor *mon, const QDict *qdict)
 void hmp_gva2gpa(Monitor *mon, const QDict *qdict)
 {
     target_ulong addr = qdict_get_int(qdict, "addr");
-    MemTxAttrs attrs;
     CPUState *cs = mon_get_cpu(mon);
     hwaddr gpa;
 
@@ -310,7 +309,7 @@ void hmp_gva2gpa(Monitor *mon, const QDict *qdict)
         return;
     }
 
-    gpa  = cpu_get_phys_page_attrs_debug(cs, addr & TARGET_PAGE_MASK, &attrs);
+    gpa  = cpu_get_phys_page_debug(cs, addr & TARGET_PAGE_MASK);
     if (gpa == -1) {
         monitor_printf(mon, "Unmapped\n");
     } else {
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 3825ff40a9b30..7ded3378cfcae 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -431,6 +431,6 @@ void hmp_dumpdtb(Monitor *mon, const QDict *qdict)
         return;
     }
 
-    monitor_printf(mon, "dtb dumped to %s", filename);
+    monitor_printf(mon, "DTB dumped to '%s'\n", filename);
 }
 #endif
diff --git a/net/net.c b/net/net.c
index a3996d5c62ccc..39d6f28158a35 100644
--- a/net/net.c
+++ b/net/net.c
@@ -381,9 +381,12 @@ NetClientState *qemu_get_peer(NetClientState *nc, int queue_index)
     return ncs->peer;
 }
 
-static void qemu_cleanup_net_client(NetClientState *nc)
+static void qemu_cleanup_net_client(NetClientState *nc,
+                                    bool remove_from_net_clients)
 {
-    QTAILQ_REMOVE(&net_clients, nc, next);
+    if (remove_from_net_clients) {
+        QTAILQ_REMOVE(&net_clients, nc, next);
+    }
 
     if (nc->info->cleanup) {
         nc->info->cleanup(nc);
@@ -425,7 +428,13 @@ void qemu_del_net_client(NetClientState *nc)
         object_unparent(OBJECT(nf));
     }
 
-    /* If there is a peer NIC, delete and cleanup client, but do not free. */
+    /*
+     * If there is a peer NIC, transfer ownership to it.  Delete the client
+     * from net_client list but do not cleanup nor free.  This way NIC can
+     * still access to members of the backend.
+     *
+     * The cleanup and free will be done when the NIC is free.
+     */
     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
         NICState *nic = qemu_get_nic(nc->peer);
         if (nic->peer_deleted) {
@@ -435,21 +444,18 @@ void qemu_del_net_client(NetClientState *nc)
 
         for (i = 0; i < queues; i++) {
             ncs[i]->peer->link_down = true;
+            QTAILQ_REMOVE(&net_clients, ncs[i], next);
         }
 
         if (nc->peer->info->link_status_changed) {
             nc->peer->info->link_status_changed(nc->peer);
         }
 
-        for (i = 0; i < queues; i++) {
-            qemu_cleanup_net_client(ncs[i]);
-        }
-
         return;
     }
 
     for (i = 0; i < queues; i++) {
-        qemu_cleanup_net_client(ncs[i]);
+        qemu_cleanup_net_client(ncs[i], true);
         qemu_free_net_client(ncs[i]);
     }
 }
@@ -462,8 +468,12 @@ void qemu_del_nic(NICState *nic)
 
     for (i = 0; i < queues; i++) {
         NetClientState *nc = qemu_get_subqueue(nic, i);
-        /* If this is a peer NIC and peer has already been deleted, free it now. */
+        /*
+         * If this is a peer NIC and peer has already been deleted, clean it up
+         * and free it now.
+         */
         if (nic->peer_deleted) {
+            qemu_cleanup_net_client(nc->peer, false);
             qemu_free_net_client(nc->peer);
         } else if (nc->peer) {
             /* if there are RX packets pending, complete them */
@@ -474,7 +484,7 @@ void qemu_del_nic(NICState *nic)
     for (i = queues - 1; i >= 0; i--) {
         NetClientState *nc = qemu_get_subqueue(nic, i);
 
-        qemu_cleanup_net_client(nc);
+        qemu_cleanup_net_client(nc, true);
         qemu_free_net_client(nc);
     }
 
@@ -1678,6 +1688,9 @@ void net_cleanup(void)
      * of the latest NET_CLIENT_DRIVER_NIC, and operate on *p as we walk
      * the list.
      *
+     * However, the NIC may have peers that trust to be clean beyond this
+     * point.  For example, if they have been removed with device_del.
+     *
      * The 'nc' variable isn't part of the list traversal; it's purely
      * for convenience as too much '(*p)->' has a tendency to make the
      * readers' eyes bleed.
@@ -1685,6 +1698,17 @@ void net_cleanup(void)
     while (*p) {
         nc = *p;
         if (nc->info->type == NET_CLIENT_DRIVER_NIC) {
+            NICState *nic = qemu_get_nic(nc);
+
+            if (nic->peer_deleted) {
+                int queues = MAX(nic->conf->peers.queues, 1);
+
+                for (int i = 0; i < queues; i++) {
+                    nc = qemu_get_subqueue(nic, i);
+                    qemu_cleanup_net_client(nc->peer, false);
+                }
+            }
+
             /* Skip NET_CLIENT_DRIVER_NIC entries */
             p = &QTAILQ_NEXT(nc, next);
         } else {
diff --git a/net/tap-linux.c b/net/tap-linux.c
index 1226d5fda2d92..22ec2f45d2b7a 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -45,10 +45,21 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
     int len = sizeof(struct virtio_net_hdr);
     unsigned int features;
 
-    fd = RETRY_ON_EINTR(open(PATH_NET_TUN, O_RDWR));
+
+    ret = if_nametoindex(ifname);
+    if (ret) {
+        g_autofree char *file = g_strdup_printf("/dev/tap%d", ret);
+        fd = open(file, O_RDWR);
+    } else {
+        fd = -1;
+    }
+
     if (fd < 0) {
-        error_setg_errno(errp, errno, "could not open %s", PATH_NET_TUN);
-        return -1;
+        fd = RETRY_ON_EINTR(open(PATH_NET_TUN, O_RDWR));
+        if (fd < 0) {
+            error_setg_errno(errp, errno, "could not open %s", PATH_NET_TUN);
+            return -1;
+        }
     }
     memset(&ifr, 0, sizeof(ifr));
     ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 12555518e8388..0b235e50c6508 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -16,6 +16,7 @@
 #include "chardev/char-fe.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-net.h"
+#include "qapi/qapi-events-net.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
 #include "qemu/option.h"
@@ -271,6 +272,7 @@ static void chr_closed_bh(void *opaque)
     if (err) {
         error_report_err(err);
     }
+    qapi_event_send_netdev_vhost_user_disconnected(name);
 }
 
 static void net_vhost_user_event(void *opaque, QEMUChrEvent event)
@@ -300,6 +302,7 @@ static void net_vhost_user_event(void *opaque, QEMUChrEvent event)
                                          net_vhost_user_watch, s);
         qmp_set_link(name, true, &err);
         s->started = true;
+        qapi_event_send_netdev_vhost_user_connected(name, chr->label);
         break;
     case CHR_EVENT_CLOSED:
         /* a close event may happen during a read/write, but vhost
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 231b45246c660..7ca8b46eee779 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -224,14 +224,6 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
 {
     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
 
-    /*
-     * If a peer NIC is attached, do not cleanup anything.
-     * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup()
-     * when the guest is shutting down.
-     */
-    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
-        return;
-    }
     munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len());
     munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len());
     if (s->vhost_net) {
@@ -270,6 +262,18 @@ static bool vhost_vdpa_has_ufo(NetClientState *nc)
 
 }
 
+/*
+ * FIXME: vhost_vdpa doesn't have an API to "set h/w endianness". But it's
+ * reasonable to assume that h/w is LE by default, because LE is what
+ * virtio 1.0 and later ask for. So, this function just says "yes, the h/w is
+ * LE". Otherwise, on a BE machine, higher-level code would mistakely think
+ * the h/w is BE and can't support VDPA for a virtio 1.0 client.
+ */
+static int vhost_vdpa_set_vnet_le(NetClientState *nc, bool enable)
+{
+    return 0;
+}
+
 static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc,
                                        Error **errp)
 {
@@ -437,6 +441,7 @@ static NetClientInfo net_vhost_vdpa_info = {
         .cleanup = vhost_vdpa_cleanup,
         .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
         .has_ufo = vhost_vdpa_has_ufo,
+        .set_vnet_le = vhost_vdpa_set_vnet_le,
         .check_peer_type = vhost_vdpa_check_peer_type,
         .set_steering_ebpf = vhost_vdpa_set_steering_ebpf,
 };
@@ -510,14 +515,20 @@ static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size,
                                   bool write)
 {
     DMAMap map = {};
+    hwaddr taddr = (hwaddr)(uintptr_t)buf;
     int r;
 
-    map.translated_addr = (hwaddr)(uintptr_t)buf;
     map.size = size - 1;
     map.perm = write ? IOMMU_RW : IOMMU_RO,
-    r = vhost_iova_tree_map_alloc(v->shared->iova_tree, &map);
+    r = vhost_iova_tree_map_alloc(v->shared->iova_tree, &map, taddr);
     if (unlikely(r != IOVA_OK)) {
         error_report("Cannot map injected element");
+
+        if (map.translated_addr == taddr) {
+            error_report("Insertion to IOVA->HVA tree failed");
+            /* Remove the mapping from the IOVA-only tree */
+            goto dma_map_err;
+        }
         return r;
     }
 
@@ -643,7 +654,7 @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s,
     VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
     int r;
 
-    r = vhost_svq_add(svq, out_sg, out_num, in_sg, in_num, NULL);
+    r = vhost_svq_add(svq, out_sg, out_num, NULL, in_sg, in_num, NULL, NULL);
     if (unlikely(r != 0)) {
         if (unlikely(r == -ENOSPC)) {
             qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
diff --git a/os-posix.c b/os-posix.c
index 9cce55ff2f7e3..52925c23d3d91 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -327,18 +327,29 @@ void os_set_line_buffering(void)
     setvbuf(stdout, NULL, _IOLBF, 0);
 }
 
-int os_mlock(void)
+int os_mlock(bool on_fault)
 {
 #ifdef HAVE_MLOCKALL
     int ret = 0;
+    int flags = MCL_CURRENT | MCL_FUTURE;
 
-    ret = mlockall(MCL_CURRENT | MCL_FUTURE);
+    if (on_fault) {
+#ifdef HAVE_MLOCK_ONFAULT
+        flags |= MCL_ONFAULT;
+#else
+        error_report("mlockall: on_fault not supported");
+        return -EINVAL;
+#endif
+    }
+
+    ret = mlockall(flags);
     if (ret < 0) {
         error_report("mlockall: %s", strerror(errno));
     }
 
     return ret;
 #else
+    (void)on_fault;
     return -ENOSYS;
 #endif
 }
diff --git a/page-target.c b/page-target.c
index 82211c85937a0..321e43d06f488 100644
--- a/page-target.c
+++ b/page-target.c
@@ -8,24 +8,6 @@
 
 #include "qemu/osdep.h"
 #include "exec/target_page.h"
-#include "exec/cpu-defs.h"
-#include "cpu.h"
-#include "exec/cpu-all.h"
-
-size_t qemu_target_page_size(void)
-{
-    return TARGET_PAGE_SIZE;
-}
-
-int qemu_target_page_mask(void)
-{
-    return TARGET_PAGE_MASK;
-}
-
-int qemu_target_page_bits(void)
-{
-    return TARGET_PAGE_BITS;
-}
 
 int qemu_target_page_bits_min(void)
 {
diff --git a/page-vary-target.c b/page-vary-target.c
index 343b4adb95a50..3f81144cda8af 100644
--- a/page-vary-target.c
+++ b/page-vary-target.c
@@ -35,7 +35,5 @@ bool set_preferred_target_page_bits(int bits)
 
 void finalize_target_page_bits(void)
 {
-#ifdef TARGET_PAGE_BITS_VARY
     finalize_target_page_bits_common(TARGET_PAGE_BITS_MIN);
-#endif
 }
diff --git a/pc-bios/README b/pc-bios/README
index 7ffb2f43a4602..f0f13e15f2c36 100644
--- a/pc-bios/README
+++ b/pc-bios/README
@@ -13,8 +13,8 @@
 
 - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware
   implementation for certain IBM POWER hardware.  The sources are at
-  https://github.com/aik/SLOF, and the image currently in qemu is
-  built from git tag qemu-slof-20230918.
+  https://gitlab.com/slof/slof, and the image currently in qemu is
+  built from git tag qemu-slof-20241106.
 
 - VOF (Virtual Open Firmware) is a minimalistic firmware to work with
   -machine pseries,x-vof=on. When enabled, the firmware acts as a slim shim and
@@ -43,6 +43,19 @@
   run an hypervisor OS or simply a host OS on the "baremetal"
   platform, also known as the PowerNV (Non-Virtualized) platform.
 
+- pnv-pnor.bin is a non-volatile RAM image used by PowerNV, which stores
+  NVRAM BIOS settings among other things. This image was created with the
+  following command (the ffspart tool can be found in the skiboot source tree):
+
+  ffspart -s 0x1000 -c 34 -i pnv-pnor.in -p pnv-pnor.bin
+
+  Where pnv-pnor.in contains the two lines (no leading whitespace):
+
+  NVRAM,0x01000,0x00020000,,,/dev/zero
+  VERSION,0x21000,0x00001000,,,/dev/zero
+
+  skiboot is then booted once to format the NVRAM partition.
+
 - QemuMacDrivers (https://github.com/ozbenh/QemuMacDrivers) is a project to
   provide virtualised drivers for PPC MacOS guests.
 
@@ -70,10 +83,10 @@
   source code also contains code reused from other projects described here:
   https://github.com/riscv/opensbi/blob/master/ThirdPartyNotices.md.
 
-- npcm7xx_bootrom.bin is a simplified, free (Apache 2.0) boot ROM for Nuvoton
-  NPCM7xx BMC devices. It currently implements the bare minimum to load, parse,
-  initialize and run boot images stored in SPI flash, but may grow more
-  features over time as needed. The source code is available at:
+- npcm{7xx,8xx}_bootrom.bin is a simplified, free (Apache 2.0) boot ROM for
+  Nuvoton NPCM7xx/8xx BMC devices. It currently implements the bare minimum to
+  load, parse, initialize and run boot images stored in SPI flash, but may grow
+  more features over time as needed. The source code is available at:
   https://github.com/google/vbootrom
 
 - hppa-firmware.img (32-bit) and hppa-firmware64.img (64-bit) are firmware
diff --git a/pc-bios/meson.build b/pc-bios/meson.build
index b68b29cc7d1f1..34d6616c32b44 100644
--- a/pc-bios/meson.build
+++ b/pc-bios/meson.build
@@ -70,6 +70,7 @@ blobs = [
   's390-ccw.img',
   'slof.bin',
   'skiboot.lid',
+  'pnv-pnor.bin',
   'palcode-clipper',
   'u-boot.e500',
   'u-boot-sam460-20100605.bin',
@@ -80,6 +81,7 @@ blobs = [
   'opensbi-riscv32-generic-fw_dynamic.bin',
   'opensbi-riscv64-generic-fw_dynamic.bin',
   'npcm7xx_bootrom.bin',
+  'npcm8xx_bootrom.bin',
   'vof.bin',
   'vof-nvram.bin',
 ]
diff --git a/pc-bios/npcm7xx_bootrom.bin b/pc-bios/npcm7xx_bootrom.bin
index 38f89d1b97b0c..903f126636f9e 100644
Binary files a/pc-bios/npcm7xx_bootrom.bin and b/pc-bios/npcm7xx_bootrom.bin differ
diff --git a/pc-bios/npcm8xx_bootrom.bin b/pc-bios/npcm8xx_bootrom.bin
new file mode 100644
index 0000000000000..6370d6475635c
Binary files /dev/null and b/pc-bios/npcm8xx_bootrom.bin differ
diff --git a/pc-bios/pnv-pnor.bin b/pc-bios/pnv-pnor.bin
new file mode 100644
index 0000000000000..3e6f70014408e
Binary files /dev/null and b/pc-bios/pnv-pnor.bin differ
diff --git a/pc-bios/skiboot.lid b/pc-bios/skiboot.lid
index 906bd5127175d..ffc77ee11126c 100644
Binary files a/pc-bios/skiboot.lid and b/pc-bios/skiboot.lid differ
diff --git a/pc-bios/slof.bin b/pc-bios/slof.bin
index 27fed09f49a79..4314e17b9ddc9 100644
Binary files a/pc-bios/slof.bin and b/pc-bios/slof.bin differ
diff --git a/plugins/api-system.c b/plugins/api-system.c
new file mode 100644
index 0000000000000..cc190b167eacc
--- /dev/null
+++ b/plugins/api-system.c
@@ -0,0 +1,131 @@
+/*
+ * QEMU Plugin API - System specific implementations
+ *
+ * This provides the APIs that have a specific system implementation
+ * or are only relevant to system-mode.
+ *
+ * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
+ * Copyright (C) 2019-2025, Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qapi/error.h"
+#include "migration/blocker.h"
+#include "hw/boards.h"
+#include "qemu/plugin-memory.h"
+#include "qemu/plugin.h"
+
+/*
+ * In system mode we cannot trace the binary being executed so the
+ * helpers all return NULL/0.
+ */
+const char *qemu_plugin_path_to_binary(void)
+{
+    return NULL;
+}
+
+uint64_t qemu_plugin_start_code(void)
+{
+    return 0;
+}
+
+uint64_t qemu_plugin_end_code(void)
+{
+    return 0;
+}
+
+uint64_t qemu_plugin_entry_code(void)
+{
+    return 0;
+}
+
+/*
+ * Virtual Memory queries
+ */
+
+static __thread struct qemu_plugin_hwaddr hwaddr_info;
+
+struct qemu_plugin_hwaddr *qemu_plugin_get_hwaddr(qemu_plugin_meminfo_t info,
+                                                  uint64_t vaddr)
+{
+    CPUState *cpu = current_cpu;
+    unsigned int mmu_idx = get_mmuidx(info);
+    enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);
+    hwaddr_info.is_store = (rw & QEMU_PLUGIN_MEM_W) != 0;
+
+    assert(mmu_idx < NB_MMU_MODES);
+
+    if (!tlb_plugin_lookup(cpu, vaddr, mmu_idx,
+                           hwaddr_info.is_store, &hwaddr_info)) {
+        error_report("invalid use of qemu_plugin_get_hwaddr");
+        return NULL;
+    }
+
+    return &hwaddr_info;
+}
+
+bool qemu_plugin_hwaddr_is_io(const struct qemu_plugin_hwaddr *haddr)
+{
+    return haddr->is_io;
+}
+
+uint64_t qemu_plugin_hwaddr_phys_addr(const struct qemu_plugin_hwaddr *haddr)
+{
+    if (haddr) {
+        return haddr->phys_addr;
+    }
+    return 0;
+}
+
+const char *qemu_plugin_hwaddr_device_name(const struct qemu_plugin_hwaddr *h)
+{
+    if (h && h->is_io) {
+        MemoryRegion *mr = h->mr;
+        if (!mr->name) {
+            unsigned maddr = (uintptr_t)mr;
+            g_autofree char *temp = g_strdup_printf("anon%08x", maddr);
+            return g_intern_string(temp);
+        } else {
+            return g_intern_string(mr->name);
+        }
+    } else {
+        return g_intern_static_string("RAM");
+    }
+}
+
+/*
+ * Time control
+ */
+static bool has_control;
+static Error *migration_blocker;
+
+const void *qemu_plugin_request_time_control(void)
+{
+    if (!has_control) {
+        has_control = true;
+        error_setg(&migration_blocker,
+                   "TCG plugin time control does not support migration");
+        migrate_add_blocker(&migration_blocker, NULL);
+        return &has_control;
+    }
+    return NULL;
+}
+
+static void advance_virtual_time__async(CPUState *cpu, run_on_cpu_data data)
+{
+    int64_t new_time = data.host_ulong;
+    qemu_clock_advance_virtual_time(new_time);
+}
+
+void qemu_plugin_update_ns(const void *handle, int64_t new_time)
+{
+    if (handle == &has_control) {
+        /* Need to execute out of cpu_exec, so bql can be locked. */
+        async_run_on_cpu(current_cpu,
+                         advance_virtual_time__async,
+                         RUN_ON_CPU_HOST_ULONG(new_time));
+    }
+}
diff --git a/plugins/api-user.c b/plugins/api-user.c
new file mode 100644
index 0000000000000..28704a89e8994
--- /dev/null
+++ b/plugins/api-user.c
@@ -0,0 +1,57 @@
+/*
+ * QEMU Plugin API - user-mode only implementations
+ *
+ * This provides the APIs that have a user-mode specific
+ * implementations or are only relevant to user-mode.
+ *
+ * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
+ * Copyright (C) 2019-2025, Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/plugin.h"
+#include "exec/log.h"
+
+/*
+ * Virtual Memory queries - these are all NOPs for user-mode which
+ * only ever has visibility of virtual addresses.
+ */
+
+struct qemu_plugin_hwaddr *qemu_plugin_get_hwaddr(qemu_plugin_meminfo_t info,
+                                                  uint64_t vaddr)
+{
+    return NULL;
+}
+
+bool qemu_plugin_hwaddr_is_io(const struct qemu_plugin_hwaddr *haddr)
+{
+    return false;
+}
+
+uint64_t qemu_plugin_hwaddr_phys_addr(const struct qemu_plugin_hwaddr *haddr)
+{
+    return 0;
+}
+
+const char *qemu_plugin_hwaddr_device_name(const struct qemu_plugin_hwaddr *h)
+{
+    return g_intern_static_string("Invalid");
+}
+
+/*
+ * Time control - for user mode the only real time is wall clock time
+ * so realistically all you can do in user mode is slow down execution
+ * which doesn't require the ability to mess with the clock.
+ */
+
+const void *qemu_plugin_request_time_control(void)
+{
+    return NULL;
+}
+
+void qemu_plugin_update_ns(const void *handle, int64_t new_time)
+{
+    qemu_log_mask(LOG_UNIMP, "user-mode can't control time");
+}
diff --git a/plugins/api.c b/plugins/api.c
index 4110cfaa2379e..604ce06802a5c 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -39,26 +39,13 @@
 #include "qemu/main-loop.h"
 #include "qemu/plugin.h"
 #include "qemu/log.h"
-#include "qemu/timer.h"
 #include "tcg/tcg.h"
-#include "exec/exec-all.h"
 #include "exec/gdbstub.h"
+#include "exec/target_page.h"
 #include "exec/translation-block.h"
 #include "exec/translator.h"
 #include "disas/disas.h"
 #include "plugin.h"
-#ifndef CONFIG_USER_ONLY
-#include "qapi/error.h"
-#include "migration/blocker.h"
-#include "exec/ram_addr.h"
-#include "qemu/plugin-memory.h"
-#include "hw/boards.h"
-#else
-#include "qemu.h"
-#ifdef CONFIG_LINUX
-#include "loader.h"
-#endif
-#endif
 
 /* Uninstall and Reset handlers */
 
@@ -287,7 +274,7 @@ uint64_t qemu_plugin_insn_vaddr(const struct qemu_plugin_insn *insn)
 void *qemu_plugin_insn_haddr(const struct qemu_plugin_insn *insn)
 {
     const DisasContextBase *db = tcg_ctx->plugin_db;
-    vaddr page0_last = db->pc_first | ~TARGET_PAGE_MASK;
+    vaddr page0_last = db->pc_first | ~qemu_target_page_mask();
 
     if (db->fake_insn) {
         return NULL;
@@ -385,76 +372,6 @@ qemu_plugin_mem_value qemu_plugin_mem_get_value(qemu_plugin_meminfo_t info)
     return value;
 }
 
-/*
- * Virtual Memory queries
- */
-
-#ifdef CONFIG_SOFTMMU
-static __thread struct qemu_plugin_hwaddr hwaddr_info;
-#endif
-
-struct qemu_plugin_hwaddr *qemu_plugin_get_hwaddr(qemu_plugin_meminfo_t info,
-                                                  uint64_t vaddr)
-{
-#ifdef CONFIG_SOFTMMU
-    CPUState *cpu = current_cpu;
-    unsigned int mmu_idx = get_mmuidx(info);
-    enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);
-    hwaddr_info.is_store = (rw & QEMU_PLUGIN_MEM_W) != 0;
-
-    assert(mmu_idx < NB_MMU_MODES);
-
-    if (!tlb_plugin_lookup(cpu, vaddr, mmu_idx,
-                           hwaddr_info.is_store, &hwaddr_info)) {
-        error_report("invalid use of qemu_plugin_get_hwaddr");
-        return NULL;
-    }
-
-    return &hwaddr_info;
-#else
-    return NULL;
-#endif
-}
-
-bool qemu_plugin_hwaddr_is_io(const struct qemu_plugin_hwaddr *haddr)
-{
-#ifdef CONFIG_SOFTMMU
-    return haddr->is_io;
-#else
-    return false;
-#endif
-}
-
-uint64_t qemu_plugin_hwaddr_phys_addr(const struct qemu_plugin_hwaddr *haddr)
-{
-#ifdef CONFIG_SOFTMMU
-    if (haddr) {
-        return haddr->phys_addr;
-    }
-#endif
-    return 0;
-}
-
-const char *qemu_plugin_hwaddr_device_name(const struct qemu_plugin_hwaddr *h)
-{
-#ifdef CONFIG_SOFTMMU
-    if (h && h->is_io) {
-        MemoryRegion *mr = h->mr;
-        if (!mr->name) {
-            unsigned maddr = (uintptr_t)mr;
-            g_autofree char *temp = g_strdup_printf("anon%08x", maddr);
-            return g_intern_string(temp);
-        } else {
-            return g_intern_string(mr->name);
-        }
-    } else {
-        return g_intern_static_string("RAM");
-    }
-#else
-    return g_intern_static_string("Invalid");
-#endif
-}
-
 int qemu_plugin_num_vcpus(void)
 {
     return plugin_num_vcpus();
@@ -473,49 +390,6 @@ bool qemu_plugin_bool_parse(const char *name, const char *value, bool *ret)
     return name && value && qapi_bool_parse(name, value, ret, NULL);
 }
 
-/*
- * Binary path, start and end locations
- */
-const char *qemu_plugin_path_to_binary(void)
-{
-    char *path = NULL;
-#ifdef CONFIG_USER_ONLY
-    TaskState *ts = get_task_state(current_cpu);
-    path = g_strdup(ts->bprm->filename);
-#endif
-    return path;
-}
-
-uint64_t qemu_plugin_start_code(void)
-{
-    uint64_t start = 0;
-#ifdef CONFIG_USER_ONLY
-    TaskState *ts = get_task_state(current_cpu);
-    start = ts->info->start_code;
-#endif
-    return start;
-}
-
-uint64_t qemu_plugin_end_code(void)
-{
-    uint64_t end = 0;
-#ifdef CONFIG_USER_ONLY
-    TaskState *ts = get_task_state(current_cpu);
-    end = ts->info->end_code;
-#endif
-    return end;
-}
-
-uint64_t qemu_plugin_entry_code(void)
-{
-    uint64_t entry = 0;
-#ifdef CONFIG_USER_ONLY
-    TaskState *ts = get_task_state(current_cpu);
-    entry = ts->info->entry;
-#endif
-    return entry;
-}
-
 /*
  * Create register handles.
  *
@@ -561,7 +435,7 @@ GArray *qemu_plugin_get_registers(void)
     return create_register_handles(regs);
 }
 
-bool qemu_plugin_read_memory_vaddr(vaddr addr, GByteArray *data, size_t len)
+bool qemu_plugin_read_memory_vaddr(uint64_t addr, GByteArray *data, size_t len)
 {
     g_assert(current_cpu);
 
@@ -641,44 +515,3 @@ uint64_t qemu_plugin_u64_sum(qemu_plugin_u64 entry)
     return total;
 }
 
-/*
- * Time control
- */
-static bool has_control;
-#ifdef CONFIG_SOFTMMU
-static Error *migration_blocker;
-#endif
-
-const void *qemu_plugin_request_time_control(void)
-{
-    if (!has_control) {
-        has_control = true;
-#ifdef CONFIG_SOFTMMU
-        error_setg(&migration_blocker,
-                   "TCG plugin time control does not support migration");
-        migrate_add_blocker(&migration_blocker, NULL);
-#endif
-        return &has_control;
-    }
-    return NULL;
-}
-
-#ifdef CONFIG_SOFTMMU
-static void advance_virtual_time__async(CPUState *cpu, run_on_cpu_data data)
-{
-    int64_t new_time = data.host_ulong;
-    qemu_clock_advance_virtual_time(new_time);
-}
-#endif
-
-void qemu_plugin_update_ns(const void *handle, int64_t new_time)
-{
-#ifdef CONFIG_SOFTMMU
-    if (handle == &has_control) {
-        /* Need to execute out of cpu_exec, so bql can be locked. */
-        async_run_on_cpu(current_cpu,
-                         advance_virtual_time__async,
-                         RUN_ON_CPU_HOST_ULONG(new_time));
-    }
-#endif
-}
diff --git a/plugins/core.c b/plugins/core.c
index bb105e8e6880b..eb9281fe54768 100644
--- a/plugins/core.c
+++ b/plugins/core.c
@@ -12,22 +12,14 @@
  * SPDX-License-Identifier: GPL-2.0-or-later
  */
 #include "qemu/osdep.h"
-#include "qemu/error-report.h"
-#include "qemu/config-file.h"
-#include "qapi/error.h"
 #include "qemu/lockable.h"
 #include "qemu/option.h"
 #include "qemu/plugin.h"
 #include "qemu/queue.h"
 #include "qemu/rcu_queue.h"
-#include "qemu/xxhash.h"
 #include "qemu/rcu.h"
-#include "hw/core/cpu.h"
-
-#include "exec/exec-all.h"
 #include "exec/tb-flush.h"
-#include "tcg/tcg.h"
-#include "tcg/tcg-op.h"
+#include "tcg/tcg-op-common.h"
 #include "plugin.h"
 
 struct qemu_plugin_cb {
diff --git a/plugins/loader.c b/plugins/loader.c
index ebc01da9c6ec2..7523d554f034b 100644
--- a/plugins/loader.c
+++ b/plugins/loader.c
@@ -31,9 +31,6 @@
 #include "qemu/memalign.h"
 #include "hw/core/cpu.h"
 #include "exec/tb-flush.h"
-#ifndef CONFIG_USER_ONLY
-#include "hw/boards.h"
-#endif
 
 #include "plugin.h"
 
@@ -128,7 +125,7 @@ static int plugin_add(void *opaque, const char *name, const char *value,
                 /* Will treat arg="argname" as "argname=on" */
                 fullarg = g_strdup_printf("%s=%s", value, "on");
             } else {
-                fullarg = g_strdup_printf("%s", value);
+                fullarg = g_strdup(value);
             }
             warn_report("using 'arg=%s' is deprecated", value);
             error_printf("Please use '%s' directly\n", fullarg);
@@ -297,17 +294,11 @@ int qemu_plugin_load_list(QemuPluginList *head, Error **errp)
     struct qemu_plugin_desc *desc, *next;
     g_autofree qemu_info_t *info = g_new0(qemu_info_t, 1);
 
-    info->target_name = TARGET_NAME;
+    info->target_name = target_name();
     info->version.min = QEMU_PLUGIN_MIN_VERSION;
     info->version.cur = QEMU_PLUGIN_VERSION;
-#ifndef CONFIG_USER_ONLY
-    MachineState *ms = MACHINE(qdev_get_machine());
-    info->system_emulation = true;
-    info->system.smp_vcpus = ms->smp.cpus;
-    info->system.max_vcpus = ms->smp.max_cpus;
-#else
-    info->system_emulation = false;
-#endif
+
+    qemu_plugin_fillin_mode_info(info);
 
     QTAILQ_FOREACH_SAFE(desc, head, entry, next) {
         int err;
diff --git a/plugins/meson.build b/plugins/meson.build
index d60be2a4d6d06..3be8245a698d9 100644
--- a/plugins/meson.build
+++ b/plugins/meson.build
@@ -57,8 +57,9 @@ if host_os == 'windows'
     command: dlltool_cmd
   )
 endif
-specific_ss.add(files(
-  'loader.c',
-  'core.c',
-  'api.c',
-))
+
+user_ss.add(files('user.c', 'api-user.c'))
+system_ss.add(files('system.c', 'api-system.c'))
+
+common_ss.add(files('loader.c', 'api.c', 'core.c'))
+
diff --git a/plugins/plugin.h b/plugins/plugin.h
index 30e2299a54db9..6fbc443b96c9b 100644
--- a/plugins/plugin.h
+++ b/plugins/plugin.h
@@ -13,6 +13,7 @@
 #define PLUGIN_H
 
 #include <gmodule.h>
+#include "qemu/queue.h"
 #include "qemu/qht.h"
 
 #define QEMU_PLUGIN_MIN_VERSION 2
@@ -118,4 +119,10 @@ struct qemu_plugin_scoreboard *plugin_scoreboard_new(size_t element_size);
 
 void plugin_scoreboard_free(struct qemu_plugin_scoreboard *score);
 
+/**
+ * qemu_plugin_fillin_mode_info() - populate mode specific info
+ * info: pointer to qemu_info_t structure
+ */
+void qemu_plugin_fillin_mode_info(qemu_info_t *info);
+
 #endif /* PLUGIN_H */
diff --git a/plugins/system.c b/plugins/system.c
new file mode 100644
index 0000000000000..b3ecc33ba520e
--- /dev/null
+++ b/plugins/system.c
@@ -0,0 +1,24 @@
+/*
+ * QEMU Plugin system-emulation helpers
+ *
+ * Helpers that are specific to system emulation.
+ *
+ * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
+ * Copyright (C) 2019-2025, Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/plugin.h"
+#include "hw/boards.h"
+
+#include "plugin.h"
+
+void qemu_plugin_fillin_mode_info(qemu_info_t *info)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    info->system_emulation = true;
+    info->system.smp_vcpus = ms->smp.cpus;
+    info->system.max_vcpus = ms->smp.max_cpus;
+}
diff --git a/plugins/user.c b/plugins/user.c
new file mode 100644
index 0000000000000..250d5425020cd
--- /dev/null
+++ b/plugins/user.c
@@ -0,0 +1,19 @@
+/*
+ * QEMU Plugin user-mode helpers
+ *
+ * Helpers that are specific to user-mode.
+ *
+ * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
+ * Copyright (C) 2019-2025, Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/plugin.h"
+#include "plugin.h"
+
+void qemu_plugin_fillin_mode_info(qemu_info_t *info)
+{
+    info->system_emulation = false;
+}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index ee6eccc68c817..b1937780e198a 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -5913,35 +5913,31 @@
 ##
 # @x-blockdev-change:
 #
-# Dynamically reconfigure the block driver state graph.  It can be
-# used to add, remove, insert or replace a graph node.  Currently only
-# the Quorum driver implements this feature to add or remove its
-# child.  This is useful to fix a broken quorum child.
+# Dynamically reconfigure the block driver state graph.
 #
-# If @node is specified, it will be inserted under @parent.  @child
-# may not be specified in this case.  If both @parent and @child are
-# specified but @node is not, @child will be detached from @parent.
+# Currently only supports adding and deleting quorum children.  A
+# child will be added at the end of the list of children.  Its
+# contents *must* be consistent with the other childrens' contents.
+# Deleting a child that is not last in the list of children is
+# problematic, because it "renumbers" the children following it.
 #
 # @parent: the id or name of the parent node.
 #
-# @child: the name of a child under the given parent node.
+# @child: the name of a child to be deleted.  Mutually exclusive with
+#     @node.
 #
-# @node: the name of the node that will be added.
+# @node: the name of the node to be added.  Mutually exclusive with
+#     @child.
 #
 # Features:
 #
-# @unstable: This command is experimental, and its API is not stable.
-#     It does not support all kinds of operations, all kinds of
-#     children, nor all block drivers.
+# @unstable: This command is experimental.
 #
-#     FIXME Removing children from a quorum node means introducing
+# TODO: Removing children from a quorum node means introducing
 #     gaps in the child indices.  This cannot be represented in the
 #     'children' list of BlockdevOptionsQuorum, as returned by
 #     .bdrv_refresh_filename().
 #
-#     Warning: The data in a new quorum child MUST be consistent with
-#     that of the rest of the array.
-#
 # Since: 2.7
 #
 # .. qmp-example::
diff --git a/qapi/block-export.json b/qapi/block-export.json
index 68dcec7edc50d..c783e01a53288 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -9,17 +9,11 @@
 { 'include': 'block-core.json' }
 
 ##
-# @NbdServerOptions:
-#
-# Keep this type consistent with the nbd-server-start arguments.  The
-# only intended difference is using SocketAddress instead of
-# SocketAddressLegacy.
-#
-# @addr: Address on which to listen.
+# @NbdServerOptionsBase:
 #
 # @handshake-max-seconds: Time limit, in seconds, at which a client
 #     that has not completed the negotiation handshake will be
-#     disconnected, 0 for no limit (since 10.0; default: 10).
+#     disconnected, or 0 for no limit (since 10.0; default: 10).
 #
 # @tls-creds: ID of the TLS credentials object (since 2.6).
 #
@@ -32,47 +26,47 @@
 # @max-connections: The maximum number of connections to allow at the
 #     same time, 0 for unlimited.  Setting this to 1 also stops the
 #     server from advertising multiple client support (since 5.2;
-#     default: 100)
-#
-# Since: 4.2
+#     default: 100).
 ##
-{ 'struct': 'NbdServerOptions',
-  'data': { 'addr': 'SocketAddress',
-            '*handshake-max-seconds': 'uint32',
+{ 'struct': 'NbdServerOptionsBase',
+  'data': { '*handshake-max-seconds': 'uint32',
             '*tls-creds': 'str',
             '*tls-authz': 'str',
             '*max-connections': 'uint32' } }
 
 ##
-# @nbd-server-start:
+# @NbdServerOptions:
 #
-# Start an NBD server listening on the given host and port.  Block
-# devices can then be exported using @nbd-server-add.  The NBD server
-# will present them as named exports; for example, another QEMU
-# instance could refer to them as "nbd:HOST:PORT:exportname=NAME".
+# Keep this type consistent with the NbdServerOptionsLegacy type.  The
+# only intended difference is using SocketAddress instead of
+# SocketAddressLegacy.
+#
+# @addr: Address on which to listen (since 4.2).
+##
+{ 'struct': 'NbdServerOptions',
+  'base': 'NbdServerOptionsBase',
+  'data': { 'addr': 'SocketAddress' } }
+
+##
+# @NbdServerOptionsLegacy:
 #
 # Keep this type consistent with the NbdServerOptions type.  The only
 # intended difference is using SocketAddressLegacy instead of
 # SocketAddress.
 #
-# @addr: Address on which to listen.
-#
-# @handshake-max-seconds: Time limit, in seconds, at which a client
-#     that has not completed the negotiation handshake will be
-#     disconnected, or 0 for no limit (since 10.0; default: 10).
-#
-# @tls-creds: ID of the TLS credentials object (since 2.6).
-#
-# @tls-authz: ID of the QAuthZ authorization object used to validate
-#     the client's x509 distinguished name.  This object is is only
-#     resolved at time of use, so can be deleted and recreated on the
-#     fly while the NBD server is active.  If missing, it will default
-#     to denying access (since 4.0).
+# @addr: Address on which to listen (since 1.3).
+##
+{ 'struct': 'NbdServerOptionsLegacy',
+  'base': 'NbdServerOptionsBase',
+  'data': { 'addr': 'SocketAddressLegacy' } }
+
+##
+# @nbd-server-start:
 #
-# @max-connections: The maximum number of connections to allow at the
-#     same time, 0 for unlimited.  Setting this to 1 also stops the
-#     server from advertising multiple client support (since 5.2;
-#     default: 100).
+# Start an NBD server listening on the given host and port.  Block
+# devices can then be exported using @nbd-server-add.  The NBD server
+# will present them as named exports; for example, another QEMU
+# instance could refer to them as "nbd:HOST:PORT:exportname=NAME".
 #
 # Errors:
 #     - if the server is already running
@@ -80,11 +74,7 @@
 # Since: 1.3
 ##
 { 'command': 'nbd-server-start',
-  'data': { 'addr': 'SocketAddressLegacy',
-            '*handshake-max-seconds': 'uint32',
-            '*tls-creds': 'str',
-            '*tls-authz': 'str',
-            '*max-connections': 'uint32' },
+  'data': 'NbdServerOptionsLegacy',
   'allow-preconfig': true }
 
 ##
diff --git a/qapi/char.json b/qapi/char.json
index f02b66c06b3e6..dde2f9538f814 100644
--- a/qapi/char.json
+++ b/qapi/char.json
@@ -337,7 +337,7 @@
 #
 # Configuration info for hub chardevs.
 #
-# @chardevs: List of chardev IDs, which should be added to this hub
+# @chardevs: IDs to be added to this hub (maximum 4 devices).
 #
 # Since: 10.0
 ##
diff --git a/qapi/common.json b/qapi/common.json
index 6ffc7a378905b..0e3a0bbbfb0b8 100644
--- a/qapi/common.json
+++ b/qapi/common.json
@@ -212,3 +212,17 @@
 ##
 { 'struct': 'HumanReadableText',
   'data': { 'human-readable-text': 'str' } }
+
+##
+# @EndianMode:
+#
+# @unspecified: Endianness not specified
+#
+# @little: Little endianness
+#
+# @big: Big endianness
+#
+# Since: 10.0
+##
+{ 'enum': 'EndianMode',
+  'data': [ 'unspecified', 'little', 'big' ] }
diff --git a/qapi/machine.json b/qapi/machine.json
index a6b8795b09edc..a7070bad4d522 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -33,7 +33,7 @@
 # Since: 3.0
 ##
 { 'enum' : 'SysEmuTarget',
-  'data' : [ 'aarch64', 'alpha', 'arm', 'avr', 'hppa', 'i386',
+  'data' : [ 'aarch64', 'alpha', 'arm', 'avr', 'hexagon', 'hppa', 'i386',
              'loongarch64', 'm68k', 'microblaze', 'microblazeel', 'mips', 'mips64',
              'mips64el', 'mipsel', 'or1k', 'ppc',
              'ppc64', 'riscv32', 'riscv64', 'rx', 's390x', 'sh4',
diff --git a/qapi/meson.build b/qapi/meson.build
index e7bc54e5d0470..eadde4db307f0 100644
--- a/qapi/meson.build
+++ b/qapi/meson.build
@@ -65,6 +65,7 @@ if have_system
     'pci',
     'rocker',
     'tpm',
+    'uefi',
   ]
 endif
 if have_system or have_tools
diff --git a/qapi/migration.json b/qapi/migration.json
index 43babd1df417c..8b9c53595c4cb 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1524,7 +1524,9 @@
 ##
 # @migrate_cancel:
 #
-# Cancel the current executing migration process.
+# Cancel the currently executing migration process.  Allows a new
+# migration to be started right after.  When postcopy-ram is in use,
+# cancelling is not allowed after the postcopy phase has started.
 #
 # .. note:: This command succeeds even if there is no migration
 #    process running.
diff --git a/qapi/net.json b/qapi/net.json
index 2739a2f423328..310cc4fd1907a 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -1031,3 +1031,43 @@
 ##
 { 'event': 'NETDEV_STREAM_DISCONNECTED',
   'data': { 'netdev-id': 'str' } }
+
+##
+# @NETDEV_VHOST_USER_CONNECTED:
+#
+# Emitted when the vhost-user chardev is connected
+#
+# @netdev-id: QEMU netdev id that is connected
+#
+# @chardev-id: The character device id used by the QEMU netdev
+#
+# Since: 10.0
+#
+# .. qmp-example::
+#
+#     <- { "timestamp": {"seconds": 1739538638, "microseconds": 354181 },
+#          "event": "NETDEV_VHOST_USER_CONNECTED",
+#          "data": { "netdev-id": "netdev0", "chardev-id": "chr0" } }
+#
+##
+{ 'event': 'NETDEV_VHOST_USER_CONNECTED',
+  'data': { 'netdev-id': 'str', 'chardev-id': 'str' } }
+
+##
+# @NETDEV_VHOST_USER_DISCONNECTED:
+#
+# Emitted when the vhost-user chardev is disconnected
+#
+# @netdev-id: QEMU netdev id that is disconnected
+#
+# Since: 10.0
+#
+# .. qmp-example::
+#
+#     <- { "timestamp": { "seconds": 1739538634, "microseconds": 920450 },
+#          "event": "NETDEV_VHOST_USER_DISCONNECTED",
+#          "data": { "netdev-id": "netdev0" } }
+#
+##
+{ 'event': 'NETDEV_VHOST_USER_DISCONNECTED',
+  'data': { 'netdev-id': 'str' } }
diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
index b1581988e4eba..c41c01eb2ab98 100644
--- a/qapi/qapi-schema.json
+++ b/qapi/qapi-schema.json
@@ -5,6 +5,8 @@
 #
 # This document describes all commands currently supported by QMP.
 #
+# For locating a particular item, please see the `qapi-qmp-index`.
+#
 # Most of the time their usage is exactly the same as in the user
 # Monitor, this means that any other document which also describe
 # commands (the manpage, QEMU's manual, etc) can and should be
@@ -81,3 +83,4 @@
 { 'include': 'vfio.json' }
 { 'include': 'cryptodev.json' }
 { 'include': 'cxl.json' }
+{ 'include': 'uefi.json' }
diff --git a/qapi/uefi.json b/qapi/uefi.json
new file mode 100644
index 0000000000000..bdfcabe1df4d3
--- /dev/null
+++ b/qapi/uefi.json
@@ -0,0 +1,64 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+
+##
+# = UEFI Variable Store
+#
+# The qemu efi variable store implementation (hw/uefi/) uses this to
+# store non-volatile variables in json format on disk.
+#
+# This is an existing format already supported by (at least) two other
+# projects, specifically https://gitlab.com/kraxel/virt-firmware and
+# https://github.com/awslabs/python-uefivars.
+##
+
+##
+# @UefiVariable:
+#
+# UEFI Variable.  Check the UEFI specifification for more detailed
+# information on the fields.
+#
+# @guid: variable namespace GUID
+#
+# @name: variable name, in UTF-8 encoding.
+#
+# @attr: variable attributes.
+#
+# @data: variable value, encoded as hex string.
+#
+# @time: variable modification time.  EFI_TIME struct, encoded as hex
+#     string.  Used only for authenticated variables, where the
+#     EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS attribute bit
+#     is set.
+#
+# @digest: variable certificate digest.  Used to verify the signature
+#     of updates for authenticated variables.  UEFI has two kinds of
+#     authenticated variables.  The secure boot variables ('PK',
+#     'KEK', 'db' and 'dbx') have hard coded signature checking rules.
+#     For other authenticated variables the firmware stores a digest
+#     of the signing certificate at variable creation time, and any
+#     updates must be signed with the same certificate.
+#
+# Since: 10.0
+##
+{ 'struct' : 'UefiVariable',
+  'data' : { 'guid'  : 'str',
+             'name'  : 'str',
+             'attr'  : 'int',
+             'data'  : 'str',
+             '*time' : 'str',
+             '*digest' : 'str'}}
+
+##
+# @UefiVarStore:
+#
+# @version: currently always 2
+#
+# @variables: list of UEFI variables
+#
+# Since: 10.0
+##
+{ 'struct' : 'UefiVarStore',
+  'data' : { 'version'   : 'int',
+             'variables' : [ 'UefiVariable' ] }}
diff --git a/qapi/virtio.json b/qapi/virtio.json
index 2529c2d8b2044..d351d2166ef1b 100644
--- a/qapi/virtio.json
+++ b/qapi/virtio.json
@@ -992,3 +992,17 @@
 ##
 { 'enum': 'GranuleMode',
   'data': [ '4k', '8k', '16k', '64k', 'host' ] }
+
+##
+# @VMAppleVirtioBlkVariant:
+#
+# @unspecified: The default, not a valid setting.
+#
+# @root: Block device holding the root volume
+#
+# @aux: Block device holding auxiliary data required for boot
+#
+# Since: 9.2
+##
+{ 'enum': 'VMAppleVirtioBlkVariant',
+  'data': [ 'unspecified', 'root', 'aux' ] }
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 05b61da51ea3c..ed5895861bb07 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -852,10 +852,6 @@ int main(int argc, char **argv)
         export_name = "";
     }
 
-    if (!trace_init_backends()) {
-        exit(1);
-    }
-    trace_init_file();
     qemu_set_log(LOG_TRACE, &error_fatal);
 
     socket_activation = check_socket_activation();
@@ -1045,6 +1041,18 @@ int main(int argc, char **argv)
 #endif /* WIN32 */
     }
 
+    /*
+     * trace_init must be done after daemonization.  Why? Because at
+     * least the simple backend spins up a helper thread as well as an
+     * atexit() handler that waits on that thread, but the helper
+     * thread won't survive a fork, leading to deadlock in the child
+     * if we initialized pre-fork.
+     */
+    if (!trace_init_backends()) {
+        exit(1);
+    }
+    trace_init_file();
+
     if (opts.device != NULL && sockpath == NULL) {
         sockpath = g_malloc(128);
         snprintf(sockpath, 128, SOCKET_PATH, basename(opts.device));
diff --git a/qemu-options.hx b/qemu-options.hx
index 1b26ad53bda76..888b3092bef78 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -42,7 +42,8 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
     "                aux-ram-share=on|off allocate auxiliary guest RAM as shared (default: off)\n"
 #endif
     "                memory-backend='backend-id' specifies explicitly provided backend for main RAM (default=none)\n"
-    "                cxl-fmw.0.targets.0=firsttarget,cxl-fmw.0.targets.1=secondtarget,cxl-fmw.0.size=size[,cxl-fmw.0.interleave-granularity=granularity]\n",
+    "                cxl-fmw.0.targets.0=firsttarget,cxl-fmw.0.targets.1=secondtarget,cxl-fmw.0.size=size[,cxl-fmw.0.interleave-granularity=granularity]\n"
+    "                smp-cache.0.cache=cachename,smp-cache.0.topology=topologylevel\n",
     QEMU_ARCH_ALL)
 SRST
 ``-machine [type=]name[,prop=value[,...]]``
@@ -172,6 +173,33 @@ SRST
         ::
 
             -machine cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.targets.1=cxl.1,cxl-fmw.0.size=128G,cxl-fmw.0.interleave-granularity=512
+
+    ``smp-cache.0.cache=cachename,smp-cache.0.topology=topologylevel``
+        Define cache properties for SMP system.
+
+        ``cache=cachename`` specifies the cache that the properties will be
+        applied on. This field is the combination of cache level and cache
+        type. It supports ``l1d`` (L1 data cache), ``l1i`` (L1 instruction
+        cache), ``l2`` (L2 unified cache) and ``l3`` (L3 unified cache).
+
+        ``topology=topologylevel`` sets the cache topology level. It accepts
+        CPU topology levels including ``core``, ``module``, ``cluster``, ``die``,
+        ``socket``, ``book``, ``drawer`` and a special value ``default``. If
+        ``default`` is set, then the cache topology will follow the architecture's
+        default cache topology model. If another topology level is set, the cache
+        will be shared at corresponding CPU topology level. For example,
+        ``topology=core`` makes the cache shared by all threads within a core.
+        The omitting cache will default to using the ``default`` level.
+
+        The default cache topology model for an i386 PC machine is as follows:
+        ``l1d``, ``l1i``, and ``l2`` caches are per ``core``, while the ``l3``
+        cache is per ``die``.
+
+        Example:
+
+        ::
+
+            -machine smp-cache.0.cache=l1d,smp-cache.0.topology=core,smp-cache.1.cache=l1i,smp-cache.1.topology=core
 ERST
 
 DEF("M", HAS_ARG, QEMU_OPTION_M,
@@ -4632,21 +4660,25 @@ SRST
 ERST
 
 DEF("overcommit", HAS_ARG, QEMU_OPTION_overcommit,
-    "-overcommit [mem-lock=on|off][cpu-pm=on|off]\n"
+    "-overcommit [mem-lock=on|off|on-fault][cpu-pm=on|off]\n"
     "                run qemu with overcommit hints\n"
-    "                mem-lock=on|off controls memory lock support (default: off)\n"
+    "                mem-lock=on|off|on-fault controls memory lock support (default: off)\n"
     "                cpu-pm=on|off controls cpu power management (default: off)\n",
     QEMU_ARCH_ALL)
 SRST
-``-overcommit mem-lock=on|off``
+``-overcommit mem-lock=on|off|on-fault``
   \ 
 ``-overcommit cpu-pm=on|off``
     Run qemu with hints about host resource overcommit. The default is
     to assume that host overcommits all resources.
 
     Locking qemu and guest memory can be enabled via ``mem-lock=on``
-    (disabled by default). This works when host memory is not
-    overcommitted and reduces the worst-case latency for guest.
+    or ``mem-lock=on-fault`` (disabled by default). This works when
+    host memory is not overcommitted and reduces the worst-case latency for
+    guest. The on-fault option is better for reducing the memory footprint
+    since it makes allocations lazy, but the pages still get locked in place
+    once faulted by the guest or QEMU. Note that the two options are mutually
+    exclusive.
 
     Guest ability to manage power state of host cpus (increasing latency
     for other processes on the same host cpu, but decreasing latency for
@@ -5078,7 +5110,7 @@ ERST
 DEF("semihosting", 0, QEMU_OPTION_semihosting,
     "-semihosting    semihosting mode\n",
     QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA |
-    QEMU_ARCH_MIPS | QEMU_ARCH_RISCV)
+    QEMU_ARCH_MIPS | QEMU_ARCH_RISCV | QEMU_ARCH_HEXAGON)
 SRST
 ``-semihosting``
     Enable :ref:`Semihosting` mode (ARM, M68K, Xtensa, MIPS, RISC-V only).
@@ -5094,11 +5126,11 @@ DEF("semihosting-config", HAS_ARG, QEMU_OPTION_semihosting_config,
     "-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,userspace=on|off][,arg=str[,...]]\n" \
     "                semihosting configuration\n",
 QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA |
-QEMU_ARCH_MIPS | QEMU_ARCH_RISCV)
+QEMU_ARCH_MIPS | QEMU_ARCH_RISCV | QEMU_ARCH_HEXAGON)
 SRST
-``-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,userspace=on|off][,arg=str[,...]]``
-    Enable and configure :ref:`Semihosting` (ARM, M68K, Xtensa, MIPS, RISC-V
-    only).
+``-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,userspace=on|off][,usefs=<path>][,arg=str[,...]]``
+    Enable and configure :ref:`Semihosting` (ARM, M68K, Xtensa, MIPS, RISC-V,
+    Hexagon only).
 
     .. warning::
       Note that this allows guest direct access to the host filesystem, so
@@ -5120,6 +5152,11 @@ SRST
         only be used if all guest code is trusted (for example, in
         bare-metal test case code).
 
+    ``usefs=<path>``
+        Sets a fallback directory to be used by the open semihosting call. If
+        the requested file is not found QEMU will search again at the given
+        path.
+
     ``arg=str1,arg=str2,...``
         Allows the user to pass input arguments, and can be used
         multiple times to build up a list. The old-style
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 6e3c15f53957a..12bc086d79eac 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -805,8 +805,10 @@ int64_t qmp_guest_fsfreeze_thaw(Error **errp)
     int ret;
 
     ret = qmp_guest_fsfreeze_do_thaw(errp);
+
     if (ret >= 0) {
         ga_unset_frozen(ga_state);
+        slog("guest-fsthaw called");
         execute_fsfreeze_hook(FSFREEZE_HOOK_THAW, errp);
     } else {
         ret = 0;
diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index 99c026c0a02cd..749fdf88952f3 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -1273,6 +1273,9 @@ int64_t qmp_guest_fsfreeze_thaw(Error **errp)
     qga_vss_fsfreeze(&i, false, NULL, errp);
 
     ga_unset_frozen(ga_state);
+
+    slog("guest-fsthaw called");
+
     return i;
 }
 
diff --git a/qga/main.c b/qga/main.c
index 531853e13d9b7..72c39b042f7cd 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -1430,7 +1430,6 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation)
         if (config->daemonize) {
             /* delay opening/locking of pidfile till filesystems are unfrozen */
             s->deferred_options.pid_filepath = config->pid_filepath;
-            become_daemon(NULL);
         }
         if (config->log_filepath) {
             /* delay opening the log file till filesystems are unfrozen */
@@ -1438,9 +1437,6 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation)
         }
         ga_disable_logging(s);
     } else {
-        if (config->daemonize) {
-            become_daemon(config->pid_filepath);
-        }
         if (config->log_filepath) {
             FILE *log_file = ga_open_logfile(config->log_filepath);
             if (!log_file) {
@@ -1487,6 +1483,20 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation)
 
     ga_apply_command_filters(s);
 
+    if (!channel_init(s, s->config->method, s->config->channel_path,
+                      s->socket_activation ? FIRST_SOCKET_ACTIVATION_FD : -1)) {
+        g_critical("failed to initialize guest agent channel");
+        return NULL;
+    }
+
+    if (config->daemonize) {
+        if (ga_is_frozen(s)) {
+            become_daemon(NULL);
+        } else {
+            become_daemon(config->pid_filepath);
+        }
+    }
+
     ga_state = s;
     return s;
 }
@@ -1513,8 +1523,9 @@ static void cleanup_agent(GAState *s)
 
 static int run_agent_once(GAState *s)
 {
-    if (!channel_init(s, s->config->method, s->config->channel_path,
-                      s->socket_activation ? FIRST_SOCKET_ACTIVATION_FD : -1)) {
+    if (!s->channel &&
+        channel_init(s, s->config->method, s->config->channel_path,
+                     s->socket_activation ? FIRST_SOCKET_ACTIVATION_FD : -1)) {
         g_critical("failed to initialize guest agent channel");
         return EXIT_FAILURE;
     }
@@ -1523,6 +1534,7 @@ static int run_agent_once(GAState *s)
 
     if (s->channel) {
         ga_channel_free(s->channel);
+        s->channel = NULL;
     }
 
     return EXIT_SUCCESS;
@@ -1579,7 +1591,7 @@ static void stop_agent(GAState *s, bool requested)
 
 int main(int argc, char **argv)
 {
-    int ret = EXIT_SUCCESS;
+    int ret = EXIT_FAILURE;
     GAState *s;
     GAConfig *config = g_new0(GAConfig, 1);
     int socket_activation;
@@ -1607,7 +1619,6 @@ int main(int argc, char **argv)
     socket_activation = check_socket_activation();
     if (socket_activation > 1) {
         g_critical("qemu-ga only supports listening on one socket");
-        ret = EXIT_FAILURE;
         goto end;
     }
     if (socket_activation) {
@@ -1631,7 +1642,6 @@ int main(int argc, char **argv)
 
         if (!config->method) {
             g_critical("unsupported listen fd type");
-            ret = EXIT_FAILURE;
             goto end;
         }
     } else if (config->channel_path == NULL) {
@@ -1643,13 +1653,13 @@ int main(int argc, char **argv)
             config->channel_path = g_strdup(QGA_SERIAL_PATH_DEFAULT);
         } else {
             g_critical("must specify a path for this channel");
-            ret = EXIT_FAILURE;
             goto end;
         }
     }
 
     if (config->dumpconf) {
         config_dump(config);
+        ret = EXIT_SUCCESS;
         goto end;
     }
 
@@ -1664,6 +1674,7 @@ int main(int argc, char **argv)
         SERVICE_TABLE_ENTRY service_table[] = {
             { (char *)QGA_SERVICE_NAME, service_main }, { NULL, NULL } };
         StartServiceCtrlDispatcher(service_table);
+        ret = EXIT_SUCCESS;
     } else {
         ret = run_agent(s);
     }
diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
index 995594aaf4386..35ec0e7db31df 100644
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -3,6 +3,9 @@
 
 ##
 # = QEMU guest agent protocol commands and structs
+#
+# For a concise listing of all commands, events, and types in the QEMU
+# guest agent, please consult the `qapi-qga-index`.
 ##
 
 { 'pragma': { 'doc-required': true } }
diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c
index f35d331331752..1ffea1a72887f 100644
--- a/qom/object_interfaces.c
+++ b/qom/object_interfaces.c
@@ -4,9 +4,11 @@
 #include "qapi/error.h"
 #include "qapi/qapi-visit-qom.h"
 #include "qobject/qobject.h"
+#include "qobject/qbool.h"
 #include "qobject/qdict.h"
 #include "qapi/qmp/qerror.h"
 #include "qobject/qjson.h"
+#include "qobject/qstring.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qobject-output-visitor.h"
 #include "qom/object_interfaces.h"
@@ -177,9 +179,25 @@ char *object_property_help(const char *name, const char *type,
         g_string_append(str, description);
     }
     if (defval) {
-        g_autofree char *def_json = g_string_free(qobject_to_json(defval),
-                                                  false);
-        g_string_append_printf(str, " (default: %s)", def_json);
+        g_autofree char *def_json = NULL;
+        const char *def;
+
+        switch (qobject_type(defval)) {
+        case QTYPE_QSTRING:
+            def = qstring_get_str(qobject_to(QString, defval));
+            break;
+
+        case QTYPE_QBOOL:
+            def = qbool_get_bool(qobject_to(QBool, defval)) ? "on" : "off";
+            break;
+
+        default:
+            def_json = g_string_free(qobject_to_json(defval), false);
+            def = def_json;
+            break;
+        }
+
+        g_string_append_printf(str, " (default: %s)", def);
     }
 
     return g_string_free(str, false);
diff --git a/roms/Makefile b/roms/Makefile
index 31e4b97c983b3..beff58d9d50c5 100644
--- a/roms/Makefile
+++ b/roms/Makefile
@@ -34,6 +34,7 @@ find-cross-gcc = $(firstword $(wildcard $(patsubst %ld,%gcc,$(call find-cross-ld
 # finally strip off path + toolname so we get the prefix
 find-cross-prefix = $(subst gcc,,$(notdir $(call find-cross-gcc,$(1))))
 
+aarch64_cross_prefix := $(call find-cross-prefix,aarch64)
 arm_cross_prefix := $(call find-cross-prefix,arm)
 powerpc64_cross_prefix := $(call find-cross-prefix,powerpc64)
 powerpc_cross_prefix := $(call find-cross-prefix,powerpc)
@@ -66,6 +67,7 @@ default help:
 	@echo "  u-boot.e500        -- update u-boot.e500"
 	@echo "  u-boot.sam460      -- update u-boot.sam460"
 	@echo "  npcm7xx_bootrom    -- update vbootrom for npcm7xx"
+	@echo "  npcm8xx_bootrom    -- update vbootrom for npcm8xx"
 	@echo "  efi                -- update UEFI (edk2) platform firmware"
 	@echo "  opensbi32-generic  -- update OpenSBI for 32-bit generic machine"
 	@echo "  opensbi64-generic  -- update OpenSBI for 64-bit generic machine"
@@ -194,6 +196,10 @@ npcm7xx_bootrom:
 	$(MAKE) -C vbootrom CROSS_COMPILE=$(arm_cross_prefix)
 	cp vbootrom/npcm7xx_bootrom.bin ../pc-bios/npcm7xx_bootrom.bin
 
+npcm8xx_bootrom:
+	$(MAKE) -C vbootrom CROSS_COMPILE=$(aarch64_cross_prefix)
+	cp vbootrom/npcm8xx_bootrom.bin ../pc-bios/npcm8xx_bootrom.bin
+
 hppa-firmware:
 	$(MAKE) -C seabios-hppa parisc
 	cp seabios-hppa/out/hppa-firmware.img      ../pc-bios/
diff --git a/roms/skiboot b/roms/skiboot
index 24a7eb35966d9..785a5e3070a86 160000
--- a/roms/skiboot
+++ b/roms/skiboot
@@ -1 +1 @@
-Subproject commit 24a7eb35966d93455520bc2debdd7954314b638b
+Subproject commit 785a5e3070a86e18521e62fe202b87209de30fa2
diff --git a/roms/vbootrom b/roms/vbootrom
index 0c37a43527f0e..1287b6e42e839 160000
--- a/roms/vbootrom
+++ b/roms/vbootrom
@@ -1 +1 @@
-Subproject commit 0c37a43527f0ee2b9584e7fb2fdc805e902635ac
+Subproject commit 1287b6e42e839ba2ab0f06268c5b53ae60df3537
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
index c0c6069247a8f..2ebf0a11ea474 100644
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -37,6 +37,14 @@ version = "1.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b"
 
+[[package]]
+name = "hpet"
+version = "0.1.0"
+dependencies = [
+ "qemu_api",
+ "qemu_api_macros",
+]
+
 [[package]]
 name = "itertools"
 version = "0.11.0"
@@ -46,6 +54,12 @@ dependencies = [
  "either",
 ]
 
+[[package]]
+name = "libc"
+version = "0.2.162"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398"
+
 [[package]]
 name = "pl011"
 version = "0.1.0"
@@ -92,6 +106,7 @@ dependencies = [
 name = "qemu_api"
 version = "0.1.0"
 dependencies = [
+ "libc",
  "qemu_api_macros",
  "version_check",
 ]
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 5b0cb55928677..ab1185a814307 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -4,6 +4,7 @@ members = [
     "qemu-api-macros",
     "qemu-api",
     "hw/char/pl011",
+    "hw/timer/hpet",
 ]
 
 [workspace.lints.rust]
@@ -36,6 +37,8 @@ result_unit_err = "allow"
 should_implement_trait = "deny"
 # can be for a reason, e.g. in callbacks
 unused_self = "allow"
+# common in device crates
+upper_case_acronyms = "allow"
 
 # default-allow lints
 as_ptr_cast_mut = "deny"
diff --git a/rust/hw/Kconfig b/rust/hw/Kconfig
index 4d934f30afe13..36f92ec02874b 100644
--- a/rust/hw/Kconfig
+++ b/rust/hw/Kconfig
@@ -1,2 +1,3 @@
 # devices Kconfig
 source char/Kconfig
+source timer/Kconfig
diff --git a/rust/hw/char/pl011/src/device.rs b/rust/hw/char/pl011/src/device.rs
index 8050ede9c85bf..f137b49feafca 100644
--- a/rust/hw/char/pl011/src/device.rs
+++ b/rust/hw/char/pl011/src/device.rs
@@ -2,36 +2,29 @@
 // Author(s): Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
 // SPDX-License-Identifier: GPL-2.0-or-later
 
-use core::ptr::{addr_of, addr_of_mut, NonNull};
-use std::{
-    ffi::CStr,
-    os::raw::{c_int, c_void},
-};
+use std::{ffi::CStr, ptr::addr_of_mut};
 
 use qemu_api::{
-    bindings::{
-        error_fatal, hwaddr, memory_region_init_io, qdev_init_clock_in, qdev_new,
-        qdev_prop_set_chr, qemu_chr_fe_accept_input, qemu_chr_fe_ioctl, qemu_chr_fe_set_handlers,
-        qemu_chr_fe_write_all, qemu_irq, sysbus_connect_irq, sysbus_mmio_map,
-        sysbus_realize_and_unref, CharBackend, Chardev, Clock, ClockEvent, MemoryRegion,
-        QEMUChrEvent, CHR_IOCTL_SERIAL_SET_BREAK,
-    },
-    c_str, impl_vmstate_forward,
-    irq::InterruptSource,
+    chardev::{CharBackend, Chardev, Event},
+    impl_vmstate_forward,
+    irq::{IRQState, InterruptSource},
+    memory::{hwaddr, MemoryRegion, MemoryRegionOps, MemoryRegionOpsBuilder},
     prelude::*,
-    qdev::{DeviceImpl, DeviceState, Property},
-    qom::{ClassInitImpl, ObjectImpl, ParentField},
-    sysbus::{SysBusDevice, SysBusDeviceClass},
+    qdev::{Clock, ClockEvent, DeviceImpl, DeviceState, Property, ResetType, ResettablePhasesImpl},
+    qom::{ObjectImpl, Owned, ParentField},
+    sysbus::{SysBusDevice, SysBusDeviceImpl},
     vmstate::VMStateDescription,
 };
 
 use crate::{
     device_class,
-    memory_ops::PL011_OPS,
-    registers::{self, Interrupt},
-    RegisterOffset,
+    registers::{self, Interrupt, RegisterOffset},
 };
 
+// TODO: You must disable the UART before any of the control registers are
+// reprogrammed. When the UART is disabled in the middle of transmission or
+// reception, it completes the current character before stopping
+
 /// Integer Baud Rate Divider, `UARTIBRD`
 const IBRD_MASK: u32 = 0xffff;
 
@@ -52,11 +45,6 @@ impl std::ops::Index<hwaddr> for DeviceId {
     }
 }
 
-impl DeviceId {
-    const ARM: Self = Self(&[0x11, 0x10, 0x14, 0x00, 0x0d, 0xf0, 0x05, 0xb1]);
-    const LUMINARY: Self = Self(&[0x11, 0x00, 0x18, 0x01, 0x0d, 0xf0, 0x05, 0xb1]);
-}
-
 // FIFOs use 32-bit indices instead of usize, for compatibility with
 // the migration stream produced by the C version of this device.
 #[repr(transparent)]
@@ -131,7 +119,7 @@ pub struct PL011State {
     #[doc(alias = "irq")]
     pub interrupts: [InterruptSource; IRQMASK.len()],
     #[doc(alias = "clk")]
-    pub clock: NonNull<Clock>,
+    pub clock: Owned<Clock>,
     #[doc(alias = "migrate_clk")]
     pub migrate_clock: bool,
 }
@@ -145,16 +133,24 @@ pub struct PL011Class {
     device_id: DeviceId,
 }
 
+trait PL011Impl: SysBusDeviceImpl + IsA<PL011State> {
+    const DEVICE_ID: DeviceId;
+}
+
+impl PL011Class {
+    fn class_init<T: PL011Impl>(&mut self) {
+        self.device_id = T::DEVICE_ID;
+        self.parent_class.class_init::<T>();
+    }
+}
+
 unsafe impl ObjectType for PL011State {
     type Class = PL011Class;
     const TYPE_NAME: &'static CStr = crate::TYPE_PL011;
 }
 
-impl ClassInitImpl<PL011Class> for PL011State {
-    fn class_init(klass: &mut PL011Class) {
-        klass.device_id = DeviceId::ARM;
-        <Self as ClassInitImpl<SysBusDeviceClass>>::class_init(&mut klass.parent_class);
-    }
+impl PL011Impl for PL011State {
+    const DEVICE_ID: DeviceId = DeviceId(&[0x11, 0x10, 0x14, 0x00, 0x0d, 0xf0, 0x05, 0xb1]);
 }
 
 impl ObjectImpl for PL011State {
@@ -162,6 +158,7 @@ impl ObjectImpl for PL011State {
 
     const INSTANCE_INIT: Option<unsafe fn(&mut Self)> = Some(Self::init);
     const INSTANCE_POST_INIT: Option<fn(&Self)> = Some(Self::post_init);
+    const CLASS_INIT: fn(&mut Self::Class) = Self::Class::class_init::<Self>;
 }
 
 impl DeviceImpl for PL011State {
@@ -172,9 +169,14 @@ impl DeviceImpl for PL011State {
         Some(&device_class::VMSTATE_PL011)
     }
     const REALIZE: Option<fn(&Self)> = Some(Self::realize);
-    const RESET: Option<fn(&Self)> = Some(Self::reset);
 }
 
+impl ResettablePhasesImpl for PL011State {
+    const HOLD: Option<fn(&Self, ResetType)> = Some(Self::reset_hold);
+}
+
+impl SysBusDeviceImpl for PL011State {}
+
 impl PL011Registers {
     pub(self) fn read(&mut self, offset: RegisterOffset) -> (bool, u32) {
         use RegisterOffset::*;
@@ -225,14 +227,14 @@ impl PL011Registers {
         &mut self,
         offset: RegisterOffset,
         value: u32,
-        char_backend: *mut CharBackend,
+        char_backend: &CharBackend,
     ) -> bool {
         // eprintln!("write offset {offset} value {value}");
         use RegisterOffset::*;
         match offset {
             DR => {
                 // interrupts always checked
-                let _ = self.loopback_tx(value);
+                let _ = self.loopback_tx(value.into());
                 self.int_level |= Interrupt::TX.0;
                 return true;
             }
@@ -259,17 +261,9 @@ impl PL011Registers {
                     self.reset_tx_fifo();
                 }
                 let update = (self.line_control.send_break() != new_val.send_break()) && {
-                    let mut break_enable: c_int = new_val.send_break().into();
-                    // SAFETY: self.char_backend is a valid CharBackend instance after it's been
-                    // initialized in realize().
-                    unsafe {
-                        qemu_chr_fe_ioctl(
-                            char_backend,
-                            CHR_IOCTL_SERIAL_SET_BREAK as i32,
-                            addr_of_mut!(break_enable).cast::<c_void>(),
-                        );
-                    }
-                    self.loopback_break(break_enable > 0)
+                    let break_enable = new_val.send_break();
+                    let _ = char_backend.send_break(break_enable);
+                    self.loopback_break(break_enable)
                 };
                 self.line_control = new_val;
                 self.set_read_trigger();
@@ -307,7 +301,7 @@ impl PL011Registers {
 
     #[inline]
     #[must_use]
-    fn loopback_tx(&mut self, value: u32) -> bool {
+    fn loopback_tx(&mut self, value: registers::Data) -> bool {
         // Caveat:
         //
         // In real hardware, TX loopback happens at the serial-bit level
@@ -376,7 +370,7 @@ impl PL011Registers {
     }
 
     fn loopback_break(&mut self, enable: bool) -> bool {
-        enable && self.loopback_tx(registers::Data::BREAK.into())
+        enable && self.loopback_tx(registers::Data::BREAK)
     }
 
     fn set_read_trigger(&mut self) {
@@ -435,11 +429,11 @@ impl PL011Registers {
     }
 
     #[must_use]
-    pub fn put_fifo(&mut self, value: u32) -> bool {
+    pub fn put_fifo(&mut self, value: registers::Data) -> bool {
         let depth = self.fifo_depth();
         assert!(depth > 0);
         let slot = (self.read_pos + self.read_count) & (depth - 1);
-        self.read_fifo[slot] = registers::Data::from(value);
+        self.read_fifo[slot] = value;
         self.read_count += 1;
         self.flags.set_receive_fifo_empty(false);
         if self.read_count == depth {
@@ -485,43 +479,39 @@ impl PL011State {
     /// location/instance. All its fields are expected to hold unitialized
     /// values with the sole exception of `parent_obj`.
     unsafe fn init(&mut self) {
-        const CLK_NAME: &CStr = c_str!("clk");
+        static PL011_OPS: MemoryRegionOps<PL011State> = MemoryRegionOpsBuilder::<PL011State>::new()
+            .read(&PL011State::read)
+            .write(&PL011State::write)
+            .native_endian()
+            .impl_sizes(4, 4)
+            .build();
 
         // SAFETY:
         //
         // self and self.iomem are guaranteed to be valid at this point since callers
         // must make sure the `self` reference is valid.
-        unsafe {
-            memory_region_init_io(
-                addr_of_mut!(self.iomem),
-                addr_of_mut!(*self).cast::<Object>(),
-                &PL011_OPS,
-                addr_of_mut!(*self).cast::<c_void>(),
-                Self::TYPE_NAME.as_ptr(),
-                0x1000,
-            );
-        }
+        MemoryRegion::init_io(
+            unsafe { &mut *addr_of_mut!(self.iomem) },
+            addr_of_mut!(*self),
+            &PL011_OPS,
+            "pl011",
+            0x1000,
+        );
 
         self.regs = Default::default();
 
         // SAFETY:
         //
-        // self.clock is not initialized at this point; but since `NonNull<_>` is Copy,
-        // we can overwrite the undefined value without side effects. This is
-        // safe since all PL011State instances are created by QOM code which
-        // calls this function to initialize the fields; therefore no code is
-        // able to access an invalid self.clock value.
-        unsafe {
-            let dev: &mut DeviceState = self.upcast_mut();
-            self.clock = NonNull::new(qdev_init_clock_in(
-                dev,
-                CLK_NAME.as_ptr(),
-                None, /* pl011_clock_update */
-                addr_of_mut!(*self).cast::<c_void>(),
-                ClockEvent::ClockUpdate.0,
-            ))
-            .unwrap();
-        }
+        // self.clock is not initialized at this point; but since `Owned<_>` is
+        // not Drop, we can overwrite the undefined value without side effects;
+        // it's not sound but, because for all PL011State instances are created
+        // by QOM code which calls this function to initialize the fields, at
+        // leastno code is able to access an invalid self.clock value.
+        self.clock = self.init_clock_in("clk", &Self::clock_update, ClockEvent::ClockUpdate);
+    }
+
+    const fn clock_update(&self, _event: ClockEvent) {
+        /* pl011_trace_baudrate_change(s); */
     }
 
     fn post_init(&self) {
@@ -531,7 +521,7 @@ impl PL011State {
         }
     }
 
-    pub fn read(&mut self, offset: hwaddr, _size: u32) -> u64 {
+    fn read(&self, offset: hwaddr, _size: u32) -> u64 {
         match RegisterOffset::try_from(offset) {
             Err(v) if (0x3f8..0x400).contains(&(v >> 2)) => {
                 let device_id = self.get_class().device_id;
@@ -545,36 +535,30 @@ impl PL011State {
                 let (update_irq, result) = self.regs.borrow_mut().read(field);
                 if update_irq {
                     self.update();
-                    unsafe {
-                        qemu_chr_fe_accept_input(&mut self.char_backend);
-                    }
+                    self.char_backend.accept_input();
                 }
                 result.into()
             }
         }
     }
 
-    pub fn write(&mut self, offset: hwaddr, value: u64) {
+    fn write(&self, offset: hwaddr, value: u64, _size: u32) {
         let mut update_irq = false;
         if let Ok(field) = RegisterOffset::try_from(offset) {
             // qemu_chr_fe_write_all() calls into the can_receive
             // callback, so handle writes before entering PL011Registers.
             if field == RegisterOffset::DR {
                 // ??? Check if transmitter is enabled.
-                let ch: u8 = value as u8;
-                // SAFETY: char_backend is a valid CharBackend instance after it's been
-                // initialized in realize().
+                let ch: [u8; 1] = [value as u8];
                 // XXX this blocks entire thread. Rewrite to use
                 // qemu_chr_fe_write and background I/O callbacks
-                unsafe {
-                    qemu_chr_fe_write_all(&mut self.char_backend, &ch, 1);
-                }
+                let _ = self.char_backend.write_all(&ch);
             }
 
             update_irq = self
                 .regs
                 .borrow_mut()
-                .write(field, value as u32, &mut self.char_backend);
+                .write(field, value as u32, &self.char_backend);
         } else {
             eprintln!("write bad offset {offset} value {value}");
         }
@@ -583,15 +567,19 @@ impl PL011State {
         }
     }
 
-    pub fn can_receive(&self) -> bool {
-        // trace_pl011_can_receive(s->lcr, s->read_count, r);
+    fn can_receive(&self) -> u32 {
         let regs = self.regs.borrow();
-        regs.read_count < regs.fifo_depth()
+        // trace_pl011_can_receive(s->lcr, s->read_count, r);
+        u32::from(regs.read_count < regs.fifo_depth())
     }
 
-    pub fn receive(&self, ch: u32) {
+    fn receive(&self, buf: &[u8]) {
+        if buf.is_empty() {
+            return;
+        }
         let mut regs = self.regs.borrow_mut();
-        let update_irq = !regs.loopback_enabled() && regs.put_fifo(ch);
+        let c: u32 = buf[0].into();
+        let update_irq = !regs.loopback_enabled() && regs.put_fifo(c.into());
         // Release the BqlRefCell before calling self.update()
         drop(regs);
 
@@ -600,11 +588,11 @@ impl PL011State {
         }
     }
 
-    pub fn event(&self, event: QEMUChrEvent) {
+    fn event(&self, event: Event) {
         let mut update_irq = false;
         let mut regs = self.regs.borrow_mut();
-        if event == QEMUChrEvent::CHR_EVENT_BREAK && !regs.loopback_enabled() {
-            update_irq = regs.put_fifo(registers::Data::BREAK.into());
+        if event == Event::CHR_EVENT_BREAK && !regs.loopback_enabled() {
+            update_irq = regs.put_fifo(registers::Data::BREAK);
         }
         // Release the BqlRefCell before calling self.update()
         drop(regs);
@@ -614,28 +602,16 @@ impl PL011State {
         }
     }
 
-    pub fn realize(&self) {
-        // SAFETY: self.char_backend has the correct size and alignment for a
-        // CharBackend object, and its callbacks are of the correct types.
-        unsafe {
-            qemu_chr_fe_set_handlers(
-                addr_of!(self.char_backend) as *mut CharBackend,
-                Some(pl011_can_receive),
-                Some(pl011_receive),
-                Some(pl011_event),
-                None,
-                addr_of!(*self).cast::<c_void>() as *mut c_void,
-                core::ptr::null_mut(),
-                true,
-            );
-        }
+    fn realize(&self) {
+        self.char_backend
+            .enable_handlers(self, Self::can_receive, Self::receive, Self::event);
     }
 
-    pub fn reset(&self) {
+    fn reset_hold(&self, _type: ResetType) {
         self.regs.borrow_mut().reset();
     }
 
-    pub fn update(&self) {
+    fn update(&self) {
         let regs = self.regs.borrow();
         let flags = regs.int_level & regs.int_enabled;
         for (irq, i) in self.interrupts.iter().zip(IRQMASK) {
@@ -661,60 +637,29 @@ const IRQMASK: [u32; 6] = [
 
 /// # Safety
 ///
-/// We expect the FFI user of this function to pass a valid pointer, that has
-/// the same size as [`PL011State`]. We also expect the device is
-/// readable/writeable from one thread at any time.
-pub unsafe extern "C" fn pl011_can_receive(opaque: *mut c_void) -> c_int {
-    let state = NonNull::new(opaque).unwrap().cast::<PL011State>();
-    unsafe { state.as_ref().can_receive().into() }
-}
-
-/// # Safety
-///
-/// We expect the FFI user of this function to pass a valid pointer, that has
-/// the same size as [`PL011State`]. We also expect the device is
-/// readable/writeable from one thread at any time.
-///
-/// The buffer and size arguments must also be valid.
-pub unsafe extern "C" fn pl011_receive(opaque: *mut c_void, buf: *const u8, size: c_int) {
-    let state = NonNull::new(opaque).unwrap().cast::<PL011State>();
-    unsafe {
-        if size > 0 {
-            debug_assert!(!buf.is_null());
-            state.as_ref().receive(u32::from(buf.read_volatile()));
-        }
-    }
-}
-
-/// # Safety
-///
-/// We expect the FFI user of this function to pass a valid pointer, that has
-/// the same size as [`PL011State`]. We also expect the device is
-/// readable/writeable from one thread at any time.
-pub unsafe extern "C" fn pl011_event(opaque: *mut c_void, event: QEMUChrEvent) {
-    let state = NonNull::new(opaque).unwrap().cast::<PL011State>();
-    unsafe { state.as_ref().event(event) }
-}
-
-/// # Safety
-///
-/// We expect the FFI user of this function to pass a valid pointer for `chr`.
+/// We expect the FFI user of this function to pass a valid pointer for `chr`
+/// and `irq`.
 #[no_mangle]
 pub unsafe extern "C" fn pl011_create(
     addr: u64,
-    irq: qemu_irq,
+    irq: *mut IRQState,
     chr: *mut Chardev,
 ) -> *mut DeviceState {
-    unsafe {
-        let dev: *mut DeviceState = qdev_new(PL011State::TYPE_NAME.as_ptr());
-        let sysbus: *mut SysBusDevice = dev.cast::<SysBusDevice>();
+    // SAFETY: The callers promise that they have owned references.
+    // They do not gift them to pl011_create, so use `Owned::from`.
+    let irq = unsafe { Owned::<IRQState>::from(&*irq) };
 
-        qdev_prop_set_chr(dev, c_str!("chardev").as_ptr(), chr);
-        sysbus_realize_and_unref(sysbus, addr_of_mut!(error_fatal));
-        sysbus_mmio_map(sysbus, 0, addr);
-        sysbus_connect_irq(sysbus, 0, irq);
-        dev
+    let dev = PL011State::new();
+    if !chr.is_null() {
+        let chr = unsafe { Owned::<Chardev>::from(&*chr) };
+        dev.prop_set_chr("chardev", &chr);
     }
+    dev.sysbus_realize();
+    dev.mmio_map(0, addr);
+    dev.connect_irq(0, &irq);
+
+    // The pointer is kept alive by the QOM tree; drop the owned ref
+    dev.as_mut_ptr()
 }
 
 #[repr(C)]
@@ -724,13 +669,6 @@ pub struct PL011Luminary {
     parent_obj: ParentField<PL011State>,
 }
 
-impl ClassInitImpl<PL011Class> for PL011Luminary {
-    fn class_init(klass: &mut PL011Class) {
-        klass.device_id = DeviceId::LUMINARY;
-        <Self as ClassInitImpl<SysBusDeviceClass>>::class_init(&mut klass.parent_class);
-    }
-}
-
 qom_isa!(PL011Luminary : PL011State, SysBusDevice, DeviceState, Object);
 
 unsafe impl ObjectType for PL011Luminary {
@@ -740,6 +678,14 @@ unsafe impl ObjectType for PL011Luminary {
 
 impl ObjectImpl for PL011Luminary {
     type ParentType = PL011State;
+
+    const CLASS_INIT: fn(&mut Self::Class) = Self::Class::class_init::<Self>;
+}
+
+impl PL011Impl for PL011Luminary {
+    const DEVICE_ID: DeviceId = DeviceId(&[0x11, 0x00, 0x18, 0x01, 0x0d, 0xf0, 0x05, 0xb1]);
 }
 
 impl DeviceImpl for PL011Luminary {}
+impl ResettablePhasesImpl for PL011Luminary {}
+impl SysBusDeviceImpl for PL011Luminary {}
diff --git a/rust/hw/char/pl011/src/device_class.rs b/rust/hw/char/pl011/src/device_class.rs
index dbef93f6cb3e0..0b2076ddaa0fe 100644
--- a/rust/hw/char/pl011/src/device_class.rs
+++ b/rust/hw/char/pl011/src/device_class.rs
@@ -2,8 +2,10 @@
 // Author(s): Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
 // SPDX-License-Identifier: GPL-2.0-or-later
 
-use core::ptr::NonNull;
-use std::os::raw::{c_int, c_void};
+use std::{
+    os::raw::{c_int, c_void},
+    ptr::NonNull,
+};
 
 use qemu_api::{
     bindings::*, c_str, prelude::*, vmstate_clock, vmstate_fields, vmstate_of, vmstate_struct,
diff --git a/rust/hw/char/pl011/src/lib.rs b/rust/hw/char/pl011/src/lib.rs
index 3c72f1221ffd4..dbae76991c9a7 100644
--- a/rust/hw/char/pl011/src/lib.rs
+++ b/rust/hw/char/pl011/src/lib.rs
@@ -12,523 +12,13 @@
 //! See [`PL011State`](crate::device::PL011State) for the device model type and
 //! the [`registers`] module for register types.
 
-#![allow(clippy::upper_case_acronyms)]
-
 use qemu_api::c_str;
 
 mod device;
 mod device_class;
-mod memory_ops;
+mod registers;
 
 pub use device::pl011_create;
 
 pub const TYPE_PL011: &::std::ffi::CStr = c_str!("pl011");
 pub const TYPE_PL011_LUMINARY: &::std::ffi::CStr = c_str!("pl011_luminary");
-
-/// Offset of each register from the base memory address of the device.
-///
-/// # Source
-/// ARM DDI 0183G, Table 3-1 p.3-3
-#[doc(alias = "offset")]
-#[allow(non_camel_case_types)]
-#[repr(u64)]
-#[derive(Debug, Eq, PartialEq, qemu_api_macros::TryInto)]
-enum RegisterOffset {
-    /// Data Register
-    ///
-    /// A write to this register initiates the actual data transmission
-    #[doc(alias = "UARTDR")]
-    DR = 0x000,
-    /// Receive Status Register or Error Clear Register
-    #[doc(alias = "UARTRSR")]
-    #[doc(alias = "UARTECR")]
-    RSR = 0x004,
-    /// Flag Register
-    ///
-    /// A read of this register shows if transmission is complete
-    #[doc(alias = "UARTFR")]
-    FR = 0x018,
-    /// Fractional Baud Rate Register
-    ///
-    /// responsible for baud rate speed
-    #[doc(alias = "UARTFBRD")]
-    FBRD = 0x028,
-    /// `IrDA` Low-Power Counter Register
-    #[doc(alias = "UARTILPR")]
-    ILPR = 0x020,
-    /// Integer Baud Rate Register
-    ///
-    /// Responsible for baud rate speed
-    #[doc(alias = "UARTIBRD")]
-    IBRD = 0x024,
-    /// line control register (data frame format)
-    #[doc(alias = "UARTLCR_H")]
-    LCR_H = 0x02C,
-    /// Toggle UART, transmission or reception
-    #[doc(alias = "UARTCR")]
-    CR = 0x030,
-    /// Interrupt FIFO Level Select Register
-    #[doc(alias = "UARTIFLS")]
-    FLS = 0x034,
-    /// Interrupt Mask Set/Clear Register
-    #[doc(alias = "UARTIMSC")]
-    IMSC = 0x038,
-    /// Raw Interrupt Status Register
-    #[doc(alias = "UARTRIS")]
-    RIS = 0x03C,
-    /// Masked Interrupt Status Register
-    #[doc(alias = "UARTMIS")]
-    MIS = 0x040,
-    /// Interrupt Clear Register
-    #[doc(alias = "UARTICR")]
-    ICR = 0x044,
-    /// DMA control Register
-    #[doc(alias = "UARTDMACR")]
-    DMACR = 0x048,
-    ///// Reserved, offsets `0x04C` to `0x07C`.
-    //Reserved = 0x04C,
-}
-
-mod registers {
-    //! Device registers exposed as typed structs which are backed by arbitrary
-    //! integer bitmaps. [`Data`], [`Control`], [`LineControl`], etc.
-    use bilge::prelude::*;
-    use qemu_api::impl_vmstate_bitsized;
-
-    /// Receive Status Register / Data Register common error bits
-    ///
-    /// The `UARTRSR` register is updated only when a read occurs
-    /// from the `UARTDR` register with the same status information
-    /// that can also be obtained by reading the `UARTDR` register
-    #[bitsize(8)]
-    #[derive(Clone, Copy, Default, DebugBits, FromBits)]
-    pub struct Errors {
-        pub framing_error: bool,
-        pub parity_error: bool,
-        pub break_error: bool,
-        pub overrun_error: bool,
-        _reserved_unpredictable: u4,
-    }
-
-    // TODO: FIFO Mode has different semantics
-    /// Data Register, `UARTDR`
-    ///
-    /// The `UARTDR` register is the data register.
-    ///
-    /// For words to be transmitted:
-    ///
-    /// - if the FIFOs are enabled, data written to this location is pushed onto
-    ///   the transmit
-    /// FIFO
-    /// - if the FIFOs are not enabled, data is stored in the transmitter
-    ///   holding register (the
-    /// bottom word of the transmit FIFO).
-    ///
-    /// The write operation initiates transmission from the UART. The data is
-    /// prefixed with a start bit, appended with the appropriate parity bit
-    /// (if parity is enabled), and a stop bit. The resultant word is then
-    /// transmitted.
-    ///
-    /// For received words:
-    ///
-    /// - if the FIFOs are enabled, the data byte and the 4-bit status (break,
-    ///   frame, parity,
-    /// and overrun) is pushed onto the 12-bit wide receive FIFO
-    /// - if the FIFOs are not enabled, the data byte and status are stored in
-    ///   the receiving
-    /// holding register (the bottom word of the receive FIFO).
-    ///
-    /// The received data byte is read by performing reads from the `UARTDR`
-    /// register along with the corresponding status information. The status
-    /// information can also be read by a read of the `UARTRSR/UARTECR`
-    /// register.
-    ///
-    /// # Note
-    ///
-    /// You must disable the UART before any of the control registers are
-    /// reprogrammed. When the UART is disabled in the middle of
-    /// transmission or reception, it completes the current character before
-    /// stopping.
-    ///
-    /// # Source
-    /// ARM DDI 0183G 3.3.1 Data Register, UARTDR
-    #[bitsize(32)]
-    #[derive(Clone, Copy, Default, DebugBits, FromBits)]
-    #[doc(alias = "UARTDR")]
-    pub struct Data {
-        pub data: u8,
-        pub errors: Errors,
-        _reserved: u16,
-    }
-    impl_vmstate_bitsized!(Data);
-
-    impl Data {
-        // bilge is not very const-friendly, unfortunately
-        pub const BREAK: Self = Self { value: 1 << 10 };
-    }
-
-    // TODO: FIFO Mode has different semantics
-    /// Receive Status Register / Error Clear Register, `UARTRSR/UARTECR`
-    ///
-    /// The UARTRSR/UARTECR register is the receive status register/error clear
-    /// register. Receive status can also be read from the `UARTRSR`
-    /// register. If the status is read from this register, then the status
-    /// information for break, framing and parity corresponds to the
-    /// data character read from the [Data register](Data), `UARTDR` prior to
-    /// reading the UARTRSR register. The status information for overrun is
-    /// set immediately when an overrun condition occurs.
-    ///
-    ///
-    /// # Note
-    /// The received data character must be read first from the [Data
-    /// Register](Data), `UARTDR` before reading the error status associated
-    /// with that data character from the `UARTRSR` register. This read
-    /// sequence cannot be reversed, because the `UARTRSR` register is
-    /// updated only when a read occurs from the `UARTDR` register. However,
-    /// the status information can also be obtained by reading the `UARTDR`
-    /// register
-    ///
-    /// # Source
-    /// ARM DDI 0183G 3.3.2 Receive Status Register/Error Clear Register,
-    /// UARTRSR/UARTECR
-    #[bitsize(32)]
-    #[derive(Clone, Copy, DebugBits, FromBits)]
-    pub struct ReceiveStatusErrorClear {
-        pub errors: Errors,
-        _reserved_unpredictable: u24,
-    }
-    impl_vmstate_bitsized!(ReceiveStatusErrorClear);
-
-    impl ReceiveStatusErrorClear {
-        pub fn set_from_data(&mut self, data: Data) {
-            self.set_errors(data.errors());
-        }
-
-        pub fn reset(&mut self) {
-            // All the bits are cleared to 0 on reset.
-            *self = Self::default();
-        }
-    }
-
-    impl Default for ReceiveStatusErrorClear {
-        fn default() -> Self {
-            0.into()
-        }
-    }
-
-    #[bitsize(32)]
-    #[derive(Clone, Copy, DebugBits, FromBits)]
-    /// Flag Register, `UARTFR`
-    #[doc(alias = "UARTFR")]
-    pub struct Flags {
-        /// CTS Clear to send. This bit is the complement of the UART clear to
-        /// send, `nUARTCTS`, modem status input. That is, the bit is 1
-        /// when `nUARTCTS` is LOW.
-        pub clear_to_send: bool,
-        /// DSR Data set ready. This bit is the complement of the UART data set
-        /// ready, `nUARTDSR`, modem status input. That is, the bit is 1 when
-        /// `nUARTDSR` is LOW.
-        pub data_set_ready: bool,
-        /// DCD Data carrier detect. This bit is the complement of the UART data
-        /// carrier detect, `nUARTDCD`, modem status input. That is, the bit is
-        /// 1 when `nUARTDCD` is LOW.
-        pub data_carrier_detect: bool,
-        /// BUSY UART busy. If this bit is set to 1, the UART is busy
-        /// transmitting data. This bit remains set until the complete
-        /// byte, including all the stop bits, has been sent from the
-        /// shift register. This bit is set as soon as the transmit FIFO
-        /// becomes non-empty, regardless of whether the UART is enabled
-        /// or not.
-        pub busy: bool,
-        /// RXFE Receive FIFO empty. The meaning of this bit depends on the
-        /// state of the FEN bit in the UARTLCR_H register. If the FIFO
-        /// is disabled, this bit is set when the receive holding
-        /// register is empty. If the FIFO is enabled, the RXFE bit is
-        /// set when the receive FIFO is empty.
-        pub receive_fifo_empty: bool,
-        /// TXFF Transmit FIFO full. The meaning of this bit depends on the
-        /// state of the FEN bit in the UARTLCR_H register. If the FIFO
-        /// is disabled, this bit is set when the transmit holding
-        /// register is full. If the FIFO is enabled, the TXFF bit is
-        /// set when the transmit FIFO is full.
-        pub transmit_fifo_full: bool,
-        /// RXFF Receive FIFO full. The meaning of this bit depends on the state
-        /// of the FEN bit in the UARTLCR_H register. If the FIFO is
-        /// disabled, this bit is set when the receive holding register
-        /// is full. If the FIFO is enabled, the RXFF bit is set when
-        /// the receive FIFO is full.
-        pub receive_fifo_full: bool,
-        /// Transmit FIFO empty. The meaning of this bit depends on the state of
-        /// the FEN bit in the [Line Control register](LineControl),
-        /// `UARTLCR_H`. If the FIFO is disabled, this bit is set when the
-        /// transmit holding register is empty. If the FIFO is enabled,
-        /// the TXFE bit is set when the transmit FIFO is empty. This
-        /// bit does not indicate if there is data in the transmit shift
-        /// register.
-        pub transmit_fifo_empty: bool,
-        /// `RI`, is `true` when `nUARTRI` is `LOW`.
-        pub ring_indicator: bool,
-        _reserved_zero_no_modify: u23,
-    }
-    impl_vmstate_bitsized!(Flags);
-
-    impl Flags {
-        pub fn reset(&mut self) {
-            *self = Self::default();
-        }
-    }
-
-    impl Default for Flags {
-        fn default() -> Self {
-            let mut ret: Self = 0.into();
-            // After reset TXFF, RXFF, and BUSY are 0, and TXFE and RXFE are 1
-            ret.set_receive_fifo_empty(true);
-            ret.set_transmit_fifo_empty(true);
-            ret
-        }
-    }
-
-    #[bitsize(32)]
-    #[derive(Clone, Copy, DebugBits, FromBits)]
-    /// Line Control Register, `UARTLCR_H`
-    #[doc(alias = "UARTLCR_H")]
-    pub struct LineControl {
-        /// BRK Send break.
-        ///
-        /// If this bit is set to `1`, a low-level is continually output on the
-        /// `UARTTXD` output, after completing transmission of the
-        /// current character. For the proper execution of the break command,
-        /// the software must set this bit for at least two complete
-        /// frames. For normal use, this bit must be cleared to `0`.
-        pub send_break: bool,
-        /// 1 PEN Parity enable:
-        ///
-        /// - 0 = parity is disabled and no parity bit added to the data frame
-        /// - 1 = parity checking and generation is enabled.
-        ///
-        /// See Table 3-11 on page 3-14 for the parity truth table.
-        pub parity_enabled: bool,
-        /// EPS Even parity select. Controls the type of parity the UART uses
-        /// during transmission and reception:
-        /// - 0 = odd parity. The UART generates or checks for an odd number of
-        ///   1s in the data and parity bits.
-        /// - 1 = even parity. The UART generates or checks for an even number
-        ///   of 1s in the data and parity bits.
-        /// This bit has no effect when the `PEN` bit disables parity checking
-        /// and generation. See Table 3-11 on page 3-14 for the parity
-        /// truth table.
-        pub parity: Parity,
-        /// 3 STP2 Two stop bits select. If this bit is set to 1, two stop bits
-        /// are transmitted at the end of the frame. The receive
-        /// logic does not check for two stop bits being received.
-        pub two_stops_bits: bool,
-        /// FEN Enable FIFOs:
-        /// 0 = FIFOs are disabled (character mode) that is, the FIFOs become
-        /// 1-byte-deep holding registers 1 = transmit and receive FIFO
-        /// buffers are enabled (FIFO mode).
-        pub fifos_enabled: Mode,
-        /// WLEN Word length. These bits indicate the number of data bits
-        /// transmitted or received in a frame as follows: b11 = 8 bits
-        /// b10 = 7 bits
-        /// b01 = 6 bits
-        /// b00 = 5 bits.
-        pub word_length: WordLength,
-        /// 7 SPS Stick parity select.
-        /// 0 = stick parity is disabled
-        /// 1 = either:
-        /// • if the EPS bit is 0 then the parity bit is transmitted and checked
-        /// as a 1 • if the EPS bit is 1 then the parity bit is
-        /// transmitted and checked as a 0. This bit has no effect when
-        /// the PEN bit disables parity checking and generation. See Table 3-11
-        /// on page 3-14 for the parity truth table.
-        pub sticky_parity: bool,
-        /// 31:8 - Reserved, do not modify, read as zero.
-        _reserved_zero_no_modify: u24,
-    }
-    impl_vmstate_bitsized!(LineControl);
-
-    impl LineControl {
-        pub fn reset(&mut self) {
-            // All the bits are cleared to 0 when reset.
-            *self = 0.into();
-        }
-    }
-
-    impl Default for LineControl {
-        fn default() -> Self {
-            0.into()
-        }
-    }
-
-    #[bitsize(1)]
-    #[derive(Clone, Copy, Debug, Eq, FromBits, PartialEq)]
-    /// `EPS` "Even parity select", field of [Line Control
-    /// register](LineControl).
-    pub enum Parity {
-        /// - 0 = odd parity. The UART generates or checks for an odd number of
-        ///   1s in the data and parity bits.
-        Odd = 0,
-        /// - 1 = even parity. The UART generates or checks for an even number
-        ///   of 1s in the data and parity bits.
-        Even = 1,
-    }
-
-    #[bitsize(1)]
-    #[derive(Clone, Copy, Debug, Eq, FromBits, PartialEq)]
-    /// `FEN` "Enable FIFOs" or Device mode, field of [Line Control
-    /// register](LineControl).
-    pub enum Mode {
-        /// 0 = FIFOs are disabled (character mode) that is, the FIFOs become
-        /// 1-byte-deep holding registers
-        Character = 0,
-        /// 1 = transmit and receive FIFO buffers are enabled (FIFO mode).
-        FIFO = 1,
-    }
-
-    #[bitsize(2)]
-    #[derive(Clone, Copy, Debug, Eq, FromBits, PartialEq)]
-    /// `WLEN` Word length, field of [Line Control register](LineControl).
-    ///
-    /// These bits indicate the number of data bits transmitted or received in a
-    /// frame as follows:
-    pub enum WordLength {
-        /// b11 = 8 bits
-        _8Bits = 0b11,
-        /// b10 = 7 bits
-        _7Bits = 0b10,
-        /// b01 = 6 bits
-        _6Bits = 0b01,
-        /// b00 = 5 bits.
-        _5Bits = 0b00,
-    }
-
-    /// Control Register, `UARTCR`
-    ///
-    /// The `UARTCR` register is the control register. All the bits are cleared
-    /// to `0` on reset except for bits `9` and `8` that are set to `1`.
-    ///
-    /// # Source
-    /// ARM DDI 0183G, 3.3.8 Control Register, `UARTCR`, Table 3-12
-    #[bitsize(32)]
-    #[doc(alias = "UARTCR")]
-    #[derive(Clone, Copy, DebugBits, FromBits)]
-    pub struct Control {
-        /// `UARTEN` UART enable: 0 = UART is disabled. If the UART is disabled
-        /// in the middle of transmission or reception, it completes the current
-        /// character before stopping. 1 = the UART is enabled. Data
-        /// transmission and reception occurs for either UART signals or SIR
-        /// signals depending on the setting of the SIREN bit.
-        pub enable_uart: bool,
-        /// `SIREN` `SIR` enable: 0 = IrDA SIR ENDEC is disabled. `nSIROUT`
-        /// remains LOW (no light pulse generated), and signal transitions on
-        /// SIRIN have no effect. 1 = IrDA SIR ENDEC is enabled. Data is
-        /// transmitted and received on nSIROUT and SIRIN. UARTTXD remains HIGH,
-        /// in the marking state. Signal transitions on UARTRXD or modem status
-        /// inputs have no effect. This bit has no effect if the UARTEN bit
-        /// disables the UART.
-        pub enable_sir: bool,
-        /// `SIRLP` SIR low-power IrDA mode. This bit selects the IrDA encoding
-        /// mode. If this bit is cleared to 0, low-level bits are transmitted as
-        /// an active high pulse with a width of 3/ 16th of the bit period. If
-        /// this bit is set to 1, low-level bits are transmitted with a pulse
-        /// width that is 3 times the period of the IrLPBaud16 input signal,
-        /// regardless of the selected bit rate. Setting this bit uses less
-        /// power, but might reduce transmission distances.
-        pub sir_lowpower_irda_mode: u1,
-        /// Reserved, do not modify, read as zero.
-        _reserved_zero_no_modify: u4,
-        /// `LBE` Loopback enable. If this bit is set to 1 and the SIREN bit is
-        /// set to 1 and the SIRTEST bit in the Test Control register, UARTTCR
-        /// on page 4-5 is set to 1, then the nSIROUT path is inverted, and fed
-        /// through to the SIRIN path. The SIRTEST bit in the test register must
-        /// be set to 1 to override the normal half-duplex SIR operation. This
-        /// must be the requirement for accessing the test registers during
-        /// normal operation, and SIRTEST must be cleared to 0 when loopback
-        /// testing is finished. This feature reduces the amount of external
-        /// coupling required during system test. If this bit is set to 1, and
-        /// the SIRTEST bit is set to 0, the UARTTXD path is fed through to the
-        /// UARTRXD path. In either SIR mode or UART mode, when this bit is set,
-        /// the modem outputs are also fed through to the modem inputs. This bit
-        /// is cleared to 0 on reset, to disable loopback.
-        pub enable_loopback: bool,
-        /// `TXE` Transmit enable. If this bit is set to 1, the transmit section
-        /// of the UART is enabled. Data transmission occurs for either UART
-        /// signals, or SIR signals depending on the setting of the SIREN bit.
-        /// When the UART is disabled in the middle of transmission, it
-        /// completes the current character before stopping.
-        pub enable_transmit: bool,
-        /// `RXE` Receive enable. If this bit is set to 1, the receive section
-        /// of the UART is enabled. Data reception occurs for either UART
-        /// signals or SIR signals depending on the setting of the SIREN bit.
-        /// When the UART is disabled in the middle of reception, it completes
-        /// the current character before stopping.
-        pub enable_receive: bool,
-        /// `DTR` Data transmit ready. This bit is the complement of the UART
-        /// data transmit ready, `nUARTDTR`, modem status output. That is, when
-        /// the bit is programmed to a 1 then `nUARTDTR` is LOW.
-        pub data_transmit_ready: bool,
-        /// `RTS` Request to send. This bit is the complement of the UART
-        /// request to send, `nUARTRTS`, modem status output. That is, when the
-        /// bit is programmed to a 1 then `nUARTRTS` is LOW.
-        pub request_to_send: bool,
-        /// `Out1` This bit is the complement of the UART Out1 (`nUARTOut1`)
-        /// modem status output. That is, when the bit is programmed to a 1 the
-        /// output is 0. For DTE this can be used as Data Carrier Detect (DCD).
-        pub out_1: bool,
-        /// `Out2` This bit is the complement of the UART Out2 (`nUARTOut2`)
-        /// modem status output. That is, when the bit is programmed to a 1, the
-        /// output is 0. For DTE this can be used as Ring Indicator (RI).
-        pub out_2: bool,
-        /// `RTSEn` RTS hardware flow control enable. If this bit is set to 1,
-        /// RTS hardware flow control is enabled. Data is only requested when
-        /// there is space in the receive FIFO for it to be received.
-        pub rts_hardware_flow_control_enable: bool,
-        /// `CTSEn` CTS hardware flow control enable. If this bit is set to 1,
-        /// CTS hardware flow control is enabled. Data is only transmitted when
-        /// the `nUARTCTS` signal is asserted.
-        pub cts_hardware_flow_control_enable: bool,
-        /// 31:16 - Reserved, do not modify, read as zero.
-        _reserved_zero_no_modify2: u16,
-    }
-    impl_vmstate_bitsized!(Control);
-
-    impl Control {
-        pub fn reset(&mut self) {
-            *self = 0.into();
-            self.set_enable_receive(true);
-            self.set_enable_transmit(true);
-        }
-    }
-
-    impl Default for Control {
-        fn default() -> Self {
-            let mut ret: Self = 0.into();
-            ret.reset();
-            ret
-        }
-    }
-
-    /// Interrupt status bits in UARTRIS, UARTMIS, UARTIMSC
-    pub struct Interrupt(pub u32);
-
-    impl Interrupt {
-        pub const OE: Self = Self(1 << 10);
-        pub const BE: Self = Self(1 << 9);
-        pub const PE: Self = Self(1 << 8);
-        pub const FE: Self = Self(1 << 7);
-        pub const RT: Self = Self(1 << 6);
-        pub const TX: Self = Self(1 << 5);
-        pub const RX: Self = Self(1 << 4);
-        pub const DSR: Self = Self(1 << 3);
-        pub const DCD: Self = Self(1 << 2);
-        pub const CTS: Self = Self(1 << 1);
-        pub const RI: Self = Self(1 << 0);
-
-        pub const E: Self = Self(Self::OE.0 | Self::BE.0 | Self::PE.0 | Self::FE.0);
-        pub const MS: Self = Self(Self::RI.0 | Self::DSR.0 | Self::DCD.0 | Self::CTS.0);
-    }
-}
-
-// TODO: You must disable the UART before any of the control registers are
-// reprogrammed. When the UART is disabled in the middle of transmission or
-// reception, it completes the current character before stopping
diff --git a/rust/hw/char/pl011/src/memory_ops.rs b/rust/hw/char/pl011/src/memory_ops.rs
deleted file mode 100644
index 432d326389896..0000000000000
--- a/rust/hw/char/pl011/src/memory_ops.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright 2024, Linaro Limited
-// Author(s): Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-use core::ptr::NonNull;
-use std::os::raw::{c_uint, c_void};
-
-use qemu_api::{bindings::*, zeroable::Zeroable};
-
-use crate::device::PL011State;
-
-pub static PL011_OPS: MemoryRegionOps = MemoryRegionOps {
-    read: Some(pl011_read),
-    write: Some(pl011_write),
-    read_with_attrs: None,
-    write_with_attrs: None,
-    endianness: device_endian::DEVICE_NATIVE_ENDIAN,
-    valid: Zeroable::ZERO,
-    impl_: MemoryRegionOps__bindgen_ty_2 {
-        min_access_size: 4,
-        max_access_size: 4,
-        ..Zeroable::ZERO
-    },
-};
-
-unsafe extern "C" fn pl011_read(opaque: *mut c_void, addr: hwaddr, size: c_uint) -> u64 {
-    let mut state = NonNull::new(opaque).unwrap().cast::<PL011State>();
-    unsafe { state.as_mut() }.read(addr, size)
-}
-
-unsafe extern "C" fn pl011_write(opaque: *mut c_void, addr: hwaddr, data: u64, _size: c_uint) {
-    let mut state = NonNull::new(opaque).unwrap().cast::<PL011State>();
-    unsafe { state.as_mut() }.write(addr, data);
-}
diff --git a/rust/hw/char/pl011/src/registers.rs b/rust/hw/char/pl011/src/registers.rs
new file mode 100644
index 0000000000000..cd92fa2c30070
--- /dev/null
+++ b/rust/hw/char/pl011/src/registers.rs
@@ -0,0 +1,506 @@
+// Copyright 2024, Linaro Limited
+// Author(s): Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+//! Device registers exposed as typed structs which are backed by arbitrary
+//! integer bitmaps. [`Data`], [`Control`], [`LineControl`], etc.
+
+use bilge::prelude::*;
+use qemu_api::impl_vmstate_bitsized;
+
+/// Offset of each register from the base memory address of the device.
+///
+/// # Source
+/// ARM DDI 0183G, Table 3-1 p.3-3
+#[doc(alias = "offset")]
+#[allow(non_camel_case_types)]
+#[repr(u64)]
+#[derive(Debug, Eq, PartialEq, qemu_api_macros::TryInto)]
+pub enum RegisterOffset {
+    /// Data Register
+    ///
+    /// A write to this register initiates the actual data transmission
+    #[doc(alias = "UARTDR")]
+    DR = 0x000,
+    /// Receive Status Register or Error Clear Register
+    #[doc(alias = "UARTRSR")]
+    #[doc(alias = "UARTECR")]
+    RSR = 0x004,
+    /// Flag Register
+    ///
+    /// A read of this register shows if transmission is complete
+    #[doc(alias = "UARTFR")]
+    FR = 0x018,
+    /// Fractional Baud Rate Register
+    ///
+    /// responsible for baud rate speed
+    #[doc(alias = "UARTFBRD")]
+    FBRD = 0x028,
+    /// `IrDA` Low-Power Counter Register
+    #[doc(alias = "UARTILPR")]
+    ILPR = 0x020,
+    /// Integer Baud Rate Register
+    ///
+    /// Responsible for baud rate speed
+    #[doc(alias = "UARTIBRD")]
+    IBRD = 0x024,
+    /// line control register (data frame format)
+    #[doc(alias = "UARTLCR_H")]
+    LCR_H = 0x02C,
+    /// Toggle UART, transmission or reception
+    #[doc(alias = "UARTCR")]
+    CR = 0x030,
+    /// Interrupt FIFO Level Select Register
+    #[doc(alias = "UARTIFLS")]
+    FLS = 0x034,
+    /// Interrupt Mask Set/Clear Register
+    #[doc(alias = "UARTIMSC")]
+    IMSC = 0x038,
+    /// Raw Interrupt Status Register
+    #[doc(alias = "UARTRIS")]
+    RIS = 0x03C,
+    /// Masked Interrupt Status Register
+    #[doc(alias = "UARTMIS")]
+    MIS = 0x040,
+    /// Interrupt Clear Register
+    #[doc(alias = "UARTICR")]
+    ICR = 0x044,
+    /// DMA control Register
+    #[doc(alias = "UARTDMACR")]
+    DMACR = 0x048,
+    ///// Reserved, offsets `0x04C` to `0x07C`.
+    //Reserved = 0x04C,
+}
+
+/// Receive Status Register / Data Register common error bits
+///
+/// The `UARTRSR` register is updated only when a read occurs
+/// from the `UARTDR` register with the same status information
+/// that can also be obtained by reading the `UARTDR` register
+#[bitsize(8)]
+#[derive(Clone, Copy, Default, DebugBits, FromBits)]
+pub struct Errors {
+    pub framing_error: bool,
+    pub parity_error: bool,
+    pub break_error: bool,
+    pub overrun_error: bool,
+    _reserved_unpredictable: u4,
+}
+
+// TODO: FIFO Mode has different semantics
+/// Data Register, `UARTDR`
+///
+/// The `UARTDR` register is the data register.
+///
+/// For words to be transmitted:
+///
+/// - if the FIFOs are enabled, data written to this location is pushed onto the
+///   transmit
+/// FIFO
+/// - if the FIFOs are not enabled, data is stored in the transmitter holding
+///   register (the
+/// bottom word of the transmit FIFO).
+///
+/// The write operation initiates transmission from the UART. The data is
+/// prefixed with a start bit, appended with the appropriate parity bit
+/// (if parity is enabled), and a stop bit. The resultant word is then
+/// transmitted.
+///
+/// For received words:
+///
+/// - if the FIFOs are enabled, the data byte and the 4-bit status (break,
+///   frame, parity,
+/// and overrun) is pushed onto the 12-bit wide receive FIFO
+/// - if the FIFOs are not enabled, the data byte and status are stored in the
+///   receiving
+/// holding register (the bottom word of the receive FIFO).
+///
+/// The received data byte is read by performing reads from the `UARTDR`
+/// register along with the corresponding status information. The status
+/// information can also be read by a read of the `UARTRSR/UARTECR`
+/// register.
+///
+/// # Note
+///
+/// You must disable the UART before any of the control registers are
+/// reprogrammed. When the UART is disabled in the middle of
+/// transmission or reception, it completes the current character before
+/// stopping.
+///
+/// # Source
+/// ARM DDI 0183G 3.3.1 Data Register, UARTDR
+#[bitsize(32)]
+#[derive(Clone, Copy, Default, DebugBits, FromBits)]
+#[doc(alias = "UARTDR")]
+pub struct Data {
+    pub data: u8,
+    pub errors: Errors,
+    _reserved: u16,
+}
+impl_vmstate_bitsized!(Data);
+
+impl Data {
+    // bilge is not very const-friendly, unfortunately
+    pub const BREAK: Self = Self { value: 1 << 10 };
+}
+
+// TODO: FIFO Mode has different semantics
+/// Receive Status Register / Error Clear Register, `UARTRSR/UARTECR`
+///
+/// The UARTRSR/UARTECR register is the receive status register/error clear
+/// register. Receive status can also be read from the `UARTRSR`
+/// register. If the status is read from this register, then the status
+/// information for break, framing and parity corresponds to the
+/// data character read from the [Data register](Data), `UARTDR` prior to
+/// reading the UARTRSR register. The status information for overrun is
+/// set immediately when an overrun condition occurs.
+///
+///
+/// # Note
+/// The received data character must be read first from the [Data
+/// Register](Data), `UARTDR` before reading the error status associated
+/// with that data character from the `UARTRSR` register. This read
+/// sequence cannot be reversed, because the `UARTRSR` register is
+/// updated only when a read occurs from the `UARTDR` register. However,
+/// the status information can also be obtained by reading the `UARTDR`
+/// register
+///
+/// # Source
+/// ARM DDI 0183G 3.3.2 Receive Status Register/Error Clear Register,
+/// UARTRSR/UARTECR
+#[bitsize(32)]
+#[derive(Clone, Copy, DebugBits, FromBits)]
+pub struct ReceiveStatusErrorClear {
+    pub errors: Errors,
+    _reserved_unpredictable: u24,
+}
+impl_vmstate_bitsized!(ReceiveStatusErrorClear);
+
+impl ReceiveStatusErrorClear {
+    pub fn set_from_data(&mut self, data: Data) {
+        self.set_errors(data.errors());
+    }
+
+    pub fn reset(&mut self) {
+        // All the bits are cleared to 0 on reset.
+        *self = Self::default();
+    }
+}
+
+impl Default for ReceiveStatusErrorClear {
+    fn default() -> Self {
+        0.into()
+    }
+}
+
+#[bitsize(32)]
+#[derive(Clone, Copy, DebugBits, FromBits)]
+/// Flag Register, `UARTFR`
+#[doc(alias = "UARTFR")]
+pub struct Flags {
+    /// CTS Clear to send. This bit is the complement of the UART clear to
+    /// send, `nUARTCTS`, modem status input. That is, the bit is 1
+    /// when `nUARTCTS` is LOW.
+    pub clear_to_send: bool,
+    /// DSR Data set ready. This bit is the complement of the UART data set
+    /// ready, `nUARTDSR`, modem status input. That is, the bit is 1 when
+    /// `nUARTDSR` is LOW.
+    pub data_set_ready: bool,
+    /// DCD Data carrier detect. This bit is the complement of the UART data
+    /// carrier detect, `nUARTDCD`, modem status input. That is, the bit is
+    /// 1 when `nUARTDCD` is LOW.
+    pub data_carrier_detect: bool,
+    /// BUSY UART busy. If this bit is set to 1, the UART is busy
+    /// transmitting data. This bit remains set until the complete
+    /// byte, including all the stop bits, has been sent from the
+    /// shift register. This bit is set as soon as the transmit FIFO
+    /// becomes non-empty, regardless of whether the UART is enabled
+    /// or not.
+    pub busy: bool,
+    /// RXFE Receive FIFO empty. The meaning of this bit depends on the
+    /// state of the FEN bit in the UARTLCR_H register. If the FIFO
+    /// is disabled, this bit is set when the receive holding
+    /// register is empty. If the FIFO is enabled, the RXFE bit is
+    /// set when the receive FIFO is empty.
+    pub receive_fifo_empty: bool,
+    /// TXFF Transmit FIFO full. The meaning of this bit depends on the
+    /// state of the FEN bit in the UARTLCR_H register. If the FIFO
+    /// is disabled, this bit is set when the transmit holding
+    /// register is full. If the FIFO is enabled, the TXFF bit is
+    /// set when the transmit FIFO is full.
+    pub transmit_fifo_full: bool,
+    /// RXFF Receive FIFO full. The meaning of this bit depends on the state
+    /// of the FEN bit in the UARTLCR_H register. If the FIFO is
+    /// disabled, this bit is set when the receive holding register
+    /// is full. If the FIFO is enabled, the RXFF bit is set when
+    /// the receive FIFO is full.
+    pub receive_fifo_full: bool,
+    /// Transmit FIFO empty. The meaning of this bit depends on the state of
+    /// the FEN bit in the [Line Control register](LineControl),
+    /// `UARTLCR_H`. If the FIFO is disabled, this bit is set when the
+    /// transmit holding register is empty. If the FIFO is enabled,
+    /// the TXFE bit is set when the transmit FIFO is empty. This
+    /// bit does not indicate if there is data in the transmit shift
+    /// register.
+    pub transmit_fifo_empty: bool,
+    /// `RI`, is `true` when `nUARTRI` is `LOW`.
+    pub ring_indicator: bool,
+    _reserved_zero_no_modify: u23,
+}
+impl_vmstate_bitsized!(Flags);
+
+impl Flags {
+    pub fn reset(&mut self) {
+        *self = Self::default();
+    }
+}
+
+impl Default for Flags {
+    fn default() -> Self {
+        let mut ret: Self = 0.into();
+        // After reset TXFF, RXFF, and BUSY are 0, and TXFE and RXFE are 1
+        ret.set_receive_fifo_empty(true);
+        ret.set_transmit_fifo_empty(true);
+        ret
+    }
+}
+
+#[bitsize(32)]
+#[derive(Clone, Copy, DebugBits, FromBits)]
+/// Line Control Register, `UARTLCR_H`
+#[doc(alias = "UARTLCR_H")]
+pub struct LineControl {
+    /// BRK Send break.
+    ///
+    /// If this bit is set to `1`, a low-level is continually output on the
+    /// `UARTTXD` output, after completing transmission of the
+    /// current character. For the proper execution of the break command,
+    /// the software must set this bit for at least two complete
+    /// frames. For normal use, this bit must be cleared to `0`.
+    pub send_break: bool,
+    /// 1 PEN Parity enable:
+    ///
+    /// - 0 = parity is disabled and no parity bit added to the data frame
+    /// - 1 = parity checking and generation is enabled.
+    ///
+    /// See Table 3-11 on page 3-14 for the parity truth table.
+    pub parity_enabled: bool,
+    /// EPS Even parity select. Controls the type of parity the UART uses
+    /// during transmission and reception:
+    /// - 0 = odd parity. The UART generates or checks for an odd number of 1s
+    ///   in the data and parity bits.
+    /// - 1 = even parity. The UART generates or checks for an even number of 1s
+    ///   in the data and parity bits.
+    /// This bit has no effect when the `PEN` bit disables parity checking
+    /// and generation. See Table 3-11 on page 3-14 for the parity
+    /// truth table.
+    pub parity: Parity,
+    /// 3 STP2 Two stop bits select. If this bit is set to 1, two stop bits
+    /// are transmitted at the end of the frame. The receive
+    /// logic does not check for two stop bits being received.
+    pub two_stops_bits: bool,
+    /// FEN Enable FIFOs:
+    /// 0 = FIFOs are disabled (character mode) that is, the FIFOs become
+    /// 1-byte-deep holding registers 1 = transmit and receive FIFO
+    /// buffers are enabled (FIFO mode).
+    pub fifos_enabled: Mode,
+    /// WLEN Word length. These bits indicate the number of data bits
+    /// transmitted or received in a frame as follows: b11 = 8 bits
+    /// b10 = 7 bits
+    /// b01 = 6 bits
+    /// b00 = 5 bits.
+    pub word_length: WordLength,
+    /// 7 SPS Stick parity select.
+    /// 0 = stick parity is disabled
+    /// 1 = either:
+    /// • if the EPS bit is 0 then the parity bit is transmitted and checked
+    /// as a 1 • if the EPS bit is 1 then the parity bit is
+    /// transmitted and checked as a 0. This bit has no effect when
+    /// the PEN bit disables parity checking and generation. See Table 3-11
+    /// on page 3-14 for the parity truth table.
+    pub sticky_parity: bool,
+    /// 31:8 - Reserved, do not modify, read as zero.
+    _reserved_zero_no_modify: u24,
+}
+impl_vmstate_bitsized!(LineControl);
+
+impl LineControl {
+    pub fn reset(&mut self) {
+        // All the bits are cleared to 0 when reset.
+        *self = 0.into();
+    }
+}
+
+impl Default for LineControl {
+    fn default() -> Self {
+        0.into()
+    }
+}
+
+#[bitsize(1)]
+#[derive(Clone, Copy, Debug, Eq, FromBits, PartialEq)]
+/// `EPS` "Even parity select", field of [Line Control
+/// register](LineControl).
+pub enum Parity {
+    /// - 0 = odd parity. The UART generates or checks for an odd number of 1s
+    ///   in the data and parity bits.
+    Odd = 0,
+    /// - 1 = even parity. The UART generates or checks for an even number of 1s
+    ///   in the data and parity bits.
+    Even = 1,
+}
+
+#[bitsize(1)]
+#[derive(Clone, Copy, Debug, Eq, FromBits, PartialEq)]
+/// `FEN` "Enable FIFOs" or Device mode, field of [Line Control
+/// register](LineControl).
+pub enum Mode {
+    /// 0 = FIFOs are disabled (character mode) that is, the FIFOs become
+    /// 1-byte-deep holding registers
+    Character = 0,
+    /// 1 = transmit and receive FIFO buffers are enabled (FIFO mode).
+    FIFO = 1,
+}
+
+#[bitsize(2)]
+#[derive(Clone, Copy, Debug, Eq, FromBits, PartialEq)]
+/// `WLEN` Word length, field of [Line Control register](LineControl).
+///
+/// These bits indicate the number of data bits transmitted or received in a
+/// frame as follows:
+pub enum WordLength {
+    /// b11 = 8 bits
+    _8Bits = 0b11,
+    /// b10 = 7 bits
+    _7Bits = 0b10,
+    /// b01 = 6 bits
+    _6Bits = 0b01,
+    /// b00 = 5 bits.
+    _5Bits = 0b00,
+}
+
+/// Control Register, `UARTCR`
+///
+/// The `UARTCR` register is the control register. All the bits are cleared
+/// to `0` on reset except for bits `9` and `8` that are set to `1`.
+///
+/// # Source
+/// ARM DDI 0183G, 3.3.8 Control Register, `UARTCR`, Table 3-12
+#[bitsize(32)]
+#[doc(alias = "UARTCR")]
+#[derive(Clone, Copy, DebugBits, FromBits)]
+pub struct Control {
+    /// `UARTEN` UART enable: 0 = UART is disabled. If the UART is disabled
+    /// in the middle of transmission or reception, it completes the current
+    /// character before stopping. 1 = the UART is enabled. Data
+    /// transmission and reception occurs for either UART signals or SIR
+    /// signals depending on the setting of the SIREN bit.
+    pub enable_uart: bool,
+    /// `SIREN` `SIR` enable: 0 = IrDA SIR ENDEC is disabled. `nSIROUT`
+    /// remains LOW (no light pulse generated), and signal transitions on
+    /// SIRIN have no effect. 1 = IrDA SIR ENDEC is enabled. Data is
+    /// transmitted and received on nSIROUT and SIRIN. UARTTXD remains HIGH,
+    /// in the marking state. Signal transitions on UARTRXD or modem status
+    /// inputs have no effect. This bit has no effect if the UARTEN bit
+    /// disables the UART.
+    pub enable_sir: bool,
+    /// `SIRLP` SIR low-power IrDA mode. This bit selects the IrDA encoding
+    /// mode. If this bit is cleared to 0, low-level bits are transmitted as
+    /// an active high pulse with a width of 3/ 16th of the bit period. If
+    /// this bit is set to 1, low-level bits are transmitted with a pulse
+    /// width that is 3 times the period of the IrLPBaud16 input signal,
+    /// regardless of the selected bit rate. Setting this bit uses less
+    /// power, but might reduce transmission distances.
+    pub sir_lowpower_irda_mode: u1,
+    /// Reserved, do not modify, read as zero.
+    _reserved_zero_no_modify: u4,
+    /// `LBE` Loopback enable. If this bit is set to 1 and the SIREN bit is
+    /// set to 1 and the SIRTEST bit in the Test Control register, UARTTCR
+    /// on page 4-5 is set to 1, then the nSIROUT path is inverted, and fed
+    /// through to the SIRIN path. The SIRTEST bit in the test register must
+    /// be set to 1 to override the normal half-duplex SIR operation. This
+    /// must be the requirement for accessing the test registers during
+    /// normal operation, and SIRTEST must be cleared to 0 when loopback
+    /// testing is finished. This feature reduces the amount of external
+    /// coupling required during system test. If this bit is set to 1, and
+    /// the SIRTEST bit is set to 0, the UARTTXD path is fed through to the
+    /// UARTRXD path. In either SIR mode or UART mode, when this bit is set,
+    /// the modem outputs are also fed through to the modem inputs. This bit
+    /// is cleared to 0 on reset, to disable loopback.
+    pub enable_loopback: bool,
+    /// `TXE` Transmit enable. If this bit is set to 1, the transmit section
+    /// of the UART is enabled. Data transmission occurs for either UART
+    /// signals, or SIR signals depending on the setting of the SIREN bit.
+    /// When the UART is disabled in the middle of transmission, it
+    /// completes the current character before stopping.
+    pub enable_transmit: bool,
+    /// `RXE` Receive enable. If this bit is set to 1, the receive section
+    /// of the UART is enabled. Data reception occurs for either UART
+    /// signals or SIR signals depending on the setting of the SIREN bit.
+    /// When the UART is disabled in the middle of reception, it completes
+    /// the current character before stopping.
+    pub enable_receive: bool,
+    /// `DTR` Data transmit ready. This bit is the complement of the UART
+    /// data transmit ready, `nUARTDTR`, modem status output. That is, when
+    /// the bit is programmed to a 1 then `nUARTDTR` is LOW.
+    pub data_transmit_ready: bool,
+    /// `RTS` Request to send. This bit is the complement of the UART
+    /// request to send, `nUARTRTS`, modem status output. That is, when the
+    /// bit is programmed to a 1 then `nUARTRTS` is LOW.
+    pub request_to_send: bool,
+    /// `Out1` This bit is the complement of the UART Out1 (`nUARTOut1`)
+    /// modem status output. That is, when the bit is programmed to a 1 the
+    /// output is 0. For DTE this can be used as Data Carrier Detect (DCD).
+    pub out_1: bool,
+    /// `Out2` This bit is the complement of the UART Out2 (`nUARTOut2`)
+    /// modem status output. That is, when the bit is programmed to a 1, the
+    /// output is 0. For DTE this can be used as Ring Indicator (RI).
+    pub out_2: bool,
+    /// `RTSEn` RTS hardware flow control enable. If this bit is set to 1,
+    /// RTS hardware flow control is enabled. Data is only requested when
+    /// there is space in the receive FIFO for it to be received.
+    pub rts_hardware_flow_control_enable: bool,
+    /// `CTSEn` CTS hardware flow control enable. If this bit is set to 1,
+    /// CTS hardware flow control is enabled. Data is only transmitted when
+    /// the `nUARTCTS` signal is asserted.
+    pub cts_hardware_flow_control_enable: bool,
+    /// 31:16 - Reserved, do not modify, read as zero.
+    _reserved_zero_no_modify2: u16,
+}
+impl_vmstate_bitsized!(Control);
+
+impl Control {
+    pub fn reset(&mut self) {
+        *self = 0.into();
+        self.set_enable_receive(true);
+        self.set_enable_transmit(true);
+    }
+}
+
+impl Default for Control {
+    fn default() -> Self {
+        let mut ret: Self = 0.into();
+        ret.reset();
+        ret
+    }
+}
+
+/// Interrupt status bits in UARTRIS, UARTMIS, UARTIMSC
+pub struct Interrupt(pub u32);
+
+impl Interrupt {
+    pub const OE: Self = Self(1 << 10);
+    pub const BE: Self = Self(1 << 9);
+    pub const PE: Self = Self(1 << 8);
+    pub const FE: Self = Self(1 << 7);
+    pub const RT: Self = Self(1 << 6);
+    pub const TX: Self = Self(1 << 5);
+    pub const RX: Self = Self(1 << 4);
+    pub const DSR: Self = Self(1 << 3);
+    pub const DCD: Self = Self(1 << 2);
+    pub const CTS: Self = Self(1 << 1);
+    pub const RI: Self = Self(1 << 0);
+
+    pub const E: Self = Self(Self::OE.0 | Self::BE.0 | Self::PE.0 | Self::FE.0);
+    pub const MS: Self = Self(Self::RI.0 | Self::DSR.0 | Self::DCD.0 | Self::CTS.0);
+}
diff --git a/rust/hw/meson.build b/rust/hw/meson.build
index 860196645e719..9749d4adfc966 100644
--- a/rust/hw/meson.build
+++ b/rust/hw/meson.build
@@ -1 +1,2 @@
 subdir('char')
+subdir('timer')
diff --git a/rust/hw/timer/Kconfig b/rust/hw/timer/Kconfig
new file mode 100644
index 0000000000000..42e421317a59f
--- /dev/null
+++ b/rust/hw/timer/Kconfig
@@ -0,0 +1,3 @@
+config X_HPET_RUST
+    bool
+    default y if PC && HAVE_RUST
diff --git a/rust/hw/timer/hpet/Cargo.toml b/rust/hw/timer/hpet/Cargo.toml
new file mode 100644
index 0000000000000..147f216e72576
--- /dev/null
+++ b/rust/hw/timer/hpet/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "hpet"
+version = "0.1.0"
+edition = "2021"
+authors = ["Zhao Liu <zhao1.liu@intel.com>"]
+license = "GPL-2.0-or-later"
+description = "IA-PC High Precision Event Timer emulation in Rust"
+rust-version = "1.63.0"
+
+[lib]
+crate-type = ["staticlib"]
+
+[dependencies]
+qemu_api = { path = "../../../qemu-api" }
+qemu_api_macros = { path = "../../../qemu-api-macros" }
+
+[lints]
+workspace = true
diff --git a/rust/hw/timer/hpet/meson.build b/rust/hw/timer/hpet/meson.build
new file mode 100644
index 0000000000000..c2d7c0532ca4e
--- /dev/null
+++ b/rust/hw/timer/hpet/meson.build
@@ -0,0 +1,18 @@
+_libhpet_rs = static_library(
+  'hpet',
+  files('src/lib.rs'),
+  override_options: ['rust_std=2021', 'build.rust_std=2021'],
+  rust_abi: 'rust',
+  dependencies: [
+    qemu_api,
+    qemu_api_macros,
+  ],
+)
+
+rust_devices_ss.add(when: 'CONFIG_X_HPET_RUST', if_true: [declare_dependency(
+  link_whole: [_libhpet_rs],
+  # Putting proc macro crates in `dependencies` is necessary for Meson to find
+  # them when compiling the root per-target static rust lib.
+  dependencies: [qemu_api_macros],
+  variables: {'crate': 'hpet'},
+)])
diff --git a/rust/hw/timer/hpet/src/fw_cfg.rs b/rust/hw/timer/hpet/src/fw_cfg.rs
new file mode 100644
index 0000000000000..bef03727ea3c7
--- /dev/null
+++ b/rust/hw/timer/hpet/src/fw_cfg.rs
@@ -0,0 +1,69 @@
+// Copyright (C) 2024 Intel Corporation.
+// Author(s): Zhao Liu <zhai1.liu@intel.com>
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+use std::ptr::addr_of_mut;
+
+use qemu_api::{cell::bql_locked, impl_zeroable, zeroable::Zeroable};
+
+/// Each `HPETState` represents a Event Timer Block. The v1 spec supports
+/// up to 8 blocks. QEMU only uses 1 block (in PC machine).
+const HPET_MAX_NUM_EVENT_TIMER_BLOCK: usize = 8;
+
+#[repr(C, packed)]
+#[derive(Copy, Clone, Default)]
+pub struct HPETFwEntry {
+    pub event_timer_block_id: u32,
+    pub address: u64,
+    pub min_tick: u16,
+    pub page_prot: u8,
+}
+impl_zeroable!(HPETFwEntry);
+
+#[repr(C, packed)]
+#[derive(Copy, Clone, Default)]
+pub struct HPETFwConfig {
+    pub count: u8,
+    pub hpet: [HPETFwEntry; HPET_MAX_NUM_EVENT_TIMER_BLOCK],
+}
+impl_zeroable!(HPETFwConfig);
+
+#[allow(non_upper_case_globals)]
+#[no_mangle]
+pub static mut hpet_fw_cfg: HPETFwConfig = HPETFwConfig {
+    count: u8::MAX,
+    ..Zeroable::ZERO
+};
+
+impl HPETFwConfig {
+    pub(crate) fn assign_hpet_id() -> usize {
+        assert!(bql_locked());
+        // SAFETY: all accesses go through these methods, which guarantee
+        // that the accesses are protected by the BQL.
+        let mut fw_cfg = unsafe { *addr_of_mut!(hpet_fw_cfg) };
+
+        if fw_cfg.count == u8::MAX {
+            // first instance
+            fw_cfg.count = 0;
+        }
+
+        if fw_cfg.count == 8 {
+            // TODO: Add error binding: error_setg()
+            panic!("Only 8 instances of HPET is allowed");
+        }
+
+        let id: usize = fw_cfg.count.into();
+        fw_cfg.count += 1;
+        id
+    }
+
+    pub(crate) fn update_hpet_cfg(hpet_id: usize, timer_block_id: u32, address: u64) {
+        assert!(bql_locked());
+        // SAFETY: all accesses go through these methods, which guarantee
+        // that the accesses are protected by the BQL.
+        let mut fw_cfg = unsafe { *addr_of_mut!(hpet_fw_cfg) };
+
+        fw_cfg.hpet[hpet_id].event_timer_block_id = timer_block_id;
+        fw_cfg.hpet[hpet_id].address = address;
+    }
+}
diff --git a/rust/hw/timer/hpet/src/hpet.rs b/rust/hw/timer/hpet/src/hpet.rs
new file mode 100644
index 0000000000000..20e0afdfca8ae
--- /dev/null
+++ b/rust/hw/timer/hpet/src/hpet.rs
@@ -0,0 +1,905 @@
+// Copyright (C) 2024 Intel Corporation.
+// Author(s): Zhao Liu <zhai1.liu@intel.com>
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+use std::{
+    ffi::CStr,
+    pin::Pin,
+    ptr::{addr_of_mut, null_mut, NonNull},
+    slice::from_ref,
+};
+
+use qemu_api::{
+    bindings::{
+        address_space_memory, address_space_stl_le, qdev_prop_bit, qdev_prop_bool,
+        qdev_prop_uint32, qdev_prop_uint8,
+    },
+    c_str,
+    cell::{BqlCell, BqlRefCell},
+    irq::InterruptSource,
+    memory::{
+        hwaddr, MemoryRegion, MemoryRegionOps, MemoryRegionOpsBuilder, MEMTXATTRS_UNSPECIFIED,
+    },
+    prelude::*,
+    qdev::{DeviceImpl, DeviceMethods, DeviceState, Property, ResetType, ResettablePhasesImpl},
+    qom::{ObjectImpl, ObjectType, ParentField},
+    qom_isa,
+    sysbus::{SysBusDevice, SysBusDeviceImpl},
+    timer::{Timer, CLOCK_VIRTUAL},
+};
+
+use crate::fw_cfg::HPETFwConfig;
+
+/// Register space for each timer block (`HPET_BASE` is defined in hpet.h).
+const HPET_REG_SPACE_LEN: u64 = 0x400; // 1024 bytes
+
+/// Minimum recommended hardware implementation.
+const HPET_MIN_TIMERS: usize = 3;
+/// Maximum timers in each timer block.
+const HPET_MAX_TIMERS: usize = 32;
+
+/// Flags that HPETState.flags supports.
+const HPET_FLAG_MSI_SUPPORT_SHIFT: usize = 0;
+
+const HPET_NUM_IRQ_ROUTES: usize = 32;
+const HPET_LEGACY_PIT_INT: u32 = 0; // HPET_LEGACY_RTC_INT isn't defined here.
+const RTC_ISA_IRQ: usize = 8;
+
+const HPET_CLK_PERIOD: u64 = 10; // 10 ns
+const FS_PER_NS: u64 = 1000000; // 1000000 femtoseconds == 1 ns
+
+/// Revision ID (bits 0:7). Revision 1 is implemented (refer to v1.0a spec).
+const HPET_CAP_REV_ID_VALUE: u64 = 0x1;
+const HPET_CAP_REV_ID_SHIFT: usize = 0;
+/// Number of Timers (bits 8:12)
+const HPET_CAP_NUM_TIM_SHIFT: usize = 8;
+/// Counter Size (bit 13)
+const HPET_CAP_COUNT_SIZE_CAP_SHIFT: usize = 13;
+/// Legacy Replacement Route Capable (bit 15)
+const HPET_CAP_LEG_RT_CAP_SHIFT: usize = 15;
+/// Vendor ID (bits 16:31)
+const HPET_CAP_VENDER_ID_VALUE: u64 = 0x8086;
+const HPET_CAP_VENDER_ID_SHIFT: usize = 16;
+/// Main Counter Tick Period (bits 32:63)
+const HPET_CAP_CNT_CLK_PERIOD_SHIFT: usize = 32;
+
+/// Overall Enable (bit 0)
+const HPET_CFG_ENABLE_SHIFT: usize = 0;
+/// Legacy Replacement Route (bit 1)
+const HPET_CFG_LEG_RT_SHIFT: usize = 1;
+/// Other bits are reserved.
+const HPET_CFG_WRITE_MASK: u64 = 0x003;
+
+/// bit 0, 7, and bits 16:31 are reserved.
+/// bit 4, 5, 15, and bits 32:64 are read-only.
+const HPET_TN_CFG_WRITE_MASK: u64 = 0x7f4e;
+/// Timer N Interrupt Type (bit 1)
+const HPET_TN_CFG_INT_TYPE_SHIFT: usize = 1;
+/// Timer N Interrupt Enable (bit 2)
+const HPET_TN_CFG_INT_ENABLE_SHIFT: usize = 2;
+/// Timer N Type (Periodic enabled or not, bit 3)
+const HPET_TN_CFG_PERIODIC_SHIFT: usize = 3;
+/// Timer N Periodic Interrupt Capable (support Periodic or not, bit 4)
+const HPET_TN_CFG_PERIODIC_CAP_SHIFT: usize = 4;
+/// Timer N Size (timer size is 64-bits or 32 bits, bit 5)
+const HPET_TN_CFG_SIZE_CAP_SHIFT: usize = 5;
+/// Timer N Value Set (bit 6)
+const HPET_TN_CFG_SETVAL_SHIFT: usize = 6;
+/// Timer N 32-bit Mode (bit 8)
+const HPET_TN_CFG_32BIT_SHIFT: usize = 8;
+/// Timer N Interrupt Rout (bits 9:13)
+const HPET_TN_CFG_INT_ROUTE_MASK: u64 = 0x3e00;
+const HPET_TN_CFG_INT_ROUTE_SHIFT: usize = 9;
+/// Timer N FSB Interrupt Enable (bit 14)
+const HPET_TN_CFG_FSB_ENABLE_SHIFT: usize = 14;
+/// Timer N FSB Interrupt Delivery (bit 15)
+const HPET_TN_CFG_FSB_CAP_SHIFT: usize = 15;
+/// Timer N Interrupt Routing Capability (bits 32:63)
+const HPET_TN_CFG_INT_ROUTE_CAP_SHIFT: usize = 32;
+
+#[derive(qemu_api_macros::TryInto)]
+#[repr(u64)]
+#[allow(non_camel_case_types)]
+/// Timer registers, masked by 0x18
+enum TimerRegister {
+    /// Timer N Configuration and Capability Register
+    CFG = 0,
+    /// Timer N Comparator Value Register
+    CMP = 8,
+    /// Timer N FSB Interrupt Route Register
+    ROUTE = 16,
+}
+
+#[derive(qemu_api_macros::TryInto)]
+#[repr(u64)]
+#[allow(non_camel_case_types)]
+/// Global registers
+enum GlobalRegister {
+    /// General Capabilities and ID Register
+    CAP = 0,
+    /// General Configuration Register
+    CFG = 0x10,
+    /// General Interrupt Status Register
+    INT_STATUS = 0x20,
+    /// Main Counter Value Register
+    COUNTER = 0xF0,
+}
+
+enum HPETRegister<'a> {
+    /// Global register in the range from `0` to `0xff`
+    Global(GlobalRegister),
+
+    /// Register in the timer block `0x100`...`0x3ff`
+    Timer(&'a BqlRefCell<HPETTimer>, TimerRegister),
+
+    /// Invalid address
+    #[allow(dead_code)]
+    Unknown(hwaddr),
+}
+
+struct HPETAddrDecode<'a> {
+    shift: u32,
+    len: u32,
+    reg: HPETRegister<'a>,
+}
+
+const fn hpet_next_wrap(cur_tick: u64) -> u64 {
+    (cur_tick | 0xffffffff) + 1
+}
+
+const fn hpet_time_after(a: u64, b: u64) -> bool {
+    ((b - a) as i64) < 0
+}
+
+const fn ticks_to_ns(value: u64) -> u64 {
+    value * HPET_CLK_PERIOD
+}
+
+const fn ns_to_ticks(value: u64) -> u64 {
+    value / HPET_CLK_PERIOD
+}
+
+// Avoid touching the bits that cannot be written.
+const fn hpet_fixup_reg(new: u64, old: u64, mask: u64) -> u64 {
+    (new & mask) | (old & !mask)
+}
+
+const fn activating_bit(old: u64, new: u64, shift: usize) -> bool {
+    let mask: u64 = 1 << shift;
+    (old & mask == 0) && (new & mask != 0)
+}
+
+const fn deactivating_bit(old: u64, new: u64, shift: usize) -> bool {
+    let mask: u64 = 1 << shift;
+    (old & mask != 0) && (new & mask == 0)
+}
+
+fn timer_handler(timer_cell: &BqlRefCell<HPETTimer>) {
+    timer_cell.borrow_mut().callback()
+}
+
+/// HPET Timer Abstraction
+#[repr(C)]
+#[derive(Debug, qemu_api_macros::offsets)]
+pub struct HPETTimer {
+    /// timer N index within the timer block (`HPETState`)
+    #[doc(alias = "tn")]
+    index: usize,
+    qemu_timer: Timer,
+    /// timer block abstraction containing this timer
+    state: NonNull<HPETState>,
+
+    // Memory-mapped, software visible timer registers
+    /// Timer N Configuration and Capability Register
+    config: u64,
+    /// Timer N Comparator Value Register
+    cmp: u64,
+    /// Timer N FSB Interrupt Route Register
+    fsb: u64,
+
+    // Hidden register state
+    /// comparator (extended to counter width)
+    cmp64: u64,
+    /// Last value written to comparator
+    period: u64,
+    /// timer pop will indicate wrap for one-shot 32-bit
+    /// mode. Next pop will be actual timer expiration.
+    wrap_flag: u8,
+    /// last value armed, to avoid timer storms
+    last: u64,
+}
+
+impl HPETTimer {
+    fn init(&mut self, index: usize, state: &HPETState) {
+        *self = HPETTimer {
+            index,
+            // SAFETY: the HPETTimer will only be used after the timer
+            // is initialized below.
+            qemu_timer: unsafe { Timer::new() },
+            state: NonNull::new(state as *const _ as *mut _).unwrap(),
+            config: 0,
+            cmp: 0,
+            fsb: 0,
+            cmp64: 0,
+            period: 0,
+            wrap_flag: 0,
+            last: 0,
+        };
+
+        // SAFETY: HPETTimer is only used as part of HPETState, which is
+        // always pinned.
+        let qemu_timer = unsafe { Pin::new_unchecked(&mut self.qemu_timer) };
+        qemu_timer.init_full(
+            None,
+            CLOCK_VIRTUAL,
+            Timer::NS,
+            0,
+            timer_handler,
+            &state.timers[self.index],
+        )
+    }
+
+    fn get_state(&self) -> &HPETState {
+        // SAFETY:
+        // the pointer is convertible to a reference
+        unsafe { self.state.as_ref() }
+    }
+
+    fn is_int_active(&self) -> bool {
+        self.get_state().is_timer_int_active(self.index)
+    }
+
+    const fn is_fsb_route_enabled(&self) -> bool {
+        self.config & (1 << HPET_TN_CFG_FSB_ENABLE_SHIFT) != 0
+    }
+
+    const fn is_periodic(&self) -> bool {
+        self.config & (1 << HPET_TN_CFG_PERIODIC_SHIFT) != 0
+    }
+
+    const fn is_int_enabled(&self) -> bool {
+        self.config & (1 << HPET_TN_CFG_INT_ENABLE_SHIFT) != 0
+    }
+
+    const fn is_32bit_mod(&self) -> bool {
+        self.config & (1 << HPET_TN_CFG_32BIT_SHIFT) != 0
+    }
+
+    const fn is_valset_enabled(&self) -> bool {
+        self.config & (1 << HPET_TN_CFG_SETVAL_SHIFT) != 0
+    }
+
+    fn clear_valset(&mut self) {
+        self.config &= !(1 << HPET_TN_CFG_SETVAL_SHIFT);
+    }
+
+    /// True if timer interrupt is level triggered; otherwise, edge triggered.
+    const fn is_int_level_triggered(&self) -> bool {
+        self.config & (1 << HPET_TN_CFG_INT_TYPE_SHIFT) != 0
+    }
+
+    /// calculate next value of the general counter that matches the
+    /// target (either entirely, or the low 32-bit only depending on
+    /// the timer mode).
+    fn calculate_cmp64(&self, cur_tick: u64, target: u64) -> u64 {
+        if self.is_32bit_mod() {
+            let mut result: u64 = cur_tick.deposit(0, 32, target);
+            if result < cur_tick {
+                result += 0x100000000;
+            }
+            result
+        } else {
+            target
+        }
+    }
+
+    const fn get_individual_route(&self) -> usize {
+        ((self.config & HPET_TN_CFG_INT_ROUTE_MASK) >> HPET_TN_CFG_INT_ROUTE_SHIFT) as usize
+    }
+
+    fn get_int_route(&self) -> usize {
+        if self.index <= 1 && self.get_state().is_legacy_mode() {
+            // If LegacyReplacement Route bit is set, HPET specification requires
+            // timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC,
+            // timer1 be routed to IRQ8 in NON-APIC or IRQ8 in the I/O APIC.
+            //
+            // If the LegacyReplacement Route bit is set, the individual routing
+            // bits for timers 0 and 1 (APIC or FSB) will have no impact.
+            //
+            // FIXME: Consider I/O APIC case.
+            if self.index == 0 {
+                0
+            } else {
+                RTC_ISA_IRQ
+            }
+        } else {
+            // (If the LegacyReplacement Route bit is set) Timer 2-n will be
+            // routed as per the routing in the timer n config registers.
+            // ...
+            // If the LegacyReplacement Route bit is not set, the individual
+            // routing bits for each of the timers are used.
+            self.get_individual_route()
+        }
+    }
+
+    fn set_irq(&mut self, set: bool) {
+        let route = self.get_int_route();
+
+        if set && self.is_int_enabled() && self.get_state().is_hpet_enabled() {
+            if self.is_fsb_route_enabled() {
+                // SAFETY:
+                // the parameters are valid.
+                unsafe {
+                    address_space_stl_le(
+                        addr_of_mut!(address_space_memory),
+                        self.fsb >> 32,  // Timer N FSB int addr
+                        self.fsb as u32, // Timer N FSB int value, truncate!
+                        MEMTXATTRS_UNSPECIFIED,
+                        null_mut(),
+                    );
+                }
+            } else if self.is_int_level_triggered() {
+                self.get_state().irqs[route].raise();
+            } else {
+                self.get_state().irqs[route].pulse();
+            }
+        } else if !self.is_fsb_route_enabled() {
+            self.get_state().irqs[route].lower();
+        }
+    }
+
+    fn update_irq(&mut self, set: bool) {
+        // If Timer N Interrupt Enable bit is 0, "the timer will
+        // still operate and generate appropriate status bits, but
+        // will not cause an interrupt"
+        self.get_state()
+            .update_int_status(self.index as u32, set && self.is_int_level_triggered());
+        self.set_irq(set);
+    }
+
+    fn arm_timer(&mut self, tick: u64) {
+        let mut ns = self.get_state().get_ns(tick);
+
+        // Clamp period to reasonable min value (1 us)
+        if self.is_periodic() && ns - self.last < 1000 {
+            ns = self.last + 1000;
+        }
+
+        self.last = ns;
+        self.qemu_timer.modify(self.last);
+    }
+
+    fn set_timer(&mut self) {
+        let cur_tick: u64 = self.get_state().get_ticks();
+
+        self.wrap_flag = 0;
+        self.cmp64 = self.calculate_cmp64(cur_tick, self.cmp);
+        if self.is_32bit_mod() {
+            // HPET spec says in one-shot 32-bit mode, generate an interrupt when
+            // counter wraps in addition to an interrupt with comparator match.
+            if !self.is_periodic() && self.cmp64 > hpet_next_wrap(cur_tick) {
+                self.wrap_flag = 1;
+                self.arm_timer(hpet_next_wrap(cur_tick));
+                return;
+            }
+        }
+        self.arm_timer(self.cmp64);
+    }
+
+    fn del_timer(&mut self) {
+        // Just remove the timer from the timer_list without destroying
+        // this timer instance.
+        self.qemu_timer.delete();
+
+        if self.is_int_active() {
+            // For level-triggered interrupt, this leaves interrupt status
+            // register set but lowers irq.
+            self.update_irq(true);
+        }
+    }
+
+    /// Configuration and Capability Register
+    fn set_tn_cfg_reg(&mut self, shift: u32, len: u32, val: u64) {
+        // TODO: Add trace point - trace_hpet_ram_write_tn_cfg(addr & 4)
+        let old_val: u64 = self.config;
+        let mut new_val: u64 = old_val.deposit(shift, len, val);
+        new_val = hpet_fixup_reg(new_val, old_val, HPET_TN_CFG_WRITE_MASK);
+
+        // Switch level-type interrupt to edge-type.
+        if deactivating_bit(old_val, new_val, HPET_TN_CFG_INT_TYPE_SHIFT) {
+            // Do this before changing timer.config; otherwise, if
+            // HPET_TN_FSB is set, update_irq will not lower the qemu_irq.
+            self.update_irq(false);
+        }
+
+        self.config = new_val;
+
+        if activating_bit(old_val, new_val, HPET_TN_CFG_INT_ENABLE_SHIFT) && self.is_int_active() {
+            self.update_irq(true);
+        }
+
+        if self.is_32bit_mod() {
+            self.cmp = u64::from(self.cmp as u32); // truncate!
+            self.period = u64::from(self.period as u32); // truncate!
+        }
+
+        if self.get_state().is_hpet_enabled() {
+            self.set_timer();
+        }
+    }
+
+    /// Comparator Value Register
+    fn set_tn_cmp_reg(&mut self, shift: u32, len: u32, val: u64) {
+        let mut length = len;
+        let mut value = val;
+
+        // TODO: Add trace point - trace_hpet_ram_write_tn_cmp(addr & 4)
+        if self.is_32bit_mod() {
+            // High 32-bits are zero, leave them untouched.
+            if shift != 0 {
+                // TODO: Add trace point - trace_hpet_ram_write_invalid_tn_cmp()
+                return;
+            }
+            length = 64;
+            value = u64::from(value as u32); // truncate!
+        }
+
+        if !self.is_periodic() || self.is_valset_enabled() {
+            self.cmp = self.cmp.deposit(shift, length, value);
+        }
+
+        if self.is_periodic() {
+            self.period = self.period.deposit(shift, length, value);
+        }
+
+        self.clear_valset();
+        if self.get_state().is_hpet_enabled() {
+            self.set_timer();
+        }
+    }
+
+    /// FSB Interrupt Route Register
+    fn set_tn_fsb_route_reg(&mut self, shift: u32, len: u32, val: u64) {
+        self.fsb = self.fsb.deposit(shift, len, val);
+    }
+
+    fn reset(&mut self) {
+        self.del_timer();
+        self.cmp = u64::MAX; // Comparator Match Registers reset to all 1's.
+        self.config = (1 << HPET_TN_CFG_PERIODIC_CAP_SHIFT) | (1 << HPET_TN_CFG_SIZE_CAP_SHIFT);
+        if self.get_state().has_msi_flag() {
+            self.config |= 1 << HPET_TN_CFG_FSB_CAP_SHIFT;
+        }
+        // advertise availability of ioapic int
+        self.config |=
+            (u64::from(self.get_state().int_route_cap)) << HPET_TN_CFG_INT_ROUTE_CAP_SHIFT;
+        self.period = 0;
+        self.wrap_flag = 0;
+    }
+
+    /// timer expiration callback
+    fn callback(&mut self) {
+        let period: u64 = self.period;
+        let cur_tick: u64 = self.get_state().get_ticks();
+
+        if self.is_periodic() && period != 0 {
+            while hpet_time_after(cur_tick, self.cmp64) {
+                self.cmp64 += period;
+            }
+            if self.is_32bit_mod() {
+                self.cmp = u64::from(self.cmp64 as u32); // truncate!
+            } else {
+                self.cmp = self.cmp64;
+            }
+            self.arm_timer(self.cmp64);
+        } else if self.wrap_flag != 0 {
+            self.wrap_flag = 0;
+            self.arm_timer(self.cmp64);
+        }
+        self.update_irq(true);
+    }
+
+    const fn read(&self, reg: TimerRegister) -> u64 {
+        use TimerRegister::*;
+        match reg {
+            CFG => self.config, // including interrupt capabilities
+            CMP => self.cmp,    // comparator register
+            ROUTE => self.fsb,
+        }
+    }
+
+    fn write(&mut self, reg: TimerRegister, value: u64, shift: u32, len: u32) {
+        use TimerRegister::*;
+        match reg {
+            CFG => self.set_tn_cfg_reg(shift, len, value),
+            CMP => self.set_tn_cmp_reg(shift, len, value),
+            ROUTE => self.set_tn_fsb_route_reg(shift, len, value),
+        }
+    }
+}
+
+/// HPET Event Timer Block Abstraction
+#[repr(C)]
+#[derive(qemu_api_macros::Object, qemu_api_macros::offsets)]
+pub struct HPETState {
+    parent_obj: ParentField<SysBusDevice>,
+    iomem: MemoryRegion,
+
+    // HPET block Registers: Memory-mapped, software visible registers
+    /// General Capabilities and ID Register
+    capability: BqlCell<u64>,
+    ///  General Configuration Register
+    config: BqlCell<u64>,
+    /// General Interrupt Status Register
+    #[doc(alias = "isr")]
+    int_status: BqlCell<u64>,
+    /// Main Counter Value Register
+    #[doc(alias = "hpet_counter")]
+    counter: BqlCell<u64>,
+
+    // Internal state
+    /// Capabilities that QEMU HPET supports.
+    /// bit 0: MSI (or FSB) support.
+    flags: u32,
+
+    /// Offset of main counter relative to qemu clock.
+    hpet_offset: BqlCell<u64>,
+    hpet_offset_saved: bool,
+
+    irqs: [InterruptSource; HPET_NUM_IRQ_ROUTES],
+    rtc_irq_level: BqlCell<u32>,
+    pit_enabled: InterruptSource,
+
+    /// Interrupt Routing Capability.
+    /// This field indicates to which interrupts in the I/O (x) APIC
+    /// the timers' interrupt can be routed, and is encoded in the
+    /// bits 32:64 of timer N's config register:
+    #[doc(alias = "intcap")]
+    int_route_cap: u32,
+
+    /// HPET timer array managed by this timer block.
+    #[doc(alias = "timer")]
+    timers: [BqlRefCell<HPETTimer>; HPET_MAX_TIMERS],
+    num_timers: BqlCell<usize>,
+
+    /// Instance id (HPET timer block ID).
+    hpet_id: BqlCell<usize>,
+}
+
+impl HPETState {
+    const fn has_msi_flag(&self) -> bool {
+        self.flags & (1 << HPET_FLAG_MSI_SUPPORT_SHIFT) != 0
+    }
+
+    fn is_legacy_mode(&self) -> bool {
+        self.config.get() & (1 << HPET_CFG_LEG_RT_SHIFT) != 0
+    }
+
+    fn is_hpet_enabled(&self) -> bool {
+        self.config.get() & (1 << HPET_CFG_ENABLE_SHIFT) != 0
+    }
+
+    fn is_timer_int_active(&self, index: usize) -> bool {
+        self.int_status.get() & (1 << index) != 0
+    }
+
+    fn get_ticks(&self) -> u64 {
+        ns_to_ticks(CLOCK_VIRTUAL.get_ns() + self.hpet_offset.get())
+    }
+
+    fn get_ns(&self, tick: u64) -> u64 {
+        ticks_to_ns(tick) - self.hpet_offset.get()
+    }
+
+    fn handle_legacy_irq(&self, irq: u32, level: u32) {
+        if irq == HPET_LEGACY_PIT_INT {
+            if !self.is_legacy_mode() {
+                self.irqs[0].set(level != 0);
+            }
+        } else {
+            self.rtc_irq_level.set(level);
+            if !self.is_legacy_mode() {
+                self.irqs[RTC_ISA_IRQ].set(level != 0);
+            }
+        }
+    }
+
+    fn init_timer(&self) {
+        for (index, timer) in self.timers.iter().enumerate() {
+            timer.borrow_mut().init(index, self);
+        }
+    }
+
+    fn update_int_status(&self, index: u32, level: bool) {
+        self.int_status
+            .set(self.int_status.get().deposit(index, 1, u64::from(level)));
+    }
+
+    /// General Configuration Register
+    fn set_cfg_reg(&self, shift: u32, len: u32, val: u64) {
+        let old_val = self.config.get();
+        let mut new_val = old_val.deposit(shift, len, val);
+
+        new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK);
+        self.config.set(new_val);
+
+        if activating_bit(old_val, new_val, HPET_CFG_ENABLE_SHIFT) {
+            // Enable main counter and interrupt generation.
+            self.hpet_offset
+                .set(ticks_to_ns(self.counter.get()) - CLOCK_VIRTUAL.get_ns());
+
+            for timer in self.timers.iter().take(self.num_timers.get()) {
+                let mut t = timer.borrow_mut();
+
+                if t.is_int_enabled() && t.is_int_active() {
+                    t.update_irq(true);
+                }
+                t.set_timer();
+            }
+        } else if deactivating_bit(old_val, new_val, HPET_CFG_ENABLE_SHIFT) {
+            // Halt main counter and disable interrupt generation.
+            self.counter.set(self.get_ticks());
+
+            for timer in self.timers.iter().take(self.num_timers.get()) {
+                timer.borrow_mut().del_timer();
+            }
+        }
+
+        // i8254 and RTC output pins are disabled when HPET is in legacy mode
+        if activating_bit(old_val, new_val, HPET_CFG_LEG_RT_SHIFT) {
+            self.pit_enabled.set(false);
+            self.irqs[0].lower();
+            self.irqs[RTC_ISA_IRQ].lower();
+        } else if deactivating_bit(old_val, new_val, HPET_CFG_LEG_RT_SHIFT) {
+            // TODO: Add irq binding: qemu_irq_lower(s->irqs[0])
+            self.irqs[0].lower();
+            self.pit_enabled.set(true);
+            self.irqs[RTC_ISA_IRQ].set(self.rtc_irq_level.get() != 0);
+        }
+    }
+
+    /// General Interrupt Status Register: Read/Write Clear
+    fn set_int_status_reg(&self, shift: u32, _len: u32, val: u64) {
+        let new_val = val << shift;
+        let cleared = new_val & self.int_status.get();
+
+        for (index, timer) in self.timers.iter().take(self.num_timers.get()).enumerate() {
+            if cleared & (1 << index) != 0 {
+                timer.borrow_mut().update_irq(false);
+            }
+        }
+    }
+
+    /// Main Counter Value Register
+    fn set_counter_reg(&self, shift: u32, len: u32, val: u64) {
+        if self.is_hpet_enabled() {
+            // TODO: Add trace point -
+            // trace_hpet_ram_write_counter_write_while_enabled()
+            //
+            // HPET spec says that writes to this register should only be
+            // done while the counter is halted. So this is an undefined
+            // behavior. There's no need to forbid it, but when HPET is
+            // enabled, the changed counter value will not affect the
+            // tick count (i.e., the previously calculated offset will
+            // not be changed as well).
+        }
+        self.counter
+            .set(self.counter.get().deposit(shift, len, val));
+    }
+
+    unsafe fn init(&mut self) {
+        static HPET_RAM_OPS: MemoryRegionOps<HPETState> =
+            MemoryRegionOpsBuilder::<HPETState>::new()
+                .read(&HPETState::read)
+                .write(&HPETState::write)
+                .native_endian()
+                .valid_sizes(4, 8)
+                .impl_sizes(4, 8)
+                .build();
+
+        // SAFETY:
+        // self and self.iomem are guaranteed to be valid at this point since callers
+        // must make sure the `self` reference is valid.
+        MemoryRegion::init_io(
+            unsafe { &mut *addr_of_mut!(self.iomem) },
+            addr_of_mut!(*self),
+            &HPET_RAM_OPS,
+            "hpet",
+            HPET_REG_SPACE_LEN,
+        );
+    }
+
+    fn post_init(&self) {
+        self.init_mmio(&self.iomem);
+        for irq in self.irqs.iter() {
+            self.init_irq(irq);
+        }
+    }
+
+    fn realize(&self) {
+        if self.int_route_cap == 0 {
+            // TODO: Add error binding: warn_report()
+            println!("Hpet's hpet-intcap property not initialized");
+        }
+
+        self.hpet_id.set(HPETFwConfig::assign_hpet_id());
+
+        if self.num_timers.get() < HPET_MIN_TIMERS {
+            self.num_timers.set(HPET_MIN_TIMERS);
+        } else if self.num_timers.get() > HPET_MAX_TIMERS {
+            self.num_timers.set(HPET_MAX_TIMERS);
+        }
+
+        self.init_timer();
+        // 64-bit General Capabilities and ID Register; LegacyReplacementRoute.
+        self.capability.set(
+            HPET_CAP_REV_ID_VALUE << HPET_CAP_REV_ID_SHIFT |
+            1 << HPET_CAP_COUNT_SIZE_CAP_SHIFT |
+            1 << HPET_CAP_LEG_RT_CAP_SHIFT |
+            HPET_CAP_VENDER_ID_VALUE << HPET_CAP_VENDER_ID_SHIFT |
+            ((self.num_timers.get() - 1) as u64) << HPET_CAP_NUM_TIM_SHIFT | // indicate the last timer
+            (HPET_CLK_PERIOD * FS_PER_NS) << HPET_CAP_CNT_CLK_PERIOD_SHIFT, // 10 ns
+        );
+
+        self.init_gpio_in(2, HPETState::handle_legacy_irq);
+        self.init_gpio_out(from_ref(&self.pit_enabled));
+    }
+
+    fn reset_hold(&self, _type: ResetType) {
+        for timer in self.timers.iter().take(self.num_timers.get()) {
+            timer.borrow_mut().reset();
+        }
+
+        self.counter.set(0);
+        self.config.set(0);
+        self.pit_enabled.set(true);
+        self.hpet_offset.set(0);
+
+        HPETFwConfig::update_hpet_cfg(
+            self.hpet_id.get(),
+            self.capability.get() as u32,
+            self.mmio_addr(0).unwrap(),
+        );
+
+        // to document that the RTC lowers its output on reset as well
+        self.rtc_irq_level.set(0);
+    }
+
+    fn decode(&self, mut addr: hwaddr, size: u32) -> HPETAddrDecode {
+        let shift = ((addr & 4) * 8) as u32;
+        let len = std::cmp::min(size * 8, 64 - shift);
+
+        addr &= !4;
+        let reg = if (0..=0xff).contains(&addr) {
+            GlobalRegister::try_from(addr).map(HPETRegister::Global)
+        } else {
+            let timer_id: usize = ((addr - 0x100) / 0x20) as usize;
+            if timer_id <= self.num_timers.get() {
+                // TODO: Add trace point - trace_hpet_ram_[read|write]_timer_id(timer_id)
+                TimerRegister::try_from(addr)
+                    .map(|reg| HPETRegister::Timer(&self.timers[timer_id], reg))
+            } else {
+                // TODO: Add trace point -  trace_hpet_timer_id_out_of_range(timer_id)
+                Err(addr)
+            }
+        };
+
+        // reg is now a Result<HPETRegister, hwaddr>
+        // convert the Err case into HPETRegister as well
+        let reg = reg.unwrap_or_else(HPETRegister::Unknown);
+        HPETAddrDecode { shift, len, reg }
+    }
+
+    fn read(&self, addr: hwaddr, size: u32) -> u64 {
+        // TODO: Add trace point - trace_hpet_ram_read(addr)
+        let HPETAddrDecode { shift, reg, .. } = self.decode(addr, size);
+
+        use GlobalRegister::*;
+        use HPETRegister::*;
+        (match reg {
+            Timer(timer, tn_reg) => timer.borrow_mut().read(tn_reg),
+            Global(CAP) => self.capability.get(), /* including HPET_PERIOD 0x004 */
+            Global(CFG) => self.config.get(),
+            Global(INT_STATUS) => self.int_status.get(),
+            Global(COUNTER) => {
+                // TODO: Add trace point
+                // trace_hpet_ram_read_reading_counter(addr & 4, cur_tick)
+                if self.is_hpet_enabled() {
+                    self.get_ticks()
+                } else {
+                    self.counter.get()
+                }
+            }
+            Unknown(_) => {
+                // TODO: Add trace point- trace_hpet_ram_read_invalid()
+                0
+            }
+        }) >> shift
+    }
+
+    fn write(&self, addr: hwaddr, value: u64, size: u32) {
+        let HPETAddrDecode { shift, len, reg } = self.decode(addr, size);
+
+        // TODO: Add trace point - trace_hpet_ram_write(addr, value)
+        use GlobalRegister::*;
+        use HPETRegister::*;
+        match reg {
+            Timer(timer, tn_reg) => timer.borrow_mut().write(tn_reg, value, shift, len),
+            Global(CAP) => {} // General Capabilities and ID Register: Read Only
+            Global(CFG) => self.set_cfg_reg(shift, len, value),
+            Global(INT_STATUS) => self.set_int_status_reg(shift, len, value),
+            Global(COUNTER) => self.set_counter_reg(shift, len, value),
+            Unknown(_) => {
+                // TODO: Add trace point - trace_hpet_ram_write_invalid()
+            }
+        }
+    }
+}
+
+qom_isa!(HPETState: SysBusDevice, DeviceState, Object);
+
+unsafe impl ObjectType for HPETState {
+    // No need for HPETClass. Just like OBJECT_DECLARE_SIMPLE_TYPE in C.
+    type Class = <SysBusDevice as ObjectType>::Class;
+    const TYPE_NAME: &'static CStr = crate::TYPE_HPET;
+}
+
+impl ObjectImpl for HPETState {
+    type ParentType = SysBusDevice;
+
+    const INSTANCE_INIT: Option<unsafe fn(&mut Self)> = Some(Self::init);
+    const INSTANCE_POST_INIT: Option<fn(&Self)> = Some(Self::post_init);
+    const CLASS_INIT: fn(&mut Self::Class) = Self::Class::class_init::<Self>;
+}
+
+// TODO: Make these properties user-configurable!
+qemu_api::declare_properties! {
+    HPET_PROPERTIES,
+    qemu_api::define_property!(
+        c_str!("timers"),
+        HPETState,
+        num_timers,
+        unsafe { &qdev_prop_uint8 },
+        u8,
+        default = HPET_MIN_TIMERS
+    ),
+    qemu_api::define_property!(
+        c_str!("msi"),
+        HPETState,
+        flags,
+        unsafe { &qdev_prop_bit },
+        u32,
+        bit = HPET_FLAG_MSI_SUPPORT_SHIFT as u8,
+        default = false,
+    ),
+    qemu_api::define_property!(
+        c_str!("hpet-intcap"),
+        HPETState,
+        int_route_cap,
+        unsafe { &qdev_prop_uint32 },
+        u32,
+        default = 0
+    ),
+    qemu_api::define_property!(
+        c_str!("hpet-offset-saved"),
+        HPETState,
+        hpet_offset_saved,
+        unsafe { &qdev_prop_bool },
+        bool,
+        default = true
+    ),
+}
+
+impl DeviceImpl for HPETState {
+    fn properties() -> &'static [Property] {
+        &HPET_PROPERTIES
+    }
+
+    const REALIZE: Option<fn(&Self)> = Some(Self::realize);
+}
+
+impl ResettablePhasesImpl for HPETState {
+    const HOLD: Option<fn(&Self, ResetType)> = Some(Self::reset_hold);
+}
+
+impl SysBusDeviceImpl for HPETState {}
diff --git a/rust/hw/timer/hpet/src/lib.rs b/rust/hw/timer/hpet/src/lib.rs
new file mode 100644
index 0000000000000..5e7c961c28940
--- /dev/null
+++ b/rust/hw/timer/hpet/src/lib.rs
@@ -0,0 +1,15 @@
+// Copyright (C) 2024 Intel Corporation.
+// Author(s): Zhao Liu <zhai1.liu@intel.com>
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+//! # HPET QEMU Device Model
+//!
+//! This library implements a device model for the IA-PC HPET (High
+//! Precision Event Timers) device in QEMU.
+
+use qemu_api::c_str;
+
+pub mod fw_cfg;
+pub mod hpet;
+
+pub const TYPE_HPET: &::std::ffi::CStr = c_str!("hpet");
diff --git a/rust/hw/timer/meson.build b/rust/hw/timer/meson.build
new file mode 100644
index 0000000000000..22a84f15536be
--- /dev/null
+++ b/rust/hw/timer/meson.build
@@ -0,0 +1 @@
+subdir('hpet')
diff --git a/rust/qemu-api-macros/src/lib.rs b/rust/qemu-api-macros/src/lib.rs
index 7ec218202f41d..eda0d46d12255 100644
--- a/rust/qemu-api-macros/src/lib.rs
+++ b/rust/qemu-api-macros/src/lib.rs
@@ -6,7 +6,7 @@ use proc_macro::TokenStream;
 use quote::quote;
 use syn::{
     parse_macro_input, parse_quote, punctuated::Punctuated, spanned::Spanned, token::Comma, Data,
-    DeriveInput, Field, Fields, Ident, Meta, Path, Token, Type, Variant, Visibility,
+    DeriveInput, Field, Fields, FieldsUnnamed, Ident, Meta, Path, Token, Type, Variant, Visibility,
 };
 
 mod utils;
@@ -33,6 +33,35 @@ fn get_fields<'a>(
     }
 }
 
+fn get_unnamed_field<'a>(input: &'a DeriveInput, msg: &str) -> Result<&'a Field, MacroError> {
+    if let Data::Struct(s) = &input.data {
+        let unnamed = match &s.fields {
+            Fields::Unnamed(FieldsUnnamed {
+                unnamed: ref fields,
+                ..
+            }) => fields,
+            _ => {
+                return Err(MacroError::Message(
+                    format!("Tuple struct required for {}", msg),
+                    s.fields.span(),
+                ))
+            }
+        };
+        if unnamed.len() != 1 {
+            return Err(MacroError::Message(
+                format!("A single field is required for {}", msg),
+                s.fields.span(),
+            ));
+        }
+        Ok(&unnamed[0])
+    } else {
+        Err(MacroError::Message(
+            format!("Struct required for {}", msg),
+            input.ident.span(),
+        ))
+    }
+}
+
 fn is_c_repr(input: &DeriveInput, msg: &str) -> Result<(), MacroError> {
     let expected = parse_quote! { #[repr(C)] };
 
@@ -46,6 +75,19 @@ fn is_c_repr(input: &DeriveInput, msg: &str) -> Result<(), MacroError> {
     }
 }
 
+fn is_transparent_repr(input: &DeriveInput, msg: &str) -> Result<(), MacroError> {
+    let expected = parse_quote! { #[repr(transparent)] };
+
+    if input.attrs.iter().any(|attr| attr == &expected) {
+        Ok(())
+    } else {
+        Err(MacroError::Message(
+            format!("#[repr(transparent)] required for {}", msg),
+            input.ident.span(),
+        ))
+    }
+}
+
 fn derive_object_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStream, MacroError> {
     is_c_repr(&input, "#[derive(Object)]")?;
 
@@ -72,6 +114,52 @@ pub fn derive_object(input: TokenStream) -> TokenStream {
     TokenStream::from(expanded)
 }
 
+fn derive_opaque_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStream, MacroError> {
+    is_transparent_repr(&input, "#[derive(Wrapper)]")?;
+
+    let name = &input.ident;
+    let field = &get_unnamed_field(&input, "#[derive(Wrapper)]")?;
+    let typ = &field.ty;
+
+    // TODO: how to add "::qemu_api"?  For now, this is only used in the
+    // qemu_api crate so it's not a problem.
+    Ok(quote! {
+        unsafe impl crate::cell::Wrapper for #name {
+            type Wrapped = <#typ as crate::cell::Wrapper>::Wrapped;
+        }
+        impl #name {
+            pub unsafe fn from_raw<'a>(ptr: *mut <Self as crate::cell::Wrapper>::Wrapped) -> &'a Self {
+                let ptr = ::std::ptr::NonNull::new(ptr).unwrap().cast::<Self>();
+                unsafe { ptr.as_ref() }
+            }
+
+            pub const fn as_mut_ptr(&self) -> *mut <Self as crate::cell::Wrapper>::Wrapped {
+                self.0.as_mut_ptr()
+            }
+
+            pub const fn as_ptr(&self) -> *const <Self as crate::cell::Wrapper>::Wrapped {
+                self.0.as_ptr()
+            }
+
+            pub const fn as_void_ptr(&self) -> *mut ::core::ffi::c_void {
+                self.0.as_void_ptr()
+            }
+
+            pub const fn raw_get(slot: *mut Self) -> *mut <Self as crate::cell::Wrapper>::Wrapped {
+                slot.cast()
+            }
+        }
+    })
+}
+
+#[proc_macro_derive(Wrapper)]
+pub fn derive_opaque(input: TokenStream) -> TokenStream {
+    let input = parse_macro_input!(input as DeriveInput);
+    let expanded = derive_opaque_or_error(input).unwrap_or_else(Into::into);
+
+    TokenStream::from(expanded)
+}
+
 #[rustfmt::skip::macros(quote)]
 fn derive_offsets_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStream, MacroError> {
     is_c_repr(&input, "#[derive(offsets)]")?;
diff --git a/rust/qemu-api/Cargo.toml b/rust/qemu-api/Cargo.toml
index a51dd14285229..57747bc934149 100644
--- a/rust/qemu-api/Cargo.toml
+++ b/rust/qemu-api/Cargo.toml
@@ -16,6 +16,7 @@ rust-version = "1.63.0"
 
 [dependencies]
 qemu_api_macros = { path = "../qemu-api-macros" }
+libc = "0.2.162"
 
 [build-dependencies]
 version_check = "~0.9"
diff --git a/rust/qemu-api/meson.build b/rust/qemu-api/meson.build
index 60944a657de5f..a3f226ccc2a59 100644
--- a/rust/qemu-api/meson.build
+++ b/rust/qemu-api/meson.build
@@ -2,6 +2,8 @@ _qemu_api_cfg = run_command(rustc_args,
   '--config-headers', config_host_h, '--features', files('Cargo.toml'),
   capture: true, check: true).stdout().strip().splitlines()
 
+libc_dep = dependency('libc-0.2-rs')
+
 # _qemu_api_cfg += ['--cfg', 'feature="allocator"']
 if rustc.version().version_compare('>=1.77.0')
   _qemu_api_cfg += ['--cfg', 'has_offset_of']
@@ -20,14 +22,18 @@ _qemu_api_rs = static_library(
       'src/bitops.rs',
       'src/callbacks.rs',
       'src/cell.rs',
+      'src/chardev.rs',
       'src/c_str.rs',
+      'src/errno.rs',
       'src/irq.rs',
+      'src/memory.rs',
       'src/module.rs',
       'src/offset_of.rs',
       'src/prelude.rs',
       'src/qdev.rs',
       'src/qom.rs',
       'src/sysbus.rs',
+      'src/timer.rs',
       'src/vmstate.rs',
       'src/zeroable.rs',
     ],
@@ -36,21 +42,31 @@ _qemu_api_rs = static_library(
   override_options: ['rust_std=2021', 'build.rust_std=2021'],
   rust_abi: 'rust',
   rust_args: _qemu_api_cfg,
+  dependencies: [libc_dep, qemu_api_macros],
 )
 
 rust.test('rust-qemu-api-tests', _qemu_api_rs,
           suite: ['unit', 'rust'])
 
-qemu_api = declare_dependency(
-  link_with: _qemu_api_rs,
-  dependencies: qemu_api_macros,
-)
+qemu_api = declare_dependency(link_with: _qemu_api_rs)
 
 # Rust executables do not support objects, so add an intermediate step.
 rust_qemu_api_objs = static_library(
     'rust_qemu_api_objs',
     objects: [libqom.extract_all_objects(recursive: false),
-              libhwcore.extract_all_objects(recursive: false)])
+              libhwcore.extract_all_objects(recursive: false),
+              libchardev.extract_all_objects(recursive: false),
+              libcrypto.extract_all_objects(recursive: false),
+              libauthz.extract_all_objects(recursive: false),
+              libio.extract_all_objects(recursive: false)])
+rust_qemu_api_deps = declare_dependency(
+    dependencies: [
+      qom_ss.dependencies(),
+      chardev_ss.dependencies(),
+      crypto_ss.dependencies(),
+      authz_ss.dependencies(),
+      io_ss.dependencies()],
+    link_whole: [rust_qemu_api_objs, libqemuutil])
 
 test('rust-qemu-api-integration',
     executable(
@@ -59,8 +75,7 @@ test('rust-qemu-api-integration',
         override_options: ['rust_std=2021', 'build.rust_std=2021'],
         rust_args: ['--test'],
         install: false,
-        dependencies: [qemu_api, qemu_api_macros],
-        link_whole: [rust_qemu_api_objs, libqemuutil]),
+        dependencies: [qemu_api, qemu_api_macros, rust_qemu_api_deps]),
     args: [
         '--test', '--test-threads', '1',
         '--format', 'pretty',
diff --git a/rust/qemu-api/src/assertions.rs b/rust/qemu-api/src/assertions.rs
index 6e420469806eb..104dec39774e2 100644
--- a/rust/qemu-api/src/assertions.rs
+++ b/rust/qemu-api/src/assertions.rs
@@ -2,9 +2,13 @@
 // Author(s): Paolo Bonzini <pbonzini@redhat.com>
 // SPDX-License-Identifier: GPL-2.0-or-later
 
+#![doc(hidden)]
 //! This module provides macros to check the equality of types and
 //! the type of `struct` fields.  This can be useful to ensure that
 //! types match the expectations of C code.
+//!
+//! Documentation is hidden because it only exposes macros, which
+//! are exported directly from `qemu_api`.
 
 // Based on https://stackoverflow.com/questions/64251852/x/70978292#70978292
 // (stackoverflow answers are released under MIT license).
@@ -88,3 +92,31 @@ macro_rules! assert_field_type {
         };
     };
 }
+
+/// Assert that an expression matches a pattern.  This can also be
+/// useful to compare enums that do not implement `Eq`.
+///
+/// # Examples
+///
+/// ```
+/// # use qemu_api::assert_match;
+/// // JoinHandle does not implement `Eq`, therefore the result
+/// // does not either.
+/// let result: Result<std::thread::JoinHandle<()>, u32> = Err(42);
+/// assert_match!(result, Err(42));
+/// ```
+#[macro_export]
+macro_rules! assert_match {
+    ($a:expr, $b:pat) => {
+        assert!(
+            match $a {
+                $b => true,
+                _ => false,
+            },
+            "{} = {:?} does not match {}",
+            stringify!($a),
+            $a,
+            stringify!($b)
+        );
+    };
+}
diff --git a/rust/qemu-api/src/bindings.rs b/rust/qemu-api/src/bindings.rs
index 8a9b821bb918a..3c1d297581eaf 100644
--- a/rust/qemu-api/src/bindings.rs
+++ b/rust/qemu-api/src/bindings.rs
@@ -15,15 +15,41 @@
     clippy::missing_safety_doc
 )]
 
+//! `bindgen`-generated declarations.
+
 #[cfg(MESON)]
 include!("bindings.inc.rs");
 
 #[cfg(not(MESON))]
 include!(concat!(env!("OUT_DIR"), "/bindings.inc.rs"));
 
+// SAFETY: these are implemented in C; the bindings need to assert that the
+// BQL is taken, either directly or via `BqlCell` and `BqlRefCell`.
+// When bindings for character devices are introduced, this can be
+// moved to the Opaque<> wrapper in src/chardev.rs.
+unsafe impl Send for CharBackend {}
+unsafe impl Sync for CharBackend {}
+
+// SAFETY: this is a pure data struct
+unsafe impl Send for CoalescedMemoryRange {}
+unsafe impl Sync for CoalescedMemoryRange {}
+
+// SAFETY: these are constants and vtables; the Send and Sync requirements
+// are deferred to the unsafe callbacks that they contain
+unsafe impl Send for MemoryRegionOps {}
+unsafe impl Sync for MemoryRegionOps {}
+
 unsafe impl Send for Property {}
 unsafe impl Sync for Property {}
+
+unsafe impl Send for TypeInfo {}
 unsafe impl Sync for TypeInfo {}
+
+unsafe impl Send for VMStateDescription {}
 unsafe impl Sync for VMStateDescription {}
+
+unsafe impl Send for VMStateField {}
 unsafe impl Sync for VMStateField {}
+
+unsafe impl Send for VMStateInfo {}
 unsafe impl Sync for VMStateInfo {}
diff --git a/rust/qemu-api/src/c_str.rs b/rust/qemu-api/src/c_str.rs
index 4cd96da0b45df..3fa61b59c7680 100644
--- a/rust/qemu-api/src/c_str.rs
+++ b/rust/qemu-api/src/c_str.rs
@@ -2,6 +2,14 @@
 // Author(s): Paolo Bonzini <pbonzini@redhat.com>
 // SPDX-License-Identifier: GPL-2.0-or-later
 
+#![doc(hidden)]
+//! This module provides a macro to define a constant of type
+//! [`CStr`](std::ffi::CStr), for compatibility with versions of
+//! Rust that lack `c""` literals.
+//!
+//! Documentation is hidden because it only exposes macros, which
+//! are exported directly from `qemu_api`.
+
 #[macro_export]
 /// Given a string constant _without_ embedded or trailing NULs, return
 /// a `CStr`.
diff --git a/rust/qemu-api/src/callbacks.rs b/rust/qemu-api/src/callbacks.rs
index 314f9dce9622b..9642a16eb89bb 100644
--- a/rust/qemu-api/src/callbacks.rs
+++ b/rust/qemu-api/src/callbacks.rs
@@ -79,6 +79,31 @@ use std::{mem, ptr::NonNull};
 /// call_it(&move |_| String::from(x), "hello workd");
 /// ```
 ///
+/// `()` can be used to indicate "no function":
+///
+/// ```
+/// # use qemu_api::callbacks::FnCall;
+/// fn optional<F: for<'a> FnCall<(&'a str,), String>>(_f: &F, s: &str) -> Option<String> {
+///     if F::IS_SOME {
+///         Some(F::call((s,)))
+///     } else {
+///         None
+///     }
+/// }
+///
+/// assert!(optional(&(), "hello world").is_none());
+/// ```
+///
+/// Invoking `F::call` will then be a run-time error.
+///
+/// ```should_panic
+/// # use qemu_api::callbacks::FnCall;
+/// # fn call_it<F: for<'a> FnCall<(&'a str,), String>>(_f: &F, s: &str) -> String {
+/// #     F::call((s,))
+/// # }
+/// let s: String = call_it(&(), "hello world"); // panics
+/// ```
+///
 /// # Safety
 ///
 /// Because `Self` is a zero-sized type, all instances of the type are
@@ -93,10 +118,70 @@ pub unsafe trait FnCall<Args, R = ()>: 'static + Sync + Sized {
     /// Rust 1.79.0+.
     const ASSERT_ZERO_SIZED: () = { assert!(mem::size_of::<Self>() == 0) };
 
+    /// Referring to this constant asserts that the `Self` type is an actual
+    /// function type, which can be used to catch incorrect use of `()`
+    /// at compile time.
+    ///
+    /// # Examples
+    ///
+    /// ```compile_fail
+    /// # use qemu_api::callbacks::FnCall;
+    /// fn call_it<F: for<'a> FnCall<(&'a str,), String>>(_f: &F, s: &str) -> String {
+    ///     let _: () = F::ASSERT_IS_SOME;
+    ///     F::call((s,))
+    /// }
+    ///
+    /// let s: String = call_it((), "hello world"); // does not compile
+    /// ```
+    ///
+    /// Note that this can be more simply `const { assert!(F::IS_SOME) }` in
+    /// Rust 1.79.0 or newer.
+    const ASSERT_IS_SOME: () = { assert!(Self::IS_SOME) };
+
+    /// `true` if `Self` is an actual function type and not `()`.
+    ///
+    /// # Examples
+    ///
+    /// You can use `IS_SOME` to catch this at compile time:
+    ///
+    /// ```compile_fail
+    /// # use qemu_api::callbacks::FnCall;
+    /// fn call_it<F: for<'a> FnCall<(&'a str,), String>>(_f: &F, s: &str) -> String {
+    ///     const { assert!(F::IS_SOME) }
+    ///     F::call((s,))
+    /// }
+    ///
+    /// let s: String = call_it((), "hello world"); // does not compile
+    /// ```
+    const IS_SOME: bool;
+
+    /// `false` if `Self` is an actual function type, `true` if it is `()`.
+    fn is_none() -> bool {
+        !Self::IS_SOME
+    }
+
+    /// `true` if `Self` is an actual function type, `false` if it is `()`.
+    fn is_some() -> bool {
+        Self::IS_SOME
+    }
+
     /// Call the function with the arguments in args.
     fn call(a: Args) -> R;
 }
 
+/// `()` acts as a "null" callback.  Using `()` and `function` is nicer
+/// than `None` and `Some(function)`, because the compiler is unable to
+/// infer the type of just `None`.  Therefore, the trait itself acts as the
+/// option type, with functions [`FnCall::is_some`] and [`FnCall::is_none`].
+unsafe impl<Args, R> FnCall<Args, R> for () {
+    const IS_SOME: bool = false;
+
+    /// Call the function with the arguments in args.
+    fn call(_a: Args) -> R {
+        panic!("callback not specified")
+    }
+}
+
 macro_rules! impl_call {
     ($($args:ident,)* ) => (
         // SAFETY: because each function is treated as a separate type,
@@ -106,6 +191,8 @@ macro_rules! impl_call {
         where
             F: 'static + Sync + Sized + Fn($($args, )*) -> R,
         {
+            const IS_SOME: bool = true;
+
             #[inline(always)]
             fn call(a: ($($args,)*)) -> R {
                 let _: () = Self::ASSERT_ZERO_SIZED;
@@ -141,4 +228,14 @@ mod tests {
     fn test_call() {
         assert_eq!(do_test_call(&str::to_owned), "hello world")
     }
+
+    // The `_f` parameter is unused but it helps the compiler infer `F`.
+    fn do_test_is_some<'a, F: FnCall<(&'a str,), String>>(_f: &F) {
+        assert!(F::is_some());
+    }
+
+    #[test]
+    fn test_is_some() {
+        do_test_is_some(&str::to_owned);
+    }
 }
diff --git a/rust/qemu-api/src/cell.rs b/rust/qemu-api/src/cell.rs
index eae4e2ce786b2..ab0785a269283 100644
--- a/rust/qemu-api/src/cell.rs
+++ b/rust/qemu-api/src/cell.rs
@@ -27,7 +27,7 @@
 // IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 // DEALINGS IN THE SOFTWARE.
 
-//! BQL-protected mutable containers.
+//! QEMU-specific mutable containers
 //!
 //! Rust memory safety is based on this rule: Given an object `T`, it is only
 //! possible to have one of the following:
@@ -43,8 +43,10 @@
 //! usually have their pointer shared with the "outside world very early in
 //! their lifetime", for example when they create their
 //! [`MemoryRegion`s](crate::bindings::MemoryRegion).  Therefore, individual
-//! parts of a  device must be made mutable in a controlled manner through the
-//! use of cell types.
+//! parts of a  device must be made mutable in a controlled manner; this module
+//! provides the tools to do so.
+//!
+//! ## Cell types
 //!
 //! [`BqlCell<T>`] and [`BqlRefCell<T>`] allow doing this via the Big QEMU Lock.
 //! While they are essentially the same single-threaded primitives that are
@@ -71,7 +73,35 @@
 //! QEMU device implementations is usually incorrect and can lead to
 //! thread-safety issues.
 //!
-//! ## `BqlCell<T>`
+//! ### Example
+//!
+//! ```
+//! # use qemu_api::prelude::*;
+//! # use qemu_api::{c_str, cell::BqlRefCell, irq::InterruptSource, irq::IRQState};
+//! # use qemu_api::{sysbus::SysBusDevice, qom::Owned, qom::ParentField};
+//! # const N_GPIOS: usize = 8;
+//! # struct PL061Registers { /* ... */ }
+//! # unsafe impl ObjectType for PL061State {
+//! #     type Class = <SysBusDevice as ObjectType>::Class;
+//! #     const TYPE_NAME: &'static std::ffi::CStr = c_str!("pl061");
+//! # }
+//! struct PL061State {
+//!     parent_obj: ParentField<SysBusDevice>,
+//!
+//!     // Configuration is read-only after initialization
+//!     pullups: u32,
+//!     pulldowns: u32,
+//!
+//!     // Single values shared with C code use BqlCell, in this case via InterruptSource
+//!     out: [InterruptSource; N_GPIOS],
+//!     interrupt: InterruptSource,
+//!
+//!     // Larger state accessed by device methods uses BqlRefCell or Mutex
+//!     registers: BqlRefCell<PL061Registers>,
+//! }
+//! ```
+//!
+//! ### `BqlCell<T>`
 //!
 //! [`BqlCell<T>`] implements interior mutability by moving values in and out of
 //! the cell. That is, an `&mut T` to the inner value can never be obtained as
@@ -91,7 +121,7 @@
 //!    - [`set`](BqlCell::set): this method replaces the interior value,
 //!      dropping the replaced value.
 //!
-//! ## `BqlRefCell<T>`
+//! ### `BqlRefCell<T>`
 //!
 //! [`BqlRefCell<T>`] uses Rust's lifetimes to implement "dynamic borrowing", a
 //! process whereby one can claim temporary, exclusive, mutable access to the
@@ -111,13 +141,82 @@
 //! Multiple immutable borrows are allowed via [`borrow`](BqlRefCell::borrow),
 //! or a single mutable borrow via [`borrow_mut`](BqlRefCell::borrow_mut).  The
 //! thread will panic if these rules are violated or if the BQL is not held.
+//!
+//! ## Opaque wrappers
+//!
+//! The cell types from the previous section are useful at the boundaries
+//! of code that requires interior mutability.  When writing glue code that
+//! interacts directly with C structs, however, it is useful to operate
+//! at a lower level.
+//!
+//! C functions often violate Rust's fundamental assumptions about memory
+//! safety by modifying memory even if it is shared.  Furthermore, C structs
+//! often start their life uninitialized and may be populated lazily.
+//!
+//! For this reason, this module provides the [`Opaque<T>`] type to opt out
+//! of Rust's usual guarantees about the wrapped type. Access to the wrapped
+//! value is always through raw pointers, obtained via methods like
+//! [`as_mut_ptr()`](Opaque::as_mut_ptr) and [`as_ptr()`](Opaque::as_ptr). These
+//! pointers can then be passed to C functions or dereferenced; both actions
+//! require `unsafe` blocks, making it clear where safety guarantees must be
+//! manually verified. For example
+//!
+//! ```ignore
+//! unsafe {
+//!     let state = Opaque::<MyStruct>::uninit();
+//!     qemu_struct_init(state.as_mut_ptr());
+//! }
+//! ```
+//!
+//! [`Opaque<T>`] will usually be wrapped one level further, so that
+//! bridge methods can be added to the wrapper:
+//!
+//! ```ignore
+//! pub struct MyStruct(Opaque<bindings::MyStruct>);
+//!
+//! impl MyStruct {
+//!     fn new() -> Pin<Box<MyStruct>> {
+//!         let result = Box::pin(unsafe { Opaque::uninit() });
+//!         unsafe { qemu_struct_init(result.as_mut_ptr()) };
+//!         result
+//!     }
+//! }
+//! ```
+//!
+//! This pattern of wrapping bindgen-generated types in [`Opaque<T>`] provides
+//! several advantages:
+//!
+//! * The choice of traits to be implemented is not limited by the
+//!   bindgen-generated code.  For example, [`Drop`] can be added without
+//!   disabling [`Copy`] on the underlying bindgen type
+//!
+//! * [`Send`] and [`Sync`] implementations can be controlled by the wrapper
+//!   type rather than being automatically derived from the C struct's layout
+//!
+//! * Methods can be implemented in a separate crate from the bindgen-generated
+//!   bindings
+//!
+//! * [`Debug`](std::fmt::Debug) and [`Display`](std::fmt::Display)
+//!   implementations can be customized to be more readable than the raw C
+//!   struct representation
+//!
+//! The [`Opaque<T>`] type does not include BQL validation; it is possible to
+//! assert in the code that the right lock is taken, to use it together
+//! with a custom lock guard type, or to let C code take the lock, as
+//! appropriate.  It is also possible to use it with non-thread-safe
+//! types, since by default (unlike [`BqlCell`] and [`BqlRefCell`]
+//! it is neither `Sync` nor `Send`.
+//!
+//! While [`Opaque<T>`] is necessary for C interop, it should be used sparingly
+//! and only at FFI boundaries. For QEMU-specific types that need interior
+//! mutability, prefer [`BqlCell`] or [`BqlRefCell`].
 
 use std::{
     cell::{Cell, UnsafeCell},
     cmp::Ordering,
     fmt,
-    marker::PhantomData,
-    mem,
+    marker::{PhantomData, PhantomPinned},
+    mem::{self, MaybeUninit},
     ops::{Deref, DerefMut},
     ptr::NonNull,
 };
@@ -840,3 +939,167 @@ impl<T: fmt::Display> fmt::Display for BqlRefMut<'_, T> {
         (**self).fmt(f)
     }
 }
+
+/// Stores an opaque value that is shared with C code.
+///
+/// Often, C structs can changed when calling a C function even if they are
+/// behind a shared Rust reference, or they can be initialized lazily and have
+/// invalid bit patterns (e.g. `3` for a [`bool`]).  This goes against Rust's
+/// strict aliasing rules, which normally prevent mutation through shared
+/// references.
+///
+/// Wrapping the struct with `Opaque<T>` ensures that the Rust compiler does not
+/// assume the usual constraints that Rust structs require, and allows using
+/// shared references on the Rust side.
+///
+/// `Opaque<T>` is `#[repr(transparent)]`, so that it matches the memory layout
+/// of `T`.
+#[repr(transparent)]
+pub struct Opaque<T> {
+    value: UnsafeCell<MaybeUninit<T>>,
+    // PhantomPinned also allows multiple references to the `Opaque<T>`, i.e.
+    // one `&mut Opaque<T>` can coexist with a `&mut T` or any number of `&T`;
+    // see https://docs.rs/pinned-aliasable/latest/pinned_aliasable/.
+    _pin: PhantomPinned,
+}
+
+impl<T> Opaque<T> {
+    /// Creates a new shared reference from a C pointer
+    ///
+    /// # Safety
+    ///
+    /// The pointer must be valid, though it need not point to a valid value.
+    pub unsafe fn from_raw<'a>(ptr: *mut T) -> &'a Self {
+        let ptr = NonNull::new(ptr).unwrap().cast::<Self>();
+        // SAFETY: Self is a transparent wrapper over T
+        unsafe { ptr.as_ref() }
+    }
+
+    /// Creates a new opaque object with uninitialized contents.
+    ///
+    /// # Safety
+    ///
+    /// Ultimately the pointer to the returned value will be dereferenced
+    /// in another `unsafe` block, for example when passing it to a C function,
+    /// but the functions containing the dereference are usually safe.  The
+    /// value returned from `uninit()` must be initialized and pinned before
+    /// calling them.
+    #[allow(clippy::missing_const_for_fn)]
+    pub unsafe fn uninit() -> Self {
+        Self {
+            value: UnsafeCell::new(MaybeUninit::uninit()),
+            _pin: PhantomPinned,
+        }
+    }
+
+    /// Creates a new opaque object with zeroed contents.
+    ///
+    /// # Safety
+    ///
+    /// Ultimately the pointer to the returned value will be dereferenced
+    /// in another `unsafe` block, for example when passing it to a C function,
+    /// but the functions containing the dereference are usually safe.  The
+    /// value returned from `uninit()` must be pinned (and possibly initialized)
+    /// before calling them.
+    #[allow(clippy::missing_const_for_fn)]
+    pub unsafe fn zeroed() -> Self {
+        Self {
+            value: UnsafeCell::new(MaybeUninit::zeroed()),
+            _pin: PhantomPinned,
+        }
+    }
+
+    /// Returns a raw mutable pointer to the opaque data.
+    pub const fn as_mut_ptr(&self) -> *mut T {
+        UnsafeCell::get(&self.value).cast()
+    }
+
+    /// Returns a raw pointer to the opaque data.
+    pub const fn as_ptr(&self) -> *const T {
+        self.as_mut_ptr() as *const _
+    }
+
+    /// Returns a raw pointer to the opaque data that can be passed to a
+    /// C function as `void *`.
+    pub const fn as_void_ptr(&self) -> *mut std::ffi::c_void {
+        UnsafeCell::get(&self.value).cast()
+    }
+
+    /// Converts a raw pointer to the wrapped type.
+    pub const fn raw_get(slot: *mut Self) -> *mut T {
+        // Compare with Linux's raw_get method, which goes through an UnsafeCell
+        // because it takes a *const Self instead.
+        slot.cast()
+    }
+}
+
+impl<T> fmt::Debug for Opaque<T> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let mut name: String = "Opaque<".to_string();
+        name += std::any::type_name::<T>();
+        name += ">";
+        f.debug_tuple(&name).field(&self.as_ptr()).finish()
+    }
+}
+
+impl<T: Default> Opaque<T> {
+    /// Creates a new opaque object with default contents.
+    ///
+    /// # Safety
+    ///
+    /// Ultimately the pointer to the returned value will be dereferenced
+    /// in another `unsafe` block, for example when passing it to a C function,
+    /// but the functions containing the dereference are usually safe.  The
+    /// value returned from `uninit()` must be pinned before calling them.
+    pub unsafe fn new() -> Self {
+        Self {
+            value: UnsafeCell::new(MaybeUninit::new(T::default())),
+            _pin: PhantomPinned,
+        }
+    }
+}
+
+/// Annotates [`Self`] as a transparent wrapper for another type.
+///
+/// Usually defined via the [`qemu_api_macros::Wrapper`] derive macro.
+///
+/// # Examples
+///
+/// ```
+/// # use std::mem::ManuallyDrop;
+/// # use qemu_api::cell::Wrapper;
+/// #[repr(transparent)]
+/// pub struct Example {
+///     inner: ManuallyDrop<String>,
+/// }
+///
+/// unsafe impl Wrapper for Example {
+///     type Wrapped = String;
+/// }
+/// ```
+///
+/// # Safety
+///
+/// `Self` must be a `#[repr(transparent)]` wrapper for the `Wrapped` type,
+/// whether directly or indirectly.
+///
+/// # Methods
+///
+/// By convention, types that implement Wrapper also implement the following
+/// methods:
+///
+/// ```ignore
+/// pub const unsafe fn from_raw<'a>(value: *mut Self::Wrapped) -> &'a Self;
+/// pub const unsafe fn as_mut_ptr(&self) -> *mut Self::Wrapped;
+/// pub const unsafe fn as_ptr(&self) -> *const Self::Wrapped;
+/// pub const unsafe fn raw_get(slot: *mut Self) -> *const Self::Wrapped;
+/// ```
+///
+/// They are not defined here to allow them to be `const`.
+pub unsafe trait Wrapper {
+    type Wrapped;
+}
+
+unsafe impl<T> Wrapper for Opaque<T> {
+    type Wrapped = T;
+}
diff --git a/rust/qemu-api/src/chardev.rs b/rust/qemu-api/src/chardev.rs
new file mode 100644
index 0000000000000..11e6c45afaf1b
--- /dev/null
+++ b/rust/qemu-api/src/chardev.rs
@@ -0,0 +1,261 @@
+// Copyright 2024 Red Hat, Inc.
+// Author(s): Paolo Bonzini <pbonzini@redhat.com>
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+//! Bindings for character devices
+//!
+//! Character devices in QEMU can run under the big QEMU lock or in a separate
+//! `GMainContext`. Here we only support the former, because the bindings
+//! enforce that the BQL is taken whenever the functions in [`CharBackend`] are
+//! called.
+
+use std::{
+    ffi::CStr,
+    fmt::{self, Debug},
+    io::{self, ErrorKind, Write},
+    marker::PhantomPinned,
+    os::raw::{c_int, c_void},
+    ptr::addr_of_mut,
+    slice,
+};
+
+use crate::{
+    bindings,
+    callbacks::FnCall,
+    cell::{BqlRefMut, Opaque},
+    prelude::*,
+};
+
+/// A safe wrapper around [`bindings::Chardev`].
+#[repr(transparent)]
+#[derive(qemu_api_macros::Wrapper)]
+pub struct Chardev(Opaque<bindings::Chardev>);
+
+pub type ChardevClass = bindings::ChardevClass;
+pub type Event = bindings::QEMUChrEvent;
+
+/// A safe wrapper around [`bindings::CharBackend`], denoting the character
+/// back-end that is used for example by a device.  Compared to the
+/// underlying C struct it adds BQL protection, and is marked as pinned
+/// because the QOM object ([`bindings::Chardev`]) contains a pointer to
+/// the `CharBackend`.
+pub struct CharBackend {
+    inner: BqlRefCell<bindings::CharBackend>,
+    _pin: PhantomPinned,
+}
+
+impl Write for BqlRefMut<'_, bindings::CharBackend> {
+    fn flush(&mut self) -> io::Result<()> {
+        Ok(())
+    }
+
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        let chr: &mut bindings::CharBackend = self;
+
+        let len = buf.len().try_into().unwrap();
+        let r = unsafe { bindings::qemu_chr_fe_write(addr_of_mut!(*chr), buf.as_ptr(), len) };
+        errno::into_io_result(r).map(|cnt| cnt as usize)
+    }
+
+    fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
+        let chr: &mut bindings::CharBackend = self;
+
+        let len = buf.len().try_into().unwrap();
+        let r = unsafe { bindings::qemu_chr_fe_write_all(addr_of_mut!(*chr), buf.as_ptr(), len) };
+        errno::into_io_result(r).and_then(|cnt| {
+            if cnt as usize == buf.len() {
+                Ok(())
+            } else {
+                Err(ErrorKind::WriteZero.into())
+            }
+        })
+    }
+}
+
+impl Debug for CharBackend {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // SAFETY: accessed just to print the values
+        let chr = self.inner.as_ptr();
+        Debug::fmt(unsafe { &*chr }, f)
+    }
+}
+
+// FIXME: use something like PinnedDrop from the pinned_init crate
+impl Drop for CharBackend {
+    fn drop(&mut self) {
+        self.disable_handlers();
+    }
+}
+
+impl CharBackend {
+    /// Enable the front-end's character device handlers, if there is an
+    /// associated `Chardev`.
+    pub fn enable_handlers<
+        'chardev,
+        'owner: 'chardev,
+        T,
+        CanReceiveFn: for<'a> FnCall<(&'a T,), u32>,
+        ReceiveFn: for<'a, 'b> FnCall<(&'a T, &'b [u8])>,
+        EventFn: for<'a> FnCall<(&'a T, Event)>,
+    >(
+        // When "self" is dropped, the handlers are automatically disabled.
+        // However, this is not necessarily true if the owner is dropped.
+        // So require the owner to outlive the character device.
+        &'chardev self,
+        owner: &'owner T,
+        _can_receive: CanReceiveFn,
+        _receive: ReceiveFn,
+        _event: EventFn,
+    ) {
+        unsafe extern "C" fn rust_can_receive_cb<T, F: for<'a> FnCall<(&'a T,), u32>>(
+            opaque: *mut c_void,
+        ) -> c_int {
+            // SAFETY: the values are safe according to the contract of
+            // enable_handlers() and qemu_chr_fe_set_handlers()
+            let owner: &T = unsafe { &*(opaque.cast::<T>()) };
+            let r = F::call((owner,));
+            r.try_into().unwrap()
+        }
+
+        unsafe extern "C" fn rust_receive_cb<T, F: for<'a, 'b> FnCall<(&'a T, &'b [u8])>>(
+            opaque: *mut c_void,
+            buf: *const u8,
+            size: c_int,
+        ) {
+            // SAFETY: the values are safe according to the contract of
+            // enable_handlers() and qemu_chr_fe_set_handlers()
+            let owner: &T = unsafe { &*(opaque.cast::<T>()) };
+            let buf = unsafe { slice::from_raw_parts(buf, size.try_into().unwrap()) };
+            F::call((owner, buf))
+        }
+
+        unsafe extern "C" fn rust_event_cb<T, F: for<'a> FnCall<(&'a T, Event)>>(
+            opaque: *mut c_void,
+            event: Event,
+        ) {
+            // SAFETY: the values are safe according to the contract of
+            // enable_handlers() and qemu_chr_fe_set_handlers()
+            let owner: &T = unsafe { &*(opaque.cast::<T>()) };
+            F::call((owner, event))
+        }
+
+        let _: () = CanReceiveFn::ASSERT_IS_SOME;
+        let receive_cb: Option<unsafe extern "C" fn(*mut c_void, *const u8, c_int)> =
+            if ReceiveFn::is_some() {
+                Some(rust_receive_cb::<T, ReceiveFn>)
+            } else {
+                None
+            };
+        let event_cb: Option<unsafe extern "C" fn(*mut c_void, Event)> = if EventFn::is_some() {
+            Some(rust_event_cb::<T, EventFn>)
+        } else {
+            None
+        };
+
+        let mut chr = self.inner.borrow_mut();
+        // SAFETY: the borrow promises that the BQL is taken
+        unsafe {
+            bindings::qemu_chr_fe_set_handlers(
+                addr_of_mut!(*chr),
+                Some(rust_can_receive_cb::<T, CanReceiveFn>),
+                receive_cb,
+                event_cb,
+                None,
+                (owner as *const T as *mut T).cast::<c_void>(),
+                core::ptr::null_mut(),
+                true,
+            );
+        }
+    }
+
+    /// Disable the front-end's character device handlers.
+    pub fn disable_handlers(&self) {
+        let mut chr = self.inner.borrow_mut();
+        // SAFETY: the borrow promises that the BQL is taken
+        unsafe {
+            bindings::qemu_chr_fe_set_handlers(
+                addr_of_mut!(*chr),
+                None,
+                None,
+                None,
+                None,
+                core::ptr::null_mut(),
+                core::ptr::null_mut(),
+                true,
+            );
+        }
+    }
+
+    /// Notify that the frontend is ready to receive data.
+    pub fn accept_input(&self) {
+        let mut chr = self.inner.borrow_mut();
+        // SAFETY: the borrow promises that the BQL is taken
+        unsafe { bindings::qemu_chr_fe_accept_input(addr_of_mut!(*chr)) }
+    }
+
+    /// Temporarily borrow the character device, allowing it to be used
+    /// as an implementor of `Write`.  Note that it is not valid to drop
+    /// the big QEMU lock while the character device is borrowed, as
+    /// that might cause C code to write to the character device.
+    pub fn borrow_mut(&self) -> impl Write + '_ {
+        self.inner.borrow_mut()
+    }
+
+    /// Send a continuous stream of zero bits on the line if `enabled` is
+    /// true, or a short stream if `enabled` is false.
+    pub fn send_break(&self, long: bool) -> io::Result<()> {
+        let mut chr = self.inner.borrow_mut();
+        let mut duration: c_int = long.into();
+        // SAFETY: the borrow promises that the BQL is taken
+        let r = unsafe {
+            bindings::qemu_chr_fe_ioctl(
+                addr_of_mut!(*chr),
+                bindings::CHR_IOCTL_SERIAL_SET_BREAK as i32,
+                addr_of_mut!(duration).cast::<c_void>(),
+            )
+        };
+
+        errno::into_io_result(r).map(|_| ())
+    }
+
+    /// Write data to a character backend from the front end.  This function
+    /// will send data from the front end to the back end.  Unlike
+    /// `write`, this function will block if the back end cannot
+    /// consume all of the data attempted to be written.
+    ///
+    /// Returns the number of bytes consumed (0 if no associated Chardev) or an
+    /// error.
+    pub fn write(&self, buf: &[u8]) -> io::Result<usize> {
+        let len = buf.len().try_into().unwrap();
+        // SAFETY: qemu_chr_fe_write is thread-safe
+        let r = unsafe { bindings::qemu_chr_fe_write(self.inner.as_ptr(), buf.as_ptr(), len) };
+        errno::into_io_result(r).map(|cnt| cnt as usize)
+    }
+
+    /// Write data to a character backend from the front end.  This function
+    /// will send data from the front end to the back end.  Unlike
+    /// `write`, this function will block if the back end cannot
+    /// consume all of the data attempted to be written.
+    ///
+    /// Returns the number of bytes consumed (0 if no associated Chardev) or an
+    /// error.
+    pub fn write_all(&self, buf: &[u8]) -> io::Result<()> {
+        let len = buf.len().try_into().unwrap();
+        // SAFETY: qemu_chr_fe_write_all is thread-safe
+        let r = unsafe { bindings::qemu_chr_fe_write_all(self.inner.as_ptr(), buf.as_ptr(), len) };
+        errno::into_io_result(r).and_then(|cnt| {
+            if cnt as usize == buf.len() {
+                Ok(())
+            } else {
+                Err(ErrorKind::WriteZero.into())
+            }
+        })
+    }
+}
+
+unsafe impl ObjectType for Chardev {
+    type Class = ChardevClass;
+    const TYPE_NAME: &'static CStr =
+        unsafe { CStr::from_bytes_with_nul_unchecked(bindings::TYPE_CHARDEV) };
+}
+qom_isa!(Chardev: Object);
diff --git a/rust/qemu-api/src/errno.rs b/rust/qemu-api/src/errno.rs
new file mode 100644
index 0000000000000..18d101448b936
--- /dev/null
+++ b/rust/qemu-api/src/errno.rs
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+//! Utility functions to convert `errno` to and from
+//! [`io::Error`]/[`io::Result`]
+//!
+//! QEMU C functions often have a "positive success/negative `errno`" calling
+//! convention.  This module provides functions to portably convert an integer
+//! into an [`io::Result`] and back.
+
+use std::{convert::TryFrom, io, io::ErrorKind};
+
+/// An `errno` value that can be converted into an [`io::Error`]
+pub struct Errno(pub u16);
+
+// On Unix, from_raw_os_error takes an errno value and OS errors
+// are printed using strerror.  On Windows however it takes a
+// GetLastError() value; therefore we need to convert errno values
+// into io::Error by hand.  This is the same mapping that the
+// standard library uses to retrieve the kind of OS errors
+// (`std::sys::pal::unix::decode_error_kind`).
+impl From<Errno> for ErrorKind {
+    fn from(value: Errno) -> ErrorKind {
+        use ErrorKind::*;
+        let Errno(errno) = value;
+        match i32::from(errno) {
+            libc::EPERM | libc::EACCES => PermissionDenied,
+            libc::ENOENT => NotFound,
+            libc::EINTR => Interrupted,
+            x if x == libc::EAGAIN || x == libc::EWOULDBLOCK => WouldBlock,
+            libc::ENOMEM => OutOfMemory,
+            libc::EEXIST => AlreadyExists,
+            libc::EINVAL => InvalidInput,
+            libc::EPIPE => BrokenPipe,
+            libc::EADDRINUSE => AddrInUse,
+            libc::EADDRNOTAVAIL => AddrNotAvailable,
+            libc::ECONNABORTED => ConnectionAborted,
+            libc::ECONNREFUSED => ConnectionRefused,
+            libc::ECONNRESET => ConnectionReset,
+            libc::ENOTCONN => NotConnected,
+            libc::ENOTSUP => Unsupported,
+            libc::ETIMEDOUT => TimedOut,
+            _ => Other,
+        }
+    }
+}
+
+// This is used on Windows for all io::Errors, but also on Unix if the
+// io::Error does not have a raw OS error.  This is the reversed
+// mapping of the above; EIO is returned for unknown ErrorKinds.
+impl From<io::ErrorKind> for Errno {
+    fn from(value: io::ErrorKind) -> Errno {
+        use ErrorKind::*;
+        let errno = match value {
+            // can be both EPERM or EACCES :( pick one
+            PermissionDenied => libc::EPERM,
+            NotFound => libc::ENOENT,
+            Interrupted => libc::EINTR,
+            WouldBlock => libc::EAGAIN,
+            OutOfMemory => libc::ENOMEM,
+            AlreadyExists => libc::EEXIST,
+            InvalidInput => libc::EINVAL,
+            BrokenPipe => libc::EPIPE,
+            AddrInUse => libc::EADDRINUSE,
+            AddrNotAvailable => libc::EADDRNOTAVAIL,
+            ConnectionAborted => libc::ECONNABORTED,
+            ConnectionRefused => libc::ECONNREFUSED,
+            ConnectionReset => libc::ECONNRESET,
+            NotConnected => libc::ENOTCONN,
+            Unsupported => libc::ENOTSUP,
+            TimedOut => libc::ETIMEDOUT,
+            _ => libc::EIO,
+        };
+        Errno(errno as u16)
+    }
+}
+
+impl From<Errno> for io::Error {
+    #[cfg(unix)]
+    fn from(value: Errno) -> io::Error {
+        let Errno(errno) = value;
+        io::Error::from_raw_os_error(errno.into())
+    }
+
+    #[cfg(windows)]
+    fn from(value: Errno) -> io::Error {
+        let error_kind: ErrorKind = value.into();
+        error_kind.into()
+    }
+}
+
+impl From<io::Error> for Errno {
+    fn from(value: io::Error) -> Errno {
+        if cfg!(unix) {
+            if let Some(errno) = value.raw_os_error() {
+                return Errno(u16::try_from(errno).unwrap());
+            }
+        }
+        value.kind().into()
+    }
+}
+
+/// Internal traits; used to enable [`into_io_result`] and [`into_neg_errno`]
+/// for the "right" set of types.
+mod traits {
+    use super::Errno;
+
+    /// A signed type that can be converted into an
+    /// [`io::Result`](std::io::Result)
+    pub trait GetErrno {
+        /// Unsigned variant of `Self`, used as the type for the `Ok` case.
+        type Out;
+
+        /// Return `Ok(self)` if positive, `Err(Errno(-self))` if negative
+        fn into_errno_result(self) -> Result<Self::Out, Errno>;
+    }
+
+    /// A type that can be taken out of an [`io::Result`](std::io::Result) and
+    /// converted into "positive success/negative `errno`" convention.
+    pub trait MergeErrno {
+        /// Signed variant of `Self`, used as the return type of
+        /// [`into_neg_errno`](super::into_neg_errno).
+        type Out: From<u16> + std::ops::Neg<Output = Self::Out>;
+
+        /// Return `self`, asserting that it is in range
+        fn map_ok(self) -> Self::Out;
+    }
+
+    macro_rules! get_errno {
+        ($t:ty, $out:ty) => {
+            impl GetErrno for $t {
+                type Out = $out;
+                fn into_errno_result(self) -> Result<Self::Out, Errno> {
+                    match self {
+                        0.. => Ok(self as $out),
+                        -65535..=-1 => Err(Errno(-self as u16)),
+                        _ => panic!("{self} is not a negative errno"),
+                    }
+                }
+            }
+        };
+    }
+
+    get_errno!(i32, u32);
+    get_errno!(i64, u64);
+    get_errno!(isize, usize);
+
+    macro_rules! merge_errno {
+        ($t:ty, $out:ty) => {
+            impl MergeErrno for $t {
+                type Out = $out;
+                fn map_ok(self) -> Self::Out {
+                    self.try_into().unwrap()
+                }
+            }
+        };
+    }
+
+    merge_errno!(u8, i32);
+    merge_errno!(u16, i32);
+    merge_errno!(u32, i32);
+    merge_errno!(u64, i64);
+
+    impl MergeErrno for () {
+        type Out = i32;
+        fn map_ok(self) -> i32 {
+            0
+        }
+    }
+}
+
+use traits::{GetErrno, MergeErrno};
+
+/// Convert an integer value into a [`io::Result`].
+///
+/// Positive values are turned into an `Ok` result; negative values
+/// are interpreted as negated `errno` and turned into an `Err`.
+///
+/// ```
+/// # use qemu_api::errno::into_io_result;
+/// # use std::io::ErrorKind;
+/// let ok = into_io_result(1i32).unwrap();
+/// assert_eq!(ok, 1u32);
+///
+/// let err = into_io_result(-1i32).unwrap_err(); // -EPERM
+/// assert_eq!(err.kind(), ErrorKind::PermissionDenied);
+/// ```
+///
+/// # Panics
+///
+/// Since the result is an unsigned integer, negative values must
+/// be close to 0; values that are too far away are considered
+/// likely overflows and will panic:
+///
+/// ```should_panic
+/// # use qemu_api::errno::into_io_result;
+/// # #[allow(dead_code)]
+/// let err = into_io_result(-0x1234_5678i32); // panic
+/// ```
+pub fn into_io_result<T: GetErrno>(value: T) -> io::Result<T::Out> {
+    value.into_errno_result().map_err(Into::into)
+}
+
+/// Convert a [`Result`] into an integer value, using negative `errno`
+/// values to report errors.
+///
+/// ```
+/// # use qemu_api::errno::into_neg_errno;
+/// # use std::io::{self, ErrorKind};
+/// let ok: io::Result<()> = Ok(());
+/// assert_eq!(into_neg_errno(ok), 0);
+///
+/// let err: io::Result<()> = Err(ErrorKind::InvalidInput.into());
+/// assert_eq!(into_neg_errno(err), -22); // -EINVAL
+/// ```
+///
+/// Since this module also provides the ability to convert [`io::Error`]
+/// to an `errno` value, [`io::Result`] is the most commonly used type
+/// for the argument of this function:
+///
+/// # Panics
+///
+/// Since the result is a signed integer, integer `Ok` values must remain
+/// positive:
+///
+/// ```should_panic
+/// # use qemu_api::errno::into_neg_errno;
+/// # use std::io;
+/// let err: io::Result<u32> = Ok(0x8899_AABB);
+/// into_neg_errno(err) // panic
+/// # ;
+/// ```
+pub fn into_neg_errno<T: MergeErrno, E: Into<Errno>>(value: Result<T, E>) -> T::Out {
+    match value {
+        Ok(x) => x.map_ok(),
+        Err(err) => -T::Out::from(err.into().0),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::io::ErrorKind;
+
+    use super::*;
+    use crate::assert_match;
+
+    #[test]
+    pub fn test_from_u8() {
+        let ok: io::Result<_> = Ok(42u8);
+        assert_eq!(into_neg_errno(ok), 42);
+
+        let err: io::Result<u8> = Err(io::ErrorKind::PermissionDenied.into());
+        assert_eq!(into_neg_errno(err), -1);
+
+        if cfg!(unix) {
+            let os_err: io::Result<u8> = Err(io::Error::from_raw_os_error(10));
+            assert_eq!(into_neg_errno(os_err), -10);
+        }
+    }
+
+    #[test]
+    pub fn test_from_u16() {
+        let ok: io::Result<_> = Ok(1234u16);
+        assert_eq!(into_neg_errno(ok), 1234);
+
+        let err: io::Result<u16> = Err(io::ErrorKind::PermissionDenied.into());
+        assert_eq!(into_neg_errno(err), -1);
+
+        if cfg!(unix) {
+            let os_err: io::Result<u16> = Err(io::Error::from_raw_os_error(10));
+            assert_eq!(into_neg_errno(os_err), -10);
+        }
+    }
+
+    #[test]
+    pub fn test_i32() {
+        assert_match!(into_io_result(1234i32), Ok(1234));
+
+        let err = into_io_result(-1i32).unwrap_err();
+        #[cfg(unix)]
+        assert_match!(err.raw_os_error(), Some(1));
+        assert_match!(err.kind(), ErrorKind::PermissionDenied);
+    }
+
+    #[test]
+    pub fn test_from_u32() {
+        let ok: io::Result<_> = Ok(1234u32);
+        assert_eq!(into_neg_errno(ok), 1234);
+
+        let err: io::Result<u32> = Err(io::ErrorKind::PermissionDenied.into());
+        assert_eq!(into_neg_errno(err), -1);
+
+        if cfg!(unix) {
+            let os_err: io::Result<u32> = Err(io::Error::from_raw_os_error(10));
+            assert_eq!(into_neg_errno(os_err), -10);
+        }
+    }
+
+    #[test]
+    pub fn test_i64() {
+        assert_match!(into_io_result(1234i64), Ok(1234));
+
+        let err = into_io_result(-22i64).unwrap_err();
+        #[cfg(unix)]
+        assert_match!(err.raw_os_error(), Some(22));
+        assert_match!(err.kind(), ErrorKind::InvalidInput);
+    }
+
+    #[test]
+    pub fn test_from_u64() {
+        let ok: io::Result<_> = Ok(1234u64);
+        assert_eq!(into_neg_errno(ok), 1234);
+
+        let err: io::Result<u64> = Err(io::ErrorKind::InvalidInput.into());
+        assert_eq!(into_neg_errno(err), -22);
+
+        if cfg!(unix) {
+            let os_err: io::Result<u64> = Err(io::Error::from_raw_os_error(6));
+            assert_eq!(into_neg_errno(os_err), -6);
+        }
+    }
+
+    #[test]
+    pub fn test_isize() {
+        assert_match!(into_io_result(1234isize), Ok(1234));
+
+        let err = into_io_result(-4isize).unwrap_err();
+        #[cfg(unix)]
+        assert_match!(err.raw_os_error(), Some(4));
+        assert_match!(err.kind(), ErrorKind::Interrupted);
+    }
+
+    #[test]
+    pub fn test_from_unit() {
+        let ok: io::Result<_> = Ok(());
+        assert_eq!(into_neg_errno(ok), 0);
+
+        let err: io::Result<()> = Err(io::ErrorKind::OutOfMemory.into());
+        assert_eq!(into_neg_errno(err), -12);
+
+        if cfg!(unix) {
+            let os_err: io::Result<()> = Err(io::Error::from_raw_os_error(2));
+            assert_eq!(into_neg_errno(os_err), -2);
+        }
+    }
+}
diff --git a/rust/qemu-api/src/irq.rs b/rust/qemu-api/src/irq.rs
index 378e5202951a0..1222d4fde30c9 100644
--- a/rust/qemu-api/src/irq.rs
+++ b/rust/qemu-api/src/irq.rs
@@ -4,14 +4,20 @@
 
 //! Bindings for interrupt sources
 
-use core::ptr;
-use std::{marker::PhantomData, os::raw::c_int};
+use std::{ffi::CStr, marker::PhantomData, os::raw::c_int, ptr};
 
 use crate::{
-    bindings::{qemu_set_irq, IRQState},
+    bindings::{self, qemu_set_irq},
+    cell::Opaque,
     prelude::*,
+    qom::ObjectClass,
 };
 
+/// An opaque wrapper around [`bindings::IRQState`].
+#[repr(transparent)]
+#[derive(Debug, qemu_api_macros::Wrapper)]
+pub struct IRQState(Opaque<bindings::IRQState>);
+
 /// Interrupt sources are used by devices to pass changes to a value (typically
 /// a boolean).  The interrupt sink is usually an interrupt controller or
 /// GPIO controller.
@@ -39,10 +45,13 @@ pub struct InterruptSource<T = bool>
 where
     c_int: From<T>,
 {
-    cell: BqlCell<*mut IRQState>,
+    cell: BqlCell<*mut bindings::IRQState>,
     _marker: PhantomData<T>,
 }
 
+// SAFETY: the implementation asserts via `BqlCell` that the BQL is taken
+unsafe impl<T> Sync for InterruptSource<T> where c_int: From<T> {}
+
 impl InterruptSource<bool> {
     /// Send a low (`false`) value to the interrupt sink.
     pub fn lower(&self) {
@@ -75,9 +84,14 @@ where
         }
     }
 
-    pub(crate) const fn as_ptr(&self) -> *mut *mut IRQState {
+    pub(crate) const fn as_ptr(&self) -> *mut *mut bindings::IRQState {
         self.cell.as_ptr()
     }
+
+    pub(crate) const fn slice_as_ptr(slice: &[Self]) -> *mut *mut bindings::IRQState {
+        assert!(!slice.is_empty());
+        slice[0].as_ptr()
+    }
 }
 
 impl Default for InterruptSource {
@@ -88,3 +102,10 @@ impl Default for InterruptSource {
         }
     }
 }
+
+unsafe impl ObjectType for IRQState {
+    type Class = ObjectClass;
+    const TYPE_NAME: &'static CStr =
+        unsafe { CStr::from_bytes_with_nul_unchecked(bindings::TYPE_IRQ) };
+}
+qom_isa!(IRQState: Object);
diff --git a/rust/qemu-api/src/lib.rs b/rust/qemu-api/src/lib.rs
index 3cf9371cff059..05f38b51d304f 100644
--- a/rust/qemu-api/src/lib.rs
+++ b/rust/qemu-api/src/lib.rs
@@ -18,12 +18,16 @@ pub mod bitops;
 pub mod c_str;
 pub mod callbacks;
 pub mod cell;
+pub mod chardev;
+pub mod errno;
 pub mod irq;
+pub mod memory;
 pub mod module;
 pub mod offset_of;
 pub mod qdev;
 pub mod qom;
 pub mod sysbus;
+pub mod timer;
 pub mod vmstate;
 pub mod zeroable;
 
diff --git a/rust/qemu-api/src/memory.rs b/rust/qemu-api/src/memory.rs
new file mode 100644
index 0000000000000..fdb1ea11fcf96
--- /dev/null
+++ b/rust/qemu-api/src/memory.rs
@@ -0,0 +1,205 @@
+// Copyright 2024 Red Hat, Inc.
+// Author(s): Paolo Bonzini <pbonzini@redhat.com>
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+//! Bindings for `MemoryRegion`, `MemoryRegionOps` and `MemTxAttrs`
+
+use std::{
+    ffi::{CStr, CString},
+    marker::PhantomData,
+    os::raw::{c_uint, c_void},
+};
+
+pub use bindings::{hwaddr, MemTxAttrs};
+
+use crate::{
+    bindings::{self, device_endian, memory_region_init_io},
+    callbacks::FnCall,
+    cell::Opaque,
+    prelude::*,
+    zeroable::Zeroable,
+};
+
+pub struct MemoryRegionOps<T>(
+    bindings::MemoryRegionOps,
+    // Note: quite often you'll see PhantomData<fn(&T)> mentioned when discussing
+    // covariance and contravariance; you don't need any of those to understand
+    // this usage of PhantomData.  Quite simply, MemoryRegionOps<T> *logically*
+    // holds callbacks that take an argument of type &T, except the type is erased
+    // before the callback is stored in the bindings::MemoryRegionOps field.
+    // The argument of PhantomData is a function pointer in order to represent
+    // that relationship; while that will also provide desirable and safe variance
+    // for T, variance is not the point but just a consequence.
+    PhantomData<fn(&T)>,
+);
+
+// SAFETY: When a *const T is passed to the callbacks, the call itself
+// is done in a thread-safe manner.  The invocation is okay as long as
+// T itself is `Sync`.
+unsafe impl<T: Sync> Sync for MemoryRegionOps<T> {}
+
+#[derive(Clone)]
+pub struct MemoryRegionOpsBuilder<T>(bindings::MemoryRegionOps, PhantomData<fn(&T)>);
+
+unsafe extern "C" fn memory_region_ops_read_cb<T, F: for<'a> FnCall<(&'a T, hwaddr, u32), u64>>(
+    opaque: *mut c_void,
+    addr: hwaddr,
+    size: c_uint,
+) -> u64 {
+    F::call((unsafe { &*(opaque.cast::<T>()) }, addr, size))
+}
+
+unsafe extern "C" fn memory_region_ops_write_cb<T, F: for<'a> FnCall<(&'a T, hwaddr, u64, u32)>>(
+    opaque: *mut c_void,
+    addr: hwaddr,
+    data: u64,
+    size: c_uint,
+) {
+    F::call((unsafe { &*(opaque.cast::<T>()) }, addr, data, size))
+}
+
+impl<T> MemoryRegionOpsBuilder<T> {
+    #[must_use]
+    pub const fn read<F: for<'a> FnCall<(&'a T, hwaddr, u32), u64>>(mut self, _f: &F) -> Self {
+        self.0.read = Some(memory_region_ops_read_cb::<T, F>);
+        self
+    }
+
+    #[must_use]
+    pub const fn write<F: for<'a> FnCall<(&'a T, hwaddr, u64, u32)>>(mut self, _f: &F) -> Self {
+        self.0.write = Some(memory_region_ops_write_cb::<T, F>);
+        self
+    }
+
+    #[must_use]
+    pub const fn big_endian(mut self) -> Self {
+        self.0.endianness = device_endian::DEVICE_BIG_ENDIAN;
+        self
+    }
+
+    #[must_use]
+    pub const fn little_endian(mut self) -> Self {
+        self.0.endianness = device_endian::DEVICE_LITTLE_ENDIAN;
+        self
+    }
+
+    #[must_use]
+    pub const fn native_endian(mut self) -> Self {
+        self.0.endianness = device_endian::DEVICE_NATIVE_ENDIAN;
+        self
+    }
+
+    #[must_use]
+    pub const fn valid_sizes(mut self, min: u32, max: u32) -> Self {
+        self.0.valid.min_access_size = min;
+        self.0.valid.max_access_size = max;
+        self
+    }
+
+    #[must_use]
+    pub const fn valid_unaligned(mut self) -> Self {
+        self.0.valid.unaligned = true;
+        self
+    }
+
+    #[must_use]
+    pub const fn impl_sizes(mut self, min: u32, max: u32) -> Self {
+        self.0.impl_.min_access_size = min;
+        self.0.impl_.max_access_size = max;
+        self
+    }
+
+    #[must_use]
+    pub const fn impl_unaligned(mut self) -> Self {
+        self.0.impl_.unaligned = true;
+        self
+    }
+
+    #[must_use]
+    pub const fn build(self) -> MemoryRegionOps<T> {
+        MemoryRegionOps::<T>(self.0, PhantomData)
+    }
+
+    #[must_use]
+    pub const fn new() -> Self {
+        Self(bindings::MemoryRegionOps::ZERO, PhantomData)
+    }
+}
+
+impl<T> Default for MemoryRegionOpsBuilder<T> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// A safe wrapper around [`bindings::MemoryRegion`].
+#[repr(transparent)]
+#[derive(qemu_api_macros::Wrapper)]
+pub struct MemoryRegion(Opaque<bindings::MemoryRegion>);
+
+unsafe impl Send for MemoryRegion {}
+unsafe impl Sync for MemoryRegion {}
+
+impl MemoryRegion {
+    // inline to ensure that it is not included in tests, which only
+    // link to hwcore and qom.  FIXME: inlining is actually the opposite
+    // of what we want, since this is the type-erased version of the
+    // init_io function below.  Look into splitting the qemu_api crate.
+    #[inline(always)]
+    unsafe fn do_init_io(
+        slot: *mut bindings::MemoryRegion,
+        owner: *mut Object,
+        ops: &'static bindings::MemoryRegionOps,
+        name: &'static str,
+        size: u64,
+    ) {
+        unsafe {
+            let cstr = CString::new(name).unwrap();
+            memory_region_init_io(
+                slot,
+                owner.cast::<bindings::Object>(),
+                ops,
+                owner.cast::<c_void>(),
+                cstr.as_ptr(),
+                size,
+            );
+        }
+    }
+
+    pub fn init_io<T: IsA<Object>>(
+        &mut self,
+        owner: *mut T,
+        ops: &'static MemoryRegionOps<T>,
+        name: &'static str,
+        size: u64,
+    ) {
+        unsafe {
+            Self::do_init_io(
+                self.0.as_mut_ptr(),
+                owner.cast::<Object>(),
+                &ops.0,
+                name,
+                size,
+            );
+        }
+    }
+}
+
+unsafe impl ObjectType for MemoryRegion {
+    type Class = bindings::MemoryRegionClass;
+    const TYPE_NAME: &'static CStr =
+        unsafe { CStr::from_bytes_with_nul_unchecked(bindings::TYPE_MEMORY_REGION) };
+}
+qom_isa!(MemoryRegion: Object);
+
+/// A special `MemTxAttrs` constant, used to indicate that no memory
+/// attributes are specified.
+///
+/// Bus masters which don't specify any attributes will get this,
+/// which has all attribute bits clear except the topmost one
+/// (so that we can distinguish "all attributes deliberately clear"
+/// from "didn't specify" if necessary).
+pub const MEMTXATTRS_UNSPECIFIED: MemTxAttrs = MemTxAttrs {
+    unspecified: true,
+    ..Zeroable::ZERO
+};
diff --git a/rust/qemu-api/src/offset_of.rs b/rust/qemu-api/src/offset_of.rs
index 075e98f986ba9..373229bbde96f 100644
--- a/rust/qemu-api/src/offset_of.rs
+++ b/rust/qemu-api/src/offset_of.rs
@@ -1,5 +1,12 @@
 // SPDX-License-Identifier: MIT
 
+#![doc(hidden)]
+//! This module provides macros that emulate the functionality of
+//! `core::mem::offset_of` on older versions of Rust.
+//!
+//! Documentation is hidden because it only exposes macros, which
+//! are exported directly from `qemu_api`.
+
 /// This macro provides the same functionality as `core::mem::offset_of`,
 /// except that only one level of field access is supported.  The declaration
 /// of the struct must be wrapped with `with_offsets! { }`.
diff --git a/rust/qemu-api/src/prelude.rs b/rust/qemu-api/src/prelude.rs
index 2dc86e19b29fe..43bfcd5fcab82 100644
--- a/rust/qemu-api/src/prelude.rs
+++ b/rust/qemu-api/src/prelude.rs
@@ -2,16 +2,23 @@
 // Author(s): Paolo Bonzini <pbonzini@redhat.com>
 // SPDX-License-Identifier: GPL-2.0-or-later
 
+//! Commonly used traits and types for QEMU.
+
 pub use crate::bitops::IntegerExt;
 
 pub use crate::cell::BqlCell;
 pub use crate::cell::BqlRefCell;
 
+pub use crate::errno;
+
+pub use crate::qdev::DeviceMethods;
+
+pub use crate::qom::InterfaceType;
 pub use crate::qom::IsA;
 pub use crate::qom::Object;
 pub use crate::qom::ObjectCast;
-pub use crate::qom::ObjectCastMut;
 pub use crate::qom::ObjectDeref;
+pub use crate::qom::ObjectClassMethods;
 pub use crate::qom::ObjectMethods;
 pub use crate::qom::ObjectType;
 
diff --git a/rust/qemu-api/src/qdev.rs b/rust/qemu-api/src/qdev.rs
index f4c75c752f173..18b4a9ba687d9 100644
--- a/rust/qemu-api/src/qdev.rs
+++ b/rust/qemu-api/src/qdev.rs
@@ -4,19 +4,105 @@
 
 //! Bindings to create devices and access device functionality from Rust.
 
-use std::{ffi::CStr, ptr::NonNull};
+use std::{
+    ffi::{CStr, CString},
+    os::raw::{c_int, c_void},
+    ptr::NonNull,
+};
 
-pub use bindings::{DeviceClass, DeviceState, Property};
+pub use bindings::{ClockEvent, DeviceClass, Property, ResetType};
 
 use crate::{
-    bindings::{self, Error},
+    bindings::{self, qdev_init_gpio_in, qdev_init_gpio_out, Error, ResettableClass},
+    callbacks::FnCall,
+    cell::{bql_locked, Opaque},
+    chardev::Chardev,
+    irq::InterruptSource,
     prelude::*,
-    qom::{ClassInitImpl, ObjectClass},
+    qom::{ObjectClass, ObjectImpl, Owned},
     vmstate::VMStateDescription,
 };
 
+/// A safe wrapper around [`bindings::Clock`].
+#[repr(transparent)]
+#[derive(Debug, qemu_api_macros::Wrapper)]
+pub struct Clock(Opaque<bindings::Clock>);
+
+unsafe impl Send for Clock {}
+unsafe impl Sync for Clock {}
+
+/// A safe wrapper around [`bindings::DeviceState`].
+#[repr(transparent)]
+#[derive(Debug, qemu_api_macros::Wrapper)]
+pub struct DeviceState(Opaque<bindings::DeviceState>);
+
+unsafe impl Send for DeviceState {}
+unsafe impl Sync for DeviceState {}
+
+/// Trait providing the contents of the `ResettablePhases` struct,
+/// which is part of the QOM `Resettable` interface.
+pub trait ResettablePhasesImpl {
+    /// If not None, this is called when the object enters reset. It
+    /// can reset local state of the object, but it must not do anything that
+    /// has a side-effect on other objects, such as raising or lowering an
+    /// [`InterruptSource`], or reading or writing guest memory. It takes the
+    /// reset's type as argument.
+    const ENTER: Option<fn(&Self, ResetType)> = None;
+
+    /// If not None, this is called when the object for entry into reset, once
+    /// every object in the system which is being reset has had its
+    /// `ResettablePhasesImpl::ENTER` method called. At this point devices
+    /// can do actions that affect other objects.
+    ///
+    /// If in doubt, implement this method.
+    const HOLD: Option<fn(&Self, ResetType)> = None;
+
+    /// If not None, this phase is called when the object leaves the reset
+    /// state. Actions affecting other objects are permitted.
+    const EXIT: Option<fn(&Self, ResetType)> = None;
+}
+
+/// # Safety
+///
+/// We expect the FFI user of this function to pass a valid pointer that
+/// can be downcasted to type `T`. We also expect the device is
+/// readable/writeable from one thread at any time.
+unsafe extern "C" fn rust_resettable_enter_fn<T: ResettablePhasesImpl>(
+    obj: *mut bindings::Object,
+    typ: ResetType,
+) {
+    let state = NonNull::new(obj).unwrap().cast::<T>();
+    T::ENTER.unwrap()(unsafe { state.as_ref() }, typ);
+}
+
+/// # Safety
+///
+/// We expect the FFI user of this function to pass a valid pointer that
+/// can be downcasted to type `T`. We also expect the device is
+/// readable/writeable from one thread at any time.
+unsafe extern "C" fn rust_resettable_hold_fn<T: ResettablePhasesImpl>(
+    obj: *mut bindings::Object,
+    typ: ResetType,
+) {
+    let state = NonNull::new(obj).unwrap().cast::<T>();
+    T::HOLD.unwrap()(unsafe { state.as_ref() }, typ);
+}
+
+/// # Safety
+///
+/// We expect the FFI user of this function to pass a valid pointer that
+/// can be downcasted to type `T`. We also expect the device is
+/// readable/writeable from one thread at any time.
+unsafe extern "C" fn rust_resettable_exit_fn<T: ResettablePhasesImpl>(
+    obj: *mut bindings::Object,
+    typ: ResetType,
+) {
+    let state = NonNull::new(obj).unwrap().cast::<T>();
+    T::EXIT.unwrap()(unsafe { state.as_ref() }, typ);
+}
+
 /// Trait providing the contents of [`DeviceClass`].
-pub trait DeviceImpl {
+pub trait DeviceImpl: ObjectImpl + ResettablePhasesImpl + IsA<DeviceState> {
     /// _Realization_ is the second stage of device creation. It contains
     /// all operations that depend on device properties and can fail (note:
     /// this is not yet supported for Rust devices).
@@ -25,13 +111,6 @@ pub trait DeviceImpl {
     /// with the function pointed to by `REALIZE`.
     const REALIZE: Option<fn(&Self)> = None;
 
-    /// If not `None`, the parent class's `reset` method is overridden
-    /// with the function pointed to by `RESET`.
-    ///
-    /// Rust does not yet support the three-phase reset protocol; this is
-    /// usually okay for leaf classes.
-    const RESET: Option<fn(&Self)> = None;
-
     /// An array providing the properties that the user can set on the
     /// device.  Not a `const` because referencing statics in constants
     /// is unstable until Rust 1.83.0.
@@ -50,54 +129,75 @@ pub trait DeviceImpl {
 /// # Safety
 ///
 /// This function is only called through the QOM machinery and
-/// used by the `ClassInitImpl<DeviceClass>` trait.
+/// used by `DeviceClass::class_init`.
 /// We expect the FFI user of this function to pass a valid pointer that
 /// can be downcasted to type `T`. We also expect the device is
 /// readable/writeable from one thread at any time.
-unsafe extern "C" fn rust_realize_fn<T: DeviceImpl>(dev: *mut DeviceState, _errp: *mut *mut Error) {
+unsafe extern "C" fn rust_realize_fn<T: DeviceImpl>(
+    dev: *mut bindings::DeviceState,
+    _errp: *mut *mut Error,
+) {
     let state = NonNull::new(dev).unwrap().cast::<T>();
     T::REALIZE.unwrap()(unsafe { state.as_ref() });
 }
 
-/// # Safety
-///
-/// We expect the FFI user of this function to pass a valid pointer that
-/// can be downcasted to type `T`. We also expect the device is
-/// readable/writeable from one thread at any time.
-unsafe extern "C" fn rust_reset_fn<T: DeviceImpl>(dev: *mut DeviceState) {
-    let mut state = NonNull::new(dev).unwrap().cast::<T>();
-    T::RESET.unwrap()(unsafe { state.as_mut() });
+unsafe impl InterfaceType for ResettableClass {
+    const TYPE_NAME: &'static CStr =
+        unsafe { CStr::from_bytes_with_nul_unchecked(bindings::TYPE_RESETTABLE_INTERFACE) };
 }
 
-impl<T> ClassInitImpl<DeviceClass> for T
-where
-    T: ClassInitImpl<ObjectClass> + DeviceImpl,
-{
-    fn class_init(dc: &mut DeviceClass) {
-        if <T as DeviceImpl>::REALIZE.is_some() {
-            dc.realize = Some(rust_realize_fn::<T>);
+impl ResettableClass {
+    /// Fill in the virtual methods of `ResettableClass` based on the
+    /// definitions in the `ResettablePhasesImpl` trait.
+    pub fn class_init<T: ResettablePhasesImpl>(&mut self) {
+        if <T as ResettablePhasesImpl>::ENTER.is_some() {
+            self.phases.enter = Some(rust_resettable_enter_fn::<T>);
         }
-        if <T as DeviceImpl>::RESET.is_some() {
-            unsafe {
-                bindings::device_class_set_legacy_reset(dc, Some(rust_reset_fn::<T>));
-            }
+        if <T as ResettablePhasesImpl>::HOLD.is_some() {
+            self.phases.hold = Some(rust_resettable_hold_fn::<T>);
+        }
+        if <T as ResettablePhasesImpl>::EXIT.is_some() {
+            self.phases.exit = Some(rust_resettable_exit_fn::<T>);
+        }
+    }
+}
+
+impl DeviceClass {
+    /// Fill in the virtual methods of `DeviceClass` based on the definitions in
+    /// the `DeviceImpl` trait.
+    pub fn class_init<T: DeviceImpl>(&mut self) {
+        if <T as DeviceImpl>::REALIZE.is_some() {
+            self.realize = Some(rust_realize_fn::<T>);
         }
         if let Some(vmsd) = <T as DeviceImpl>::vmsd() {
-            dc.vmsd = vmsd;
+            self.vmsd = vmsd;
         }
         let prop = <T as DeviceImpl>::properties();
         if !prop.is_empty() {
             unsafe {
-                bindings::device_class_set_props_n(dc, prop.as_ptr(), prop.len());
+                bindings::device_class_set_props_n(self, prop.as_ptr(), prop.len());
             }
         }
 
-        <T as ClassInitImpl<ObjectClass>>::class_init(&mut dc.parent_class);
+        ResettableClass::cast::<DeviceState>(self).class_init::<T>();
+        self.parent_class.class_init::<T>();
     }
 }
 
 #[macro_export]
 macro_rules! define_property {
+    ($name:expr, $state:ty, $field:ident, $prop:expr, $type:ty, bit = $bitnr:expr, default = $defval:expr$(,)*) => {
+        $crate::bindings::Property {
+            // use associated function syntax for type checking
+            name: ::std::ffi::CStr::as_ptr($name),
+            info: $prop,
+            offset: $crate::offset_of!($state, $field) as isize,
+            bitnr: $bitnr,
+            set_default: true,
+            defval: $crate::bindings::Property__bindgen_ty_1 { u: $defval as u64 },
+            ..$crate::zeroable::Zeroable::ZERO
+        }
+    };
     ($name:expr, $state:ty, $field:ident, $prop:expr, $type:ty, default = $defval:expr$(,)*) => {
         $crate::bindings::Property {
             // use associated function syntax for type checking
@@ -143,3 +243,154 @@ unsafe impl ObjectType for DeviceState {
         unsafe { CStr::from_bytes_with_nul_unchecked(bindings::TYPE_DEVICE) };
 }
 qom_isa!(DeviceState: Object);
+
+/// Trait for methods exposed by the [`DeviceState`] class.  The methods can be
+/// called on all objects that have the trait `IsA<DeviceState>`.
+///
+/// The trait should only be used through the blanket implementation,
+/// which guarantees safety via `IsA`.
+pub trait DeviceMethods: ObjectDeref
+where
+    Self::Target: IsA<DeviceState>,
+{
+    /// Add an input clock named `name`.  Invoke the callback with
+    /// `self` as the first parameter for the events that are requested.
+    ///
+    /// The resulting clock is added as a child of `self`, but it also
+    /// stays alive until after `Drop::drop` is called because C code
+    /// keeps an extra reference to it until `device_finalize()` calls
+    /// `qdev_finalize_clocklist()`.  Therefore (unlike most cases in
+    /// which Rust code has a reference to a child object) it would be
+    /// possible for this function to return a `&Clock` too.
+    #[inline]
+    fn init_clock_in<F: for<'a> FnCall<(&'a Self::Target, ClockEvent)>>(
+        &self,
+        name: &str,
+        _cb: &F,
+        events: ClockEvent,
+    ) -> Owned<Clock> {
+        fn do_init_clock_in(
+            dev: &DeviceState,
+            name: &str,
+            cb: Option<unsafe extern "C" fn(*mut c_void, ClockEvent)>,
+            events: ClockEvent,
+        ) -> Owned<Clock> {
+            assert!(bql_locked());
+
+            // SAFETY: the clock is heap allocated, but qdev_init_clock_in()
+            // does not gift the reference to its caller; so use Owned::from to
+            // add one.  The callback is disabled automatically when the clock
+            // is unparented, which happens before the device is finalized.
+            unsafe {
+                let cstr = CString::new(name).unwrap();
+                let clk = bindings::qdev_init_clock_in(
+                    dev.as_mut_ptr(),
+                    cstr.as_ptr(),
+                    cb,
+                    dev.as_void_ptr(),
+                    events.0,
+                );
+
+                let clk: &Clock = Clock::from_raw(clk);
+                Owned::from(clk)
+            }
+        }
+
+        let cb: Option<unsafe extern "C" fn(*mut c_void, ClockEvent)> = if F::is_some() {
+            unsafe extern "C" fn rust_clock_cb<T, F: for<'a> FnCall<(&'a T, ClockEvent)>>(
+                opaque: *mut c_void,
+                event: ClockEvent,
+            ) {
+                // SAFETY: the opaque is "this", which is indeed a pointer to T
+                F::call((unsafe { &*(opaque.cast::<T>()) }, event))
+            }
+            Some(rust_clock_cb::<Self::Target, F>)
+        } else {
+            None
+        };
+
+        do_init_clock_in(self.upcast(), name, cb, events)
+    }
+
+    /// Add an output clock named `name`.
+    ///
+    /// The resulting clock is added as a child of `self`, but it also
+    /// stays alive until after `Drop::drop` is called because C code
+    /// keeps an extra reference to it until `device_finalize()` calls
+    /// `qdev_finalize_clocklist()`.  Therefore (unlike most cases in
+    /// which Rust code has a reference to a child object) it would be
+    /// possible for this function to return a `&Clock` too.
+    #[inline]
+    fn init_clock_out(&self, name: &str) -> Owned<Clock> {
+        unsafe {
+            let cstr = CString::new(name).unwrap();
+            let clk = bindings::qdev_init_clock_out(self.upcast().as_mut_ptr(), cstr.as_ptr());
+
+            let clk: &Clock = Clock::from_raw(clk);
+            Owned::from(clk)
+        }
+    }
+
+    fn prop_set_chr(&self, propname: &str, chr: &Owned<Chardev>) {
+        assert!(bql_locked());
+        let c_propname = CString::new(propname).unwrap();
+        let chr: &Chardev = chr;
+        unsafe {
+            bindings::qdev_prop_set_chr(
+                self.upcast().as_mut_ptr(),
+                c_propname.as_ptr(),
+                chr.as_mut_ptr(),
+            );
+        }
+    }
+
+    fn init_gpio_in<F: for<'a> FnCall<(&'a Self::Target, u32, u32)>>(
+        &self,
+        num_lines: u32,
+        _cb: F,
+    ) {
+        fn do_init_gpio_in(
+            dev: &DeviceState,
+            num_lines: u32,
+            gpio_in_cb: unsafe extern "C" fn(*mut c_void, c_int, c_int),
+        ) {
+            unsafe {
+                qdev_init_gpio_in(dev.as_mut_ptr(), Some(gpio_in_cb), num_lines as c_int);
+            }
+        }
+
+        let _: () = F::ASSERT_IS_SOME;
+        unsafe extern "C" fn rust_irq_handler<T, F: for<'a> FnCall<(&'a T, u32, u32)>>(
+            opaque: *mut c_void,
+            line: c_int,
+            level: c_int,
+        ) {
+            // SAFETY: the opaque was passed as a reference to `T`
+            F::call((unsafe { &*(opaque.cast::<T>()) }, line as u32, level as u32))
+        }
+
+        let gpio_in_cb: unsafe extern "C" fn(*mut c_void, c_int, c_int) =
+            rust_irq_handler::<Self::Target, F>;
+
+        do_init_gpio_in(self.upcast(), num_lines, gpio_in_cb);
+    }
+
+    fn init_gpio_out(&self, pins: &[InterruptSource]) {
+        unsafe {
+            qdev_init_gpio_out(
+                self.upcast().as_mut_ptr(),
+                InterruptSource::slice_as_ptr(pins),
+                pins.len() as c_int,
+            );
+        }
+    }
+}
+
+impl<R: ObjectDeref> DeviceMethods for R where R::Target: IsA<DeviceState> {}
+
+unsafe impl ObjectType for Clock {
+    type Class = ObjectClass;
+    const TYPE_NAME: &'static CStr =
+        unsafe { CStr::from_bytes_with_nul_unchecked(bindings::TYPE_CLOCK) };
+}
+qom_isa!(Clock: Object);
diff --git a/rust/qemu-api/src/qom.rs b/rust/qemu-api/src/qom.rs
index f50ee371aacc3..34d7bc0ef96ff 100644
--- a/rust/qemu-api/src/qom.rs
+++ b/rust/qemu-api/src/qom.rs
@@ -37,11 +37,8 @@
 //! * a trait for virtual method implementations, for example `DeviceImpl`.
 //!   Child classes implement this trait to provide their own behavior for
 //!   virtual methods. The trait's methods take `&self` to access instance data.
-//!
-//! * an implementation of [`ClassInitImpl`], for example
-//!   `ClassInitImpl<DeviceClass>`. This fills the vtable in the class struct;
-//!   the source for this is the `*Impl` trait; the associated consts and
-//!   functions if needed are wrapped to map C types into Rust types.
+//!   The traits have the appropriate specialization of `IsA<>` as a supertrait,
+//!   for example `IsA<DeviceState>` for `DeviceImpl`.
 //!
 //! * a trait for instance methods, for example `DeviceMethods`. This trait is
 //!   automatically implemented for any reference or smart pointer to a device
@@ -52,18 +49,75 @@
 //!   This provides access to class-wide functionality that doesn't depend on
 //!   instance data. Like instance methods, these are automatically inherited by
 //!   child classes.
+//!
+//! # Class structures
+//!
+//! Each QOM class that has virtual methods describes them in a
+//! _class struct_.  Class structs include a parent field corresponding
+//! to the vtable of the parent class, all the way up to [`ObjectClass`].
+//!
+//! As mentioned above, virtual methods are defined via traits such as
+//! `DeviceImpl`.  Class structs do not define any trait but, conventionally,
+//! all of them have a `class_init` method to initialize the virtual methods
+//! based on the trait and then call the same method on the superclass.
+//!
+//! ```ignore
+//! impl YourSubclassClass
+//! {
+//!     pub fn class_init<T: YourSubclassImpl>(&mut self) {
+//!         ...
+//!         klass.parent_class::class_init<T>();
+//!     }
+//! }
+//! ```
+//!
+//! If a class implements a QOM interface.  In that case, the function must
+//! contain, for each interface, an extra forwarding call as follows:
+//!
+//! ```ignore
+//! ResettableClass::cast::<Self>(self).class_init::<Self>();
+//! ```
+//!
+//! These `class_init` functions are methods on the class rather than a trait,
+//! because the bound on `T` (`DeviceImpl` in this case), will change for every
+//! class struct.  The functions are pointed to by the
+//! [`ObjectImpl::CLASS_INIT`] function pointer. While there is no default
+//! implementation, in most cases it will be enough to write it as follows:
+//!
+//! ```ignore
+//! const CLASS_INIT: fn(&mut Self::Class)> = Self::Class::class_init::<Self>;
+//! ```
+//!
+//! This design incurs a small amount of code duplication but, by not using
+//! traits, it allows the flexibility of implementing bindings in any crate,
+//! without incurring into violations of orphan rules for traits.
 
 use std::{
     ffi::CStr,
     fmt,
+    mem::ManuallyDrop,
     ops::{Deref, DerefMut},
     os::raw::c_void,
     ptr::NonNull,
 };
 
-pub use bindings::{Object, ObjectClass};
+pub use bindings::ObjectClass;
 
-use crate::bindings::{self, object_dynamic_cast, object_get_class, object_get_typename, TypeInfo};
+use crate::{
+    bindings::{
+        self, object_class_dynamic_cast, object_dynamic_cast, object_get_class,
+        object_get_typename, object_new, object_ref, object_unref, TypeInfo,
+    },
+    cell::{bql_locked, Opaque},
+};
+
+/// A safe wrapper around [`bindings::Object`].
+#[repr(transparent)]
+#[derive(Debug, qemu_api_macros::Wrapper)]
+pub struct Object(Opaque<bindings::Object>);
+
+unsafe impl Send for Object {}
+unsafe impl Sync for Object {}
 
 /// Marker trait: `Self` can be statically upcasted to `P` (i.e. `P` is a direct
 /// or indirect parent of `Self`).
@@ -153,7 +207,7 @@ impl<T: fmt::Display + ObjectType> fmt::Display for ParentField<T> {
     }
 }
 
-unsafe extern "C" fn rust_instance_init<T: ObjectImpl>(obj: *mut Object) {
+unsafe extern "C" fn rust_instance_init<T: ObjectImpl>(obj: *mut bindings::Object) {
     let mut state = NonNull::new(obj).unwrap().cast::<T>();
     // SAFETY: obj is an instance of T, since rust_instance_init<T>
     // is called from QOM core as the instance_init function
@@ -163,7 +217,7 @@ unsafe extern "C" fn rust_instance_init<T: ObjectImpl>(obj: *mut Object) {
     }
 }
 
-unsafe extern "C" fn rust_instance_post_init<T: ObjectImpl>(obj: *mut Object) {
+unsafe extern "C" fn rust_instance_post_init<T: ObjectImpl>(obj: *mut bindings::Object) {
     let state = NonNull::new(obj).unwrap().cast::<T>();
     // SAFETY: obj is an instance of T, since rust_instance_post_init<T>
     // is called from QOM core as the instance_post_init function
@@ -171,7 +225,7 @@ unsafe extern "C" fn rust_instance_post_init<T: ObjectImpl>(obj: *mut Object) {
     T::INSTANCE_POST_INIT.unwrap()(unsafe { state.as_ref() });
 }
 
-unsafe extern "C" fn rust_class_init<T: ObjectType + ClassInitImpl<T::Class>>(
+unsafe extern "C" fn rust_class_init<T: ObjectType + ObjectImpl>(
     klass: *mut ObjectClass,
     _data: *mut c_void,
 ) {
@@ -181,10 +235,10 @@ unsafe extern "C" fn rust_class_init<T: ObjectType + ClassInitImpl<T::Class>>(
     // SAFETY: klass is a T::Class, since rust_class_init<T>
     // is called from QOM core as the class_init function
     // for class T
-    T::class_init(unsafe { klass.as_mut() })
+    <T as ObjectImpl>::CLASS_INIT(unsafe { klass.as_mut() })
 }
 
-unsafe extern "C" fn drop_object<T: ObjectImpl>(obj: *mut Object) {
+unsafe extern "C" fn drop_object<T: ObjectImpl>(obj: *mut bindings::Object) {
     // SAFETY: obj is an instance of T, since drop_object<T> is called
     // from the QOM core function object_deinit() as the instance_finalize
     // function for class T.  Note that while object_deinit() will drop the
@@ -234,14 +288,14 @@ pub unsafe trait ObjectType: Sized {
     /// Return the receiver as an Object.  This is always safe, even
     /// if this type represents an interface.
     fn as_object(&self) -> &Object {
-        unsafe { &*self.as_object_ptr() }
+        unsafe { &*self.as_ptr().cast() }
     }
 
     /// Return the receiver as a const raw pointer to Object.
     /// This is preferrable to `as_object_mut_ptr()` if a C
     /// function only needs a `const Object *`.
-    fn as_object_ptr(&self) -> *const Object {
-        self.as_ptr().cast()
+    fn as_object_ptr(&self) -> *const bindings::Object {
+        self.as_object().as_ptr()
     }
 
     /// Return the receiver as a mutable raw pointer to Object.
@@ -251,8 +305,54 @@ pub unsafe trait ObjectType: Sized {
     /// This cast is always safe, but because the result is mutable
     /// and the incoming reference is not, this should only be used
     /// for calls to C functions, and only if needed.
-    unsafe fn as_object_mut_ptr(&self) -> *mut Object {
-        self.as_object_ptr() as *mut _
+    unsafe fn as_object_mut_ptr(&self) -> *mut bindings::Object {
+        self.as_object().as_mut_ptr()
+    }
+}
+
+/// Trait exposed by all structs corresponding to QOM interfaces.
+/// Unlike `ObjectType`, it is implemented on the class type (which provides
+/// the vtable for the interfaces).
+///
+/// # Safety
+///
+/// `TYPE` must match the contents of the `TypeInfo` as found in the C code;
+/// right now, interfaces can only be declared in C.
+pub unsafe trait InterfaceType: Sized {
+    /// The name of the type, which can be passed to
+    /// `object_class_dynamic_cast()` to obtain the pointer to the vtable
+    /// for this interface.
+    const TYPE_NAME: &'static CStr;
+
+    /// Return the vtable for the interface; `U` is the type that
+    /// lists the interface in its `TypeInfo`.
+    ///
+    /// # Examples
+    ///
+    /// This function is usually called by a `class_init` method in `U::Class`.
+    /// For example, `DeviceClass::class_init<T>` initializes its `Resettable`
+    /// interface as follows:
+    ///
+    /// ```ignore
+    /// ResettableClass::cast::<DeviceState>(self).class_init::<T>();
+    /// ```
+    ///
+    /// where `T` is the concrete subclass that is being initialized.
+    ///
+    /// # Panics
+    ///
+    /// Panic if the incoming argument if `T` does not implement the interface.
+    fn cast<U: ObjectType>(klass: &mut U::Class) -> &mut Self {
+        unsafe {
+            // SAFETY: upcasting to ObjectClass is always valid, and the
+            // return type is either NULL or the argument itself
+            let result: *mut Self = object_class_dynamic_cast(
+                (klass as *mut U::Class).cast(),
+                Self::TYPE_NAME.as_ptr(),
+            )
+            .cast();
+            result.as_mut().unwrap()
+        }
     }
 }
 
@@ -280,10 +380,10 @@ where
     ///
     /// # Safety
     ///
-    /// This method is unsafe because it overrides const-ness of `&self`.
-    /// Bindings to C APIs will use it a lot, but otherwise it should not
-    /// be necessary.
-    unsafe fn as_mut_ptr<U: ObjectType>(&self) -> *mut U
+    /// This method is safe because only the actual dereference of the pointer
+    /// has to be unsafe.  Bindings to C APIs will use it a lot, but care has
+    /// to be taken because it overrides the const-ness of `&self`.
+    fn as_mut_ptr<U: ObjectType>(&self) -> *mut U
     where
         Self::Target: IsA<U>,
     {
@@ -363,93 +463,10 @@ where
 impl<T: ObjectType> ObjectDeref for &T {}
 impl<T: ObjectType> ObjectCast for &T {}
 
-/// Trait for mutable type casting operations in the QOM hierarchy.
-///
-/// This trait provides the mutable counterparts to [`ObjectCast`]'s conversion
-/// functions. Unlike `ObjectCast`, this trait returns `Result` for fallible
-/// conversions to preserve the original smart pointer if the cast fails. This
-/// is necessary because mutable references cannot be copied, so a failed cast
-/// must return ownership of the original reference. For example:
-///
-/// ```ignore
-/// let mut dev = get_device();
-/// // If this fails, we need the original `dev` back to try something else
-/// match dev.dynamic_cast_mut::<FooDevice>() {
-///    Ok(foodev) => /* use foodev */,
-///    Err(dev) => /* still have ownership of dev */
-/// }
-/// ```
-pub trait ObjectCastMut: Sized + ObjectDeref + DerefMut
-where
-    Self::Target: ObjectType,
-{
-    /// Safely convert from a derived type to one of its parent types.
-    ///
-    /// This is always safe; the [`IsA`] trait provides static verification
-    /// that `Self` dereferences to `U` or a child of `U`.
-    fn upcast_mut<'a, U: ObjectType>(self) -> &'a mut U
-    where
-        Self::Target: IsA<U>,
-        Self: 'a,
-    {
-        // SAFETY: soundness is declared via IsA<U>, which is an unsafe trait
-        unsafe { self.unsafe_cast_mut::<U>() }
-    }
-
-    /// Attempt to convert to a derived type.
-    ///
-    /// Returns `Ok(..)` if the object is of type `U`, or `Err(self)` if the
-    /// object if the conversion failed. This is verified at runtime by
-    /// checking the object's type information.
-    fn downcast_mut<'a, U: IsA<Self::Target>>(self) -> Result<&'a mut U, Self>
-    where
-        Self: 'a,
-    {
-        self.dynamic_cast_mut::<U>()
-    }
-
-    /// Attempt to convert between any two types in the QOM hierarchy.
-    ///
-    /// Returns `Ok(..)` if the object is of type `U`, or `Err(self)` if the
-    /// object if the conversion failed. This is verified at runtime by
-    /// checking the object's type information.
-    fn dynamic_cast_mut<'a, U: ObjectType>(self) -> Result<&'a mut U, Self>
-    where
-        Self: 'a,
-    {
-        unsafe {
-            // SAFETY: upcasting to Object is always valid, and the
-            // return type is either NULL or the argument itself
-            let result: *mut U =
-                object_dynamic_cast(self.as_object_mut_ptr(), U::TYPE_NAME.as_ptr()).cast();
-
-            result.as_mut().ok_or(self)
-        }
-    }
-
-    /// Convert to any QOM type without verification.
-    ///
-    /// # Safety
-    ///
-    /// What safety? You need to know yourself that the cast is correct; only
-    /// use when performance is paramount.  It is still better than a raw
-    /// pointer `cast()`, which does not even check that you remain in the
-    /// realm of QOM `ObjectType`s.
-    ///
-    /// `unsafe_cast::<Object>()` is always safe.
-    unsafe fn unsafe_cast_mut<'a, U: ObjectType>(self) -> &'a mut U
-    where
-        Self: 'a,
-    {
-        unsafe { &mut *self.as_mut_ptr::<Self::Target>().cast::<U>() }
-    }
-}
-
 impl<T: ObjectType> ObjectDeref for &mut T {}
-impl<T: ObjectType> ObjectCastMut for &mut T {}
 
 /// Trait a type must implement to be registered with QEMU.
-pub trait ObjectImpl: ObjectType + ClassInitImpl<Self::Class> {
+pub trait ObjectImpl: ObjectType + IsA<Object> {
     /// The parent of the type.  This should match the first field of the
     /// struct that implements `ObjectImpl`, minus the `ParentField<_>` wrapper.
     type ParentType: ObjectType;
@@ -502,85 +519,26 @@ pub trait ObjectImpl: ObjectType + ClassInitImpl<Self::Class> {
 
     // methods on ObjectClass
     const UNPARENT: Option<fn(&Self)> = None;
-}
 
-/// Internal trait used to automatically fill in a class struct.
-///
-/// Each QOM class that has virtual methods describes them in a
-/// _class struct_.  Class structs include a parent field corresponding
-/// to the vtable of the parent class, all the way up to [`ObjectClass`].
-/// Each QOM type has one such class struct; this trait takes care of
-/// initializing the `T` part of the class struct, for the type that
-/// implements the trait.
-///
-/// Each struct will implement this trait with `T` equal to each
-/// superclass.  For example, a device should implement at least
-/// `ClassInitImpl<`[`DeviceClass`](crate::qdev::DeviceClass)`>` and
-/// `ClassInitImpl<`[`ObjectClass`]`>`.  Such implementations are made
-/// in one of two ways.
-///
-/// For most superclasses, `ClassInitImpl` is provided by the `qemu-api`
-/// crate itself.  The Rust implementation of methods will come from a
-/// trait like [`ObjectImpl`] or [`DeviceImpl`](crate::qdev::DeviceImpl),
-/// and `ClassInitImpl` is provided by blanket implementations that
-/// operate on all implementors of the `*Impl`* trait.  For example:
-///
-/// ```ignore
-/// impl<T> ClassInitImpl<DeviceClass> for T
-/// where
-///     T: ClassInitImpl<ObjectClass> + DeviceImpl,
-/// ```
-///
-/// The bound on `ClassInitImpl<ObjectClass>` is needed so that,
-/// after initializing the `DeviceClass` part of the class struct,
-/// the parent [`ObjectClass`] is initialized as well.
-///
-/// The other case is when manual implementation of the trait is needed.
-/// This covers the following cases:
-///
-/// * if a class implements a QOM interface, the Rust code _has_ to define its
-///   own class struct `FooClass` and implement `ClassInitImpl<FooClass>`.
-///   `ClassInitImpl<FooClass>`'s `class_init` method will then forward to
-///   multiple other `class_init`s, for the interfaces as well as the
-///   superclass. (Note that there is no Rust example yet for using interfaces).
-///
-/// * for classes implemented outside the ``qemu-api`` crate, it's not possible
-///   to add blanket implementations like the above one, due to orphan rules. In
-///   that case, the easiest solution is to implement
-///   `ClassInitImpl<YourSuperclass>` for each subclass and not have a
-///   `YourSuperclassImpl` trait at all.
-///
-/// ```ignore
-/// impl ClassInitImpl<YourSuperclass> for YourSubclass {
-///     fn class_init(klass: &mut YourSuperclass) {
-///         klass.some_method = Some(Self::some_method);
-///         <Self as ClassInitImpl<SysBusDeviceClass>>::class_init(&mut klass.parent_class);
-///     }
-/// }
-/// ```
-///
-///   While this method incurs a small amount of code duplication,
-///   it is generally limited to the recursive call on the last line.
-///   This is because classes defined in Rust do not need the same
-///   glue code that is needed when the classes are defined in C code.
-///   You may consider using a macro if you have many subclasses.
-pub trait ClassInitImpl<T> {
-    /// Initialize `klass` to point to the virtual method implementations
+    /// Store into the argument the virtual method implementations
     /// for `Self`.  On entry, the virtual method pointers are set to
     /// the default values coming from the parent classes; the function
     /// can change them to override virtual methods of a parent class.
     ///
-    /// The virtual method implementations usually come from another
-    /// trait, for example [`DeviceImpl`](crate::qdev::DeviceImpl)
-    /// when `T` is [`DeviceClass`](crate::qdev::DeviceClass).
+    /// Usually defined simply as `Self::Class::class_init::<Self>`;
+    /// however a default implementation cannot be included here, because the
+    /// bounds that the `Self::Class::class_init` method places on `Self` are
+    /// not known in advance.
     ///
-    /// On entry, `klass`'s parent class is initialized, while the other fields
+    /// # Safety
+    ///
+    /// While `klass`'s parent class is initialized on entry, the other fields
     /// are all zero; it is therefore assumed that all fields in `T` can be
     /// zeroed, otherwise it would not be possible to provide the class as a
     /// `&mut T`.  TODO: add a bound of [`Zeroable`](crate::zeroable::Zeroable)
     /// to T; this is more easily done once Zeroable does not require a manual
     /// implementation (Rust 1.75.0).
-    fn class_init(klass: &mut T);
+    const CLASS_INIT: fn(&mut Self::Class);
 }
 
 /// # Safety
@@ -588,18 +546,17 @@ pub trait ClassInitImpl<T> {
 /// We expect the FFI user of this function to pass a valid pointer that
 /// can be downcasted to type `T`. We also expect the device is
 /// readable/writeable from one thread at any time.
-unsafe extern "C" fn rust_unparent_fn<T: ObjectImpl>(dev: *mut Object) {
+unsafe extern "C" fn rust_unparent_fn<T: ObjectImpl>(dev: *mut bindings::Object) {
     let state = NonNull::new(dev).unwrap().cast::<T>();
     T::UNPARENT.unwrap()(unsafe { state.as_ref() });
 }
 
-impl<T> ClassInitImpl<ObjectClass> for T
-where
-    T: ObjectImpl,
-{
-    fn class_init(oc: &mut ObjectClass) {
+impl ObjectClass {
+    /// Fill in the virtual methods of `ObjectClass` based on the definitions in
+    /// the `ObjectImpl` trait.
+    pub fn class_init<T: ObjectImpl>(&mut self) {
         if <T as ObjectImpl>::UNPARENT.is_some() {
-            oc.unparent = Some(rust_unparent_fn::<T>);
+            self.unparent = Some(rust_unparent_fn::<T>);
         }
     }
 }
@@ -610,6 +567,167 @@ unsafe impl ObjectType for Object {
         unsafe { CStr::from_bytes_with_nul_unchecked(bindings::TYPE_OBJECT) };
 }
 
+/// A reference-counted pointer to a QOM object.
+///
+/// `Owned<T>` wraps `T` with automatic reference counting.  It increases the
+/// reference count when created via [`Owned::from`] or cloned, and decreases
+/// it when dropped.  This ensures that the reference count remains elevated
+/// as long as any `Owned<T>` references to it exist.
+///
+/// `Owned<T>` can be used for two reasons:
+/// * because the lifetime of the QOM object is unknown and someone else could
+///   take a reference (similar to `Arc<T>`, for example): in this case, the
+///   object can escape and outlive the Rust struct that contains the `Owned<T>`
+///   field;
+///
+/// * to ensure that the object stays alive until after `Drop::drop` is called
+///   on the Rust struct: in this case, the object will always die together with
+///   the Rust struct that contains the `Owned<T>` field.
+///
+/// Child properties are an example of the second case: in C, an object that
+/// is created with `object_initialize_child` will die *before*
+/// `instance_finalize` is called, whereas Rust expects the struct to have valid
+/// contents when `Drop::drop` is called.  Therefore Rust structs that have
+/// child properties need to keep a reference to the child object.  Right now
+/// this can be done with `Owned<T>`; in the future one might have a separate
+/// `Child<'parent, T>` smart pointer that keeps a reference to a `T`, like
+/// `Owned`, but does not allow cloning.
+///
+/// Note that dropping an `Owned<T>` requires the big QEMU lock to be taken.
+#[repr(transparent)]
+#[derive(PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct Owned<T: ObjectType>(NonNull<T>);
+
+// The following rationale for safety is taken from Linux's kernel::sync::Arc.
+
+// SAFETY: It is safe to send `Owned<T>` to another thread when the underlying
+// `T` is `Sync` because it effectively means sharing `&T` (which is safe
+// because `T` is `Sync`); additionally, it needs `T` to be `Send` because any
+// thread that has an `Owned<T>` may ultimately access `T` using a
+// mutable reference when the reference count reaches zero and `T` is dropped.
+unsafe impl<T: ObjectType + Send + Sync> Send for Owned<T> {}
+
+// SAFETY: It is safe to send `&Owned<T>` to another thread when the underlying
+// `T` is `Sync` because it effectively means sharing `&T` (which is safe
+// because `T` is `Sync`); additionally, it needs `T` to be `Send` because any
+// thread that has a `&Owned<T>` may clone it and get an `Owned<T>` on that
+// thread, so the thread may ultimately access `T` using a mutable reference
+// when the reference count reaches zero and `T` is dropped.
+unsafe impl<T: ObjectType + Sync + Send> Sync for Owned<T> {}
+
+impl<T: ObjectType> Owned<T> {
+    /// Convert a raw C pointer into an owned reference to the QOM
+    /// object it points to.  The object's reference count will be
+    /// decreased when the `Owned` is dropped.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `ptr` is NULL.
+    ///
+    /// # Safety
+    ///
+    /// The caller must indeed own a reference to the QOM object.
+    /// The object must not be embedded in another unless the outer
+    /// object is guaranteed to have a longer lifetime.
+    ///
+    /// A raw pointer obtained via [`Owned::into_raw()`] can always be passed
+    /// back to `from_raw()` (assuming the original `Owned` was valid!),
+    /// since the owned reference remains there between the calls to
+    /// `into_raw()` and `from_raw()`.
+    pub unsafe fn from_raw(ptr: *const T) -> Self {
+        // SAFETY NOTE: while NonNull requires a mutable pointer, only
+        // Deref is implemented so the pointer passed to from_raw
+        // remains const
+        Owned(NonNull::new(ptr as *mut T).unwrap())
+    }
+
+    /// Obtain a raw C pointer from a reference.  `src` is consumed
+    /// and the reference is leaked.
+    #[allow(clippy::missing_const_for_fn)]
+    pub fn into_raw(src: Owned<T>) -> *mut T {
+        let src = ManuallyDrop::new(src);
+        src.0.as_ptr()
+    }
+
+    /// Increase the reference count of a QOM object and return
+    /// a new owned reference to it.
+    ///
+    /// # Safety
+    ///
+    /// The object must not be embedded in another, unless the outer
+    /// object is guaranteed to have a longer lifetime.
+    pub unsafe fn from(obj: &T) -> Self {
+        unsafe {
+            object_ref(obj.as_object_mut_ptr().cast::<c_void>());
+
+            // SAFETY NOTE: while NonNull requires a mutable pointer, only
+            // Deref is implemented so the reference passed to from_raw
+            // remains shared
+            Owned(NonNull::new_unchecked(obj.as_mut_ptr()))
+        }
+    }
+}
+
+impl<T: ObjectType> Clone for Owned<T> {
+    fn clone(&self) -> Self {
+        // SAFETY: creation method is unsafe; whoever calls it has
+        // responsibility that the pointer is valid, and remains valid
+        // throughout the lifetime of the `Owned<T>` and its clones.
+        unsafe { Owned::from(self.deref()) }
+    }
+}
+
+impl<T: ObjectType> Deref for Owned<T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        // SAFETY: creation method is unsafe; whoever calls it has
+        // responsibility that the pointer is valid, and remains valid
+        // throughout the lifetime of the `Owned<T>` and its clones.
+        // With that guarantee, reference counting ensures that
+        // the object remains alive.
+        unsafe { &*self.0.as_ptr() }
+    }
+}
+impl<T: ObjectType> ObjectDeref for Owned<T> {}
+
+impl<T: ObjectType> Drop for Owned<T> {
+    fn drop(&mut self) {
+        assert!(bql_locked());
+        // SAFETY: creation method is unsafe, and whoever calls it has
+        // responsibility that the pointer is valid, and remains valid
+        // throughout the lifetime of the `Owned<T>` and its clones.
+        unsafe {
+            object_unref(self.as_object_mut_ptr().cast::<c_void>());
+        }
+    }
+}
+
+impl<T: IsA<Object>> fmt::Debug for Owned<T> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        self.deref().debug_fmt(f)
+    }
+}
+
+/// Trait for class methods exposed by the Object class.  The methods can be
+/// called on all objects that have the trait `IsA<Object>`.
+///
+/// The trait should only be used through the blanket implementation,
+/// which guarantees safety via `IsA`
+pub trait ObjectClassMethods: IsA<Object> {
+    /// Return a new reference counted instance of this class
+    fn new() -> Owned<Self> {
+        assert!(bql_locked());
+        // SAFETY: the object created by object_new is allocated on
+        // the heap and has a reference count of 1
+        unsafe {
+            let raw_obj = object_new(Self::TYPE_NAME.as_ptr());
+            let obj = Object::from_raw(raw_obj).unsafe_cast::<Self>();
+            Owned::from_raw(obj)
+        }
+    }
+}
+
 /// Trait for methods exposed by the Object class.  The methods can be
 /// called on all objects that have the trait `IsA<Object>`.
 ///
@@ -641,6 +759,14 @@ where
 
         klass
     }
+
+    /// Convenience function for implementing the Debug trait
+    fn debug_fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_tuple(&self.typename())
+            .field(&(self as *const Self))
+            .finish()
+    }
 }
 
+impl<T> ObjectClassMethods for T where T: IsA<Object> {}
 impl<R: ObjectDeref> ObjectMethods for R where R::Target: IsA<Object> {}
diff --git a/rust/qemu-api/src/sysbus.rs b/rust/qemu-api/src/sysbus.rs
index e6762b5c14556..e92502a8fe65d 100644
--- a/rust/qemu-api/src/sysbus.rs
+++ b/rust/qemu-api/src/sysbus.rs
@@ -2,19 +2,30 @@
 // Author(s): Paolo Bonzini <pbonzini@redhat.com>
 // SPDX-License-Identifier: GPL-2.0-or-later
 
-use std::{ffi::CStr, ptr::addr_of};
+//! Bindings to access `sysbus` functionality from Rust.
 
-pub use bindings::{SysBusDevice, SysBusDeviceClass};
+use std::{ffi::CStr, ptr::addr_of_mut};
+
+pub use bindings::SysBusDeviceClass;
 
 use crate::{
     bindings,
-    cell::bql_locked,
-    irq::InterruptSource,
+    cell::{bql_locked, Opaque},
+    irq::{IRQState, InterruptSource},
+    memory::MemoryRegion,
     prelude::*,
-    qdev::{DeviceClass, DeviceState},
-    qom::ClassInitImpl,
+    qdev::{DeviceImpl, DeviceState},
+    qom::Owned,
 };
 
+/// A safe wrapper around [`bindings::SysBusDevice`].
+#[repr(transparent)]
+#[derive(Debug, qemu_api_macros::Wrapper)]
+pub struct SysBusDevice(Opaque<bindings::SysBusDevice>);
+
+unsafe impl Send for SysBusDevice {}
+unsafe impl Sync for SysBusDevice {}
+
 unsafe impl ObjectType for SysBusDevice {
     type Class = SysBusDeviceClass;
     const TYPE_NAME: &'static CStr =
@@ -22,13 +33,14 @@ unsafe impl ObjectType for SysBusDevice {
 }
 qom_isa!(SysBusDevice: DeviceState, Object);
 
-// TODO: add SysBusDeviceImpl
-impl<T> ClassInitImpl<SysBusDeviceClass> for T
-where
-    T: ClassInitImpl<DeviceClass>,
-{
-    fn class_init(sdc: &mut SysBusDeviceClass) {
-        <T as ClassInitImpl<DeviceClass>>::class_init(&mut sdc.parent_class);
+// TODO: add virtual methods
+pub trait SysBusDeviceImpl: DeviceImpl + IsA<SysBusDevice> {}
+
+impl SysBusDeviceClass {
+    /// Fill in the virtual methods of `SysBusDeviceClass` based on the
+    /// definitions in the `SysBusDeviceImpl` trait.
+    pub fn class_init<T: SysBusDeviceImpl>(self: &mut SysBusDeviceClass) {
+        self.parent_class.class_init::<T>();
     }
 }
 
@@ -42,10 +54,10 @@ where
     /// important, since whoever creates the sysbus device will refer to the
     /// region with a number that corresponds to the order of calls to
     /// `init_mmio`.
-    fn init_mmio(&self, iomem: &bindings::MemoryRegion) {
+    fn init_mmio(&self, iomem: &MemoryRegion) {
         assert!(bql_locked());
         unsafe {
-            bindings::sysbus_init_mmio(self.as_mut_ptr(), addr_of!(*iomem) as *mut _);
+            bindings::sysbus_init_mmio(self.upcast().as_mut_ptr(), iomem.as_mut_ptr());
         }
     }
 
@@ -56,7 +68,53 @@ where
     fn init_irq(&self, irq: &InterruptSource) {
         assert!(bql_locked());
         unsafe {
-            bindings::sysbus_init_irq(self.as_mut_ptr(), irq.as_ptr());
+            bindings::sysbus_init_irq(self.upcast().as_mut_ptr(), irq.as_ptr());
+        }
+    }
+
+    // TODO: do we want a type like GuestAddress here?
+    fn mmio_addr(&self, id: u32) -> Option<u64> {
+        assert!(bql_locked());
+        // SAFETY: the BQL ensures that no one else writes to sbd.mmio[], and
+        // the SysBusDevice must be initialized to get an IsA<SysBusDevice>.
+        let sbd = unsafe { *self.upcast().as_ptr() };
+        let id: usize = id.try_into().unwrap();
+        if sbd.mmio[id].memory.is_null() {
+            None
+        } else {
+            Some(sbd.mmio[id].addr)
+        }
+    }
+
+    // TODO: do we want a type like GuestAddress here?
+    fn mmio_map(&self, id: u32, addr: u64) {
+        assert!(bql_locked());
+        let id: i32 = id.try_into().unwrap();
+        unsafe {
+            bindings::sysbus_mmio_map(self.upcast().as_mut_ptr(), id, addr);
+        }
+    }
+
+    // Owned<> is used here because sysbus_connect_irq (via
+    // object_property_set_link) adds a reference to the IRQState,
+    // which can prolong its life
+    fn connect_irq(&self, id: u32, irq: &Owned<IRQState>) {
+        assert!(bql_locked());
+        let id: i32 = id.try_into().unwrap();
+        let irq: &IRQState = irq;
+        unsafe {
+            bindings::sysbus_connect_irq(self.upcast().as_mut_ptr(), id, irq.as_mut_ptr());
+        }
+    }
+
+    fn sysbus_realize(&self) {
+        // TODO: return an Error
+        assert!(bql_locked());
+        unsafe {
+            bindings::sysbus_realize(
+                self.upcast().as_mut_ptr(),
+                addr_of_mut!(bindings::error_fatal),
+            );
         }
     }
 }
diff --git a/rust/qemu-api/src/timer.rs b/rust/qemu-api/src/timer.rs
new file mode 100644
index 0000000000000..f0b04ef95d7eb
--- /dev/null
+++ b/rust/qemu-api/src/timer.rs
@@ -0,0 +1,123 @@
+// Copyright (C) 2024 Intel Corporation.
+// Author(s): Zhao Liu <zhai1.liu@intel.com>
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+use std::{
+    os::raw::{c_int, c_void},
+    pin::Pin,
+};
+
+use crate::{
+    bindings::{self, qemu_clock_get_ns, timer_del, timer_init_full, timer_mod, QEMUClockType},
+    callbacks::FnCall,
+    cell::Opaque,
+};
+
+/// A safe wrapper around [`bindings::QEMUTimer`].
+#[repr(transparent)]
+#[derive(Debug, qemu_api_macros::Wrapper)]
+pub struct Timer(Opaque<bindings::QEMUTimer>);
+
+unsafe impl Send for Timer {}
+unsafe impl Sync for Timer {}
+
+#[repr(transparent)]
+#[derive(qemu_api_macros::Wrapper)]
+pub struct TimerListGroup(Opaque<bindings::QEMUTimerListGroup>);
+
+unsafe impl Send for TimerListGroup {}
+unsafe impl Sync for TimerListGroup {}
+
+impl Timer {
+    pub const MS: u32 = bindings::SCALE_MS;
+    pub const US: u32 = bindings::SCALE_US;
+    pub const NS: u32 = bindings::SCALE_NS;
+
+    /// Create a `Timer` struct without initializing it.
+    ///
+    /// # Safety
+    ///
+    /// The timer must be initialized before it is armed with
+    /// [`modify`](Self::modify).
+    pub unsafe fn new() -> Self {
+        // SAFETY: requirements relayed to callers of Timer::new
+        Self(unsafe { Opaque::zeroed() })
+    }
+
+    /// Create a new timer with the given attributes.
+    pub fn init_full<'timer, 'opaque: 'timer, T, F>(
+        self: Pin<&'timer mut Self>,
+        timer_list_group: Option<&TimerListGroup>,
+        clk_type: ClockType,
+        scale: u32,
+        attributes: u32,
+        _cb: F,
+        opaque: &'opaque T,
+    ) where
+        F: for<'a> FnCall<(&'a T,)>,
+    {
+        let _: () = F::ASSERT_IS_SOME;
+
+        /// timer expiration callback
+        unsafe extern "C" fn rust_timer_handler<T, F: for<'a> FnCall<(&'a T,)>>(
+            opaque: *mut c_void,
+        ) {
+            // SAFETY: the opaque was passed as a reference to `T`.
+            F::call((unsafe { &*(opaque.cast::<T>()) },))
+        }
+
+        let timer_cb: unsafe extern "C" fn(*mut c_void) = rust_timer_handler::<T, F>;
+
+        // SAFETY: the opaque outlives the timer
+        unsafe {
+            timer_init_full(
+                self.as_mut_ptr(),
+                if let Some(g) = timer_list_group {
+                    g as *const TimerListGroup as *mut _
+                } else {
+                    ::core::ptr::null_mut()
+                },
+                clk_type.id,
+                scale as c_int,
+                attributes as c_int,
+                Some(timer_cb),
+                (opaque as *const T).cast::<c_void>() as *mut c_void,
+            )
+        }
+    }
+
+    pub fn modify(&self, expire_time: u64) {
+        // SAFETY: the only way to obtain a Timer safely is via methods that
+        // take a Pin<&mut Self>, therefore the timer is pinned
+        unsafe { timer_mod(self.as_mut_ptr(), expire_time as i64) }
+    }
+
+    pub fn delete(&self) {
+        // SAFETY: the only way to obtain a Timer safely is via methods that
+        // take a Pin<&mut Self>, therefore the timer is pinned
+        unsafe { timer_del(self.as_mut_ptr()) }
+    }
+}
+
+// FIXME: use something like PinnedDrop from the pinned_init crate
+impl Drop for Timer {
+    fn drop(&mut self) {
+        self.delete()
+    }
+}
+
+pub struct ClockType {
+    id: QEMUClockType,
+}
+
+impl ClockType {
+    pub fn get_ns(&self) -> u64 {
+        // SAFETY: cannot be created outside this module, therefore id
+        // is valid
+        (unsafe { qemu_clock_get_ns(self.id) }) as u64
+    }
+}
+
+pub const CLOCK_VIRTUAL: ClockType = ClockType {
+    id: QEMUClockType::QEMU_CLOCK_VIRTUAL,
+};
diff --git a/rust/qemu-api/src/vmstate.rs b/rust/qemu-api/src/vmstate.rs
index 6ac432cf52f2f..f0510ae769d11 100644
--- a/rust/qemu-api/src/vmstate.rs
+++ b/rust/qemu-api/src/vmstate.rs
@@ -29,6 +29,8 @@ use core::{marker::PhantomData, mem, ptr::NonNull};
 pub use crate::bindings::{VMStateDescription, VMStateField};
 use crate::{
     bindings::{self, VMStateFlags},
+    prelude::*,
+    qom::Owned,
     zeroable::Zeroable,
 };
 
@@ -189,9 +191,9 @@ pub const fn vmstate_varray_flag<T: VMState>(_: PhantomData<T>) -> VMStateFlags
 /// * scalar types (integer and `bool`)
 /// * the C struct `QEMUTimer`
 /// * a transparent wrapper for any of the above (`Cell`, `UnsafeCell`,
-///   [`BqlCell`](crate::cell::BqlCell), [`BqlRefCell`](crate::cell::BqlRefCell)
+///   [`BqlCell`], [`BqlRefCell`]
 /// * a raw pointer to any of the above
-/// * a `NonNull` pointer or a `Box` for any of the above
+/// * a `NonNull` pointer, a `Box` or an [`Owned`] for any of the above
 /// * an array of any of the above
 ///
 /// In order to support other types, the trait `VMState` must be implemented
@@ -292,7 +294,7 @@ impl VMStateField {
 /// # Examples
 ///
 /// ```
-/// # use qemu_api::vmstate::impl_vmstate_forward;
+/// # use qemu_api::impl_vmstate_forward;
 /// pub struct Fifo([u8; 16]);
 /// impl_vmstate_forward!(Fifo);
 /// ```
@@ -328,6 +330,7 @@ macro_rules! impl_vmstate_transparent {
 
 impl_vmstate_transparent!(std::cell::Cell<T> where T: VMState);
 impl_vmstate_transparent!(std::cell::UnsafeCell<T> where T: VMState);
+impl_vmstate_transparent!(std::pin::Pin<T> where T: VMState);
 impl_vmstate_transparent!(crate::cell::BqlCell<T> where T: VMState);
 impl_vmstate_transparent!(crate::cell::BqlRefCell<T> where T: VMState);
 
@@ -398,6 +401,7 @@ impl_vmstate_pointer!(NonNull<T> where T: VMState);
 // Unlike C pointers, Box is always non-null therefore there is no need
 // to specify VMS_ALLOC.
 impl_vmstate_pointer!(Box<T> where T: VMState);
+impl_vmstate_pointer!(Owned<T> where T: VMState + ObjectType);
 
 // Arrays using the underlying type's VMState plus
 // VMS_ARRAY/VMS_ARRAY_OF_POINTER
@@ -466,11 +470,11 @@ macro_rules! vmstate_clock {
                 $crate::assert_field_type!(
                     $struct_name,
                     $field_name,
-                    core::ptr::NonNull<$crate::bindings::Clock>
+                    $crate::qom::Owned<$crate::qdev::Clock>
                 );
                 $crate::offset_of!($struct_name, $field_name)
             },
-            size: ::core::mem::size_of::<*const $crate::bindings::Clock>(),
+            size: ::core::mem::size_of::<*const $crate::qdev::Clock>(),
             flags: VMStateFlags(VMStateFlags::VMS_STRUCT.0 | VMStateFlags::VMS_POINTER.0),
             vmsd: unsafe { ::core::ptr::addr_of!($crate::bindings::vmstate_clock) },
             ..$crate::zeroable::Zeroable::ZERO
diff --git a/rust/qemu-api/src/zeroable.rs b/rust/qemu-api/src/zeroable.rs
index 7b04947cb6c16..a3415a2ebccb0 100644
--- a/rust/qemu-api/src/zeroable.rs
+++ b/rust/qemu-api/src/zeroable.rs
@@ -1,11 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
+//! Defines a trait for structs that can be safely initialized with zero bytes.
+
 /// Encapsulates the requirement that
 /// `MaybeUninit::<Self>::zeroed().assume_init()` does not cause undefined
 /// behavior.  This trait in principle could be implemented as just:
 ///
 /// ```
-/// pub unsafe trait Zeroable {
+/// pub unsafe trait Zeroable: Default {
 ///     const ZERO: Self = unsafe { ::core::mem::MaybeUninit::<Self>::zeroed().assume_init() };
 /// }
 /// ```
@@ -56,6 +58,7 @@ pub unsafe trait Zeroable: Default {
 /// ## Differences with `core::mem::zeroed`
 ///
 /// `const_zero` zeroes padding bits, while `core::mem::zeroed` doesn't
+#[macro_export]
 macro_rules! const_zero {
     // This macro to produce a type-generic zero constant is taken from the
     // const_zero crate (v0.1.1):
@@ -77,10 +80,11 @@ macro_rules! const_zero {
 }
 
 /// A wrapper to implement the `Zeroable` trait through the `const_zero` macro.
+#[macro_export]
 macro_rules! impl_zeroable {
     ($type:ty) => {
-        unsafe impl Zeroable for $type {
-            const ZERO: Self = unsafe { const_zero!($type) };
+        unsafe impl $crate::zeroable::Zeroable for $type {
+            const ZERO: Self = unsafe { $crate::const_zero!($type) };
         }
     };
 }
@@ -100,3 +104,6 @@ impl_zeroable!(crate::bindings::VMStateField);
 impl_zeroable!(crate::bindings::VMStateDescription);
 impl_zeroable!(crate::bindings::MemoryRegionOps__bindgen_ty_1);
 impl_zeroable!(crate::bindings::MemoryRegionOps__bindgen_ty_2);
+impl_zeroable!(crate::bindings::MemoryRegionOps);
+impl_zeroable!(crate::bindings::MemTxAttrs);
+impl_zeroable!(crate::bindings::CharBackend);
diff --git a/rust/qemu-api/tests/tests.rs b/rust/qemu-api/tests/tests.rs
index 5c3e75ed3d542..269122e7ec195 100644
--- a/rust/qemu-api/tests/tests.rs
+++ b/rust/qemu-api/tests/tests.rs
@@ -2,20 +2,17 @@
 // Author(s): Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
 // SPDX-License-Identifier: GPL-2.0-or-later
 
-use std::{
-    ffi::CStr,
-    os::raw::c_void,
-    ptr::{addr_of, addr_of_mut},
-};
+use std::{ffi::CStr, ptr::addr_of};
 
 use qemu_api::{
-    bindings::*,
+    bindings::{module_call_init, module_init_type, qdev_prop_bool},
     c_str,
     cell::{self, BqlCell},
     declare_properties, define_property,
     prelude::*,
-    qdev::{DeviceClass, DeviceImpl, DeviceState, Property},
-    qom::{ClassInitImpl, ObjectImpl, ParentField},
+    qdev::{DeviceImpl, DeviceState, Property, ResettablePhasesImpl},
+    qom::{ObjectImpl, ParentField},
+    sysbus::SysBusDevice,
     vmstate::VMStateDescription,
     zeroable::Zeroable,
 };
@@ -41,6 +38,12 @@ pub struct DummyClass {
     parent_class: <DeviceState as ObjectType>::Class,
 }
 
+impl DummyClass {
+    pub fn class_init<T: DeviceImpl>(self: &mut DummyClass) {
+        self.parent_class.class_init::<T>();
+    }
+}
+
 declare_properties! {
     DUMMY_PROPERTIES,
         define_property!(
@@ -60,8 +63,11 @@ unsafe impl ObjectType for DummyState {
 impl ObjectImpl for DummyState {
     type ParentType = DeviceState;
     const ABSTRACT: bool = false;
+    const CLASS_INIT: fn(&mut DummyClass) = DummyClass::class_init::<Self>;
 }
 
+impl ResettablePhasesImpl for DummyState {}
+
 impl DeviceImpl for DummyState {
     fn properties() -> &'static [Property] {
         &DUMMY_PROPERTIES
@@ -71,14 +77,6 @@ impl DeviceImpl for DummyState {
     }
 }
 
-// `impl<T> ClassInitImpl<DummyClass> for T` doesn't work since it violates
-// orphan rule.
-impl ClassInitImpl<DummyClass> for DummyState {
-    fn class_init(klass: &mut DummyClass) {
-        <Self as ClassInitImpl<DeviceClass>>::class_init(&mut klass.parent_class);
-    }
-}
-
 #[derive(qemu_api_macros::offsets)]
 #[repr(C)]
 #[derive(qemu_api_macros::Object)]
@@ -100,19 +98,15 @@ unsafe impl ObjectType for DummyChildState {
 impl ObjectImpl for DummyChildState {
     type ParentType = DummyState;
     const ABSTRACT: bool = false;
+    const CLASS_INIT: fn(&mut DummyChildClass) = DummyChildClass::class_init::<Self>;
 }
 
+impl ResettablePhasesImpl for DummyChildState {}
 impl DeviceImpl for DummyChildState {}
 
-impl ClassInitImpl<DummyClass> for DummyChildState {
-    fn class_init(klass: &mut DummyClass) {
-        <Self as ClassInitImpl<DeviceClass>>::class_init(&mut klass.parent_class);
-    }
-}
-
-impl ClassInitImpl<DummyChildClass> for DummyChildState {
-    fn class_init(klass: &mut DummyChildClass) {
-        <Self as ClassInitImpl<DummyClass>>::class_init(&mut klass.parent_class);
+impl DummyChildClass {
+    pub fn class_init<T: DeviceImpl>(self: &mut DummyChildClass) {
+        self.parent_class.class_init::<T>();
     }
 }
 
@@ -132,22 +126,26 @@ fn init_qom() {
 /// Create and immediately drop an instance.
 fn test_object_new() {
     init_qom();
-    unsafe {
-        object_unref(object_new(DummyState::TYPE_NAME.as_ptr()).cast());
-        object_unref(object_new(DummyChildState::TYPE_NAME.as_ptr()).cast());
-    }
+    drop(DummyState::new());
+    drop(DummyChildState::new());
+}
+
+#[test]
+#[allow(clippy::redundant_clone)]
+/// Create, clone and then drop an instance.
+fn test_clone() {
+    init_qom();
+    let p = DummyState::new();
+    assert_eq!(p.clone().typename(), "dummy");
+    drop(p);
 }
 
 #[test]
 /// Try invoking a method on an object.
 fn test_typename() {
     init_qom();
-    let p: *mut DummyState = unsafe { object_new(DummyState::TYPE_NAME.as_ptr()).cast() };
-    let p_ref: &DummyState = unsafe { &*p };
-    assert_eq!(p_ref.typename(), "dummy");
-    unsafe {
-        object_unref(p_ref.as_object_mut_ptr().cast::<c_void>());
-    }
+    let p = DummyState::new();
+    assert_eq!(p.typename(), "dummy");
 }
 
 // a note on all "cast" tests: usually, especially for downcasts the desired
@@ -162,50 +160,22 @@ fn test_typename() {
 /// Test casts on shared references.
 fn test_cast() {
     init_qom();
-    let p: *mut DummyState = unsafe { object_new(DummyState::TYPE_NAME.as_ptr()).cast() };
+    let p = DummyState::new();
+    let p_ptr: *mut DummyState = p.as_mut_ptr();
+    let p_ref: &mut DummyState = unsafe { &mut *p_ptr };
 
-    let p_ref: &DummyState = unsafe { &*p };
     let obj_ref: &Object = p_ref.upcast();
-    assert_eq!(addr_of!(*obj_ref), p.cast());
+    assert_eq!(addr_of!(*obj_ref), p_ptr.cast());
 
     let sbd_ref: Option<&SysBusDevice> = obj_ref.dynamic_cast();
     assert!(sbd_ref.is_none());
 
     let dev_ref: Option<&DeviceState> = obj_ref.downcast();
-    assert_eq!(addr_of!(*dev_ref.unwrap()), p.cast());
+    assert_eq!(addr_of!(*dev_ref.unwrap()), p_ptr.cast());
 
     // SAFETY: the cast is wrong, but the value is only used for comparison
     unsafe {
         let sbd_ref: &SysBusDevice = obj_ref.unsafe_cast();
-        assert_eq!(addr_of!(*sbd_ref), p.cast());
-
-        object_unref(p_ref.as_object_mut_ptr().cast::<c_void>());
-    }
-}
-
-#[test]
-#[allow(clippy::shadow_unrelated)]
-/// Test casts on mutable references.
-fn test_cast_mut() {
-    init_qom();
-    let p: *mut DummyState = unsafe { object_new(DummyState::TYPE_NAME.as_ptr()).cast() };
-
-    let p_ref: &mut DummyState = unsafe { &mut *p };
-    let obj_ref: &mut Object = p_ref.upcast_mut();
-    assert_eq!(addr_of_mut!(*obj_ref), p.cast());
-
-    let sbd_ref: Result<&mut SysBusDevice, &mut Object> = obj_ref.dynamic_cast_mut();
-    let obj_ref = sbd_ref.unwrap_err();
-
-    let dev_ref: Result<&mut DeviceState, &mut Object> = obj_ref.downcast_mut();
-    let dev_ref = dev_ref.unwrap();
-    assert_eq!(addr_of_mut!(*dev_ref), p.cast());
-
-    // SAFETY: the cast is wrong, but the value is only used for comparison
-    unsafe {
-        let sbd_ref: &mut SysBusDevice = obj_ref.unsafe_cast_mut();
-        assert_eq!(addr_of_mut!(*sbd_ref), p.cast());
-
-        object_unref(p_ref.as_object_mut_ptr().cast::<c_void>());
+        assert_eq!(addr_of!(*sbd_ref), p_ptr.cast());
     }
 }
diff --git a/rust/wrapper.h b/rust/wrapper.h
index a9bc67af0d5f7..d927ad6799dab 100644
--- a/rust/wrapper.h
+++ b/rust/wrapper.h
@@ -62,3 +62,6 @@ typedef enum memory_order {
 #include "qapi/error.h"
 #include "migration/vmstate.h"
 #include "chardev/char-serial.h"
+#include "exec/memattrs.h"
+#include "qemu/timer.h"
+#include "exec/address-spaces.h"
diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py
index 8e1fbf4c9d9f1..67631ac43e9f4 100755
--- a/scripts/analyze-migration.py
+++ b/scripts/analyze-migration.py
@@ -620,7 +620,9 @@ class MigrationDump(object):
     QEMU_VM_SUBSECTION    = 0x05
     QEMU_VM_VMDESCRIPTION = 0x06
     QEMU_VM_CONFIGURATION = 0x07
+    QEMU_VM_COMMAND       = 0x08
     QEMU_VM_SECTION_FOOTER= 0x7e
+    QEMU_MIG_CMD_SWITCHOVER_START = 0x0b
 
     def __init__(self, filename):
         self.section_classes = {
@@ -685,6 +687,15 @@ def read(self, desc_only = False, dump_memory = False,
             elif section_type == self.QEMU_VM_SECTION_PART or section_type == self.QEMU_VM_SECTION_END:
                 section_id = file.read32()
                 self.sections[section_id].read()
+            elif section_type == self.QEMU_VM_COMMAND:
+                command_type = file.read16()
+                command_data_len = file.read16()
+                if command_type != self.QEMU_MIG_CMD_SWITCHOVER_START:
+                    raise Exception("Unknown QEMU_VM_COMMAND: %x" %
+                                    (command_type))
+                if command_data_len != 0:
+                    raise Exception("Invalid SWITCHOVER_START length: %x" %
+                                    (command_data_len))
             elif section_type == self.QEMU_VM_SECTION_FOOTER:
                 read_section_id = file.read32()
                 if read_section_id != section_id:
diff --git a/scripts/archive-source.sh b/scripts/archive-source.sh
index 30677c3ec9032..e461c1531ed6c 100755
--- a/scripts/archive-source.sh
+++ b/scripts/archive-source.sh
@@ -28,7 +28,7 @@ sub_file="${sub_tdir}/submodule.tar"
 # different to the host OS.
 subprojects="keycodemapdb libvfio-user berkeley-softfloat-3
   berkeley-testfloat-3 arbitrary-int-1-rs bilge-0.2-rs
-  bilge-impl-0.2-rs either-1-rs itertools-0.11-rs proc-macro2-1-rs
+  bilge-impl-0.2-rs either-1-rs itertools-0.11-rs libc-0.2-rs proc-macro2-1-rs
   proc-macro-error-1-rs proc-macro-error-attr-1-rs quote-1-rs
   syn-2-rs unicode-ident-1-rs"
 sub_deinit=""
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 06d07e6c225c1..6ae9d7febee82 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1353,6 +1353,70 @@ sub checkfilename {
 	}
 }
 
+sub checkspdx {
+    my ($file, $expr) = @_;
+
+    # Imported Linux headers probably have SPDX tags, but if they
+    # don't we're not requiring contributors to fix this, as these
+    # files are not expected to be modified locally in QEMU.
+    # Also don't accidentally detect own checking code.
+    if ($file =~ m,include/standard-headers, ||
+	$file =~ m,linux-headers, ||
+	$file =~ m,checkpatch.pl,) {
+	return;
+    }
+
+    my $origexpr = $expr;
+
+    # Flatten sub-expressions
+    $expr =~ s/\(|\)/ /g;
+    $expr =~ s/OR|AND/ /g;
+
+    # Merge WITH exceptions to the license
+    $expr =~ s/\s+WITH\s+/-WITH-/g;
+
+    # Cull more leading/trailing whitespace
+    $expr =~ s/^\s*//g;
+    $expr =~ s/\s*$//g;
+
+    my @bits = split / +/, $expr;
+
+    my $prefer = "GPL-2.0-or-later";
+    my @valid = qw(
+	GPL-2.0-only
+	LGPL-2.1-only
+	LGPL-2.1-or-later
+	BSD-2-Clause
+	BSD-3-Clause
+	MIT
+	);
+
+    my $nonpreferred = 0;
+    my @unknown = ();
+    foreach my $bit (@bits) {
+	if ($bit eq $prefer) {
+	    next;
+	}
+	if (grep /^$bit$/, @valid) {
+	    $nonpreferred = 1;
+	} else {
+	    push @unknown, $bit;
+	}
+    }
+    if (@unknown) {
+	ERROR("Saw unacceptable licenses '" . join(',', @unknown) .
+	      "', valid choices for QEMU are:\n" . join("\n", $prefer, @valid));
+    }
+
+    if ($nonpreferred) {
+	WARN("Saw acceptable license '$origexpr' but note '$prefer' is " .
+	     "preferred for new files unless the code is derived from a " .
+	     "source file with an existing declared license that must be " .
+	     "retained. Please explain the license choice in the commit " .
+	     "message.");
+    }
+}
+
 sub process {
 	my $filename = shift;
 
@@ -1378,6 +1442,8 @@ sub process {
 	my $in_imported_file = 0;
 	my $in_no_imported_file = 0;
 	my $non_utf8_charset = 0;
+	my $expect_spdx = 0;
+	my $expect_spdx_file;
 
 	our @report = ();
 	our $cnt_lines = 0;
@@ -1615,6 +1681,51 @@ sub process {
 			WARN("added, moved or deleted file(s), does MAINTAINERS need updating?\n" . $herecurr);
 		}
 
+# All new files should have a SPDX-License-Identifier tag
+		if ($line =~ /^new file mode\s*\d+\s*$/) {
+		    if ($expect_spdx) {
+			if ($expect_spdx_file =~
+			    /\.(c|h|py|pl|sh|json|inc|Makefile)$/) {
+			    # source code files MUST have SPDX license declared
+			    ERROR("New file '$expect_spdx_file' requires " .
+				  "'SPDX-License-Identifer'");
+			} else {
+			    # Other files MAY have SPDX license if appropriate
+			    WARN("Does new file '$expect_spdx_file' need " .
+				 "'SPDX-License-Identifer'?");
+			}
+		    }
+		    $expect_spdx = 1;
+		    $expect_spdx_file = undef;
+		} elsif ($expect_spdx) {
+		    $expect_spdx_file = $realfile unless
+			defined $expect_spdx_file;
+
+		    # SPDX tags may occurr in comments which were
+		    # stripped from '$line', so use '$rawline'
+		    if ($rawline =~ /SPDX-License-Identifier/) {
+			$expect_spdx = 0;
+			$expect_spdx_file = undef;
+		    }
+		}
+
+# Check SPDX-License-Identifier references a permitted license
+		if ($rawline =~ m,SPDX-License-Identifier: (.*?)(\*/)?\s*$,) {
+		    &checkspdx($realfile, $1);
+		}
+
+		if ($rawline =~ m,(SPDX-[a-zA-Z0-9-_]+):,) {
+		    my $tag = $1;
+		    my @permitted = qw(
+			SPDX-License-Identifier
+		    );
+
+		    unless (grep { /^$tag$/ } @permitted) {
+			ERROR("Tag $tag not permitted in QEMU code, valid " .
+			      "choices are: " . join(", ", @permitted));
+		    }
+		}
+
 # Check for wrappage within a valid hunk of the file
 		if ($realcnt != 0 && $line !~ m{^(?:\+|-| |\\ No newline|$)}) {
 			ERROR("patch seems to be corrupt (line wrapped?)\n" .
diff --git a/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml b/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml
index 288156d1e4b64..dbcd2e076dec6 100644
--- a/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml
+++ b/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml
@@ -123,6 +123,7 @@ packages:
   - tar
   - tesseract-ocr
   - tesseract-ocr-eng
+  - vulkan-tools
   - xorriso
   - zlib1g-dev
   - zstd
diff --git a/scripts/ci/setup/ubuntu/ubuntu-2204-s390x.yaml b/scripts/ci/setup/ubuntu/ubuntu-2204-s390x.yaml
index d497139ef39b1..4b8ee3d885d55 100644
--- a/scripts/ci/setup/ubuntu/ubuntu-2204-s390x.yaml
+++ b/scripts/ci/setup/ubuntu/ubuntu-2204-s390x.yaml
@@ -121,6 +121,7 @@ packages:
   - tar
   - tesseract-ocr
   - tesseract-ocr-eng
+  - vulkan-tools
   - xorriso
   - zlib1g-dev
   - zstd
diff --git a/scripts/make-release b/scripts/make-release
index 2885e872109a8..8c3594a1a470b 100755
--- a/scripts/make-release
+++ b/scripts/make-release
@@ -11,8 +11,9 @@
 # See the COPYING file in the top-level directory.
 
 function subproject_dir() {
-    if test ! -f "subprojects/$1.wrap"; then
-      error "scripts/archive-source.sh should only process wrap subprojects"
+    if test ! -f "$src/subprojects/$1.wrap"; then
+      echo "scripts/archive-source.sh should only process wrap subprojects"
+      exit 1
     fi
 
     # Print the directory key of the wrap file, defaulting to the
@@ -26,7 +27,7 @@ function subproject_dir() {
       -e    's///p' \
       -e    'q' \
       -e '}' \
-      "subprojects/$1.wrap")
+      "$src/subprojects/$1.wrap")
 
     echo "${dir:-$1}"
 }
@@ -40,7 +41,7 @@ fi
 # Only include wraps that are invoked with subproject()
 SUBPROJECTS="libvfio-user keycodemapdb berkeley-softfloat-3
   berkeley-testfloat-3 arbitrary-int-1-rs bilge-0.2-rs
-  bilge-impl-0.2-rs either-1-rs itertools-0.11-rs proc-macro2-1-rs
+  bilge-impl-0.2-rs either-1-rs itertools-0.11-rs libc-0.2-rs proc-macro2-1-rs
   proc-macro-error-1-rs proc-macro-error-attr-1-rs quote-1-rs
   syn-2-rs unicode-ident-1-rs"
 
@@ -76,7 +77,7 @@ popd
 exclude=(--exclude=.git)
 # include the tarballs in subprojects/packagecache but not their expansion
 for sp in $SUBPROJECTS; do
-    if grep -xqF "[wrap-file]" subprojects/$sp.wrap; then
+    if grep -xqF "[wrap-file]" $src/subprojects/$sp.wrap; then
       exclude+=(--exclude=subprojects/"$(subproject_dir $sp)")
     fi
 done
diff --git a/scripts/meson-buildoptions.py b/scripts/meson-buildoptions.py
index 4814a8ff61f94..a3e22471b2fdc 100644
--- a/scripts/meson-buildoptions.py
+++ b/scripts/meson-buildoptions.py
@@ -241,8 +241,14 @@ def print_parse(options):
     print("  esac")
     print("}")
 
-
-options = load_options(json.load(sys.stdin))
+json_data = sys.stdin.read()
+try:
+    options = load_options(json.loads(json_data))
+except:
+    print("Failure in scripts/meson-buildoptions.py parsing stdin as json",
+          file=sys.stderr)
+    print(json_data, file=sys.stderr)
+    sys.exit(1)
 print("# This file is generated by meson-buildoptions.py, do not edit!")
 print_help(options)
 print_parse(options)
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index a8066aab0379a..3e8e00852b2c5 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -168,6 +168,7 @@ meson_options_help() {
   printf "%s\n" '  pixman          pixman support'
   printf "%s\n" '  plugins         TCG plugins via shared library loading'
   printf "%s\n" '  png             PNG support with libpng'
+  printf "%s\n" '  pvg             macOS paravirtualized graphics support'
   printf "%s\n" '  qatzip          QATzip compression support'
   printf "%s\n" '  qcow1           qcow1 image format support'
   printf "%s\n" '  qed             qed image format support'
@@ -436,6 +437,8 @@ _meson_option_parse() {
     --enable-png) printf "%s" -Dpng=enabled ;;
     --disable-png) printf "%s" -Dpng=disabled ;;
     --prefix=*) quote_sh "-Dprefix=$2" ;;
+    --enable-pvg) printf "%s" -Dpvg=enabled ;;
+    --disable-pvg) printf "%s" -Dpvg=disabled ;;
     --enable-qatzip) printf "%s" -Dqatzip=enabled ;;
     --disable-qatzip) printf "%s" -Dqatzip=disabled ;;
     --enable-qcow1) printf "%s" -Dqcow1=enabled ;;
diff --git a/scripts/qapi/backend.py b/scripts/qapi/backend.py
new file mode 100644
index 0000000000000..14e60aa67af49
--- /dev/null
+++ b/scripts/qapi/backend.py
@@ -0,0 +1,63 @@
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+from abc import ABC, abstractmethod
+
+from .commands import gen_commands
+from .events import gen_events
+from .features import gen_features
+from .introspect import gen_introspect
+from .schema import QAPISchema
+from .types import gen_types
+from .visit import gen_visit
+
+
+class QAPIBackend(ABC):
+
+    @abstractmethod
+    def generate(self,
+                 schema: QAPISchema,
+                 output_dir: str,
+                 prefix: str,
+                 unmask: bool,
+                 builtins: bool,
+                 gen_tracing: bool) -> None:
+        """
+        Generate code for the given schema into the target directory.
+
+        :param schema: The primary QAPI schema object.
+        :param output_dir: The output directory to store generated code.
+        :param prefix: Optional C-code prefix for symbol names.
+        :param unmask: Expose non-ABI names through introspection?
+        :param builtins: Generate code for built-in types?
+
+        :raise QAPIError: On failures.
+        """
+
+
+class QAPICBackend(QAPIBackend):
+
+    def generate(self,
+                 schema: QAPISchema,
+                 output_dir: str,
+                 prefix: str,
+                 unmask: bool,
+                 builtins: bool,
+                 gen_tracing: bool) -> None:
+        """
+        Generate C code for the given schema into the target directory.
+
+        :param schema_file: The primary QAPI schema file.
+        :param output_dir: The output directory to store generated code.
+        :param prefix: Optional C-code prefix for symbol names.
+        :param unmask: Expose non-ABI names through introspection?
+        :param builtins: Generate code for built-in types?
+
+        :raise QAPIError: On failures.
+        """
+        gen_types(schema, output_dir, prefix, builtins)
+        gen_features(schema, output_dir, prefix)
+        gen_visit(schema, output_dir, prefix, builtins)
+        gen_commands(schema, output_dir, prefix, gen_tracing)
+        gen_events(schema, output_dir, prefix)
+        gen_introspect(schema, output_dir, prefix, unmask)
diff --git a/scripts/qapi/introspect.py b/scripts/qapi/introspect.py
index 42e5185c7c665..89ee5d5f17642 100644
--- a/scripts/qapi/introspect.py
+++ b/scripts/qapi/introspect.py
@@ -11,6 +11,7 @@
 See the COPYING file in the top-level directory.
 """
 
+from dataclasses import dataclass
 from typing import (
     Any,
     Dict,
@@ -79,19 +80,16 @@
 _ValueT = TypeVar('_ValueT', bound=_Value)
 
 
+@dataclass
 class Annotated(Generic[_ValueT]):
     """
     Annotated generally contains a SchemaInfo-like type (as a dict),
     But it also used to wrap comments/ifconds around scalar leaf values,
     for the benefit of features and enums.
     """
-    # TODO: Remove after Python 3.7 adds @dataclass:
-    # pylint: disable=too-few-public-methods
-    def __init__(self, value: _ValueT, ifcond: QAPISchemaIfCond,
-                 comment: Optional[str] = None):
-        self.value = value
-        self.comment: Optional[str] = comment
-        self.ifcond = ifcond
+    value: _ValueT
+    ifcond: QAPISchemaIfCond
+    comment: Optional[str] = None
 
 
 def _tree_to_qlit(obj: JSONValue,
diff --git a/scripts/qapi/main.py b/scripts/qapi/main.py
index 324081b9fc3de..0e2a6ae3f070e 100644
--- a/scripts/qapi/main.py
+++ b/scripts/qapi/main.py
@@ -8,18 +8,14 @@
 """
 
 import argparse
+from importlib import import_module
 import sys
 from typing import Optional
 
-from .commands import gen_commands
+from .backend import QAPIBackend, QAPICBackend
 from .common import must_match
 from .error import QAPIError
-from .events import gen_events
-from .features import gen_features
-from .introspect import gen_introspect
 from .schema import QAPISchema
-from .types import gen_types
-from .visit import gen_visit
 
 
 def invalid_prefix_char(prefix: str) -> Optional[str]:
@@ -29,32 +25,36 @@ def invalid_prefix_char(prefix: str) -> Optional[str]:
     return None
 
 
-def generate(schema_file: str,
-             output_dir: str,
-             prefix: str,
-             unmask: bool = False,
-             builtins: bool = False,
-             gen_tracing: bool = False) -> None:
-    """
-    Generate C code for the given schema into the target directory.
+def create_backend(path: str) -> QAPIBackend:
+    if path is None:
+        return QAPICBackend()
 
-    :param schema_file: The primary QAPI schema file.
-    :param output_dir: The output directory to store generated code.
-    :param prefix: Optional C-code prefix for symbol names.
-    :param unmask: Expose non-ABI names through introspection?
-    :param builtins: Generate code for built-in types?
+    module_path, dot, class_name = path.rpartition('.')
+    if not dot:
+        raise QAPIError("argument of -B must be of the form MODULE.CLASS")
 
-    :raise QAPIError: On failures.
-    """
-    assert invalid_prefix_char(prefix) is None
+    try:
+        mod = import_module(module_path)
+    except Exception as ex:
+        raise QAPIError(f"unable to import '{module_path}': {ex}") from ex
+
+    try:
+        klass = getattr(mod, class_name)
+    except AttributeError as ex:
+        raise QAPIError(
+            f"module '{module_path}' has no class '{class_name}'") from ex
+
+    try:
+        backend = klass()
+    except Exception as ex:
+        raise QAPIError(
+            f"backend '{path}' cannot be instantiated: {ex}") from ex
+
+    if not isinstance(backend, QAPIBackend):
+        raise QAPIError(
+            f"backend '{path}' must be an instance of QAPIBackend")
 
-    schema = QAPISchema(schema_file)
-    gen_types(schema, output_dir, prefix, builtins)
-    gen_features(schema, output_dir, prefix)
-    gen_visit(schema, output_dir, prefix, builtins)
-    gen_commands(schema, output_dir, prefix, gen_tracing)
-    gen_events(schema, output_dir, prefix)
-    gen_introspect(schema, output_dir, prefix, unmask)
+    return backend
 
 
 def main() -> int:
@@ -77,6 +77,8 @@ def main() -> int:
     parser.add_argument('-u', '--unmask-non-abi-names', action='store_true',
                         dest='unmask',
                         help="expose non-ABI names in introspection")
+    parser.add_argument('-B', '--backend', default=None,
+                        help="Python module name for code generator")
 
     # Option --suppress-tracing exists so we can avoid solving build system
     # problems.  TODO Drop it when we no longer need it.
@@ -93,12 +95,14 @@ def main() -> int:
         return 1
 
     try:
-        generate(args.schema,
-                 output_dir=args.output_dir,
-                 prefix=args.prefix,
-                 unmask=args.unmask,
-                 builtins=args.builtins,
-                 gen_tracing=not args.suppress_tracing)
+        schema = QAPISchema(args.schema)
+        backend = create_backend(args.backend)
+        backend.generate(schema,
+                         output_dir=args.output_dir,
+                         prefix=args.prefix,
+                         unmask=args.unmask,
+                         builtins=args.builtins,
+                         gen_tracing=not args.suppress_tracing)
     except QAPIError as err:
         print(err, file=sys.stderr)
         return 1
diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py
index adc85b5b39465..949d9e8bff722 100644
--- a/scripts/qapi/parser.py
+++ b/scripts/qapi/parser.py
@@ -14,7 +14,7 @@
 # This work is licensed under the terms of the GNU GPL, version 2.
 # See the COPYING file in the top-level directory.
 
-from collections import OrderedDict
+import enum
 import os
 import re
 from typing import (
@@ -154,7 +154,7 @@ def _parse(self) -> None:
                                        "value of 'include' must be a string")
                 incl_fname = os.path.join(os.path.dirname(self._fname),
                                           include)
-                self._add_expr(OrderedDict({'include': incl_fname}), info)
+                self._add_expr({'include': incl_fname}, info)
                 exprs_include = self._include(include, info, incl_fname,
                                               self._included)
                 if exprs_include:
@@ -355,7 +355,7 @@ def accept(self, skip_comment: bool = True) -> None:
                 raise QAPIParseError(self, "stray '%s'" % match.group(0))
 
     def get_members(self) -> Dict[str, object]:
-        expr: Dict[str, object] = OrderedDict()
+        expr: Dict[str, object] = {}
         if self.tok == '}':
             self.accept()
             return expr
@@ -575,7 +575,10 @@ def get_doc(self) -> 'QAPIDoc':
                         )
                         raise QAPIParseError(self, emsg)
 
-                    doc.new_tagged_section(self.info, match.group(1))
+                    doc.new_tagged_section(
+                        self.info,
+                        QAPIDoc.Kind.from_string(match.group(1))
+                    )
                     text = line[match.end():]
                     if text:
                         doc.append_line(text)
@@ -586,7 +589,7 @@ def get_doc(self) -> 'QAPIDoc':
                         self,
                         "unexpected '=' markup in definition documentation")
                 else:
-                    # tag-less paragraph
+                    # plain paragraph
                     doc.ensure_untagged_section(self.info)
                     doc.append_line(line)
                     line = self.get_doc_paragraph(doc)
@@ -635,23 +638,51 @@ class QAPIDoc:
     Free-form documentation blocks consist only of a body section.
     """
 
+    class Kind(enum.Enum):
+        PLAIN = 0
+        MEMBER = 1
+        FEATURE = 2
+        RETURNS = 3
+        ERRORS = 4
+        SINCE = 5
+        TODO = 6
+
+        @staticmethod
+        def from_string(kind: str) -> 'QAPIDoc.Kind':
+            return QAPIDoc.Kind[kind.upper()]
+
+        def __str__(self) -> str:
+            return self.name.title()
+
     class Section:
         # pylint: disable=too-few-public-methods
-        def __init__(self, info: QAPISourceInfo,
-                     tag: Optional[str] = None):
+        def __init__(
+            self,
+            info: QAPISourceInfo,
+            kind: 'QAPIDoc.Kind',
+        ):
             # section source info, i.e. where it begins
             self.info = info
-            # section tag, if any ('Returns', '@name', ...)
-            self.tag = tag
+            # section kind
+            self.kind = kind
             # section text without tag
             self.text = ''
 
+        def __repr__(self) -> str:
+            return f"<QAPIDoc.Section kind={self.kind!r} text={self.text!r}>"
+
         def append_line(self, line: str) -> None:
             self.text += line + '\n'
 
     class ArgSection(Section):
-        def __init__(self, info: QAPISourceInfo, tag: str):
-            super().__init__(info, tag)
+        def __init__(
+            self,
+            info: QAPISourceInfo,
+            kind: 'QAPIDoc.Kind',
+            name: str
+        ):
+            super().__init__(info, kind)
+            self.name = name
             self.member: Optional['QAPISchemaMember'] = None
 
         def connect(self, member: 'QAPISchemaMember') -> None:
@@ -663,7 +694,9 @@ def __init__(self, info: QAPISourceInfo, symbol: Optional[str] = None):
         # definition doc's symbol, None for free-form doc
         self.symbol: Optional[str] = symbol
         # the sections in textual order
-        self.all_sections: List[QAPIDoc.Section] = [QAPIDoc.Section(info)]
+        self.all_sections: List[QAPIDoc.Section] = [
+            QAPIDoc.Section(info, QAPIDoc.Kind.PLAIN)
+        ]
         # the body section
         self.body: Optional[QAPIDoc.Section] = self.all_sections[0]
         # dicts mapping parameter/feature names to their description
@@ -680,55 +713,71 @@ def __init__(self, info: QAPISourceInfo, symbol: Optional[str] = None):
     def end(self) -> None:
         for section in self.all_sections:
             section.text = section.text.strip('\n')
-            if section.tag is not None and section.text == '':
+            if section.kind != QAPIDoc.Kind.PLAIN and section.text == '':
                 raise QAPISemError(
-                    section.info, "text required after '%s:'" % section.tag)
+                    section.info, "text required after '%s:'" % section.kind)
 
     def ensure_untagged_section(self, info: QAPISourceInfo) -> None:
-        if self.all_sections and not self.all_sections[-1].tag:
+        kind = QAPIDoc.Kind.PLAIN
+
+        if self.all_sections and self.all_sections[-1].kind == kind:
             # extend current section
-            self.all_sections[-1].text += '\n'
+            section = self.all_sections[-1]
+            if not section.text:
+                # Section is empty so far; update info to start *here*.
+                section.info = info
+            section.text += '\n'
             return
+
         # start new section
-        section = self.Section(info)
+        section = self.Section(info, kind)
         self.sections.append(section)
         self.all_sections.append(section)
 
-    def new_tagged_section(self, info: QAPISourceInfo, tag: str) -> None:
-        section = self.Section(info, tag)
-        if tag == 'Returns':
+    def new_tagged_section(
+        self,
+        info: QAPISourceInfo,
+        kind: 'QAPIDoc.Kind',
+    ) -> None:
+        section = self.Section(info, kind)
+        if kind == QAPIDoc.Kind.RETURNS:
             if self.returns:
                 raise QAPISemError(
-                    info, "duplicated '%s' section" % tag)
+                    info, "duplicated '%s' section" % kind)
             self.returns = section
-        elif tag == 'Errors':
+        elif kind == QAPIDoc.Kind.ERRORS:
             if self.errors:
                 raise QAPISemError(
-                    info, "duplicated '%s' section" % tag)
+                    info, "duplicated '%s' section" % kind)
             self.errors = section
-        elif tag == 'Since':
+        elif kind == QAPIDoc.Kind.SINCE:
             if self.since:
                 raise QAPISemError(
-                    info, "duplicated '%s' section" % tag)
+                    info, "duplicated '%s' section" % kind)
             self.since = section
         self.sections.append(section)
         self.all_sections.append(section)
 
-    def _new_description(self, info: QAPISourceInfo, name: str,
-                         desc: Dict[str, ArgSection]) -> None:
+    def _new_description(
+        self,
+        info: QAPISourceInfo,
+        name: str,
+        kind: 'QAPIDoc.Kind',
+        desc: Dict[str, ArgSection]
+    ) -> None:
         if not name:
             raise QAPISemError(info, "invalid parameter name")
         if name in desc:
             raise QAPISemError(info, "'%s' parameter name duplicated" % name)
-        section = self.ArgSection(info, '@' + name)
+        section = self.ArgSection(info, kind, name)
         self.all_sections.append(section)
         desc[name] = section
 
     def new_argument(self, info: QAPISourceInfo, name: str) -> None:
-        self._new_description(info, name, self.args)
+        self._new_description(info, name, QAPIDoc.Kind.MEMBER, self.args)
 
     def new_feature(self, info: QAPISourceInfo, name: str) -> None:
-        self._new_description(info, name, self.features)
+        self._new_description(info, name, QAPIDoc.Kind.FEATURE, self.features)
 
     def append_line(self, line: str) -> None:
         self.all_sections[-1].append_line(line)
@@ -740,8 +789,23 @@ def connect_member(self, member: 'QAPISchemaMember') -> None:
                 raise QAPISemError(member.info,
                                    "%s '%s' lacks documentation"
                                    % (member.role, member.name))
-            self.args[member.name] = QAPIDoc.ArgSection(
-                self.info, '@' + member.name)
+            # Insert stub documentation section for missing member docs.
+            # TODO: drop when undocumented members are outlawed
+
+            section = QAPIDoc.ArgSection(
+                self.info, QAPIDoc.Kind.MEMBER, member.name)
+            self.args[member.name] = section
+
+            # Determine where to insert stub doc - it should go at the
+            # end of the members section(s), if any. Note that index 0
+            # is assumed to be an untagged intro section, even if it is
+            # empty.
+            index = 1
+            if len(self.all_sections) > 1:
+                while self.all_sections[index].kind == QAPIDoc.Kind.MEMBER:
+                    index += 1
+            self.all_sections.insert(index, section)
+
         self.args[member.name].connect(member)
 
     def connect_feature(self, feature: 'QAPISchemaFeature') -> None:
diff --git a/scripts/qapi/pylintrc b/scripts/qapi/pylintrc
index c028a1f9f5195..d24eece74114e 100644
--- a/scripts/qapi/pylintrc
+++ b/scripts/qapi/pylintrc
@@ -17,6 +17,7 @@ disable=consider-using-f-string,
         too-many-arguments,
         too-many-branches,
         too-many-instance-attributes,
+        too-many-positional-arguments,
         too-many-statements,
         useless-option-value,
 
diff --git a/scripts/qapi/schema.py b/scripts/qapi/schema.py
index 7f70969c09199..cbe3b5aa91ec1 100644
--- a/scripts/qapi/schema.py
+++ b/scripts/qapi/schema.py
@@ -19,7 +19,6 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
-from collections import OrderedDict
 import os
 import re
 from typing import (
@@ -557,7 +556,7 @@ def check(self, schema: QAPISchema) -> None:
         super().check(schema)
         assert self._checked and not self._check_complete
 
-        seen = OrderedDict()
+        seen = {}
         if self._base_name:
             self.base = schema.resolve_type(self._base_name, self.info,
                                             "'base'")
@@ -1141,10 +1140,10 @@ def __init__(self, fname: str):
         self.docs = parser.docs
         self._entity_list: List[QAPISchemaEntity] = []
         self._entity_dict: Dict[str, QAPISchemaDefinition] = {}
-        self._module_dict: Dict[str, QAPISchemaModule] = OrderedDict()
+        self._module_dict: Dict[str, QAPISchemaModule] = {}
         # NB, values in the dict will identify the first encountered
         # usage of a named feature only
-        self._feature_dict: Dict[str, QAPISchemaFeature] = OrderedDict()
+        self._feature_dict: Dict[str, QAPISchemaFeature] = {}
 
         # All schemas get the names defined in the QapiSpecialFeature enum.
         # Rely on dict iteration order matching insertion order so that
@@ -1454,7 +1453,7 @@ def _def_command(self, expr: QAPIExpression) -> None:
         ifcond = QAPISchemaIfCond(expr.get('if'))
         info = expr.info
         features = self._make_features(expr.get('features'), info)
-        if isinstance(data, OrderedDict):
+        if isinstance(data, dict):
             data = self._make_implicit_object_type(
                 name, info, ifcond,
                 'arg', self._make_members(data, info))
@@ -1473,7 +1472,7 @@ def _def_event(self, expr: QAPIExpression) -> None:
         ifcond = QAPISchemaIfCond(expr.get('if'))
         info = expr.info
         features = self._make_features(expr.get('features'), info)
-        if isinstance(data, OrderedDict):
+        if isinstance(data, dict):
             data = self._make_implicit_object_type(
                 name, info, ifcond,
                 'arg', self._make_members(data, info))
diff --git a/scripts/qapi/source.py b/scripts/qapi/source.py
index 7b379fdc92578..ffdc3f482acd4 100644
--- a/scripts/qapi/source.py
+++ b/scripts/qapi/source.py
@@ -47,9 +47,9 @@ def set_defn(self, meta: str, name: str) -> None:
         self.defn_meta = meta
         self.defn_name = name
 
-    def next_line(self: T) -> T:
+    def next_line(self: T, n: int = 1) -> T:
         info = copy.copy(self)
-        info.line += 1
+        info.line += n
         return info
 
     def loc(self) -> str:
diff --git a/scripts/qcow2-to-stdout.py b/scripts/qcow2-to-stdout.py
new file mode 100755
index 0000000000000..06b7c13ccbb54
--- /dev/null
+++ b/scripts/qcow2-to-stdout.py
@@ -0,0 +1,449 @@
+#!/usr/bin/env python3
+
+# This tool reads a disk image in any format and converts it to qcow2,
+# writing the result directly to stdout.
+#
+# Copyright (C) 2024 Igalia, S.L.
+#
+# Authors: Alberto Garcia <berto@igalia.com>
+#          Madeeha Javed <javed@igalia.com>
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# qcow2 files produced by this script are always arranged like this:
+#
+# - qcow2 header
+# - refcount table
+# - refcount blocks
+# - L1 table
+# - L2 tables
+# - Data clusters
+#
+# A note about variable names: in qcow2 there is one refcount table
+# and one (active) L1 table, although each can occupy several
+# clusters. For the sake of simplicity the code sometimes talks about
+# refcount tables and L1 tables when referring to those clusters.
+
+import argparse
+import errno
+import math
+import os
+import signal
+import struct
+import subprocess
+import sys
+import tempfile
+import time
+from contextlib import contextmanager
+
+QCOW2_DEFAULT_CLUSTER_SIZE = 65536
+QCOW2_DEFAULT_REFCOUNT_BITS = 16
+QCOW2_FEATURE_NAME_TABLE = 0x6803F857
+QCOW2_DATA_FILE_NAME_STRING = 0x44415441
+QCOW2_V3_HEADER_LENGTH = 112  # Header length in QEMU 9.0. Must be a multiple of 8
+QCOW2_INCOMPAT_DATA_FILE_BIT = 2
+QCOW2_AUTOCLEAR_DATA_FILE_RAW_BIT = 1
+QCOW_OFLAG_COPIED = 1 << 63
+QEMU_STORAGE_DAEMON = "qemu-storage-daemon"
+
+
+def bitmap_set(bitmap, idx):
+    bitmap[idx // 8] |= 1 << (idx % 8)
+
+
+def bitmap_is_set(bitmap, idx):
+    return (bitmap[idx // 8] & (1 << (idx % 8))) != 0
+
+
+def bitmap_iterator(bitmap, length):
+    for idx in range(length):
+        if bitmap_is_set(bitmap, idx):
+            yield idx
+
+
+def align_up(num, d):
+    return d * math.ceil(num / d)
+
+
+# Holes in the input file contain only zeroes so we can skip them and
+# save time. This function returns the indexes of the clusters that
+# are known to contain data. Those are the ones that we need to read.
+def clusters_with_data(fd, cluster_size):
+    data_to = 0
+    while True:
+        try:
+            data_from = os.lseek(fd, data_to, os.SEEK_DATA)
+            data_to = align_up(os.lseek(fd, data_from, os.SEEK_HOLE), cluster_size)
+            for idx in range(data_from // cluster_size, data_to // cluster_size):
+                yield idx
+        except OSError as err:
+            if err.errno == errno.ENXIO:  # End of file reached
+                break
+            raise err
+
+
+# write_qcow2_content() expects a raw input file. If we have a different
+# format we can use qemu-storage-daemon to make it appear as raw.
+@contextmanager
+def get_input_as_raw_file(input_file, input_format):
+    if input_format == "raw":
+        yield input_file
+        return
+    try:
+        temp_dir = tempfile.mkdtemp()
+        pid_file = os.path.join(temp_dir, "pid")
+        raw_file = os.path.join(temp_dir, "raw")
+        open(raw_file, "wb").close()
+        ret = subprocess.run(
+            [
+                QEMU_STORAGE_DAEMON,
+                "--daemonize",
+                "--pidfile", pid_file,
+                "--blockdev", f"driver=file,node-name=file0,driver=file,filename={input_file},read-only=on",
+                "--blockdev", f"driver={input_format},node-name=disk0,file=file0,read-only=on",
+                "--export", f"type=fuse,id=export0,node-name=disk0,mountpoint={raw_file},writable=off",
+            ],
+            capture_output=True,
+        )
+        if ret.returncode != 0:
+            sys.exit("[Error] Could not start the qemu-storage-daemon:\n" +
+                     ret.stderr.decode().rstrip('\n'))
+        yield raw_file
+    finally:
+        # Kill the storage daemon on exit
+        # and remove all temporary files
+        if os.path.exists(pid_file):
+            with open(pid_file, "r") as f:
+                pid = int(f.readline())
+            os.kill(pid, signal.SIGTERM)
+            while os.path.exists(pid_file):
+                time.sleep(0.1)
+        os.unlink(raw_file)
+        os.rmdir(temp_dir)
+
+
+def write_features(cluster, offset, data_file_name):
+    if data_file_name is not None:
+        encoded_name = data_file_name.encode("utf-8")
+        padded_name_len = align_up(len(encoded_name), 8)
+        struct.pack_into(f">II{padded_name_len}s", cluster, offset,
+                         QCOW2_DATA_FILE_NAME_STRING,
+                         len(encoded_name),
+                         encoded_name)
+        offset += 8 + padded_name_len
+
+    qcow2_features = [
+        # Incompatible
+        (0, 0, "dirty bit"),
+        (0, 1, "corrupt bit"),
+        (0, 2, "external data file"),
+        (0, 3, "compression type"),
+        (0, 4, "extended L2 entries"),
+        # Compatible
+        (1, 0, "lazy refcounts"),
+        # Autoclear
+        (2, 0, "bitmaps"),
+        (2, 1, "raw external data"),
+    ]
+    struct.pack_into(">I", cluster, offset, QCOW2_FEATURE_NAME_TABLE)
+    struct.pack_into(">I", cluster, offset + 4, len(qcow2_features) * 48)
+    offset += 8
+    for feature_type, feature_bit, feature_name in qcow2_features:
+        struct.pack_into(">BB46s", cluster, offset,
+                         feature_type, feature_bit, feature_name.encode("ascii"))
+        offset += 48
+
+
+def write_qcow2_content(input_file, cluster_size, refcount_bits, data_file_name, data_file_raw):
+    # Some basic values
+    l1_entries_per_table = cluster_size // 8
+    l2_entries_per_table = cluster_size // 8
+    refcounts_per_table  = cluster_size // 8
+    refcounts_per_block  = cluster_size * 8 // refcount_bits
+
+    # Virtual disk size, number of data clusters and L1 entries
+    disk_size = align_up(os.path.getsize(input_file), 512)
+    total_data_clusters = math.ceil(disk_size / cluster_size)
+    l1_entries = math.ceil(total_data_clusters / l2_entries_per_table)
+    allocated_l1_tables = math.ceil(l1_entries / l1_entries_per_table)
+
+    # Max L1 table size is 32 MB (QCOW_MAX_L1_SIZE in block/qcow2.h)
+    if (l1_entries * 8) > (32 * 1024 * 1024):
+        sys.exit("[Error] The image size is too large. Try using a larger cluster size.")
+
+    # Two bitmaps indicating which L1 and L2 entries are set
+    l1_bitmap = bytearray(allocated_l1_tables * l1_entries_per_table // 8)
+    l2_bitmap = bytearray(l1_entries * l2_entries_per_table // 8)
+    allocated_l2_tables = 0
+    allocated_data_clusters = 0
+
+    if data_file_raw:
+        # If data_file_raw is set then all clusters are allocated and
+        # we don't need to read the input file at all.
+        allocated_l2_tables = l1_entries
+        for idx in range(l1_entries):
+            bitmap_set(l1_bitmap, idx)
+        for idx in range(total_data_clusters):
+            bitmap_set(l2_bitmap, idx)
+    else:
+        # Open the input file for reading
+        fd = os.open(input_file, os.O_RDONLY)
+        zero_cluster = bytes(cluster_size)
+        # Read all the clusters that contain data
+        for idx in clusters_with_data(fd, cluster_size):
+            cluster = os.pread(fd, cluster_size, cluster_size * idx)
+            # If the last cluster is smaller than cluster_size pad it with zeroes
+            if len(cluster) < cluster_size:
+                cluster += bytes(cluster_size - len(cluster))
+            # If a cluster has non-zero data then it must be allocated
+            # in the output file and its L2 entry must be set
+            if cluster != zero_cluster:
+                bitmap_set(l2_bitmap, idx)
+                allocated_data_clusters += 1
+                # Allocated data clusters also need their corresponding L1 entry and L2 table
+                l1_idx = math.floor(idx / l2_entries_per_table)
+                if not bitmap_is_set(l1_bitmap, l1_idx):
+                    bitmap_set(l1_bitmap, l1_idx)
+                    allocated_l2_tables += 1
+
+    # Total amount of allocated clusters excluding the refcount blocks and table
+    total_allocated_clusters = 1 + allocated_l1_tables + allocated_l2_tables
+    if data_file_name is None:
+        total_allocated_clusters += allocated_data_clusters
+
+    # Clusters allocated for the refcount blocks and table
+    allocated_refcount_blocks = math.ceil(total_allocated_clusters  / refcounts_per_block)
+    allocated_refcount_tables = math.ceil(allocated_refcount_blocks / refcounts_per_table)
+
+    # Now we have a problem because allocated_refcount_blocks and allocated_refcount_tables...
+    # (a) increase total_allocated_clusters, and
+    # (b) need to be recalculated when total_allocated_clusters is increased
+    # So we need to repeat the calculation as long as the numbers change
+    while True:
+        new_total_allocated_clusters = total_allocated_clusters + allocated_refcount_tables + allocated_refcount_blocks
+        new_allocated_refcount_blocks = math.ceil(new_total_allocated_clusters / refcounts_per_block)
+        if new_allocated_refcount_blocks > allocated_refcount_blocks:
+            allocated_refcount_blocks = new_allocated_refcount_blocks
+            allocated_refcount_tables = math.ceil(allocated_refcount_blocks / refcounts_per_table)
+        else:
+            break
+
+    # Now that we have the final numbers we can update total_allocated_clusters
+    total_allocated_clusters += allocated_refcount_tables + allocated_refcount_blocks
+
+    # At this point we have the exact number of clusters that the output
+    # image is going to use so we can calculate all the offsets.
+    current_cluster_idx = 1
+
+    refcount_table_offset = current_cluster_idx * cluster_size
+    current_cluster_idx += allocated_refcount_tables
+
+    refcount_block_offset = current_cluster_idx * cluster_size
+    current_cluster_idx += allocated_refcount_blocks
+
+    l1_table_offset = current_cluster_idx * cluster_size
+    current_cluster_idx += allocated_l1_tables
+
+    l2_table_offset = current_cluster_idx * cluster_size
+    current_cluster_idx += allocated_l2_tables
+
+    data_clusters_offset = current_cluster_idx * cluster_size
+
+    # Calculate some values used in the qcow2 header
+    if allocated_l1_tables == 0:
+        l1_table_offset = 0
+
+    hdr_cluster_bits = int(math.log2(cluster_size))
+    hdr_refcount_bits = int(math.log2(refcount_bits))
+    hdr_length = QCOW2_V3_HEADER_LENGTH
+    hdr_incompat_features = 0
+    if data_file_name is not None:
+        hdr_incompat_features |= 1 << QCOW2_INCOMPAT_DATA_FILE_BIT
+    hdr_autoclear_features = 0
+    if data_file_raw:
+        hdr_autoclear_features |= 1 << QCOW2_AUTOCLEAR_DATA_FILE_RAW_BIT
+
+    ### Write qcow2 header
+    cluster = bytearray(cluster_size)
+    struct.pack_into(">4sIQIIQIIQQIIQQQQII", cluster, 0,
+        b"QFI\xfb",            # QCOW magic string
+        3,                     # version
+        0,                     # backing file offset
+        0,                     # backing file sizes
+        hdr_cluster_bits,
+        disk_size,
+        0,                     # encryption method
+        l1_entries,
+        l1_table_offset,
+        refcount_table_offset,
+        allocated_refcount_tables,
+        0,                     # number of snapshots
+        0,                     # snapshot table offset
+        hdr_incompat_features,
+        0,                     # compatible features
+        hdr_autoclear_features,
+        hdr_refcount_bits,
+        hdr_length,
+    )
+
+    write_features(cluster, hdr_length, data_file_name)
+
+    sys.stdout.buffer.write(cluster)
+
+    ### Write refcount table
+    cur_offset = refcount_block_offset
+    remaining_refcount_table_entries = allocated_refcount_blocks # Each entry is a pointer to a refcount block
+    while remaining_refcount_table_entries > 0:
+        cluster = bytearray(cluster_size)
+        to_write = min(remaining_refcount_table_entries, refcounts_per_table)
+        remaining_refcount_table_entries -= to_write
+        for idx in range(to_write):
+            struct.pack_into(">Q", cluster, idx * 8, cur_offset)
+            cur_offset += cluster_size
+        sys.stdout.buffer.write(cluster)
+
+    ### Write refcount blocks
+    remaining_refcount_block_entries = total_allocated_clusters # One entry for each allocated cluster
+    for tbl in range(allocated_refcount_blocks):
+        cluster = bytearray(cluster_size)
+        to_write = min(remaining_refcount_block_entries, refcounts_per_block)
+        remaining_refcount_block_entries -= to_write
+        # All refcount entries contain the number 1. The only difference
+        # is their bit width, defined when the image is created.
+        for idx in range(to_write):
+            if refcount_bits == 64:
+                struct.pack_into(">Q", cluster, idx * 8, 1)
+            elif refcount_bits == 32:
+                struct.pack_into(">L", cluster, idx * 4, 1)
+            elif refcount_bits == 16:
+                struct.pack_into(">H", cluster, idx * 2, 1)
+            elif refcount_bits == 8:
+                cluster[idx] = 1
+            elif refcount_bits == 4:
+                cluster[idx // 2] |= 1 << ((idx % 2) * 4)
+            elif refcount_bits == 2:
+                cluster[idx // 4] |= 1 << ((idx % 4) * 2)
+            elif refcount_bits == 1:
+                cluster[idx // 8] |= 1 << (idx % 8)
+        sys.stdout.buffer.write(cluster)
+
+    ### Write L1 table
+    cur_offset = l2_table_offset
+    for tbl in range(allocated_l1_tables):
+        cluster = bytearray(cluster_size)
+        for idx in range(l1_entries_per_table):
+            l1_idx = tbl * l1_entries_per_table + idx
+            if bitmap_is_set(l1_bitmap, l1_idx):
+                struct.pack_into(">Q", cluster, idx * 8, cur_offset | QCOW_OFLAG_COPIED)
+                cur_offset += cluster_size
+        sys.stdout.buffer.write(cluster)
+
+    ### Write L2 tables
+    cur_offset = data_clusters_offset
+    for tbl in range(l1_entries):
+        # Skip the empty L2 tables. We can identify them because
+        # there is no L1 entry pointing at them.
+        if bitmap_is_set(l1_bitmap, tbl):
+            cluster = bytearray(cluster_size)
+            for idx in range(l2_entries_per_table):
+                l2_idx = tbl * l2_entries_per_table + idx
+                if bitmap_is_set(l2_bitmap, l2_idx):
+                    if data_file_name is None:
+                        struct.pack_into(">Q", cluster, idx * 8, cur_offset | QCOW_OFLAG_COPIED)
+                        cur_offset += cluster_size
+                    else:
+                        struct.pack_into(">Q", cluster, idx * 8, (l2_idx * cluster_size) | QCOW_OFLAG_COPIED)
+            sys.stdout.buffer.write(cluster)
+
+    ### Write data clusters
+    if data_file_name is None:
+        for idx in bitmap_iterator(l2_bitmap, total_data_clusters):
+            cluster = os.pread(fd, cluster_size, cluster_size * idx)
+            # If the last cluster is smaller than cluster_size pad it with zeroes
+            if len(cluster) < cluster_size:
+                cluster += bytes(cluster_size - len(cluster))
+            sys.stdout.buffer.write(cluster)
+
+    if not data_file_raw:
+        os.close(fd)
+
+
+def main():
+    # Command-line arguments
+    parser = argparse.ArgumentParser(
+        description="This program converts a QEMU disk image to qcow2 "
+        "and writes it to the standard output"
+    )
+    parser.add_argument("input_file", help="name of the input file")
+    parser.add_argument(
+        "-f",
+        dest="input_format",
+        metavar="input_format",
+        help="format of the input file (default: raw)",
+        default="raw",
+    )
+    parser.add_argument(
+        "-c",
+        dest="cluster_size",
+        metavar="cluster_size",
+        help=f"qcow2 cluster size (default: {QCOW2_DEFAULT_CLUSTER_SIZE})",
+        default=QCOW2_DEFAULT_CLUSTER_SIZE,
+        type=int,
+        choices=[1 << x for x in range(9, 22)],
+    )
+    parser.add_argument(
+        "-r",
+        dest="refcount_bits",
+        metavar="refcount_bits",
+        help=f"width of the reference count entries (default: {QCOW2_DEFAULT_REFCOUNT_BITS})",
+        default=QCOW2_DEFAULT_REFCOUNT_BITS,
+        type=int,
+        choices=[1 << x for x in range(7)],
+    )
+    parser.add_argument(
+        "-d",
+        dest="data_file",
+        help="create an image with input_file as an external data file",
+        action="store_true",
+    )
+    parser.add_argument(
+        "-R",
+        dest="data_file_raw",
+        help="enable data_file_raw on the generated image (implies -d)",
+        action="store_true",
+    )
+    args = parser.parse_args()
+
+    if args.data_file_raw:
+        args.data_file = True
+
+    if not os.path.isfile(args.input_file):
+        sys.exit(f"[Error] {args.input_file} does not exist or is not a regular file.")
+
+    if args.data_file and args.input_format != "raw":
+        sys.exit("[Error] External data files can only be used with raw input images")
+
+    # A 512 byte header is too small for the data file name extension
+    if args.data_file and args.cluster_size == 512:
+        sys.exit("[Error] External data files require a larger cluster size")
+
+    if sys.stdout.isatty():
+        sys.exit("[Error] Refusing to write to a tty. Try redirecting stdout.")
+
+    if args.data_file:
+        data_file_name = args.input_file
+    else:
+        data_file_name = None
+
+    with get_input_as_raw_file(args.input_file, args.input_format) as raw_file:
+        write_qcow2_content(
+            raw_file,
+            args.cluster_size,
+            args.refcount_bits,
+            data_file_name,
+            args.data_file_raw,
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh
index 6ef9f118d9ffe..5fd462b1d1c87 100755
--- a/scripts/qemu-binfmt-conf.sh
+++ b/scripts/qemu-binfmt-conf.sh
@@ -144,35 +144,35 @@ loongarch64_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x
 loongarch64_mask='\xff\xff\xff\xff\xff\xff\xff\xfc\x00\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
 loongarch64_family=loongarch
 
-qemu_get_family() {
-    cpu=${HOST_ARCH:-$(uname -m)}
+# Converts the name of a host CPU architecture to the corresponding QEMU
+# target.
+#
+# FIXME: This can probably be simplified a lot by dropping most entries.
+#        Remember that the script is only used on Linux, so we only need to
+#        handle the strings Linux uses to report the host CPU architecture.
+qemu_normalize() {
+    cpu="$1"
     case "$cpu" in
-    amd64|i386|i486|i586|i686|i86pc|BePC|x86_64)
+    i[3-6]86)
         echo "i386"
         ;;
-    mips*)
-        echo "mips"
+    amd64)
+        echo "x86_64"
         ;;
-    "Power Macintosh"|ppc64|powerpc|ppc)
+    powerpc)
         echo "ppc"
         ;;
-    ppc64el|ppc64le)
-        echo "ppcle"
+    ppc64el)
+        echo "ppc64le"
         ;;
-    arm|armel|armhf|arm64|armv[4-9]*l|aarch64)
+    armel|armhf|armv[4-9]*l)
         echo "arm"
         ;;
-    armeb|armv[4-9]*b|aarch64_be)
+    armv[4-9]*b)
         echo "armeb"
         ;;
-    sparc*)
-        echo "sparc"
-        ;;
-    riscv*)
-        echo "riscv"
-        ;;
-    loongarch*)
-        echo "loongarch"
+    arm64)
+        echo "aarch64"
         ;;
     *)
         echo "$cpu"
@@ -205,6 +205,9 @@ Usage: qemu-binfmt-conf.sh [--qemu-path PATH][--debian][--systemd CPU]
        --persistent:    if yes, the interpreter is loaded when binfmt is
                         configured and remains in memory. All future uses
                         are cloned from the open file.
+       --ignore-family: if yes, it is assumed that the host CPU (e.g. riscv64)
+                        can't natively run programs targeting a CPU that is
+                        part of the same family (e.g. riscv32).
        --preserve-argv0 preserve argv[0]
 
     To import templates with update-binfmts, use :
@@ -309,7 +312,13 @@ EOF
 
 qemu_set_binfmts() {
     # probe cpu type
-    host_family=$(qemu_get_family)
+    host_cpu=$(qemu_normalize ${HOST_ARCH:-$(uname -m)})
+    host_family=$(eval echo \$${host_cpu}_family)
+
+    if [ "$host_family" = "" ] ; then
+        echo "INTERNAL ERROR: unknown host cpu $host_cpu" 1>&2
+        exit 1
+    fi
 
     # register the interpreter for each cpu except for the native one
 
@@ -318,20 +327,28 @@ qemu_set_binfmts() {
         mask=$(eval echo \$${cpu}_mask)
         family=$(eval echo \$${cpu}_family)
 
+        target="$cpu"
+        if [ "$cpu" = "i486" ] ; then
+            target="i386"
+        fi
+
+        qemu="$QEMU_PATH/qemu-$target$QEMU_SUFFIX"
+
         if [ "$magic" = "" ] || [ "$mask" = "" ] || [ "$family" = "" ] ; then
             echo "INTERNAL ERROR: unknown cpu $cpu" 1>&2
             continue
         fi
 
-        qemu="$QEMU_PATH/qemu-$cpu"
-        if [ "$cpu" = "i486" ] ; then
-            qemu="$QEMU_PATH/qemu-i386"
+        if [ "$host_family" = "$family" ] ; then
+            # When --ignore-family is used, we have to generate rules even
+            # for targets that are in the same family as the host CPU. The
+            # only exception is of course when the CPU types exactly match
+            if [ "$target" = "$host_cpu" ] || [ "$IGNORE_FAMILY" = "no" ] ; then
+                continue
+            fi
         fi
 
-        qemu="$qemu$QEMU_SUFFIX"
-        if [ "$host_family" != "$family" ] ; then
-            $BINFMT_SET
-        fi
+        $BINFMT_SET
     done
 }
 
@@ -346,10 +363,11 @@ CREDENTIAL=no
 PERSISTENT=no
 PRESERVE_ARG0=no
 QEMU_SUFFIX=""
+IGNORE_FAMILY=no
 
 _longopts="debian,systemd:,qemu-path:,qemu-suffix:,exportdir:,help,credential:,\
-persistent:,preserve-argv0:"
-options=$(getopt -o ds:Q:S:e:hc:p:g:F: -l ${_longopts} -- "$@")
+persistent:,preserve-argv0:,ignore-family:"
+options=$(getopt -o ds:Q:S:e:hc:p:g:F:i: -l ${_longopts} -- "$@")
 eval set -- "$options"
 
 while true ; do
@@ -409,6 +427,10 @@ while true ; do
         shift
         PRESERVE_ARG0="$1"
         ;;
+    -i|--ignore-family)
+        shift
+        IGNORE_FAMILY="$1"
+        ;;
     *)
         break
         ;;
diff --git a/scripts/qemu-trace-stap b/scripts/qemu-trace-stap
index eb6e951ff2359..e983460ee7585 100755
--- a/scripts/qemu-trace-stap
+++ b/scripts/qemu-trace-stap
@@ -56,6 +56,7 @@ def tapset_dir(binary):
 
 
 def cmd_run(args):
+    stap = which("stap")
     prefix = probe_prefix(args.binary)
     tapsets = tapset_dir(args.binary)
 
@@ -76,7 +77,7 @@ def cmd_run(args):
 
     # We request an 8MB buffer, since the stap default 1MB buffer
     # can be easily overflowed by frequently firing QEMU traces
-    stapargs = ["stap", "-s", "8", "-I", tapsets ]
+    stapargs = [stap, "-s", "8", "-I", tapsets ]
     if args.pid is not None:
         stapargs.extend(["-x", args.pid])
     stapargs.extend(["-e", script])
@@ -84,6 +85,7 @@ def cmd_run(args):
 
 
 def cmd_list(args):
+    stap = which("stap")
     tapsets = tapset_dir(args.binary)
 
     if args.verbose:
@@ -96,7 +98,7 @@ def cmd_list(args):
 
         if verbose:
             print("Listing probes with name '%s'" % script)
-        proc = subprocess.Popen(["stap", "-I", tapsets, "-l", script],
+        proc = subprocess.Popen([stap, "-I", tapsets, "-l", script],
                                 stdout=subprocess.PIPE,
                                 universal_newlines=True)
         out, err = proc.communicate()
diff --git a/semihosting/arm-compat-semi.c b/semihosting/arm-compat-semi.c
index 86e5260e504be..e4825a8667182 100644
--- a/semihosting/arm-compat-semi.c
+++ b/semihosting/arm-compat-semi.c
@@ -85,7 +85,30 @@
 #define O_BINARY 0
 #endif
 
-static int gdb_open_modeflags[12] = {
+struct semihosting_opt_callbacks {
+    void (*set_err)(CPUState *cs, target_ulong err);
+    void (*prepare_for_read)(CPUState *cs, target_ulong fd, target_ulong buf,
+                              target_ulong len);
+} opt_callbacks;
+
+#define SEMIHOSTING_REGISTER_OPT_CALLBACKS(callbacks) \
+    struct semihosting_opt_callbacks opt_callbacks = callbacks;
+
+#define CALL_OPT_CALLBACK(FN, ARGS...) do { \
+    if (opt_callbacks.FN) { \
+        opt_callbacks.FN(ARGS); \
+    } \
+} while (0)
+
+#include "common-semi-target.h"
+
+#ifdef SEMIHOSTING_EXT_OPEN_MODES
+#define GDB_OPEN_MODES_NR 14
+#else
+#define GDB_OPEN_MODES_NR 12
+#endif
+
+static int gdb_open_modeflags[GDB_OPEN_MODES_NR] = {
     GDB_O_RDONLY,
     GDB_O_RDONLY,
     GDB_O_RDWR,
@@ -98,6 +121,10 @@ static int gdb_open_modeflags[12] = {
     GDB_O_WRONLY | GDB_O_CREAT | GDB_O_APPEND,
     GDB_O_RDWR | GDB_O_CREAT | GDB_O_APPEND,
     GDB_O_RDWR | GDB_O_CREAT | GDB_O_APPEND,
+#ifdef SEMIHOSTING_EXT_OPEN_MODES
+    GDB_O_RDWR | GDB_O_CREAT,
+    GDB_O_RDWR | GDB_O_CREAT | GDB_O_EXCL,
+#endif
 };
 
 #ifndef CONFIG_USER_ONLY
@@ -180,17 +207,10 @@ static LayoutInfo common_semi_find_bases(CPUState *cs)
  * error indication (0 on success, non-0 for error) which the caller
  * should check.
  */
-
-#define GET_ARG(n) do {                                 \
-    if (is_64bit_semihosting(env)) {                    \
-        if (get_user_u64(arg ## n, args + (n) * 8)) {   \
-            goto do_fault;                              \
-        }                                               \
-    } else {                                            \
-        if (get_user_u32(arg ## n, args + (n) * 4)) {   \
-            goto do_fault;                              \
-        }                                               \
-    }                                                   \
+#define GET_ARG(n) do { \
+    if (common_semi_read_arg_word(env, &arg ## n, args, n)) { \
+        goto do_fault; \
+    } \
 } while (0)
 
 #define SET_ARG(n, val)                                 \
@@ -223,7 +243,7 @@ static inline uint32_t get_swi_errno(CPUState *cs)
 #endif
 }
 
-static void common_semi_cb(CPUState *cs, uint64_t ret, int err)
+void common_semi_cb(CPUState *cs, uint64_t ret, int err)
 {
     if (err) {
 #ifdef CONFIG_USER_ONLY
@@ -231,6 +251,7 @@ static void common_semi_cb(CPUState *cs, uint64_t ret, int err)
         ts->swi_errno = err;
 #else
         syscall_err = err;
+        CALL_OPT_CALLBACK(set_err, cs, err);
 #endif
     }
     common_semi_set_ret(cs, ret);
@@ -386,7 +407,7 @@ void do_common_semihosting(CPUState *cs)
         if (!s) {
             goto do_fault;
         }
-        if (arg1 >= 12) {
+        if (arg1 >= GDB_OPEN_MODES_NR) {
             unlock_user(s, arg0, 0);
             common_semi_cb(cs, -1, EINVAL);
             break;
@@ -466,6 +487,7 @@ void do_common_semihosting(CPUState *cs)
         GET_ARG(0);
         GET_ARG(1);
         GET_ARG(2);
+        CALL_OPT_CALLBACK(prepare_for_read, cs, arg0, arg1, arg2);
         semihost_sys_read(cs, common_semi_rw_cb, arg0, arg1, arg2);
         break;
 
diff --git a/semihosting/config.c b/semihosting/config.c
index 56283b5c3c38a..a64a8dfd27da5 100644
--- a/semihosting/config.c
+++ b/semihosting/config.c
@@ -46,6 +46,9 @@ QemuOptsList qemu_semihosting_config_opts = {
         }, {
             .name = "arg",
             .type = QEMU_OPT_STRING,
+        }, {
+            .name = "usefs",
+            .type = QEMU_OPT_STRING,
         },
         { /* end of list */ }
     },
@@ -58,6 +61,7 @@ typedef struct SemihostingConfig {
     char **argv;
     int argc;
     const char *cmdline; /* concatenated argv */
+    const char *usefs;
 } SemihostingConfig;
 
 static SemihostingConfig semihosting;
@@ -94,6 +98,11 @@ const char *semihosting_get_cmdline(void)
     return semihosting.cmdline;
 }
 
+const char *semihosting_get_usefs(void)
+{
+    return semihosting.usefs;
+}
+
 static int add_semihosting_arg(void *opaque,
                                const char *name, const char *val,
                                Error **errp)
@@ -144,6 +153,8 @@ int qemu_semihosting_config_options(const char *optstr)
                                                 true);
         semihosting.userspace_enabled = qemu_opt_get_bool(opts, "userspace",
                                                           false);
+        semihosting.usefs = qemu_opt_get(opts, "usefs");
+
         const char *target = qemu_opt_get(opts, "target");
         /* setup of chardev is deferred until they are initialised */
         semihost_chardev = qemu_opt_get(opts, "chardev");
diff --git a/semihosting/guestfd.c b/semihosting/guestfd.c
index d3241434c516f..4d846f4e5d10c 100644
--- a/semihosting/guestfd.c
+++ b/semihosting/guestfd.c
@@ -23,6 +23,18 @@ GuestFD console_in_gf;
 GuestFD console_out_gf;
 #endif
 
+static void semihosting_use_stdio(void)
+{
+    console_in_gf.type = GuestFDHost;
+    console_in_gf.hostfd = 0;
+    console_out_gf.type = GuestFDHost;
+    console_out_gf.hostfd = 1;
+    guestfd_array = g_array_set_size(guestfd_array, 3);
+    associate_guestfd(0, 0);
+    associate_guestfd(1, 1);
+    associate_guestfd(2, 2);
+}
+
 void qemu_semihosting_guestfd_init(void)
 {
     /* New entries zero-initialized, i.e. type GuestFDUnused */
@@ -36,8 +48,12 @@ void qemu_semihosting_guestfd_init(void)
         console_out_gf.type = GuestFDGDB;
         console_out_gf.hostfd = 2;
     } else {
+#ifdef CONFIG_SEMIHOSTING_USE_STDIO
+        semihosting_use_stdio();
+#else
         console_in_gf.type = GuestFDConsole;
         console_out_gf.type = GuestFDConsole;
+#endif
     }
 #else
     /* Otherwise, the stdio file descriptors apply. */
diff --git a/semihosting/syscalls.c b/semihosting/syscalls.c
index f6451d9bb0e65..e790c79efe858 100644
--- a/semihosting/syscalls.c
+++ b/semihosting/syscalls.c
@@ -13,6 +13,7 @@
 #include "semihosting/guestfd.h"
 #include "semihosting/syscalls.h"
 #include "semihosting/console.h"
+#include "semihosting/semihost.h"
 #ifdef CONFIG_USER_ONLY
 #include "qemu.h"
 #else
@@ -261,7 +262,8 @@ static void host_open(CPUState *cs, gdb_syscall_complete_cb complete,
 {
     CPUArchState *env G_GNUC_UNUSED = cpu_env(cs);
     char *p;
-    int ret, host_flags = O_BINARY;
+    int ret, err, host_flags = O_BINARY;
+    const char *usefs = semihosting_get_usefs();
 
     ret = validate_lock_user_string(&p, cs, fname, fname_len);
     if (ret < 0) {
@@ -287,9 +289,17 @@ static void host_open(CPUState *cs, gdb_syscall_complete_cb complete,
     }
 
     ret = open(p, host_flags, mode);
+    err = errno;
+    if (ret < 0 && err == ENOENT && usefs) {
+        g_autoptr(GString) usefs_fname = g_string_new(NULL);
+        g_string_append_printf(usefs_fname, "%s/%s", usefs, p);
+        ret = open(usefs_fname->str, host_flags, mode);
+        err = errno;
+    }
+
     if (ret < 0) {
         qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to open %s\n", __func__, p);
-        complete(cs, -1, errno);
+        complete(cs, -1, err);
     } else {
         int guestfd = alloc_guestfd();
         associate_guestfd(guestfd, ret);
@@ -542,6 +552,13 @@ static void host_poll_one(CPUState *cs, gdb_syscall_complete_cb complete,
 }
 #endif
 
+static void host_ftruncate(CPUState *cs, gdb_syscall_complete_cb complete,
+                           GuestFD *gf, off_t len)
+{
+    int err = ftruncate(gf->hostfd, len);
+    complete(cs, err, err < 0 ? errno : 0);
+}
+
 /*
  * Static file semihosting syscall implementations.
  */
@@ -983,3 +1000,22 @@ void semihost_sys_poll_one(CPUState *cs, gdb_syscall_complete_cb complete,
     }
 }
 #endif
+
+void semihost_sys_ftruncate(CPUState *cs, gdb_syscall_complete_cb complete,
+                            int fd, off_t len)
+{
+    GuestFD *gf = get_guestfd(fd);
+    if (!gf) {
+        complete(cs, -1, EBADF);
+        return;
+    }
+
+    switch (gf->type) {
+    case GuestFDHost:
+        host_ftruncate(cs, complete, gf, len);
+        break;
+    default:
+        fprintf(stderr, "ftruncate call not implemented for this semihosting mode.\n");
+        g_assert_not_reached();
+    }
+}
diff --git a/storage-daemon/qapi/qapi-schema.json b/storage-daemon/qapi/qapi-schema.json
index f10c94949068c..2a562ee32e524 100644
--- a/storage-daemon/qapi/qapi-schema.json
+++ b/storage-daemon/qapi/qapi-schema.json
@@ -13,6 +13,14 @@
 # the array type in the main schema, even if it is unused outside of the
 # storage daemon.
 
+##
+# = QEMU storage daemon protocol commands and structs
+#
+# For a concise listing of all commands, events, and types in the QEMU
+# storage daemon, please consult the `qapi-qsd-index`.
+##
+
+
 { 'include': '../../qapi/pragma.json' }
 
 # Documentation generated with qapi-gen.py is in source order, with
diff --git a/stubs/meson.build b/stubs/meson.build
index b0fee37e0593e..63392f5e78528 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -62,7 +62,7 @@ if have_user
     stub_ss.add(files('qdev.c'))
   endif
 
-  stub_ss.add(files('monitor-fd.c'))
+  stub_ss.add(files('monitor-internal.c'))
 endif
 
 if have_system
diff --git a/stubs/monitor-fd.c b/stubs/monitor-fd.c
deleted file mode 100644
index 9bb67498850b2..0000000000000
--- a/stubs/monitor-fd.c
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-
-#include "qemu/osdep.h"
-#include "monitor/monitor.h"
-
-int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp)
-{
-    abort();
-}
diff --git a/subprojects/.gitignore b/subprojects/.gitignore
index 50f173f90dbe5..d12d34618cc20 100644
--- a/subprojects/.gitignore
+++ b/subprojects/.gitignore
@@ -11,6 +11,7 @@
 /bilge-impl-0.2.0
 /either-1.12.0
 /itertools-0.11.0
+/libc-0.2.162
 /proc-macro-error-1.0.4
 /proc-macro-error-attr-1.0.4
 /proc-macro2-1.0.84
diff --git a/subprojects/libc-0.2-rs.wrap b/subprojects/libc-0.2-rs.wrap
new file mode 100644
index 0000000000000..bbe08f8788348
--- /dev/null
+++ b/subprojects/libc-0.2-rs.wrap
@@ -0,0 +1,7 @@
+[wrap-file]
+directory = libc-0.2.162
+source_url = https://crates.io/api/v1/crates/libc/0.2.162/download
+source_filename = libc-0.2.162.tar.gz
+source_hash = 18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398
+#method = cargo
+patch_directory = libc-0.2-rs
diff --git a/subprojects/packagefiles/libc-0.2-rs/meson.build b/subprojects/packagefiles/libc-0.2-rs/meson.build
new file mode 100644
index 0000000000000..ac4f80dba98e7
--- /dev/null
+++ b/subprojects/packagefiles/libc-0.2-rs/meson.build
@@ -0,0 +1,37 @@
+project('libc-0.2-rs', 'rust',
+  meson_version: '>=1.5.0',
+  version: '0.2.162',
+  license: 'MIT OR Apache-2.0',
+  default_options: [])
+
+_libc_rs = static_library(
+  'libc',
+  files('src/lib.rs'),
+  gnu_symbol_visibility: 'hidden',
+  override_options: ['rust_std=2015', 'build.rust_std=2015'],
+  rust_abi: 'rust',
+  rust_args: [
+    '--cap-lints', 'allow',
+    '--cfg', 'freebsd11',
+    '--cfg', 'libc_priv_mod_use',
+    '--cfg', 'libc_union',
+    '--cfg', 'libc_const_size_of',
+    '--cfg', 'libc_align',
+    '--cfg', 'libc_int128',
+    '--cfg', 'libc_core_cvoid',
+    '--cfg', 'libc_packedN',
+    '--cfg', 'libc_cfg_target_vendor',
+    '--cfg', 'libc_non_exhaustive',
+    '--cfg', 'libc_long_array',
+    '--cfg', 'libc_ptr_addr_of',
+    '--cfg', 'libc_underscore_const_names',
+    '--cfg', 'libc_const_extern_fn',
+  ],
+  dependencies: [],
+)
+
+libc_dep = declare_dependency(
+  link_with: _libc_rs,
+)
+
+meson.override_dependency('libc-0.2-rs', libc_dep)
diff --git a/system/arch_init.c b/system/arch_init.c
index d2c32f848872d..e85736884c976 100644
--- a/system/arch_init.c
+++ b/system/arch_init.c
@@ -22,29 +22,9 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
-#include "qemu/module.h"
 #include "system/arch_init.h"
 
-#ifdef TARGET_SPARC
-int graphic_width = 1024;
-int graphic_height = 768;
-int graphic_depth = 8;
-#elif defined(TARGET_M68K)
-int graphic_width = 800;
-int graphic_height = 600;
-int graphic_depth = 8;
-#else
-int graphic_width = 800;
-int graphic_height = 600;
-int graphic_depth = 32;
-#endif
-
-const uint32_t arch_type = QEMU_ARCH;
-
-void qemu_init_arch_modules(void)
+bool qemu_arch_available(unsigned qemu_arch_mask)
 {
-#ifdef CONFIG_MODULES
-    module_init_info(qemu_modinfo);
-    module_allow_arch(TARGET_NAME);
-#endif
+    return qemu_arch_mask & QEMU_ARCH;
 }
diff --git a/system/cpus.c b/system/cpus.c
index 37e5892c2404c..2cc5f887ab5da 100644
--- a/system/cpus.c
+++ b/system/cpus.c
@@ -31,6 +31,7 @@
 #include "qapi/qapi-events-run-state.h"
 #include "qapi/qmp/qerror.h"
 #include "exec/gdbstub.h"
+#include "system/accel-ops.h"
 #include "system/hw_accel.h"
 #include "exec/cpu-common.h"
 #include "qemu/thread.h"
diff --git a/system/device_tree-stub.c b/system/device_tree-stub.c
index bddda6fa37a74..428330b0feca8 100644
--- a/system/device_tree-stub.c
+++ b/system/device_tree-stub.c
@@ -5,6 +5,9 @@
 #ifdef CONFIG_FDT
 void qmp_dumpdtb(const char *filename, Error **errp)
 {
-    error_setg(errp, "This machine doesn't have a FDT");
+    ERRP_GUARD();
+
+    error_setg(errp, "This machine doesn't have an FDT");
+    error_append_hint(errp, "(this machine type definitely doesn't use FDT)\n");
 }
 #endif
diff --git a/system/device_tree.c b/system/device_tree.c
index 4bc2d61b934a4..aa3fe9516f365 100644
--- a/system/device_tree.c
+++ b/system/device_tree.c
@@ -594,21 +594,6 @@ int qemu_fdt_add_path(void *fdt, const char *path)
     return retval;
 }
 
-void qemu_fdt_dumpdtb(void *fdt, int size)
-{
-    const char *dumpdtb = current_machine->dumpdtb;
-
-    if (dumpdtb) {
-        /* Dump the dtb to a file and quit */
-        if (g_file_set_contents(dumpdtb, fdt, size, NULL)) {
-            info_report("dtb dumped to %s. Exiting.", dumpdtb);
-            exit(0);
-        }
-        error_report("%s: Failed dumping dtb to %s", __func__, dumpdtb);
-        exit(1);
-    }
-}
-
 int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
                                             const char *node_path,
                                             const char *property,
@@ -650,11 +635,16 @@ int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
 
 void qmp_dumpdtb(const char *filename, Error **errp)
 {
+    ERRP_GUARD();
+
     g_autoptr(GError) err = NULL;
     uint32_t size;
 
     if (!current_machine->fdt) {
-        error_setg(errp, "This machine doesn't have a FDT");
+        error_setg(errp, "This machine doesn't have an FDT");
+        error_append_hint(errp,
+                          "(Perhaps it doesn't support FDT at all, or perhaps "
+                          "you need to provide an FDT with the -fdt option?)\n");
         return;
     }
 
diff --git a/system/dma-helpers.c b/system/dma-helpers.c
index f6403242f5fea..6bad75876f116 100644
--- a/system/dma-helpers.c
+++ b/system/dma-helpers.c
@@ -211,7 +211,7 @@ static const AIOCBInfo dma_aiocb_info = {
     .cancel_async       = dma_aio_cancel,
 };
 
-BlockAIOCB *dma_blk_io(AioContext *ctx,
+BlockAIOCB *dma_blk_io(
     QEMUSGList *sg, uint64_t offset, uint32_t align,
     DMAIOFunc *io_func, void *io_func_opaque,
     BlockCompletionFunc *cb,
@@ -223,7 +223,7 @@ BlockAIOCB *dma_blk_io(AioContext *ctx,
 
     dbs->acb = NULL;
     dbs->sg = sg;
-    dbs->ctx = ctx;
+    dbs->ctx = qemu_get_current_aio_context();
     dbs->offset = offset;
     dbs->align = align;
     dbs->sg_cur_index = 0;
@@ -251,7 +251,7 @@ BlockAIOCB *dma_blk_read(BlockBackend *blk,
                          QEMUSGList *sg, uint64_t offset, uint32_t align,
                          void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
+    return dma_blk_io(sg, offset, align,
                       dma_blk_read_io_func, blk, cb, opaque,
                       DMA_DIRECTION_FROM_DEVICE);
 }
@@ -269,7 +269,7 @@ BlockAIOCB *dma_blk_write(BlockBackend *blk,
                           QEMUSGList *sg, uint64_t offset, uint32_t align,
                           void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
+    return dma_blk_io(sg, offset, align,
                       dma_blk_write_io_func, blk, cb, opaque,
                       DMA_DIRECTION_TO_DEVICE);
 }
diff --git a/system/globals-target.c b/system/globals-target.c
new file mode 100644
index 0000000000000..989720591e72b
--- /dev/null
+++ b/system/globals-target.c
@@ -0,0 +1,24 @@
+/*
+ * Global variables that should not exist (target specific)
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "qemu/osdep.h"
+#include "system/system.h"
+
+#ifdef TARGET_SPARC
+int graphic_width = 1024;
+int graphic_height = 768;
+int graphic_depth = 8;
+#elif defined(TARGET_M68K)
+int graphic_width = 800;
+int graphic_height = 600;
+int graphic_depth = 8;
+#else
+int graphic_width = 800;
+int graphic_height = 600;
+int graphic_depth = 32;
+#endif
diff --git a/system/globals.c b/system/globals.c
index 4867c93ca6b9e..9640c9511e9aa 100644
--- a/system/globals.c
+++ b/system/globals.c
@@ -31,10 +31,20 @@
 #include "system/cpus.h"
 #include "system/system.h"
 
+bool should_mlock(MlockState state)
+{
+    return state == MLOCK_ON || state == MLOCK_ON_FAULT;
+}
+
+bool is_mlock_on_fault(MlockState state)
+{
+    return state == MLOCK_ON_FAULT;
+}
+
 enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB;
 int display_opengl;
 const char* keyboard_layout;
-bool enable_mlock;
+MlockState mlock_state;
 bool enable_cpu_pm;
 int autostart = 1;
 int vga_interface_type = VGA_NONE;
@@ -48,7 +58,6 @@ unsigned int nb_prom_envs;
 const char *prom_envs[MAX_PROM_ENVS];
 uint8_t *boot_splash_filedata;
 int only_migratable; /* turn it off unless user states otherwise */
-int icount_align_option;
 
 /* The bytes in qemu_uuid are in the order specified by RFC4122, _not_ in the
  * little-endian "wire format" described in the SMBIOS 2.6 specification.
diff --git a/system/memory_ldst.c.inc b/system/memory_ldst.c.inc
index 0e6f3940a9a15..7f32d3d9ff39d 100644
--- a/system/memory_ldst.c.inc
+++ b/system/memory_ldst.c.inc
@@ -34,7 +34,7 @@ static inline uint32_t glue(address_space_ldl_internal, SUFFIX)(ARG1_DECL,
 
     RCU_READ_LOCK();
     mr = TRANSLATE(addr, &addr1, &l, false, attrs);
-    if (l < 4 || !memory_access_is_direct(mr, false)) {
+    if (l < 4 || !memory_access_is_direct(mr, false, attrs)) {
         release_lock |= prepare_mmio_access(mr);
 
         /* I/O case */
@@ -103,7 +103,7 @@ static inline uint64_t glue(address_space_ldq_internal, SUFFIX)(ARG1_DECL,
 
     RCU_READ_LOCK();
     mr = TRANSLATE(addr, &addr1, &l, false, attrs);
-    if (l < 8 || !memory_access_is_direct(mr, false)) {
+    if (l < 8 || !memory_access_is_direct(mr, false, attrs)) {
         release_lock |= prepare_mmio_access(mr);
 
         /* I/O case */
@@ -170,7 +170,7 @@ uint8_t glue(address_space_ldub, SUFFIX)(ARG1_DECL,
 
     RCU_READ_LOCK();
     mr = TRANSLATE(addr, &addr1, &l, false, attrs);
-    if (!memory_access_is_direct(mr, false)) {
+    if (!memory_access_is_direct(mr, false, attrs)) {
         release_lock |= prepare_mmio_access(mr);
 
         /* I/O case */
@@ -207,7 +207,7 @@ static inline uint16_t glue(address_space_lduw_internal, SUFFIX)(ARG1_DECL,
 
     RCU_READ_LOCK();
     mr = TRANSLATE(addr, &addr1, &l, false, attrs);
-    if (l < 2 || !memory_access_is_direct(mr, false)) {
+    if (l < 2 || !memory_access_is_direct(mr, false, attrs)) {
         release_lock |= prepare_mmio_access(mr);
 
         /* I/O case */
@@ -277,7 +277,7 @@ void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL,
 
     RCU_READ_LOCK();
     mr = TRANSLATE(addr, &addr1, &l, true, attrs);
-    if (l < 4 || !memory_access_is_direct(mr, true)) {
+    if (l < 4 || !memory_access_is_direct(mr, true, attrs)) {
         release_lock |= prepare_mmio_access(mr);
 
         r = memory_region_dispatch_write(mr, addr1, val, MO_32, attrs);
@@ -314,7 +314,7 @@ static inline void glue(address_space_stl_internal, SUFFIX)(ARG1_DECL,
 
     RCU_READ_LOCK();
     mr = TRANSLATE(addr, &addr1, &l, true, attrs);
-    if (l < 4 || !memory_access_is_direct(mr, true)) {
+    if (l < 4 || !memory_access_is_direct(mr, true, attrs)) {
         release_lock |= prepare_mmio_access(mr);
         r = memory_region_dispatch_write(mr, addr1, val,
                                          MO_32 | devend_memop(endian), attrs);
@@ -377,7 +377,7 @@ void glue(address_space_stb, SUFFIX)(ARG1_DECL,
 
     RCU_READ_LOCK();
     mr = TRANSLATE(addr, &addr1, &l, true, attrs);
-    if (!memory_access_is_direct(mr, true)) {
+    if (!memory_access_is_direct(mr, true, attrs)) {
         release_lock |= prepare_mmio_access(mr);
         r = memory_region_dispatch_write(mr, addr1, val, MO_8, attrs);
     } else {
@@ -410,7 +410,7 @@ static inline void glue(address_space_stw_internal, SUFFIX)(ARG1_DECL,
 
     RCU_READ_LOCK();
     mr = TRANSLATE(addr, &addr1, &l, true, attrs);
-    if (l < 2 || !memory_access_is_direct(mr, true)) {
+    if (l < 2 || !memory_access_is_direct(mr, true, attrs)) {
         release_lock |= prepare_mmio_access(mr);
         r = memory_region_dispatch_write(mr, addr1, val,
                                          MO_16 | devend_memop(endian), attrs);
@@ -474,7 +474,7 @@ static void glue(address_space_stq_internal, SUFFIX)(ARG1_DECL,
 
     RCU_READ_LOCK();
     mr = TRANSLATE(addr, &addr1, &l, true, attrs);
-    if (l < 8 || !memory_access_is_direct(mr, true)) {
+    if (l < 8 || !memory_access_is_direct(mr, true, attrs)) {
         release_lock |= prepare_mmio_access(mr);
         r = memory_region_dispatch_write(mr, addr1, val,
                                          MO_64 | devend_memop(endian), attrs);
diff --git a/system/meson.build b/system/meson.build
index 4952f4b2c7d40..eec07a945130f 100644
--- a/system/meson.build
+++ b/system/meson.build
@@ -1,9 +1,9 @@
 specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_true: [files(
   'arch_init.c',
   'ioport.c',
+  'globals-target.c',
   'memory.c',
   'physmem.c',
-  'watchpoint.c',
 )])
 
 system_ss.add(files(
@@ -24,6 +24,7 @@ system_ss.add(files(
   'runstate.c',
   'tpm-hmp-cmds.c',
   'vl.c',
+  'watchpoint.c',
 ), sdl, libpmem, libdaxctl)
 
 if have_tpm
diff --git a/system/physmem.c b/system/physmem.c
index 67c9db9daadbb..e97de3ef65cf8 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -28,10 +28,11 @@
 #include "qemu/lockable.h"
 
 #ifdef CONFIG_TCG
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 #endif /* CONFIG_TCG */
 
 #include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/page-protection.h"
 #include "exec/target_page.h"
 #include "exec/translation-block.h"
@@ -70,7 +71,10 @@
 
 #include "qemu/pmem.h"
 
+#include "qapi/qapi-types-migration.h"
+#include "migration/blocker.h"
 #include "migration/cpr.h"
+#include "migration/options.h"
 #include "migration/vmstate.h"
 
 #include "qemu/range.h"
@@ -573,7 +577,7 @@ MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat,
                                     is_write, true, &as, attrs);
     mr = section.mr;
 
-    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
+    if (xen_enabled() && memory_access_is_direct(mr, is_write, attrs)) {
         hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
         *plen = MIN(page, *plen);
     }
@@ -1882,7 +1886,11 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
     if (new_block->flags & RAM_GUEST_MEMFD) {
         int ret;
 
-        assert(kvm_enabled());
+        if (!kvm_enabled()) {
+            error_setg(errp, "cannot set up private guest memory for %s: KVM required",
+                       object_get_typename(OBJECT(current_machine->cgs)));
+            goto out_free;
+        }
         assert(new_block->guest_memfd < 0);
 
         ret = ram_block_discard_require(true);
@@ -1899,6 +1907,14 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
             qemu_mutex_unlock_ramlist();
             goto out_free;
         }
+
+        error_setg(&new_block->cpr_blocker,
+                   "Memory region %s uses guest_memfd, "
+                   "which is not supported with CPR.",
+                   memory_region_name(new_block->mr));
+        migrate_add_blocker_modes(&new_block->cpr_blocker, errp,
+                                  MIG_MODE_CPR_TRANSFER,
+                                  -1);
     }
 
     ram_size = (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS;
@@ -2275,45 +2291,80 @@ void qemu_ram_free(RAMBlock *block)
 }
 
 #ifndef _WIN32
-void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
+/* Simply remap the given VM memory location from start to start+length */
+static int qemu_ram_remap_mmap(RAMBlock *block, uint64_t start, size_t length)
+{
+    int flags, prot;
+    void *area;
+    void *host_startaddr = block->host + start;
+
+    assert(block->fd < 0);
+    flags = MAP_FIXED | MAP_ANONYMOUS;
+    flags |= block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE;
+    flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
+    prot = PROT_READ;
+    prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
+    area = mmap(host_startaddr, length, prot, flags, -1, 0);
+    return area != host_startaddr ? -errno : 0;
+}
+
+/*
+ * qemu_ram_remap - remap a single RAM page
+ *
+ * @addr: address in ram_addr_t address space.
+ *
+ * This function will try remapping a single page of guest RAM identified by
+ * @addr, essentially discarding memory to recover from previously poisoned
+ * memory (MCE). The page size depends on the RAMBlock (i.e., hugetlb). @addr
+ * does not have to point at the start of the page.
+ *
+ * This function is only to be used during system resets; it will kill the
+ * VM if remapping failed.
+ */
+void qemu_ram_remap(ram_addr_t addr)
 {
     RAMBlock *block;
-    ram_addr_t offset;
-    int flags;
-    void *area, *vaddr;
-    int prot;
+    uint64_t offset;
+    void *vaddr;
+    size_t page_size;
 
     RAMBLOCK_FOREACH(block) {
         offset = addr - block->offset;
         if (offset < block->max_length) {
+            /* Respect the pagesize of our RAMBlock */
+            page_size = qemu_ram_pagesize(block);
+            offset = QEMU_ALIGN_DOWN(offset, page_size);
+
             vaddr = ramblock_ptr(block, offset);
             if (block->flags & RAM_PREALLOC) {
                 ;
             } else if (xen_enabled()) {
                 abort();
             } else {
-                flags = MAP_FIXED;
-                flags |= block->flags & RAM_SHARED ?
-                         MAP_SHARED : MAP_PRIVATE;
-                flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
-                prot = PROT_READ;
-                prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
-                if (block->fd >= 0) {
-                    area = mmap(vaddr, length, prot, flags, block->fd,
-                                offset + block->fd_offset);
-                } else {
-                    flags |= MAP_ANONYMOUS;
-                    area = mmap(vaddr, length, prot, flags, -1, 0);
-                }
-                if (area != vaddr) {
-                    error_report("Could not remap addr: "
-                                 RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
-                                 length, addr);
-                    exit(1);
+                if (ram_block_discard_range(block, offset, page_size) != 0) {
+                    /*
+                     * Fall back to using mmap() only for anonymous mapping,
+                     * as if a backing file is associated we may not be able
+                     * to recover the memory in all cases.
+                     * So don't take the risk of using only mmap and fail now.
+                     */
+                    if (block->fd >= 0) {
+                        error_report("Could not remap RAM %s:%" PRIx64 "+%"
+                                     PRIx64 " +%zx", block->idstr, offset,
+                                     block->fd_offset, page_size);
+                        exit(1);
+                    }
+                    if (qemu_ram_remap_mmap(block, offset, page_size) != 0) {
+                        error_report("Could not remap RAM %s:%" PRIx64 " +%zx",
+                                     block->idstr, offset, page_size);
+                        exit(1);
+                    }
                 }
-                memory_try_enable_merging(vaddr, length);
-                qemu_ram_setup_dump(vaddr, length);
+                memory_try_enable_merging(vaddr, page_size);
+                qemu_ram_setup_dump(vaddr, page_size);
             }
+
+            break;
         }
     }
 }
@@ -2869,7 +2920,7 @@ static MemTxResult flatview_write_continue_step(MemTxAttrs attrs,
         return MEMTX_ACCESS_ERROR;
     }
 
-    if (!memory_access_is_direct(mr, true)) {
+    if (!memory_access_is_direct(mr, true, attrs)) {
         uint64_t val;
         MemTxResult result;
         bool release_lock = prepare_mmio_access(mr);
@@ -2965,7 +3016,7 @@ static MemTxResult flatview_read_continue_step(MemTxAttrs attrs, uint8_t *buf,
         return MEMTX_ACCESS_ERROR;
     }
 
-    if (!memory_access_is_direct(mr, false)) {
+    if (!memory_access_is_direct(mr, false, attrs)) {
         /* I/O case */
         uint64_t val;
         MemTxResult result;
@@ -3137,8 +3188,7 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
         l = len;
         mr = address_space_translate(as, addr, &addr1, &l, true, attrs);
 
-        if (!(memory_region_is_ram(mr) ||
-              memory_region_is_romd(mr))) {
+        if (!memory_region_supports_direct_access(mr)) {
             l = memory_access_size(mr, l, addr1);
         } else {
             /* ROM/RAM case */
@@ -3275,7 +3325,7 @@ static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len,
     while (len > 0) {
         l = len;
         mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
-        if (!memory_access_is_direct(mr, is_write)) {
+        if (!memory_access_is_direct(mr, is_write, attrs)) {
             l = memory_access_size(mr, l, addr);
             if (!memory_region_access_valid(mr, xlat, l, is_write, attrs)) {
                 return false;
@@ -3355,7 +3405,7 @@ void *address_space_map(AddressSpace *as,
     fv = address_space_to_flatview(as);
     mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
 
-    if (!memory_access_is_direct(mr, is_write)) {
+    if (!memory_access_is_direct(mr, is_write, attrs)) {
         size_t used = qatomic_read(&as->bounce_buffer_size);
         for (;;) {
             hwaddr alloc = MIN(as->max_bounce_buffer_size - used, l);
@@ -3488,7 +3538,7 @@ int64_t address_space_cache_init(MemoryRegionCache *cache,
 
     mr = cache->mrs.mr;
     memory_region_ref(mr);
-    if (memory_access_is_direct(mr, is_write)) {
+    if (memory_access_is_direct(mr, is_write, MEMTXATTRS_UNSPECIFIED)) {
         /* We don't care about the memory attributes here as we're only
          * doing this if we found actual RAM, which behaves the same
          * regardless of attributes; so UNSPECIFIED is fine.
@@ -3681,13 +3731,8 @@ int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
         if (l > len)
             l = len;
         phys_addr += (addr & ~TARGET_PAGE_MASK);
-        if (is_write) {
-            res = address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr,
-                                          attrs, buf, l);
-        } else {
-            res = address_space_read(cpu->cpu_ases[asidx].as, phys_addr,
-                                     attrs, buf, l);
-        }
+        res = address_space_rw(cpu->cpu_ases[asidx].as, phys_addr, attrs, buf,
+                               l, is_write);
         if (res != MEMTX_OK) {
             return -1;
         }
@@ -3797,18 +3842,19 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
             }
 
             ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-                            start, length);
+                            start + rb->fd_offset, length);
             if (ret) {
                 ret = -errno;
-                error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
-                             __func__, rb->idstr, start, length, ret);
+                error_report("%s: Failed to fallocate %s:%" PRIx64 "+%" PRIx64
+                             " +%zx (%d)", __func__, rb->idstr, start,
+                             rb->fd_offset, length, ret);
                 goto err;
             }
 #else
             ret = -ENOSYS;
             error_report("%s: fallocate not available/file"
-                         "%s:%" PRIx64 " +%zx (%d)",
-                         __func__, rb->idstr, start, length, ret);
+                         "%s:%" PRIx64 "+%" PRIx64 " +%zx (%d)", __func__,
+                         rb->idstr, start, rb->fd_offset, length, ret);
             goto err;
 #endif
         }
@@ -3855,6 +3901,7 @@ int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
     int ret = -1;
 
 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+    /* ignore fd_offset with guest_memfd */
     ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
                     start, length);
 
@@ -4059,3 +4106,58 @@ bool ram_block_discard_is_required(void)
     return qatomic_read(&ram_block_discard_required_cnt) ||
            qatomic_read(&ram_block_coordinated_discard_required_cnt);
 }
+
+/*
+ * Return true if ram is compatible with CPR.  Do not exclude rom,
+ * because the rom file could change in new QEMU.
+ */
+static bool ram_is_cpr_compatible(RAMBlock *rb)
+{
+    MemoryRegion *mr = rb->mr;
+
+    if (!mr || !memory_region_is_ram(mr)) {
+        return true;
+    }
+
+    /* Ram device is remapped in new QEMU */
+    if (memory_region_is_ram_device(mr)) {
+        return true;
+    }
+
+    /*
+     * A file descriptor is passed to new QEMU and remapped, or its backing
+     * file is reopened and mapped.  It must be shared to avoid COW.
+     */
+    if (rb->fd >= 0 && qemu_ram_is_shared(rb)) {
+        return true;
+    }
+
+    return false;
+}
+
+/*
+ * Add a blocker for each volatile ram block.  This function should only be
+ * called after we know that the block is migratable.  Non-migratable blocks
+ * are either re-created in new QEMU, or are handled specially, or are covered
+ * by a device-level CPR blocker.
+ */
+void ram_block_add_cpr_blocker(RAMBlock *rb, Error **errp)
+{
+    assert(qemu_ram_is_migratable(rb));
+
+    if (ram_is_cpr_compatible(rb)) {
+        return;
+    }
+
+    error_setg(&rb->cpr_blocker,
+               "Memory region %s is not compatible with CPR. share=on is "
+               "required for memory-backend objects, and aux-ram-share=on is "
+               "required.", memory_region_name(rb->mr));
+    migrate_add_blocker_modes(&rb->cpr_blocker, errp, MIG_MODE_CPR_TRANSFER,
+                              -1);
+}
+
+void ram_block_del_cpr_blocker(RAMBlock *rb)
+{
+    migrate_del_blocker(&rb->cpr_blocker);
+}
diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c
index 856c9e8c32eab..5588ed2047d9e 100644
--- a/system/qdev-monitor.c
+++ b/system/qdev-monitor.c
@@ -132,7 +132,7 @@ static const char *qdev_class_get_alias(DeviceClass *dc)
 
     for (i = 0; qdev_alias_table[i].typename; i++) {
         if (qdev_alias_table[i].arch_mask &&
-            !(qdev_alias_table[i].arch_mask & arch_type)) {
+            !qemu_arch_available(qdev_alias_table[i].arch_mask)) {
             continue;
         }
 
@@ -218,7 +218,7 @@ static const char *find_typename_by_alias(const char *alias)
 
     for (i = 0; qdev_alias_table[i].alias; i++) {
         if (qdev_alias_table[i].arch_mask &&
-            !(qdev_alias_table[i].arch_mask & arch_type)) {
+            !qemu_arch_available(qdev_alias_table[i].arch_mask)) {
             continue;
         }
 
diff --git a/system/vl.c b/system/vl.c
index 9c6942c6cfcc2..ec93988a03a4c 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -26,6 +26,7 @@
 #include "qemu/help-texts.h"
 #include "qemu/datadir.h"
 #include "qemu/units.h"
+#include "qemu/module.h"
 #include "exec/cpu-common.h"
 #include "exec/page-vary.h"
 #include "hw/qdev-properties.h"
@@ -53,6 +54,7 @@
 #include "hw/usb.h"
 #include "hw/isa/isa.h"
 #include "hw/scsi/scsi.h"
+#include "hw/sd/sd.h"
 #include "hw/display/vga.h"
 #include "hw/firmware/smbios.h"
 #include "hw/acpi/acpi.h"
@@ -77,6 +79,7 @@
 #include "hw/block/block.h"
 #include "hw/i386/x86.h"
 #include "hw/i386/pc.h"
+#include "hw/core/cpu.h"
 #include "migration/cpr.h"
 #include "migration/misc.h"
 #include "migration/snapshot.h"
@@ -194,7 +197,7 @@ static int default_parallel = 1;
 static int default_monitor = 1;
 static int default_floppy = 1;
 static int default_cdrom = 1;
-static int default_sdcard = 1;
+static bool auto_create_sdcard = true;
 static int default_vga = 1;
 static int default_net = 1;
 
@@ -351,7 +354,7 @@ static QemuOptsList qemu_overcommit_opts = {
     .desc = {
         {
             .name = "mem-lock",
-            .type = QEMU_OPT_BOOL,
+            .type = QEMU_OPT_STRING,
         },
         {
             .name = "cpu-pm",
@@ -718,7 +721,7 @@ static void configure_blockdev(BlockdevOptionsQueue *bdo_queue,
     default_drive(default_cdrom, snapshot, machine_class->block_default_type, 2,
                   CDROM_OPTS);
     default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
-    default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
+    default_drive(auto_create_sdcard, snapshot, IF_SD, 0, SD_OPTS);
 
 }
 
@@ -796,8 +799,8 @@ static QemuOptsList qemu_run_with_opts = {
 
 static void realtime_init(void)
 {
-    if (enable_mlock) {
-        if (os_mlock() < 0) {
+    if (should_mlock(mlock_state)) {
+        if (os_mlock(is_mlock_on_fault(mlock_state)) < 0) {
             error_report("locking memory failed");
             exit(1);
         }
@@ -875,11 +878,11 @@ static void help(int exitcode)
             g_get_prgname());
 
 #define DEF(option, opt_arg, opt_enum, opt_help, arch_mask)    \
-    if ((arch_mask) & arch_type)                               \
+    if (qemu_arch_available(arch_mask)) \
         fputs(opt_help, stdout);
 
 #define ARCHHEADING(text, arch_mask) \
-    if ((arch_mask) & arch_type)    \
+    if (qemu_arch_available(arch_mask)) \
         puts(stringify(text));
 
 #define DEFHEADING(text) ARCHHEADING(text, QEMU_ARCH_ALL)
@@ -1346,8 +1349,8 @@ static void qemu_disable_default_devices(void)
     if (!has_defaults || machine_class->no_cdrom) {
         default_cdrom = 0;
     }
-    if (!has_defaults || machine_class->no_sdcard) {
-        default_sdcard = 0;
+    if (!has_defaults || !machine_class->auto_create_sdcard) {
+        auto_create_sdcard = false;
     }
     if (!has_defaults) {
         default_audio = 0;
@@ -1875,6 +1878,44 @@ static void object_option_parse(const char *str)
     visit_free(v);
 }
 
+static void overcommit_parse(const char *str)
+{
+    QemuOpts *opts;
+    const char *mem_lock_opt;
+
+    opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"),
+                                   str, false);
+    if (!opts) {
+        exit(1);
+    }
+
+    enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", enable_cpu_pm);
+
+    mem_lock_opt = qemu_opt_get(opts, "mem-lock");
+    if (!mem_lock_opt) {
+        return;
+    }
+
+    if (strcmp(mem_lock_opt, "on") == 0) {
+        mlock_state = MLOCK_ON;
+        return;
+    }
+
+    if (strcmp(mem_lock_opt, "off") == 0) {
+        mlock_state = MLOCK_OFF;
+        return;
+    }
+
+    if (strcmp(mem_lock_opt, "on-fault") == 0) {
+        mlock_state = MLOCK_ON_FAULT;
+        return;
+    }
+
+    error_report("parameter 'mem-lock' expects one of "
+                 "'on', 'off', 'on-fault'");
+    exit(1);
+}
+
 /*
  * Very early object creation, before the sandbox options have been activated.
  */
@@ -2661,12 +2702,27 @@ static void qemu_init_displays(void)
 
 static void qemu_init_board(void)
 {
+    MachineClass *machine_class = MACHINE_GET_CLASS(current_machine);
+
     /* process plugin before CPUs are created, but once -smp has been parsed */
     qemu_plugin_load_list(&plugin_list, &error_fatal);
 
     /* From here on we enter MACHINE_PHASE_INITIALIZED.  */
     machine_run_board_init(current_machine, mem_path, &error_fatal);
 
+    if (machine_class->auto_create_sdcard) {
+        bool ambigous;
+
+        /* Ensure there is a SD bus available to create SD card on */
+        Object *obj = object_resolve_path_type("", TYPE_SD_BUS, &ambigous);
+        if (!obj && !ambigous) {
+            fprintf(stderr, "Can not create sd-card on '%s' machine"
+                            " because it lacks a sd-bus\n",
+                            machine_class->name);
+            abort();
+        }
+    }
+
     drive_check_orphaned();
 
     realtime_init();
@@ -2831,7 +2887,10 @@ void qemu_init(int argc, char **argv)
 
     os_setup_limits();
 
-    qemu_init_arch_modules();
+#ifdef CONFIG_MODULES
+    module_init_info(qemu_modinfo);
+    module_allow_arch(target_name());
+#endif
 
     qemu_init_subsystems();
 
@@ -2870,7 +2929,7 @@ void qemu_init(int argc, char **argv)
             const QEMUOption *popt;
 
             popt = lookup_opt(argc, argv, &optarg, &optind);
-            if (!(popt->arch_mask & arch_type)) {
+            if (!qemu_arch_available(popt->arch_mask)) {
                 error_report("Option not supported for this target");
                 exit(1);
             }
@@ -3575,13 +3634,7 @@ void qemu_init(int argc, char **argv)
                 object_option_parse(optarg);
                 break;
             case QEMU_OPTION_overcommit:
-                opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"),
-                                               optarg, false);
-                if (!opts) {
-                    exit(1);
-                }
-                enable_mlock = qemu_opt_get_bool(opts, "mem-lock", enable_mlock);
-                enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", enable_cpu_pm);
+                overcommit_parse(optarg);
                 break;
             case QEMU_OPTION_compat:
                 {
diff --git a/system/watchpoint.c b/system/watchpoint.c
index 2aa2a9ea63f59..08dbd8483d156 100644
--- a/system/watchpoint.c
+++ b/system/watchpoint.c
@@ -19,7 +19,8 @@
 
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
+#include "exec/target_page.h"
 #include "hw/core/cpu.h"
 
 /* Add a watchpoint.  */
diff --git a/target/Kconfig b/target/Kconfig
index d0c7b59d9c718..37781146b9bbb 100644
--- a/target/Kconfig
+++ b/target/Kconfig
@@ -16,6 +16,7 @@ source sh4/Kconfig
 source sparc/Kconfig
 source tricore/Kconfig
 source xtensa/Kconfig
+source hexagon/Kconfig
 
 config TARGET_BIG_ENDIAN
     bool
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
index f5dd744987618..584c2aa76bd9e 100644
--- a/target/alpha/cpu.c
+++ b/target/alpha/cpu.c
@@ -63,6 +63,7 @@ static void alpha_restore_state_to_opc(CPUState *cs,
     }
 }
 
+#ifndef CONFIG_USER_ONLY
 static bool alpha_cpu_has_work(CPUState *cs)
 {
     /* Here we are checking to see if the CPU should wake up from HALT.
@@ -77,6 +78,7 @@ static bool alpha_cpu_has_work(CPUState *cs)
                                     | CPU_INTERRUPT_SMP
                                     | CPU_INTERRUPT_MCHK);
 }
+#endif /* !CONFIG_USER_ONLY */
 
 static int alpha_cpu_mmu_index(CPUState *cs, bool ifetch)
 {
@@ -85,6 +87,7 @@ static int alpha_cpu_mmu_index(CPUState *cs, bool ifetch)
 
 static void alpha_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
 {
+    info->endian = BFD_ENDIAN_LITTLE;
     info->mach = bfd_mach_alpha_ev6;
     info->print_insn = print_insn_alpha;
 }
@@ -223,11 +226,12 @@ static void alpha_cpu_initfn(Object *obj)
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps alpha_sysemu_ops = {
+    .has_work = alpha_cpu_has_work,
     .get_phys_page_debug = alpha_cpu_get_phys_page_debug,
 };
 #endif
 
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 
 static const TCGCPUOps alpha_tcg_ops = {
     .initialize = alpha_translate_init,
@@ -258,7 +262,6 @@ static void alpha_cpu_class_init(ObjectClass *oc, void *data)
                                     &acc->parent_realize);
 
     cc->class_by_name = alpha_cpu_class_by_name;
-    cc->has_work = alpha_cpu_has_work;
     cc->mmu_index = alpha_cpu_mmu_index;
     cc->dump_state = alpha_cpu_dump_state;
     cc->set_pc = alpha_cpu_set_pc;
diff --git a/target/alpha/fpu_helper.c b/target/alpha/fpu_helper.c
index f810a9b6a47a3..6aefb9b851ab1 100644
--- a/target/alpha/fpu_helper.c
+++ b/target/alpha/fpu_helper.c
@@ -455,29 +455,28 @@ static uint64_t do_cvttq(CPUAlphaState *env, uint64_t a, int roundmode)
 {
     float64 fa;
     int64_t ret;
-    uint32_t exc;
+    uint32_t exc = 0;
+    int flags;
 
     fa = t_to_float64(a);
     ret = float64_to_int64_modulo(fa, roundmode, &FP_STATUS);
 
-    exc = get_float_exception_flags(&FP_STATUS);
-    if (unlikely(exc)) {
+    flags = get_float_exception_flags(&FP_STATUS);
+    if (unlikely(flags)) {
         set_float_exception_flags(0, &FP_STATUS);
 
         /* We need to massage the resulting exceptions. */
-        if (exc & float_flag_invalid_cvti) {
+        if (flags & float_flag_invalid_cvti) {
             /* Overflow, either normal or infinity. */
             if (float64_is_infinity(fa)) {
                 exc = FPCR_INV;
             } else {
                 exc = FPCR_IOV | FPCR_INE;
             }
-        } else if (exc & float_flag_invalid) {
+        } else if (flags & float_flag_invalid) {
             exc = FPCR_INV;
-        } else if (exc & float_flag_inexact) {
+        } else if (flags & float_flag_inexact) {
             exc = FPCR_INE;
-        } else {
-            exc = 0;
         }
     }
     env->error_code = exc;
diff --git a/target/alpha/helper.c b/target/alpha/helper.c
index 2f1000c99fa9c..57cefcba14483 100644
--- a/target/alpha/helper.c
+++ b/target/alpha/helper.c
@@ -20,7 +20,7 @@
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/page-protection.h"
 #include "fpu/softfloat-types.h"
 #include "exec/helper-proto.h"
diff --git a/target/alpha/sys_helper.c b/target/alpha/sys_helper.c
index 54ee93f34cccb..51e32544287a8 100644
--- a/target/alpha/sys_helper.c
+++ b/target/alpha/sys_helper.c
@@ -19,7 +19,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/tb-flush.h"
 #include "exec/helper-proto.h"
 #include "system/runstate.h"
diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c
index 5c943dc27b5e5..c40df4e7fd744 100644
--- a/target/arm/arch_dump.c
+++ b/target/arm/arch_dump.c
@@ -23,6 +23,7 @@
 #include "elf.h"
 #include "system/dump.h"
 #include "cpu-features.h"
+#include "internals.h"
 
 /* struct user_pt_regs from arch/arm64/include/uapi/asm/ptrace.h */
 struct aarch64_user_regs {
diff --git a/target/arm/common-semi-target.h b/target/arm/common-semi-target.h
index da51f2d7f540d..69429a45c6526 100644
--- a/target/arm/common-semi-target.h
+++ b/target/arm/common-semi-target.h
@@ -12,6 +12,17 @@
 
 #include "target/arm/cpu-qom.h"
 
+static inline bool common_semi_read_arg_word(CPUArchState *env,
+                                             target_ulong *save_to,
+                                             target_ulong args_addr,
+                                             int arg_num)
+{
+    if (is_64bit_semihosting(env)) {
+        return get_user_u64(*save_to, args_addr + (arg_num) * 8));
+    }
+    return get_user_u32(*save_to, args_addr + (arg_num) * 4));
+}
+
 static inline target_ulong common_semi_arg(CPUState *cs, int argno)
 {
     ARMCPU *cpu = ARM_CPU(cs);
diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h
index 1759d9defbeed..2183de8eda691 100644
--- a/target/arm/cpregs.h
+++ b/target/arm/cpregs.h
@@ -328,20 +328,23 @@ typedef enum CPAccessResult {
      * Access fails due to a configurable trap or enable which would
      * result in a categorized exception syndrome giving information about
      * the failing instruction (ie syndrome category 0x3, 0x4, 0x5, 0x6,
-     * 0xc or 0x18).
+     * 0xc or 0x18). These traps are always to a specified target EL,
+     * never to the usual target EL.
      */
-    CP_ACCESS_TRAP = (1 << 2),
-    CP_ACCESS_TRAP_EL2 = CP_ACCESS_TRAP | 2,
-    CP_ACCESS_TRAP_EL3 = CP_ACCESS_TRAP | 3,
+    CP_ACCESS_TRAP_BIT = (1 << 2),
+    CP_ACCESS_TRAP_EL1 = CP_ACCESS_TRAP_BIT | 1,
+    CP_ACCESS_TRAP_EL2 = CP_ACCESS_TRAP_BIT | 2,
+    CP_ACCESS_TRAP_EL3 = CP_ACCESS_TRAP_BIT | 3,
 
     /*
-     * Access fails and results in an exception syndrome 0x0 ("uncategorized").
+     * Access fails with UNDEFINED, i.e. an exception syndrome 0x0
+     * ("uncategorized"), which is what an undefined insn produces.
      * Note that this is not a catch-all case -- the set of cases which may
      * result in this failure is specifically defined by the architecture.
      * This trap is always to the usual target EL, never directly to a
      * specified target EL.
      */
-    CP_ACCESS_TRAP_UNCATEGORIZED = (2 << 2),
+    CP_ACCESS_UNDEFINED = (2 << 2),
 } CPAccessResult;
 
 /* Indexes into fgt_read[] */
@@ -1154,4 +1157,32 @@ static inline bool arm_cpreg_traps_in_nv(const ARMCPRegInfo *ri)
     return ri->opc1 == 4 || ri->opc1 == 5;
 }
 
+/* Macros for accessing a specified CP register bank */
+#define A32_BANKED_REG_GET(_env, _regname, _secure)                     \
+    ((_secure) ? (_env)->cp15._regname##_s : (_env)->cp15._regname##_ns)
+
+#define A32_BANKED_REG_SET(_env, _regname, _secure, _val)       \
+    do {                                                        \
+        if (_secure) {                                          \
+            (_env)->cp15._regname##_s = (_val);                 \
+        } else {                                                \
+            (_env)->cp15._regname##_ns = (_val);                \
+        }                                                       \
+    } while (0)
+
+/*
+ * Macros for automatically accessing a specific CP register bank depending on
+ * the current secure state of the system.  These macros are not intended for
+ * supporting instruction translation reads/writes as these are dependent
+ * solely on the SCR.NS bit and not the mode.
+ */
+#define A32_BANKED_CURRENT_REG_GET(_env, _regname)                          \
+    A32_BANKED_REG_GET((_env), _regname,                                    \
+                       (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)))
+
+#define A32_BANKED_CURRENT_REG_SET(_env, _regname, _val)                    \
+    A32_BANKED_REG_SET((_env), _regname,                                    \
+                       (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)), \
+                       (_val))
+
 #endif /* TARGET_ARM_CPREGS_H */
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 656070afb5972..01786ac7879c0 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -29,7 +29,7 @@
 #include "cpu.h"
 #ifdef CONFIG_TCG
 #include "exec/translation-block.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 #endif /* CONFIG_TCG */
 #include "internals.h"
 #include "cpu-features.h"
@@ -123,6 +123,7 @@ void arm_restore_state_to_opc(CPUState *cs,
 }
 #endif /* CONFIG_TCG */
 
+#ifndef CONFIG_USER_ONLY
 /*
  * With SCTLR_ELx.NMI == 0, IRQ with Superpriority is masked identically with
  * IRQ without Superpriority. Moreover, if the GIC is configured so that
@@ -141,6 +142,7 @@ static bool arm_cpu_has_work(CPUState *cs)
          | CPU_INTERRUPT_VFIQ | CPU_INTERRUPT_VIRQ | CPU_INTERRUPT_VSERR
          | CPU_INTERRUPT_EXITTB);
 }
+#endif /* !CONFIG_USER_ONLY */
 
 static int arm_cpu_mmu_index(CPUState *cs, bool ifetch)
 {
@@ -830,7 +832,6 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
 
 static bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
-    CPUClass *cc = CPU_GET_CLASS(cs);
     CPUARMState *env = cpu_env(cs);
     uint32_t cur_el = arm_current_el(env);
     bool secure = arm_is_secure(env);
@@ -930,7 +931,7 @@ static bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
  found:
     cs->exception_index = excp_idx;
     env->exception.target_el = target_el;
-    cc->tcg_ops->do_interrupt(cs);
+    cs->cc->tcg_ops->do_interrupt(cs);
     return true;
 }
 
@@ -1171,7 +1172,7 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info)
 {
     ARMCPU *ac = ARM_CPU(cpu);
     CPUARMState *env = &ac->env;
-    bool sctlr_b;
+    bool sctlr_b = arm_sctlr_b(env);
 
     if (is_a64(env)) {
         info->cap_arch = CS_ARCH_ARM64;
@@ -1198,13 +1199,9 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info)
         info->cap_mode = cap_mode;
     }
 
-    sctlr_b = arm_sctlr_b(env);
+    info->endian = BFD_ENDIAN_LITTLE;
     if (bswap_code(sctlr_b)) {
-#if TARGET_BIG_ENDIAN
-        info->endian = BFD_ENDIAN_LITTLE;
-#else
-        info->endian = BFD_ENDIAN_BIG;
-#endif
+        info->endian = TARGET_BIG_ENDIAN ? BFD_ENDIAN_LITTLE : BFD_ENDIAN_BIG;
     }
     info->flags &= ~INSN_ARM_BE32;
 #ifndef CONFIG_USER_ONLY
@@ -2073,6 +2070,10 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
                                               arm_gt_stimer_cb, cpu);
         cpu->gt_timer[GTIMER_HYPVIRT] = timer_new(QEMU_CLOCK_VIRTUAL, scale,
                                                   arm_gt_hvtimer_cb, cpu);
+        cpu->gt_timer[GTIMER_S_EL2_PHYS] = timer_new(QEMU_CLOCK_VIRTUAL, scale,
+                                                     arm_gt_sel2timer_cb, cpu);
+        cpu->gt_timer[GTIMER_S_EL2_VIRT] = timer_new(QEMU_CLOCK_VIRTUAL, scale,
+                                                     arm_gt_sel2vtimer_cb, cpu);
     }
 #endif
 
@@ -2656,6 +2657,7 @@ static const gchar *arm_gdb_arch_name(CPUState *cs)
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps arm_sysemu_ops = {
+    .has_work = arm_cpu_has_work,
     .get_phys_page_attrs_debug = arm_cpu_get_phys_page_attrs_debug,
     .asidx_from_attrs = arm_asidx_from_attrs,
     .write_elf32_note = arm_cpu_write_elf32_note,
@@ -2706,7 +2708,6 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
                                        &acc->parent_phases);
 
     cc->class_by_name = arm_cpu_class_by_name;
-    cc->has_work = arm_cpu_has_work;
     cc->mmu_index = arm_cpu_mmu_index;
     cc->dump_state = arm_cpu_dump_state;
     cc->set_pc = arm_cpu_set_pc;
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 6f6cf5c888bf9..a8177c6c2e89d 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -62,6 +62,7 @@
 #define EXCP_NMI            26
 #define EXCP_VINMI          27
 #define EXCP_VFNMI          28
+#define EXCP_MON_TRAP       29   /* AArch32 trap to Monitor mode */
 /* NB: add new EXCP_ defines to the array in arm_log_exception() too */
 
 #define ARMV7M_EXCP_RESET   1
@@ -1170,6 +1171,8 @@ void arm_gt_vtimer_cb(void *opaque);
 void arm_gt_htimer_cb(void *opaque);
 void arm_gt_stimer_cb(void *opaque);
 void arm_gt_hvtimer_cb(void *opaque);
+void arm_gt_sel2timer_cb(void *opaque);
+void arm_gt_sel2vtimer_cb(void *opaque);
 
 unsigned int gt_cntfrq_period_ns(ARMCPU *cpu);
 void gt_rme_post_el_change(ARMCPU *cpu, void *opaque);
@@ -2595,6 +2598,11 @@ static inline bool arm_is_secure_below_el3(CPUARMState *env)
     return false;
 }
 
+static inline bool arm_is_el3_or_mon(CPUARMState *env)
+{
+    return false;
+}
+
 static inline ARMSecuritySpace arm_security_space(CPUARMState *env)
 {
     return ARMSS_NonSecure;
@@ -2627,81 +2635,15 @@ uint64_t arm_hcr_el2_eff_secstate(CPUARMState *env, ARMSecuritySpace space);
 uint64_t arm_hcr_el2_eff(CPUARMState *env);
 uint64_t arm_hcrx_el2_eff(CPUARMState *env);
 
-/* Return true if the specified exception level is running in AArch64 state. */
-static inline bool arm_el_is_aa64(CPUARMState *env, int el)
-{
-    /* This isn't valid for EL0 (if we're in EL0, is_a64() is what you want,
-     * and if we're not in EL0 then the state of EL0 isn't well defined.)
-     */
-    assert(el >= 1 && el <= 3);
-    bool aa64 = arm_feature(env, ARM_FEATURE_AARCH64);
-
-    /* The highest exception level is always at the maximum supported
-     * register width, and then lower levels have a register width controlled
-     * by bits in the SCR or HCR registers.
-     */
-    if (el == 3) {
-        return aa64;
-    }
-
-    if (arm_feature(env, ARM_FEATURE_EL3) &&
-        ((env->cp15.scr_el3 & SCR_NS) || !(env->cp15.scr_el3 & SCR_EEL2))) {
-        aa64 = aa64 && (env->cp15.scr_el3 & SCR_RW);
-    }
-
-    if (el == 2) {
-        return aa64;
-    }
-
-    if (arm_is_el2_enabled(env)) {
-        aa64 = aa64 && (env->cp15.hcr_el2 & HCR_RW);
-    }
-
-    return aa64;
-}
-
-/* Function for determining whether guest cp register reads and writes should
+/*
+ * Function for determining whether guest cp register reads and writes should
  * access the secure or non-secure bank of a cp register.  When EL3 is
  * operating in AArch32 state, the NS-bit determines whether the secure
  * instance of a cp register should be used. When EL3 is AArch64 (or if
  * it doesn't exist at all) then there is no register banking, and all
  * accesses are to the non-secure version.
  */
-static inline bool access_secure_reg(CPUARMState *env)
-{
-    bool ret = (arm_feature(env, ARM_FEATURE_EL3) &&
-                !arm_el_is_aa64(env, 3) &&
-                !(env->cp15.scr_el3 & SCR_NS));
-
-    return ret;
-}
-
-/* Macros for accessing a specified CP register bank */
-#define A32_BANKED_REG_GET(_env, _regname, _secure)    \
-    ((_secure) ? (_env)->cp15._regname##_s : (_env)->cp15._regname##_ns)
-
-#define A32_BANKED_REG_SET(_env, _regname, _secure, _val)   \
-    do {                                                \
-        if (_secure) {                                   \
-            (_env)->cp15._regname##_s = (_val);            \
-        } else {                                        \
-            (_env)->cp15._regname##_ns = (_val);           \
-        }                                               \
-    } while (0)
-
-/* Macros for automatically accessing a specific CP register bank depending on
- * the current secure state of the system.  These macros are not intended for
- * supporting instruction translation reads/writes as these are dependent
- * solely on the SCR.NS bit and not the mode.
- */
-#define A32_BANKED_CURRENT_REG_GET(_env, _regname)        \
-    A32_BANKED_REG_GET((_env), _regname,                \
-                       (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)))
-
-#define A32_BANKED_CURRENT_REG_SET(_env, _regname, _val)                       \
-    A32_BANKED_REG_SET((_env), _regname,                                    \
-                       (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)), \
-                       (_val))
+bool access_secure_reg(CPUARMState *env);
 
 uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx,
                                  uint32_t cur_el, bool secure);
@@ -2724,39 +2666,6 @@ static inline bool arm_v7m_is_handler_mode(CPUARMState *env)
     return env->v7m.exception != 0;
 }
 
-/* Return the current Exception Level (as per ARMv8; note that this differs
- * from the ARMv7 Privilege Level).
- */
-static inline int arm_current_el(CPUARMState *env)
-{
-    if (arm_feature(env, ARM_FEATURE_M)) {
-        return arm_v7m_is_handler_mode(env) ||
-            !(env->v7m.control[env->v7m.secure] & 1);
-    }
-
-    if (is_a64(env)) {
-        return extract32(env->pstate, 2, 2);
-    }
-
-    switch (env->uncached_cpsr & 0x1f) {
-    case ARM_CPU_MODE_USR:
-        return 0;
-    case ARM_CPU_MODE_HYP:
-        return 2;
-    case ARM_CPU_MODE_MON:
-        return 3;
-    default:
-        if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) {
-            /* If EL3 is 32-bit then all secure privileged modes run in
-             * EL3
-             */
-            return 3;
-        }
-
-        return 1;
-    }
-}
-
 /**
  * write_list_to_cpustate
  * @cpu: ARMCPU
@@ -3057,47 +2966,6 @@ static inline bool arm_sctlr_b(CPUARMState *env)
 
 uint64_t arm_sctlr(CPUARMState *env, int el);
 
-static inline bool arm_cpu_data_is_big_endian_a32(CPUARMState *env,
-                                                  bool sctlr_b)
-{
-#ifdef CONFIG_USER_ONLY
-    /*
-     * In system mode, BE32 is modelled in line with the
-     * architecture (as word-invariant big-endianness), where loads
-     * and stores are done little endian but from addresses which
-     * are adjusted by XORing with the appropriate constant. So the
-     * endianness to use for the raw data access is not affected by
-     * SCTLR.B.
-     * In user mode, however, we model BE32 as byte-invariant
-     * big-endianness (because user-only code cannot tell the
-     * difference), and so we need to use a data access endianness
-     * that depends on SCTLR.B.
-     */
-    if (sctlr_b) {
-        return true;
-    }
-#endif
-    /* In 32bit endianness is determined by looking at CPSR's E bit */
-    return env->uncached_cpsr & CPSR_E;
-}
-
-static inline bool arm_cpu_data_is_big_endian_a64(int el, uint64_t sctlr)
-{
-    return sctlr & (el ? SCTLR_EE : SCTLR_E0E);
-}
-
-/* Return true if the processor is in big-endian mode. */
-static inline bool arm_cpu_data_is_big_endian(CPUARMState *env)
-{
-    if (!is_a64(env)) {
-        return arm_cpu_data_is_big_endian_a32(env, arm_sctlr_b(env));
-    } else {
-        int cur_el = arm_current_el(env);
-        uint64_t sctlr = arm_sctlr(env, cur_el);
-        return arm_cpu_data_is_big_endian_a64(cur_el, sctlr);
-    }
-}
-
 #include "exec/cpu-all.h"
 
 /*
@@ -3283,13 +3151,6 @@ static inline bool bswap_code(bool sctlr_b)
 #endif
 }
 
-#ifdef CONFIG_USER_ONLY
-static inline bool arm_cpu_bswap_data(CPUARMState *env)
-{
-    return TARGET_BIG_ENDIAN ^ arm_cpu_data_is_big_endian(env);
-}
-#endif
-
 void cpu_get_tb_cpu_state(CPUARMState *env, vaddr *pc,
                           uint64_t *cs_base, uint32_t *flags);
 
diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c
index 2212ef4a3b948..a9a619ba6b1b4 100644
--- a/target/arm/debug_helper.c
+++ b/target/arm/debug_helper.c
@@ -875,12 +875,13 @@ static CPAccessResult access_tdcc(CPUARMState *env, const ARMCPRegInfo *ri,
                                           (env->cp15.mdcr_el3 & MDCR_TDCC);
 
     if (el < 1 && mdscr_el1_tdcc) {
-        return CP_ACCESS_TRAP;
+        return CP_ACCESS_TRAP_EL1;
     }
     if (el < 2 && (mdcr_el2_tda || mdcr_el2_tdcc)) {
         return CP_ACCESS_TRAP_EL2;
     }
-    if (el < 3 && ((env->cp15.mdcr_el3 & MDCR_TDA) || mdcr_el3_tdcc)) {
+    if (!arm_is_el3_or_mon(env) &&
+        ((env->cp15.mdcr_el3 & MDCR_TDA) || mdcr_el3_tdcc)) {
         return CP_ACCESS_TRAP_EL3;
     }
     return CP_ACCESS_OK;
@@ -1036,7 +1037,7 @@ static const ARMCPRegInfo debug_cp_reginfo[] = {
     { .name = "DBGVCR",
       .cp = 14, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0,
       .access = PL1_RW, .accessfn = access_tda,
-      .type = ARM_CP_NOP },
+      .type = ARM_CP_CONST, .resetvalue = 0 },
     /*
      * Dummy MDCCINT_EL1, since we don't implement the Debug Communications
      * Channel but Linux may try to access this register. The 32-bit
@@ -1045,7 +1046,7 @@ static const ARMCPRegInfo debug_cp_reginfo[] = {
     { .name = "MDCCINT_EL1", .state = ARM_CP_STATE_BOTH,
       .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0,
       .access = PL1_RW, .accessfn = access_tdcc,
-      .type = ARM_CP_NOP },
+      .type = ARM_CP_CONST, .resetvalue = 0 },
     /*
      * Dummy DBGCLAIM registers.
      * "The architecture does not define any functionality for the CLAIM tag bits.",
@@ -1074,7 +1075,8 @@ static const ARMCPRegInfo debug_aa32_el1_reginfo[] = {
     { .name = "DBGVCR32_EL2", .state = ARM_CP_STATE_AA64,
       .opc0 = 2, .opc1 = 4, .crn = 0, .crm = 7, .opc2 = 0,
       .access = PL2_RW, .accessfn = access_dbgvcr32,
-      .type = ARM_CP_NOP | ARM_CP_EL3_NO_EL2_KEEP },
+      .type = ARM_CP_CONST | ARM_CP_EL3_NO_EL2_KEEP,
+      .resetvalue = 0 },
 };
 
 static const ARMCPRegInfo debug_lpae_cp_reginfo[] = {
diff --git a/target/arm/gtimer.h b/target/arm/gtimer.h
index b992941bef11b..d49c63cbf87d6 100644
--- a/target/arm/gtimer.h
+++ b/target/arm/gtimer.h
@@ -10,12 +10,14 @@
 #define TARGET_ARM_GTIMER_H
 
 enum {
-    GTIMER_PHYS     = 0,
-    GTIMER_VIRT     = 1,
-    GTIMER_HYP      = 2,
-    GTIMER_SEC      = 3,
-    GTIMER_HYPVIRT  = 4,
-#define NUM_GTIMERS   5
+    GTIMER_PHYS     = 0, /* CNTP_* ; EL1 physical timer */
+    GTIMER_VIRT     = 1, /* CNTV_* ; EL1 virtual timer */
+    GTIMER_HYP      = 2, /* CNTHP_* ; EL2 physical timer */
+    GTIMER_SEC      = 3, /* CNTPS_* ; EL3 physical timer */
+    GTIMER_HYPVIRT  = 4, /* CNTHV_* ; EL2 virtual timer ; only if FEAT_VHE */
+    GTIMER_S_EL2_PHYS = 5, /* CNTHPS_* ; only if FEAT_SEL2 */
+    GTIMER_S_EL2_VIRT = 6, /* CNTHVS_* ; only if FEAT_SEL2 */
+#define NUM_GTIMERS   7
 };
 
 #endif
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 7d95eae997118..bb445e30cd152 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -18,6 +18,7 @@
 #include "qemu/timer.h"
 #include "qemu/bitops.h"
 #include "qemu/qemu-print.h"
+#include "exec/cputlb.h"
 #include "exec/exec-all.h"
 #include "exec/translation-block.h"
 #include "hw/irq.h"
@@ -285,7 +286,7 @@ static CPAccessResult access_el3_aa32ns(CPUARMState *env,
 {
     if (!is_a64(env) && arm_current_el(env) == 3 &&
         arm_is_secure_below_el3(env)) {
-        return CP_ACCESS_TRAP_UNCATEGORIZED;
+        return CP_ACCESS_UNDEFINED;
     }
     return CP_ACCESS_OK;
 }
@@ -310,7 +311,7 @@ static CPAccessResult access_trap_aa32s_el1(CPUARMState *env,
         return CP_ACCESS_TRAP_EL3;
     }
     /* This will be EL1 NS and EL2 NS, which just UNDEF */
-    return CP_ACCESS_TRAP_UNCATEGORIZED;
+    return CP_ACCESS_UNDEFINED;
 }
 
 /*
@@ -881,7 +882,7 @@ static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri,
     uint64_t mdcr_el2 = arm_mdcr_el2_eff(env);
 
     if (el == 0 && !(env->cp15.c9_pmuserenr & 1)) {
-        return CP_ACCESS_TRAP;
+        return CP_ACCESS_TRAP_EL1;
     }
     if (el < 2 && (mdcr_el2 & MDCR_TPM)) {
         return CP_ACCESS_TRAP_EL2;
@@ -2159,7 +2160,7 @@ static CPAccessResult teehbr_access(CPUARMState *env, const ARMCPRegInfo *ri,
                                     bool isread)
 {
     if (arm_current_el(env) == 0 && (env->teecr & 1)) {
-        return CP_ACCESS_TRAP;
+        return CP_ACCESS_TRAP_EL1;
     }
     return teecr_access(env, ri, isread);
 }
@@ -2239,14 +2240,14 @@ static CPAccessResult gt_cntfrq_access(CPUARMState *env, const ARMCPRegInfo *ri,
             cntkctl = env->cp15.c14_cntkctl;
         }
         if (!extract32(cntkctl, 0, 2)) {
-            return CP_ACCESS_TRAP;
+            return CP_ACCESS_TRAP_EL1;
         }
         break;
     case 1:
         if (!isread && ri->state == ARM_CP_STATE_AA32 &&
             arm_is_secure_below_el3(env)) {
             /* Accesses from 32-bit Secure EL1 UNDEF (*not* trap to EL3!) */
-            return CP_ACCESS_TRAP_UNCATEGORIZED;
+            return CP_ACCESS_UNDEFINED;
         }
         break;
     case 2:
@@ -2255,7 +2256,7 @@ static CPAccessResult gt_cntfrq_access(CPUARMState *env, const ARMCPRegInfo *ri,
     }
 
     if (!isread && el < arm_highest_el(env)) {
-        return CP_ACCESS_TRAP_UNCATEGORIZED;
+        return CP_ACCESS_UNDEFINED;
     }
 
     return CP_ACCESS_OK;
@@ -2278,7 +2279,7 @@ static CPAccessResult gt_counter_access(CPUARMState *env, int timeridx,
 
         /* CNT[PV]CT: not visible from PL0 if EL0[PV]CTEN is zero */
         if (!extract32(env->cp15.c14_cntkctl, timeridx, 1)) {
-            return CP_ACCESS_TRAP;
+            return CP_ACCESS_TRAP_EL1;
         }
         /* fall through */
     case 1:
@@ -2319,7 +2320,7 @@ static CPAccessResult gt_timer_access(CPUARMState *env, int timeridx,
          * EL0 if EL0[PV]TEN is zero.
          */
         if (!extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) {
-            return CP_ACCESS_TRAP;
+            return CP_ACCESS_TRAP_EL1;
         }
         /* fall through */
 
@@ -2385,7 +2386,10 @@ static CPAccessResult gt_stimer_access(CPUARMState *env,
     switch (arm_current_el(env)) {
     case 1:
         if (!arm_is_secure(env)) {
-            return CP_ACCESS_TRAP;
+            return CP_ACCESS_UNDEFINED;
+        }
+        if (arm_is_el2_enabled(env)) {
+            return CP_ACCESS_UNDEFINED;
         }
         if (!(env->cp15.scr_el3 & SCR_ST)) {
             return CP_ACCESS_TRAP_EL3;
@@ -2393,7 +2397,7 @@ static CPAccessResult gt_stimer_access(CPUARMState *env,
         return CP_ACCESS_OK;
     case 0:
     case 2:
-        return CP_ACCESS_TRAP;
+        return CP_ACCESS_UNDEFINED;
     case 3:
         return CP_ACCESS_OK;
     default:
@@ -2401,6 +2405,45 @@ static CPAccessResult gt_stimer_access(CPUARMState *env,
     }
 }
 
+static CPAccessResult gt_sel2timer_access(CPUARMState *env,
+                                          const ARMCPRegInfo *ri,
+                                          bool isread)
+{
+    /*
+     * The AArch64 register view of the secure EL2 timers are mostly
+     * accessible from EL3 and EL2 although can also be trapped to EL2
+     * from EL1 depending on nested virt config.
+     */
+    switch (arm_current_el(env)) {
+    case 0: /* UNDEFINED */
+        return CP_ACCESS_UNDEFINED;
+    case 1:
+        if (!arm_is_secure(env)) {
+            /* UNDEFINED */
+            return CP_ACCESS_UNDEFINED;
+        } else if (arm_hcr_el2_eff(env) & HCR_NV) {
+            /* Aarch64.SystemAccessTrap(EL2, 0x18) */
+            return CP_ACCESS_TRAP_EL2;
+        }
+        /* UNDEFINED */
+        return CP_ACCESS_UNDEFINED;
+    case 2:
+        if (!arm_is_secure(env)) {
+            /* UNDEFINED */
+            return CP_ACCESS_UNDEFINED;
+        }
+        return CP_ACCESS_OK;
+    case 3:
+        if (env->cp15.scr_el3 & SCR_EEL2) {
+            return CP_ACCESS_OK;
+        } else {
+            return CP_ACCESS_UNDEFINED;
+        }
+    default:
+        g_assert_not_reached();
+    }
+}
+
 uint64_t gt_get_countervalue(CPUARMState *env)
 {
     ARMCPU *cpu = env_archcpu(env);
@@ -2452,12 +2495,80 @@ static uint64_t gt_phys_raw_cnt_offset(CPUARMState *env)
     return 0;
 }
 
-static uint64_t gt_phys_cnt_offset(CPUARMState *env)
+static uint64_t gt_indirect_access_timer_offset(CPUARMState *env, int timeridx)
 {
-    if (arm_current_el(env) >= 2) {
+    /*
+     * Return the timer offset to use for indirect accesses to the timer.
+     * This is the Offset value as defined in D12.2.4.1 "Operation of the
+     * CompareValue views of the timers".
+     *
+     * The condition here is not always the same as the condition for
+     * whether to apply an offset register when doing a direct read of
+     * the counter sysreg; those conditions are described in the
+     * access pseudocode for each counter register.
+     */
+    switch (timeridx) {
+    case GTIMER_PHYS:
+        return gt_phys_raw_cnt_offset(env);
+    case GTIMER_VIRT:
+        return env->cp15.cntvoff_el2;
+    case GTIMER_HYP:
+    case GTIMER_SEC:
+    case GTIMER_HYPVIRT:
+    case GTIMER_S_EL2_PHYS:
+    case GTIMER_S_EL2_VIRT:
+        return 0;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+uint64_t gt_direct_access_timer_offset(CPUARMState *env, int timeridx)
+{
+    /*
+     * Return the timer offset to use for direct accesses to the
+     * counter registers CNTPCT and CNTVCT, and for direct accesses
+     * to the CNT*_TVAL registers.
+     *
+     * This isn't exactly the same as the indirect-access offset,
+     * because here we also care about what EL the register access
+     * is being made from.
+     *
+     * This corresponds to the access pseudocode for the registers.
+     */
+    uint64_t hcr;
+
+    switch (timeridx) {
+    case GTIMER_PHYS:
+        if (arm_current_el(env) >= 2) {
+            return 0;
+        }
+        return gt_phys_raw_cnt_offset(env);
+    case GTIMER_VIRT:
+        switch (arm_current_el(env)) {
+        case 2:
+            hcr = arm_hcr_el2_eff(env);
+            if (hcr & HCR_E2H) {
+                return 0;
+            }
+            break;
+        case 0:
+            hcr = arm_hcr_el2_eff(env);
+            if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
+                return 0;
+            }
+            break;
+        }
+        return env->cp15.cntvoff_el2;
+    case GTIMER_HYP:
+    case GTIMER_SEC:
+    case GTIMER_HYPVIRT:
+    case GTIMER_S_EL2_PHYS:
+    case GTIMER_S_EL2_VIRT:
         return 0;
+    default:
+        g_assert_not_reached();
     }
-    return gt_phys_raw_cnt_offset(env);
 }
 
 static void gt_recalc_timer(ARMCPU *cpu, int timeridx)
@@ -2469,8 +2580,7 @@ static void gt_recalc_timer(ARMCPU *cpu, int timeridx)
          * Timer enabled: calculate and set current ISTATUS, irq, and
          * reset timer to when ISTATUS next has to change
          */
-        uint64_t offset = timeridx == GTIMER_VIRT ?
-            cpu->env.cp15.cntvoff_el2 : gt_phys_raw_cnt_offset(&cpu->env);
+        uint64_t offset = gt_indirect_access_timer_offset(&cpu->env, timeridx);
         uint64_t count = gt_get_countervalue(&cpu->env);
         /* Note that this must be unsigned 64 bit arithmetic: */
         int istatus = count - offset >= gt->cval;
@@ -2533,34 +2643,14 @@ static void gt_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri,
 
 static uint64_t gt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    return gt_get_countervalue(env) - gt_phys_cnt_offset(env);
-}
-
-uint64_t gt_virt_cnt_offset(CPUARMState *env)
-{
-    uint64_t hcr;
-
-    switch (arm_current_el(env)) {
-    case 2:
-        hcr = arm_hcr_el2_eff(env);
-        if (hcr & HCR_E2H) {
-            return 0;
-        }
-        break;
-    case 0:
-        hcr = arm_hcr_el2_eff(env);
-        if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
-            return 0;
-        }
-        break;
-    }
-
-    return env->cp15.cntvoff_el2;
+    uint64_t offset = gt_direct_access_timer_offset(env, GTIMER_PHYS);
+    return gt_get_countervalue(env) - offset;
 }
 
 static uint64_t gt_virt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    return gt_get_countervalue(env) - gt_virt_cnt_offset(env);
+    uint64_t offset = gt_direct_access_timer_offset(env, GTIMER_VIRT);
+    return gt_get_countervalue(env) - offset;
 }
 
 static void gt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -2572,47 +2662,38 @@ static void gt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
     gt_recalc_timer(env_archcpu(env), timeridx);
 }
 
-static uint64_t gt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                             int timeridx)
+static uint64_t do_tval_read(CPUARMState *env, int timeridx, uint64_t offset)
 {
-    uint64_t offset = 0;
-
-    switch (timeridx) {
-    case GTIMER_VIRT:
-    case GTIMER_HYPVIRT:
-        offset = gt_virt_cnt_offset(env);
-        break;
-    case GTIMER_PHYS:
-        offset = gt_phys_cnt_offset(env);
-        break;
-    }
-
     return (uint32_t)(env->cp15.c14_timer[timeridx].cval -
                       (gt_get_countervalue(env) - offset));
 }
 
-static void gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                          int timeridx,
-                          uint64_t value)
+static uint64_t gt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri,
+                             int timeridx)
 {
-    uint64_t offset = 0;
+    uint64_t offset = gt_direct_access_timer_offset(env, timeridx);
 
-    switch (timeridx) {
-    case GTIMER_VIRT:
-    case GTIMER_HYPVIRT:
-        offset = gt_virt_cnt_offset(env);
-        break;
-    case GTIMER_PHYS:
-        offset = gt_phys_cnt_offset(env);
-        break;
-    }
+    return do_tval_read(env, timeridx, offset);
+}
 
+static void do_tval_write(CPUARMState *env, int timeridx, uint64_t value,
+                          uint64_t offset)
+{
     trace_arm_gt_tval_write(timeridx, value);
     env->cp15.c14_timer[timeridx].cval = gt_get_countervalue(env) - offset +
                                          sextract64(value, 0, 32);
     gt_recalc_timer(env_archcpu(env), timeridx);
 }
 
+static void gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                          int timeridx,
+                          uint64_t value)
+{
+    uint64_t offset = gt_direct_access_timer_offset(env, timeridx);
+
+    do_tval_write(env, timeridx, value, offset);
+}
+
 static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
                          int timeridx,
                          uint64_t value)
@@ -2742,13 +2823,21 @@ static void gt_virt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
 
 static uint64_t gt_virt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    return gt_tval_read(env, ri, GTIMER_VIRT);
+    /*
+     * This is CNTV_TVAL_EL02; unlike the underlying CNTV_TVAL_EL0
+     * we always apply CNTVOFF_EL2. Special case that here rather
+     * than going into the generic gt_tval_read() and then having
+     * to re-detect that it's this register.
+     * Note that the accessfn/perms mean we know we're at EL2 or EL3 here.
+     */
+    return do_tval_read(env, GTIMER_VIRT, env->cp15.cntvoff_el2);
 }
 
 static void gt_virt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                uint64_t value)
 {
-    gt_tval_write(env, ri, GTIMER_VIRT, value);
+    /* Similarly for writes to CNTV_TVAL_EL02 */
+    do_tval_write(env, GTIMER_VIRT, value, env->cp15.cntvoff_el2);
 }
 
 static void gt_virt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -2908,6 +2997,62 @@ static void gt_sec_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
     gt_ctl_write(env, ri, GTIMER_SEC, value);
 }
 
+static void gt_sec_pel2_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    gt_timer_reset(env, ri, GTIMER_S_EL2_PHYS);
+}
+
+static void gt_sec_pel2_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                   uint64_t value)
+{
+    gt_cval_write(env, ri, GTIMER_S_EL2_PHYS, value);
+}
+
+static uint64_t gt_sec_pel2_tval_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    return gt_tval_read(env, ri, GTIMER_S_EL2_PHYS);
+}
+
+static void gt_sec_pel2_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                              uint64_t value)
+{
+    gt_tval_write(env, ri, GTIMER_S_EL2_PHYS, value);
+}
+
+static void gt_sec_pel2_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                              uint64_t value)
+{
+    gt_ctl_write(env, ri, GTIMER_S_EL2_PHYS, value);
+}
+
+static void gt_sec_vel2_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    gt_timer_reset(env, ri, GTIMER_S_EL2_VIRT);
+}
+
+static void gt_sec_vel2_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                              uint64_t value)
+{
+    gt_cval_write(env, ri, GTIMER_S_EL2_VIRT, value);
+}
+
+static uint64_t gt_sec_vel2_tval_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    return gt_tval_read(env, ri, GTIMER_S_EL2_VIRT);
+}
+
+static void gt_sec_vel2_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                   uint64_t value)
+{
+    gt_tval_write(env, ri, GTIMER_S_EL2_VIRT, value);
+}
+
+static void gt_sec_vel2_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                              uint64_t value)
+{
+    gt_ctl_write(env, ri, GTIMER_S_EL2_VIRT, value);
+}
+
 static void gt_hv_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     gt_timer_reset(env, ri, GTIMER_HYPVIRT);
@@ -2964,6 +3109,20 @@ void arm_gt_stimer_cb(void *opaque)
     gt_recalc_timer(cpu, GTIMER_SEC);
 }
 
+void arm_gt_sel2timer_cb(void *opaque)
+{
+    ARMCPU *cpu = opaque;
+
+    gt_recalc_timer(cpu, GTIMER_S_EL2_PHYS);
+}
+
+void arm_gt_sel2vtimer_cb(void *opaque)
+{
+    ARMCPU *cpu = opaque;
+
+    gt_recalc_timer(cpu, GTIMER_S_EL2_VIRT);
+}
+
 void arm_gt_hvtimer_cb(void *opaque)
 {
     ARMCPU *cpu = opaque;
@@ -3304,7 +3463,7 @@ static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri,
                 }
                 return CP_ACCESS_TRAP_EL3;
             }
-            return CP_ACCESS_TRAP_UNCATEGORIZED;
+            return CP_ACCESS_UNDEFINED;
         }
     }
     return CP_ACCESS_OK;
@@ -3601,7 +3760,7 @@ static CPAccessResult at_e012_access(CPUARMState *env, const ARMCPRegInfo *ri,
      * scr_write() ensures that the NSE bit is not set otherwise.
      */
     if ((env->cp15.scr_el3 & (SCR_NSE | SCR_NS)) == SCR_NSE) {
-        return CP_ACCESS_TRAP;
+        return CP_ACCESS_UNDEFINED;
     }
     return CP_ACCESS_OK;
 }
@@ -3611,7 +3770,7 @@ static CPAccessResult at_s1e2_access(CPUARMState *env, const ARMCPRegInfo *ri,
 {
     if (arm_current_el(env) == 3 &&
         !(env->cp15.scr_el3 & (SCR_NS | SCR_EEL2))) {
-        return CP_ACCESS_TRAP;
+        return CP_ACCESS_UNDEFINED;
     }
     return at_e012_access(env, ri, isread);
 }
@@ -4499,7 +4658,7 @@ static CPAccessResult aa64_daif_access(CPUARMState *env, const ARMCPRegInfo *ri,
                                        bool isread)
 {
     if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) {
-        return CP_ACCESS_TRAP;
+        return CP_ACCESS_TRAP_EL1;
     }
     return CP_ACCESS_OK;
 }
@@ -4589,9 +4748,9 @@ static CPAccessResult aa64_cacheop_poc_access(CPUARMState *env,
     /* Cache invalidate/clean to Point of Coherency or Persistence...  */
     switch (arm_current_el(env)) {
     case 0:
-        /* ... EL0 must UNDEF unless SCTLR_EL1.UCI is set.  */
+        /* ... EL0 must trap to EL1 unless SCTLR_EL1.UCI is set.  */
         if (!(arm_sctlr(env, 0) & SCTLR_UCI)) {
-            return CP_ACCESS_TRAP;
+            return CP_ACCESS_TRAP_EL1;
         }
         /* fall through */
     case 1:
@@ -4609,9 +4768,9 @@ static CPAccessResult do_cacheop_pou_access(CPUARMState *env, uint64_t hcrflags)
     /* Cache invalidate/clean to Point of Unification... */
     switch (arm_current_el(env)) {
     case 0:
-        /* ... EL0 must UNDEF unless SCTLR_EL1.UCI is set.  */
+        /* ... EL0 must trap to EL1 unless SCTLR_EL1.UCI is set.  */
         if (!(arm_sctlr(env, 0) & SCTLR_UCI)) {
-            return CP_ACCESS_TRAP;
+            return CP_ACCESS_TRAP_EL1;
         }
         /* fall through */
     case 1:
@@ -4651,7 +4810,7 @@ static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri,
                 }
             } else {
                 if (!(env->cp15.sctlr_el[1] & SCTLR_DZE)) {
-                    return CP_ACCESS_TRAP;
+                    return CP_ACCESS_TRAP_EL1;
                 }
                 if (hcr & HCR_TDZ) {
                     return CP_ACCESS_TRAP_EL2;
@@ -4684,7 +4843,7 @@ static CPAccessResult sp_el0_access(CPUARMState *env, const ARMCPRegInfo *ri,
          * Access to SP_EL0 is undefined if it's being used as
          * the stack pointer.
          */
-        return CP_ACCESS_TRAP_UNCATEGORIZED;
+        return CP_ACCESS_UNDEFINED;
     }
     return CP_ACCESS_OK;
 }
@@ -5167,6 +5326,11 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask)
     /* Clear RES0 bits.  */
     value &= valid_mask;
 
+    /* RW is RAO/WI if EL1 is AArch64 only */
+    if (!cpu_isar_feature(aa64_aa32_el1, cpu)) {
+        value |= HCR_RW;
+    }
+
     /*
      * These bits change the MMU setup:
      * HCR_VM enables stage 2 translation
@@ -5224,6 +5388,12 @@ static void hcr_writelow(CPUARMState *env, const ARMCPRegInfo *ri,
     do_hcr_write(env, value, MAKE_64BIT_MASK(32, 32));
 }
 
+static void hcr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    /* hcr_write will set the RES1 bits on an AArch64-only CPU */
+    hcr_write(env, ri, 0);
+}
+
 /*
  * Return the effective value of HCR_EL2, at the given security state.
  * Bits that are not included here:
@@ -5459,6 +5629,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
       .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2),
       .nv2_redirect_offset = 0x78,
+      .resetfn = hcr_reset,
       .writefn = hcr_write, .raw_writefn = raw_write },
     { .name = "HCR", .state = ARM_CP_STATE_AA32,
       .type = ARM_CP_ALIAS | ARM_CP_IO,
@@ -5674,7 +5845,7 @@ static CPAccessResult sel2_access(CPUARMState *env, const ARMCPRegInfo *ri,
     if (arm_current_el(env) == 3 || arm_is_secure_below_el3(env)) {
         return CP_ACCESS_OK;
     }
-    return CP_ACCESS_TRAP_UNCATEGORIZED;
+    return CP_ACCESS_UNDEFINED;
 }
 
 static const ARMCPRegInfo el2_sec_cp_reginfo[] = {
@@ -5688,6 +5859,56 @@ static const ARMCPRegInfo el2_sec_cp_reginfo[] = {
       .access = PL2_RW, .accessfn = sel2_access,
       .nv2_redirect_offset = 0x48,
       .fieldoffset = offsetof(CPUARMState, cp15.vstcr_el2) },
+#ifndef CONFIG_USER_ONLY
+    /* Secure EL2 Physical Timer */
+    { .name = "CNTHPS_TVAL_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 5, .opc2 = 0,
+      .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL2_RW,
+      .accessfn = gt_sel2timer_access,
+      .readfn = gt_sec_pel2_tval_read,
+      .writefn = gt_sec_pel2_tval_write,
+      .resetfn = gt_sec_pel2_timer_reset,
+    },
+    { .name = "CNTHPS_CTL_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 5, .opc2 = 1,
+      .type = ARM_CP_IO, .access = PL2_RW,
+      .accessfn = gt_sel2timer_access,
+      .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_S_EL2_PHYS].ctl),
+      .resetvalue = 0,
+      .writefn = gt_sec_pel2_ctl_write, .raw_writefn = raw_write,
+    },
+    { .name = "CNTHPS_CVAL_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 5, .opc2 = 2,
+      .type = ARM_CP_IO, .access = PL2_RW,
+      .accessfn = gt_sel2timer_access,
+      .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_S_EL2_PHYS].cval),
+      .writefn = gt_sec_pel2_cval_write, .raw_writefn = raw_write,
+    },
+    /* Secure EL2 Virtual Timer */
+    { .name = "CNTHVS_TVAL_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 4, .opc2 = 0,
+      .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL2_RW,
+      .accessfn = gt_sel2timer_access,
+      .readfn = gt_sec_vel2_tval_read,
+      .writefn = gt_sec_vel2_tval_write,
+      .resetfn = gt_sec_vel2_timer_reset,
+    },
+    { .name = "CNTHVS_CTL_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 4, .opc2 = 1,
+      .type = ARM_CP_IO, .access = PL2_RW,
+      .accessfn = gt_sel2timer_access,
+      .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_S_EL2_VIRT].ctl),
+      .resetvalue = 0,
+      .writefn = gt_sec_vel2_ctl_write, .raw_writefn = raw_write,
+    },
+    { .name = "CNTHVS_CVAL_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 4, .opc2 = 2,
+      .type = ARM_CP_IO, .access = PL2_RW,
+      .accessfn = gt_sel2timer_access,
+      .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_S_EL2_VIRT].cval),
+      .writefn = gt_sec_vel2_cval_write, .raw_writefn = raw_write,
+    },
+#endif
 };
 
 static CPAccessResult nsacr_access(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -5710,7 +5931,7 @@ static CPAccessResult nsacr_access(CPUARMState *env, const ARMCPRegInfo *ri,
     if (isread) {
         return CP_ACCESS_OK;
     }
-    return CP_ACCESS_TRAP_UNCATEGORIZED;
+    return CP_ACCESS_UNDEFINED;
 }
 
 static const ARMCPRegInfo el3_cp_reginfo[] = {
@@ -5798,7 +6019,7 @@ static CPAccessResult e2h_access(CPUARMState *env, const ARMCPRegInfo *ri,
         return CP_ACCESS_OK;
     }
     if (!(arm_hcr_el2_eff(env) & HCR_E2H)) {
-        return CP_ACCESS_TRAP_UNCATEGORIZED;
+        return CP_ACCESS_UNDEFINED;
     }
     return CP_ACCESS_OK;
 }
@@ -5896,7 +6117,7 @@ static CPAccessResult el2_e2h_e12_access(CPUARMState *env,
     }
     /* FOO_EL12 aliases only exist when E2H is 1; otherwise they UNDEF */
     if (!(arm_hcr_el2_eff(env) & HCR_E2H)) {
-        return CP_ACCESS_TRAP_UNCATEGORIZED;
+        return CP_ACCESS_UNDEFINED;
     }
     if (ri->orig_accessfn) {
         return ri->orig_accessfn(env, ri->opaque, isread);
@@ -6073,7 +6294,7 @@ static CPAccessResult ctr_el0_access(CPUARMState *env, const ARMCPRegInfo *ri,
                 }
             } else {
                 if (!(env->cp15.sctlr_el[1] & SCTLR_UCT)) {
-                    return CP_ACCESS_TRAP;
+                    return CP_ACCESS_TRAP_EL1;
                 }
                 if (hcr & HCR_TID2) {
                     return CP_ACCESS_TRAP_EL2;
@@ -6103,7 +6324,7 @@ static CPAccessResult access_terr(CPUARMState *env, const ARMCPRegInfo *ri,
     if (el < 2 && (arm_hcr_el2_eff(env) & HCR_TERR)) {
         return CP_ACCESS_TRAP_EL2;
     }
-    if (el < 3 && (env->cp15.scr_el3 & SCR_TERR)) {
+    if (!arm_is_el3_or_mon(env) && (env->cp15.scr_el3 & SCR_TERR)) {
         return CP_ACCESS_TRAP_EL3;
     }
     return CP_ACCESS_OK;
@@ -6372,7 +6593,7 @@ static CPAccessResult access_tpidr2(CPUARMState *env, const ARMCPRegInfo *ri,
     if (el == 0) {
         uint64_t sctlr = arm_sctlr(env, el);
         if (!(sctlr & SCTLR_EnTP2)) {
-            return CP_ACCESS_TRAP;
+            return CP_ACCESS_TRAP_EL1;
         }
     }
     /* TODO: FEAT_FGT */
@@ -6750,8 +6971,8 @@ static CPAccessResult access_lor_other(CPUARMState *env,
                                        const ARMCPRegInfo *ri, bool isread)
 {
     if (arm_is_secure_below_el3(env)) {
-        /* Access denied in secure mode.  */
-        return CP_ACCESS_TRAP;
+        /* UNDEF if SCR_EL3.NS == 0 */
+        return CP_ACCESS_UNDEFINED;
     }
     return access_lor_ns(env, ri, isread);
 }
@@ -7172,7 +7393,7 @@ static CPAccessResult access_scxtnum(CPUARMState *env, const ARMCPRegInfo *ri,
             if (hcr & HCR_TGE) {
                 return CP_ACCESS_TRAP_EL2;
             }
-            return CP_ACCESS_TRAP;
+            return CP_ACCESS_TRAP_EL1;
         }
     } else if (el < 2 && (env->cp15.sctlr_el[2] & SCTLR_TSCXT)) {
         return CP_ACCESS_TRAP_EL2;
@@ -7292,7 +7513,7 @@ static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri,
     if (el == 0) {
         uint64_t sctlr = arm_sctlr(env, el);
         if (!(sctlr & SCTLR_EnRCTX)) {
-            return CP_ACCESS_TRAP;
+            return CP_ACCESS_TRAP_EL1;
         }
     } else if (el == 1) {
         uint64_t hcr = arm_hcr_el2_eff(env);
@@ -9609,7 +9830,7 @@ uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx,
     uint64_t hcr_el2;
 
     if (arm_feature(env, ARM_FEATURE_EL3)) {
-        rw = ((env->cp15.scr_el3 & SCR_RW) == SCR_RW);
+        rw = arm_scr_rw_eff(env);
     } else {
         /*
          * Either EL2 is the highest EL (and so the EL2 register width
@@ -9684,6 +9905,7 @@ void arm_log_exception(CPUState *cs)
             [EXCP_NMI] = "NMI",
             [EXCP_VINMI] = "Virtual IRQ NMI",
             [EXCP_VFNMI] = "Virtual FIQ NMI",
+            [EXCP_MON_TRAP] = "Monitor Trap",
         };
 
         if (idx >= 0 && idx < ARRAY_SIZE(excnames)) {
@@ -10250,6 +10472,16 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs)
         mask = CPSR_A | CPSR_I | CPSR_F;
         offset = 0;
         break;
+    case EXCP_MON_TRAP:
+        new_mode = ARM_CPU_MODE_MON;
+        addr = 0x04;
+        mask = CPSR_A | CPSR_I | CPSR_F;
+        if (env->thumb) {
+            offset = 2;
+        } else {
+            offset = 4;
+        }
+        break;
     default:
         cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
         return; /* Never happens.  Keep compiler happy.  */
@@ -10407,7 +10639,7 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
 
         switch (new_el) {
         case 3:
-            is_aa64 = (env->cp15.scr_el3 & SCR_RW) != 0;
+            is_aa64 = arm_scr_rw_eff(env);
             break;
         case 2:
             hcr = arm_hcr_el2_eff(env);
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 0afd96018e067..2439af63a05a3 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -899,6 +899,18 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
 
     clamp_id_aa64mmfr0_parange_to_ipa_size(&host_isar.id_aa64mmfr0);
 
+    /*
+     * Disable SME, which is not properly handled by QEMU hvf yet.
+     * To allow this through we would need to:
+     * - make sure that the SME state is correctly handled in the
+     *   get_registers/put_registers functions
+     * - get the SME-specific CPU properties to work with accelerators
+     *   other than TCG
+     * - fix any assumptions we made that SME implies SVE (since
+     *   on the M4 there is SME but not SVE)
+     */
+    host_isar.id_aa64pfr1 &= ~R_ID_AA64PFR1_SME_MASK;
+
     ahcf->isar = host_isar;
 
     /*
@@ -1971,6 +1983,7 @@ int hvf_vcpu_exec(CPUState *cpu)
         bool isv = syndrome & ARM_EL_ISV;
         bool iswrite = (syndrome >> 6) & 1;
         bool s1ptw = (syndrome >> 7) & 1;
+        bool sse = (syndrome >> 21) & 1;
         uint32_t sas = (syndrome >> 22) & 3;
         uint32_t len = 1 << sas;
         uint32_t srt = (syndrome >> 16) & 0x1f;
@@ -1998,6 +2011,9 @@ int hvf_vcpu_exec(CPUState *cpu)
             address_space_read(&address_space_memory,
                                hvf_exit->exception.physical_address,
                                MEMTXATTRS_UNSPECIFIED, &val, len);
+            if (sse) {
+                val = sextract64(val, 0, len * 8);
+            }
             hvf_set_reg(cpu, srt, val);
         }
 
diff --git a/target/arm/internals.h b/target/arm/internals.h
index b3187341456ad..28585c07555b5 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -392,6 +392,141 @@ static inline FloatRoundMode arm_rmode_to_sf(ARMFPRounding rmode)
     return arm_rmode_to_sf_map[rmode];
 }
 
+/* Return the effective value of SCR_EL3.RW */
+static inline bool arm_scr_rw_eff(CPUARMState *env)
+{
+    /*
+     * SCR_EL3.RW has an effective value of 1 if:
+     *  - we are NS and EL2 is implemented but doesn't support AArch32
+     *  - we are S and EL2 is enabled (in which case it must be AArch64)
+     */
+    ARMCPU *cpu = env_archcpu(env);
+
+    if (env->cp15.scr_el3 & SCR_RW) {
+        return true;
+    }
+    if (env->cp15.scr_el3 & SCR_NS) {
+        return arm_feature(env, ARM_FEATURE_EL2) &&
+            !cpu_isar_feature(aa64_aa32_el2, cpu);
+    } else {
+        return env->cp15.scr_el3 & SCR_EEL2;
+    }
+}
+
+/* Return true if the specified exception level is running in AArch64 state. */
+static inline bool arm_el_is_aa64(CPUARMState *env, int el)
+{
+    /*
+     * This isn't valid for EL0 (if we're in EL0, is_a64() is what you want,
+     * and if we're not in EL0 then the state of EL0 isn't well defined.)
+     */
+    assert(el >= 1 && el <= 3);
+    bool aa64 = arm_feature(env, ARM_FEATURE_AARCH64);
+
+    /*
+     * The highest exception level is always at the maximum supported
+     * register width, and then lower levels have a register width controlled
+     * by bits in the SCR or HCR registers.
+     */
+    if (el == 3) {
+        return aa64;
+    }
+
+    if (arm_feature(env, ARM_FEATURE_EL3)) {
+        aa64 = aa64 && arm_scr_rw_eff(env);
+    }
+
+    if (el == 2) {
+        return aa64;
+    }
+
+    if (arm_is_el2_enabled(env)) {
+        aa64 = aa64 && (env->cp15.hcr_el2 & HCR_RW);
+    }
+
+    return aa64;
+}
+
+/*
+ * Return the current Exception Level (as per ARMv8; note that this differs
+ * from the ARMv7 Privilege Level).
+ */
+static inline int arm_current_el(CPUARMState *env)
+{
+    if (arm_feature(env, ARM_FEATURE_M)) {
+        return arm_v7m_is_handler_mode(env) ||
+            !(env->v7m.control[env->v7m.secure] & 1);
+    }
+
+    if (is_a64(env)) {
+        return extract32(env->pstate, 2, 2);
+    }
+
+    switch (env->uncached_cpsr & 0x1f) {
+    case ARM_CPU_MODE_USR:
+        return 0;
+    case ARM_CPU_MODE_HYP:
+        return 2;
+    case ARM_CPU_MODE_MON:
+        return 3;
+    default:
+        if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) {
+            /* If EL3 is 32-bit then all secure privileged modes run in EL3 */
+            return 3;
+        }
+
+        return 1;
+    }
+}
+
+static inline bool arm_cpu_data_is_big_endian_a32(CPUARMState *env,
+                                                  bool sctlr_b)
+{
+#ifdef CONFIG_USER_ONLY
+    /*
+     * In system mode, BE32 is modelled in line with the
+     * architecture (as word-invariant big-endianness), where loads
+     * and stores are done little endian but from addresses which
+     * are adjusted by XORing with the appropriate constant. So the
+     * endianness to use for the raw data access is not affected by
+     * SCTLR.B.
+     * In user mode, however, we model BE32 as byte-invariant
+     * big-endianness (because user-only code cannot tell the
+     * difference), and so we need to use a data access endianness
+     * that depends on SCTLR.B.
+     */
+    if (sctlr_b) {
+        return true;
+    }
+#endif
+    /* In 32bit endianness is determined by looking at CPSR's E bit */
+    return env->uncached_cpsr & CPSR_E;
+}
+
+static inline bool arm_cpu_data_is_big_endian_a64(int el, uint64_t sctlr)
+{
+    return sctlr & (el ? SCTLR_EE : SCTLR_E0E);
+}
+
+/* Return true if the processor is in big-endian mode. */
+static inline bool arm_cpu_data_is_big_endian(CPUARMState *env)
+{
+    if (!is_a64(env)) {
+        return arm_cpu_data_is_big_endian_a32(env, arm_sctlr_b(env));
+    } else {
+        int cur_el = arm_current_el(env);
+        uint64_t sctlr = arm_sctlr(env, cur_el);
+        return arm_cpu_data_is_big_endian_a64(cur_el, sctlr);
+    }
+}
+
+#ifdef CONFIG_USER_ONLY
+static inline bool arm_cpu_bswap_data(CPUARMState *env)
+{
+    return TARGET_BIG_ENDIAN ^ arm_cpu_data_is_big_endian(env);
+}
+#endif
+
 static inline void aarch64_save_sp(CPUARMState *env, int el)
 {
     if (env->pstate & PSTATE_SP) {
@@ -1819,9 +1954,10 @@ int delete_hw_watchpoint(target_ulong addr, target_ulong len, int type);
 uint64_t gt_get_countervalue(CPUARMState *env);
 /*
  * Return the currently applicable offset between the system counter
- * and CNTVCT_EL0 (this will be either 0 or the value of CNTVOFF_EL2).
+ * and the counter for the specified timer, as used for direct register
+ * accesses.
  */
-uint64_t gt_virt_cnt_offset(CPUARMState *env);
+uint64_t gt_direct_access_timer_offset(CPUARMState *env, int timeridx);
 
 /*
  * Return mask of ARMMMUIdxBit values corresponding to an "invalidate
@@ -1833,5 +1969,14 @@ int alle1_tlbmask(CPUARMState *env);
 void arm_set_default_fp_behaviours(float_status *s);
 /* Set the float_status behaviour to match Arm FPCR.AH=1 behaviour */
 void arm_set_ah_fp_behaviours(float_status *s);
+/* Read the float_status info and return the appropriate FPSR value */
+uint32_t vfp_get_fpsr_from_host(CPUARMState *env);
+/* Clear the exception status flags from all float_status fields */
+void vfp_clear_float_status_exc_flags(CPUARMState *env);
+/*
+ * Update float_status fields to handle the bits of the FPCR
+ * specified by mask changing to the values in val.
+ */
+void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask);
 
 #endif
diff --git a/target/arm/meson.build b/target/arm/meson.build
index 2e10464dbb6be..3065081d241d4 100644
--- a/target/arm/meson.build
+++ b/target/arm/meson.build
@@ -4,7 +4,7 @@ arm_ss.add(files(
   'debug_helper.c',
   'gdbstub.c',
   'helper.c',
-  'vfp_helper.c',
+  'vfp_fpscr.c',
 ))
 arm_ss.add(zlib)
 
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
index 64bb6878a48a7..43309003486b5 100644
--- a/target/arm/ptw.c
+++ b/target/arm/ptw.c
@@ -16,9 +16,6 @@
 #include "internals.h"
 #include "cpu-features.h"
 #include "idau.h"
-#ifdef CONFIG_TCG
-# include "tcg/oversized-guest.h"
-#endif
 
 typedef struct S1Translate {
     /*
@@ -840,7 +837,6 @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val,
         ptw->out_rw = true;
     }
 
-#ifdef CONFIG_ATOMIC64
     if (ptw->out_be) {
         old_val = cpu_to_be64(old_val);
         new_val = cpu_to_be64(new_val);
@@ -852,36 +848,6 @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val,
         cur_val = qatomic_cmpxchg__nocheck((uint64_t *)host, old_val, new_val);
         cur_val = le64_to_cpu(cur_val);
     }
-#else
-    /*
-     * We can't support the full 64-bit atomic cmpxchg on the host.
-     * Because this is only used for FEAT_HAFDBS, which is only for AA64,
-     * we know that TCG_OVERSIZED_GUEST is set, which means that we are
-     * running in round-robin mode and could only race with dma i/o.
-     */
-#if !TCG_OVERSIZED_GUEST
-# error "Unexpected configuration"
-#endif
-    bool locked = bql_locked();
-    if (!locked) {
-        bql_lock();
-    }
-    if (ptw->out_be) {
-        cur_val = ldq_be_p(host);
-        if (cur_val == old_val) {
-            stq_be_p(host, new_val);
-        }
-    } else {
-        cur_val = ldq_le_p(host);
-        if (cur_val == old_val) {
-            stq_le_p(host, new_val);
-        }
-    }
-    if (!locked) {
-        bql_unlock();
-    }
-#endif
-
     return cur_val;
 #else
     /* AArch32 does not have FEAT_HADFS; non-TCG guests only use debug-mode. */
diff --git a/target/arm/tcg-stubs.c b/target/arm/tcg-stubs.c
index f3f45d54f2820..93a15cad61036 100644
--- a/target/arm/tcg-stubs.c
+++ b/target/arm/tcg-stubs.c
@@ -30,3 +30,25 @@ void assert_hflags_rebuild_correctly(CPUARMState *env)
 void define_tlb_insn_regs(ARMCPU *cpu)
 {
 }
+
+/* With KVM, we never use float_status, so these can be no-ops */
+void arm_set_default_fp_behaviours(float_status *s)
+{
+}
+
+void arm_set_ah_fp_behaviours(float_status *s)
+{
+}
+
+uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
+{
+    return 0;
+}
+
+void vfp_clear_float_status_exc_flags(CPUARMState *env)
+{
+}
+
+void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
+{
+}
diff --git a/target/arm/tcg/cpu-v7m.c b/target/arm/tcg/cpu-v7m.c
index 03acdf83e0068..c4dd30927268c 100644
--- a/target/arm/tcg/cpu-v7m.c
+++ b/target/arm/tcg/cpu-v7m.c
@@ -10,7 +10,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 #include "internals.h"
 
 #if !defined(CONFIG_USER_ONLY)
@@ -19,7 +19,6 @@
 
 static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
-    CPUClass *cc = CPU_GET_CLASS(cs);
     ARMCPU *cpu = ARM_CPU(cs);
     CPUARMState *env = &cpu->env;
     bool ret = false;
@@ -35,7 +34,7 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
     if (interrupt_request & CPU_INTERRUPT_HARD
         && (armv7m_nvic_can_take_pending_exception(env->nvic))) {
         cs->exception_index = EXCP_IRQ;
-        cc->tcg_ops->do_interrupt(cs);
+        cs->cc->tcg_ops->do_interrupt(cs);
         ret = true;
     }
     return ret;
diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c
index 0f1c5bc87e886..2c45b7eddda73 100644
--- a/target/arm/tcg/cpu32.c
+++ b/target/arm/tcg/cpu32.c
@@ -10,7 +10,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 #include "internals.h"
 #include "target/arm/idau.h"
 #if !defined(CONFIG_USER_ONLY)
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
index 32f0647ca4fb2..9244848efed05 100644
--- a/target/arm/tcg/helper-a64.c
+++ b/target/arm/tcg/helper-a64.c
@@ -631,6 +631,7 @@ static void cpsr_write_from_spsr_elx(CPUARMState *env,
 
 void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
 {
+    ARMCPU *cpu = env_archcpu(env);
     int cur_el = arm_current_el(env);
     unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el);
     uint32_t spsr = env->banked_spsr[spsr_idx];
@@ -677,12 +678,17 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
         goto illegal_return;
     }
 
+    if (!return_to_aa64 && !cpu_isar_feature(aa64_aa32, cpu)) {
+        /* Return to AArch32 when CPU is AArch64-only */
+        goto illegal_return;
+    }
+
     if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) {
         goto illegal_return;
     }
 
     bql_lock();
-    arm_call_pre_el_change_hook(env_archcpu(env));
+    arm_call_pre_el_change_hook(cpu);
     bql_unlock();
 
     if (!return_to_aa64) {
@@ -710,7 +716,7 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
         int tbii;
 
         env->aarch64 = true;
-        spsr &= aarch64_pstate_valid_mask(&env_archcpu(env)->isar);
+        spsr &= aarch64_pstate_valid_mask(&cpu->isar);
         pstate_write(env, spsr);
         if (!arm_singlestep_active(env)) {
             env->pstate &= ~PSTATE_SS;
@@ -749,7 +755,7 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
     aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64);
 
     bql_lock();
-    arm_call_el_change_hook(env_archcpu(env));
+    arm_call_el_change_hook(cpu);
     bql_unlock();
 
     return;
diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
index 9e6a1869f94ef..8d79b8b7ae1f5 100644
--- a/target/arm/tcg/hflags.c
+++ b/target/arm/tcg/hflags.c
@@ -63,6 +63,15 @@ static bool aprofile_require_alignment(CPUARMState *env, int el, uint64_t sctlr)
 #endif
 }
 
+bool access_secure_reg(CPUARMState *env)
+{
+    bool ret = (arm_feature(env, ARM_FEATURE_EL3) &&
+                !arm_el_is_aa64(env, 3) &&
+                !(env->cp15.scr_el3 & SCR_NS));
+
+    return ret;
+}
+
 static CPUARMTBFlags rebuild_hflags_common(CPUARMState *env, int fp_el,
                                            ARMMMUIdx mmu_idx,
                                            CPUARMTBFlags flags)
diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build
index 1f9077c372cbd..dd12ccedb182d 100644
--- a/target/arm/tcg/meson.build
+++ b/target/arm/tcg/meson.build
@@ -41,6 +41,7 @@ arm_ss.add(files(
   'vec_helper.c',
   'tlb-insns.c',
   'arith_helper.c',
+  'vfp_helper.c',
 ))
 
 arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c
index f72ce2ae0d4fa..5d6d8a17ae89b 100644
--- a/target/arm/tcg/mte_helper.c
+++ b/target/arm/tcg/mte_helper.c
@@ -31,7 +31,7 @@
 #endif
 #include "exec/cpu_ldst.h"
 #include "exec/helper-proto.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 #include "qapi/error.h"
 #include "qemu/guest-random.h"
 #include "mte_helper.h"
diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c
index 1161d301b710f..30786fd1ff4e3 100644
--- a/target/arm/tcg/op_helper.c
+++ b/target/arm/tcg/op_helper.c
@@ -313,15 +313,19 @@ void HELPER(check_bxj_trap)(CPUARMState *env, uint32_t rm)
 }
 
 #ifndef CONFIG_USER_ONLY
-/* Function checks whether WFx (WFI/WFE) instructions are set up to be trapped.
+/*
+ * Function checks whether WFx (WFI/WFE) instructions are set up to be trapped.
  * The function returns the target EL (1-3) if the instruction is to be trapped;
  * otherwise it returns 0 indicating it is not trapped.
+ * For a trap, *excp is updated with the EXCP_* trap type to use.
  */
-static inline int check_wfx_trap(CPUARMState *env, bool is_wfe)
+static inline int check_wfx_trap(CPUARMState *env, bool is_wfe, uint32_t *excp)
 {
     int cur_el = arm_current_el(env);
     uint64_t mask;
 
+    *excp = EXCP_UDEF;
+
     if (arm_feature(env, ARM_FEATURE_M)) {
         /* M profile cores can never trap WFI/WFE. */
         return 0;
@@ -331,18 +335,9 @@ static inline int check_wfx_trap(CPUARMState *env, bool is_wfe)
      * WFx instructions being trapped to EL1. These trap bits don't exist in v7.
      */
     if (cur_el < 1 && arm_feature(env, ARM_FEATURE_V8)) {
-        int target_el;
-
         mask = is_wfe ? SCTLR_nTWE : SCTLR_nTWI;
-        if (arm_is_secure_below_el3(env) && !arm_el_is_aa64(env, 3)) {
-            /* Secure EL0 and Secure PL1 is at EL3 */
-            target_el = 3;
-        } else {
-            target_el = 1;
-        }
-
-        if (!(env->cp15.sctlr_el[target_el] & mask)) {
-            return target_el;
+        if (!(arm_sctlr(env, cur_el) & mask)) {
+            return exception_target_el(env);
         }
     }
 
@@ -358,9 +353,12 @@ static inline int check_wfx_trap(CPUARMState *env, bool is_wfe)
     }
 
     /* We are not trapping to EL1 or EL2; trap to EL3 if SCR_EL3 requires it */
-    if (cur_el < 3) {
+    if (arm_feature(env, ARM_FEATURE_V8) && !arm_is_el3_or_mon(env)) {
         mask = (is_wfe) ? SCR_TWE : SCR_TWI;
         if (env->cp15.scr_el3 & mask) {
+            if (!arm_el_is_aa64(env, 3)) {
+                *excp = EXCP_MON_TRAP;
+            }
             return 3;
         }
     }
@@ -383,7 +381,8 @@ void HELPER(wfi)(CPUARMState *env, uint32_t insn_len)
     return;
 #else
     CPUState *cs = env_cpu(env);
-    int target_el = check_wfx_trap(env, false);
+    uint32_t excp;
+    int target_el = check_wfx_trap(env, false, &excp);
 
     if (cpu_has_work(cs)) {
         /* Don't bother to go into our "low power state" if
@@ -399,7 +398,7 @@ void HELPER(wfi)(CPUARMState *env, uint32_t insn_len)
             env->regs[15] -= insn_len;
         }
 
-        raise_exception(env, EXCP_UDEF, syn_wfx(1, 0xe, 0, insn_len == 2),
+        raise_exception(env, excp, syn_wfx(1, 0xe, 0, insn_len == 2),
                         target_el);
     }
 
@@ -424,10 +423,17 @@ void HELPER(wfit)(CPUARMState *env, uint64_t timeout)
 #else
     ARMCPU *cpu = env_archcpu(env);
     CPUState *cs = env_cpu(env);
-    int target_el = check_wfx_trap(env, false);
+    uint32_t excp;
+    int target_el = check_wfx_trap(env, false, &excp);
     /* The WFIT should time out when CNTVCT_EL0 >= the specified value. */
     uint64_t cntval = gt_get_countervalue(env);
-    uint64_t offset = gt_virt_cnt_offset(env);
+    /*
+     * We want the value that we would get if we read CNTVCT_EL0 from
+     * the current exception level, so the direct_access offset, not
+     * the indirect_access one. Compare the pseudocode LocalTimeoutEvent(),
+     * which calls VirtualCounterTimer().
+     */
+    uint64_t offset = gt_direct_access_timer_offset(env, GTIMER_VIRT);
     uint64_t cntvct = cntval - offset;
     uint64_t nexttick;
 
@@ -441,8 +447,7 @@ void HELPER(wfit)(CPUARMState *env, uint64_t timeout)
 
     if (target_el) {
         env->pc -= 4;
-        raise_exception(env, EXCP_UDEF, syn_wfx(1, 0xe, 0, false),
-                        target_el);
+        raise_exception(env, excp, syn_wfx(1, 0xe, 0, false), target_el);
     }
 
     if (uadd64_overflow(timeout, offset, &nexttick)) {
@@ -758,12 +763,13 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key,
     const ARMCPRegInfo *ri = get_arm_cp_reginfo(cpu->cp_regs, key);
     CPAccessResult res = CP_ACCESS_OK;
     int target_el;
+    uint32_t excp;
 
     assert(ri != NULL);
 
     if (arm_feature(env, ARM_FEATURE_XSCALE) && ri->cp < 14
         && extract32(env->cp15.c15_cpar, ri->cp, 1) == 0) {
-        res = CP_ACCESS_TRAP;
+        res = CP_ACCESS_UNDEFINED;
         goto fail;
     }
 
@@ -780,7 +786,7 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key,
      * the other trap takes priority. So we take the "check HSTR_EL2" path
      * for all of those cases.)
      */
-    if (res != CP_ACCESS_OK && ((res & CP_ACCESS_EL_MASK) == 0) &&
+    if (res != CP_ACCESS_OK && ((res & CP_ACCESS_EL_MASK) < 2) &&
         arm_current_el(env) == 0) {
         goto fail;
     }
@@ -851,12 +857,25 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key,
     }
 
  fail:
-    switch (res & ~CP_ACCESS_EL_MASK) {
-    case CP_ACCESS_TRAP:
+    excp = EXCP_UDEF;
+    switch (res) {
+        /* CP_ACCESS_TRAP* traps are always direct to a specified EL */
+    case CP_ACCESS_TRAP_EL3:
+        /*
+         * If EL3 is AArch32 then there's no syndrome register; the cases
+         * where we would raise a SystemAccessTrap to AArch64 EL3 all become
+         * raising a Monitor trap exception. (Because there's no visible
+         * syndrome it doesn't matter what we pass to raise_exception().)
+         */
+        if (!arm_el_is_aa64(env, 3)) {
+            excp = EXCP_MON_TRAP;
+        }
+        break;
+    case CP_ACCESS_TRAP_EL2:
+    case CP_ACCESS_TRAP_EL1:
         break;
-    case CP_ACCESS_TRAP_UNCATEGORIZED:
-        /* Only CP_ACCESS_TRAP traps are direct to a specified EL */
-        assert((res & CP_ACCESS_EL_MASK) == 0);
+    case CP_ACCESS_UNDEFINED:
+        /* CP_ACCESS_UNDEFINED is never direct to a specified EL */
         if (cpu_isar_feature(aa64_ids, cpu) && isread &&
             arm_cpreg_in_idspace(ri)) {
             /*
@@ -876,6 +895,9 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key,
     case 0:
         target_el = exception_target_el(env);
         break;
+    case 1:
+        assert(arm_current_el(env) < 2);
+        break;
     case 2:
         assert(arm_current_el(env) != 3);
         assert(arm_is_el2_enabled(env));
@@ -884,11 +906,10 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key,
         assert(arm_feature(env, ARM_FEATURE_EL3));
         break;
     default:
-        /* No "direct" traps to EL1 */
         g_assert_not_reached();
     }
 
-    raise_exception(env, EXCP_UDEF, syndrome, target_el);
+    raise_exception(env, excp, syndrome, target_el);
 }
 
 const void *HELPER(lookup_cp_reg)(CPUARMState *env, uint32_t key)
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index c206ca65ceba7..d786b4b111898 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -28,7 +28,7 @@
 #include "tcg/tcg.h"
 #include "vec_internal.h"
 #include "sve_ldst_internal.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 #ifdef CONFIG_USER_ONLY
 #include "user/page-protection.h"
 #endif
diff --git a/target/arm/tcg/tlb-insns.c b/target/arm/tcg/tlb-insns.c
index fadc61a76e96e..630a481f0f81d 100644
--- a/target/arm/tcg/tlb-insns.c
+++ b/target/arm/tcg/tlb-insns.c
@@ -7,7 +7,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu/log.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "cpu.h"
 #include "internals.h"
 #include "cpu-features.h"
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 8bef391bb039d..39014325df180 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -1381,14 +1381,14 @@ static bool fp_access_check_only(DisasContext *s)
 {
     if (s->fp_excp_el) {
         assert(!s->fp_access_checked);
-        s->fp_access_checked = true;
+        s->fp_access_checked = -1;
 
         gen_exception_insn_el(s, 0, EXCP_UDEF,
                               syn_fp_access_trap(1, 0xe, false, 0),
                               s->fp_excp_el);
         return false;
     }
-    s->fp_access_checked = true;
+    s->fp_access_checked = 1;
     return true;
 }
 
@@ -1456,23 +1456,23 @@ static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz)
 bool sve_access_check(DisasContext *s)
 {
     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
+        bool ret;
+
         assert(dc_isar_feature(aa64_sme, s));
-        if (!sme_sm_enabled_check(s)) {
-            goto fail_exit;
-        }
-    } else if (s->sve_excp_el) {
+        ret = sme_sm_enabled_check(s);
+        s->sve_access_checked = (ret ? 1 : -1);
+        return ret;
+    }
+    if (s->sve_excp_el) {
+        /* Assert that we only raise one exception per instruction. */
+        assert(!s->sve_access_checked);
         gen_exception_insn_el(s, 0, EXCP_UDEF,
                               syn_sve_access_trap(), s->sve_excp_el);
-        goto fail_exit;
+        s->sve_access_checked = -1;
+        return false;
     }
-    s->sve_access_checked = true;
+    s->sve_access_checked = 1;
     return fp_access_check(s);
-
- fail_exit:
-    /* Assert that we only raise one exception per instruction. */
-    assert(!s->sve_access_checked);
-    s->sve_access_checked = true;
-    return false;
 }
 
 /*
@@ -1500,8 +1500,9 @@ bool sme_enabled_check(DisasContext *s)
      * sme_excp_el by itself for cpregs access checks.
      */
     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
-        s->fp_access_checked = true;
-        return sme_access_check(s);
+        bool ret = sme_access_check(s);
+        s->fp_access_checked = (ret ? 1 : -1);
+        return ret;
     }
     return fp_access_check_only(s);
 }
@@ -10257,8 +10258,8 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
     s->insn = insn;
     s->base.pc_next = pc + 4;
 
-    s->fp_access_checked = false;
-    s->sve_access_checked = false;
+    s->fp_access_checked = 0;
+    s->sve_access_checked = 0;
 
     if (s->pstate_il) {
         /*
diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h
index 7d3b59ccd9618..b2420f59ebe7e 100644
--- a/target/arm/tcg/translate-a64.h
+++ b/target/arm/tcg/translate-a64.h
@@ -65,7 +65,7 @@ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
 static inline void assert_fp_access_checked(DisasContext *s)
 {
 #ifdef CONFIG_DEBUG_TCG
-    if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
+    if (unlikely(s->fp_access_checked <= 0)) {
         fprintf(stderr, "target-arm: FP access check missing for "
                 "instruction 0x%08x\n", s->insn);
         abort();
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
index 68ac39341530f..d2800181388e4 100644
--- a/target/arm/tcg/translate.c
+++ b/target/arm/tcg/translate.c
@@ -3510,7 +3510,7 @@ static int t32_expandimm_rot(DisasContext *s, int x)
 /* Return the unrotated immediate from T32ExpandImm.  */
 static int t32_expandimm_imm(DisasContext *s, int x)
 {
-    int imm = extract32(x, 0, 8);
+    uint32_t imm = extract32(x, 0, 8);
 
     switch (extract32(x, 8, 4)) {
     case 0: /* XY */
@@ -4941,7 +4941,7 @@ static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
 }
 
 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
-                            TCGv_i32 addr, int address_offset)
+                            TCGv_i32 addr)
 {
     if (!a->p) {
         TCGv_i32 ofs = load_reg(s, a->rm);
@@ -4954,7 +4954,6 @@ static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
     } else if (!a->w) {
         return;
     }
-    tcg_gen_addi_i32(addr, addr, address_offset);
     store_reg(s, a->rn, addr);
 }
 
@@ -4974,7 +4973,7 @@ static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
      * Perform base writeback before the loaded value to
      * ensure correct behavior with overlapping index registers.
      */
-    op_addr_rr_post(s, a, addr, 0);
+    op_addr_rr_post(s, a, addr);
     store_reg_from_load(s, a->rt, tmp);
     return true;
 }
@@ -4999,14 +4998,53 @@ static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
     disas_set_da_iss(s, mop, issinfo);
 
-    op_addr_rr_post(s, a, addr, 0);
+    op_addr_rr_post(s, a, addr);
     return true;
 }
 
-static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
+static void do_ldrd_load(DisasContext *s, TCGv_i32 addr, int rt, int rt2)
 {
+    /*
+     * LDRD is required to be an atomic 64-bit access if the
+     * address is 8-aligned, two atomic 32-bit accesses if
+     * it's only 4-aligned, and to give an alignment fault
+     * if it's not 4-aligned. This is MO_ALIGN_4 | MO_ATOM_SUBALIGN.
+     * Rt is always the word from the lower address, and Rt2 the
+     * data from the higher address, regardless of endianness.
+     * So (like gen_load_exclusive) we avoid gen_aa32_ld_i64()
+     * so we don't get its SCTLR_B check, and instead do a 64-bit access
+     * using MO_BE if appropriate and then split the two halves.
+     *
+     * For M-profile, and for A-profile before LPAE, the 64-bit
+     * atomicity is not required. We could model that using
+     * the looser MO_ATOM_IFALIGN_PAIR, but providing a higher
+     * level of atomicity than required is harmless (we would not
+     * currently generate better code for IFALIGN_PAIR here).
+     *
+     * This also gives us the correct behaviour of not updating
+     * rt if the load of rt2 faults; this is required for cases
+     * like "ldrd r2, r3, [r2]" where rt is also the base register.
+     */
     int mem_idx = get_mem_index(s);
-    TCGv_i32 addr, tmp;
+    MemOp opc = MO_64 | MO_ALIGN_4 | MO_ATOM_SUBALIGN | s->be_data;
+    TCGv taddr = gen_aa32_addr(s, addr, opc);
+    TCGv_i64 t64 = tcg_temp_new_i64();
+    TCGv_i32 tmp = tcg_temp_new_i32();
+    TCGv_i32 tmp2 = tcg_temp_new_i32();
+
+    tcg_gen_qemu_ld_i64(t64, taddr, mem_idx, opc);
+    if (s->be_data == MO_BE) {
+        tcg_gen_extr_i64_i32(tmp2, tmp, t64);
+    } else {
+        tcg_gen_extr_i64_i32(tmp, tmp2, t64);
+    }
+    store_reg(s, rt, tmp);
+    store_reg(s, rt2, tmp2);
+}
+
+static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
+{
+    TCGv_i32 addr;
 
     if (!ENABLE_ARCH_5TE) {
         return false;
@@ -5017,25 +5055,49 @@ static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
     }
     addr = op_addr_rr_pre(s, a);
 
-    tmp = tcg_temp_new_i32();
-    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
-    store_reg(s, a->rt, tmp);
-
-    tcg_gen_addi_i32(addr, addr, 4);
-
-    tmp = tcg_temp_new_i32();
-    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
-    store_reg(s, a->rt + 1, tmp);
+    do_ldrd_load(s, addr, a->rt, a->rt + 1);
 
     /* LDRD w/ base writeback is undefined if the registers overlap.  */
-    op_addr_rr_post(s, a, addr, -4);
+    op_addr_rr_post(s, a, addr);
     return true;
 }
 
-static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
+static void do_strd_store(DisasContext *s, TCGv_i32 addr, int rt, int rt2)
 {
+    /*
+     * STRD is required to be an atomic 64-bit access if the
+     * address is 8-aligned, two atomic 32-bit accesses if
+     * it's only 4-aligned, and to give an alignment fault
+     * if it's not 4-aligned.
+     * Rt is always the word from the lower address, and Rt2 the
+     * data from the higher address, regardless of endianness.
+     * So (like gen_store_exclusive) we avoid gen_aa32_ld_i64()
+     * so we don't get its SCTLR_B check, and instead do a 64-bit access
+     * using MO_BE if appropriate, using a value constructed
+     * by putting the two halves together in the right order.
+     *
+     * As with LDRD, the 64-bit atomicity is not required for
+     * M-profile, or for A-profile before LPAE, and we provide
+     * the higher guarantee always for simplicity.
+     */
     int mem_idx = get_mem_index(s);
-    TCGv_i32 addr, tmp;
+    MemOp opc = MO_64 | MO_ALIGN_4 | MO_ATOM_SUBALIGN | s->be_data;
+    TCGv taddr = gen_aa32_addr(s, addr, opc);
+    TCGv_i32 t1 = load_reg(s, rt);
+    TCGv_i32 t2 = load_reg(s, rt2);
+    TCGv_i64 t64 = tcg_temp_new_i64();
+
+    if (s->be_data == MO_BE) {
+        tcg_gen_concat_i32_i64(t64, t2, t1);
+    } else {
+        tcg_gen_concat_i32_i64(t64, t1, t2);
+    }
+    tcg_gen_qemu_st_i64(t64, taddr, mem_idx, opc);
+}
+
+static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
+{
+    TCGv_i32 addr;
 
     if (!ENABLE_ARCH_5TE) {
         return false;
@@ -5046,15 +5108,9 @@ static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
     }
     addr = op_addr_rr_pre(s, a);
 
-    tmp = load_reg(s, a->rt);
-    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
-
-    tcg_gen_addi_i32(addr, addr, 4);
+    do_strd_store(s, addr, a->rt, a->rt + 1);
 
-    tmp = load_reg(s, a->rt + 1);
-    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
-
-    op_addr_rr_post(s, a, addr, -4);
+    op_addr_rr_post(s, a, addr);
     return true;
 }
 
@@ -5090,13 +5146,14 @@ static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
 }
 
 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
-                            TCGv_i32 addr, int address_offset)
+                            TCGv_i32 addr)
 {
+    int address_offset = 0;
     if (!a->p) {
         if (a->u) {
-            address_offset += a->imm;
+            address_offset = a->imm;
         } else {
-            address_offset -= a->imm;
+            address_offset = -a->imm;
         }
     } else if (!a->w) {
         return;
@@ -5121,7 +5178,7 @@ static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
      * Perform base writeback before the loaded value to
      * ensure correct behavior with overlapping index registers.
      */
-    op_addr_ri_post(s, a, addr, 0);
+    op_addr_ri_post(s, a, addr);
     store_reg_from_load(s, a->rt, tmp);
     return true;
 }
@@ -5146,29 +5203,20 @@ static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
     disas_set_da_iss(s, mop, issinfo);
 
-    op_addr_ri_post(s, a, addr, 0);
+    op_addr_ri_post(s, a, addr);
     return true;
 }
 
 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
 {
-    int mem_idx = get_mem_index(s);
-    TCGv_i32 addr, tmp;
+    TCGv_i32 addr;
 
     addr = op_addr_ri_pre(s, a);
 
-    tmp = tcg_temp_new_i32();
-    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
-    store_reg(s, a->rt, tmp);
-
-    tcg_gen_addi_i32(addr, addr, 4);
-
-    tmp = tcg_temp_new_i32();
-    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
-    store_reg(s, rt2, tmp);
+    do_ldrd_load(s, addr, a->rt, rt2);
 
     /* LDRD w/ base writeback is undefined if the registers overlap.  */
-    op_addr_ri_post(s, a, addr, -4);
+    op_addr_ri_post(s, a, addr);
     return true;
 }
 
@@ -5191,20 +5239,13 @@ static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
 
 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
 {
-    int mem_idx = get_mem_index(s);
-    TCGv_i32 addr, tmp;
+    TCGv_i32 addr;
 
     addr = op_addr_ri_pre(s, a);
 
-    tmp = load_reg(s, a->rt);
-    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
-
-    tcg_gen_addi_i32(addr, addr, 4);
-
-    tmp = load_reg(s, rt2);
-    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
+    do_strd_store(s, addr, a->rt, rt2);
 
-    op_addr_ri_post(s, a, addr, -4);
+    op_addr_ri_post(s, a, addr);
     return true;
 }
 
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index f8dc2f0d4bb92..53e485d28ac4a 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -92,15 +92,19 @@ typedef struct DisasContext {
     bool aarch64;
     bool thumb;
     bool lse2;
-    /* Because unallocated encodings generate different exception syndrome
+    /*
+     * Because unallocated encodings generate different exception syndrome
      * information from traps due to FP being disabled, we can't do a single
      * "is fp access disabled" check at a high level in the decode tree.
      * To help in catching bugs where the access check was forgotten in some
      * code path, we set this flag when the access check is done, and assert
      * that it is set at the point where we actually touch the FP regs.
+     *   0: not checked,
+     *   1: checked, access ok
+     *  -1: checked, access denied
      */
-    bool fp_access_checked;
-    bool sve_access_checked;
+    int8_t fp_access_checked;
+    int8_t sve_access_checked;
     /* ARMv8 single-step state (this is distinct from the QEMU gdbstub
      * single-step support).
      */
diff --git a/target/arm/vfp_helper.c b/target/arm/tcg/vfp_helper.c
similarity index 90%
rename from target/arm/vfp_helper.c
rename to target/arm/tcg/vfp_helper.c
index 5d424477a2d83..b32e2f4e27c63 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/tcg/vfp_helper.c
@@ -23,13 +23,7 @@
 #include "internals.h"
 #include "cpu-features.h"
 #include "fpu/softfloat.h"
-#ifdef CONFIG_TCG
 #include "qemu/log.h"
-#endif
-
-/* VFP support.  We follow the convention used for VFP instructions:
-   Single precision routines have a "s" suffix, double precision a
-   "d" suffix.  */
 
 /*
  * Set the float_status behaviour to match the Arm defaults:
@@ -75,8 +69,6 @@ void arm_set_ah_fp_behaviours(float_status *s)
     set_float_default_nan_pattern(0b11000000, s);
 }
 
-#ifdef CONFIG_TCG
-
 /* Convert host exception flags to vfp form.  */
 static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah)
 {
@@ -113,7 +105,7 @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah)
     return target_bits;
 }
 
-static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
+uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
 {
     uint32_t a32_flags = 0, a64_flags = 0;
 
@@ -148,7 +140,7 @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
         vfp_exceptbits_from_host(a32_flags, false);
 }
 
-static void vfp_clear_float_status_exc_flags(CPUARMState *env)
+void vfp_clear_float_status_exc_flags(CPUARMState *env)
 {
     /*
      * Clear out all the exception-flag information in the float_status
@@ -176,7 +168,7 @@ static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env)
     vfp_clear_float_status_exc_flags(env);
 }
 
-static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
+void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
 {
     uint64_t changed = env->vfp.fpcr;
 
@@ -261,166 +253,11 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
     }
 }
 
-#else
-
-static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
-{
-    return 0;
-}
-
-static void vfp_clear_float_status_exc_flags(CPUARMState *env)
-{
-}
-
-static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
-{
-}
-
-#endif
-
-uint32_t vfp_get_fpcr(CPUARMState *env)
-{
-    uint32_t fpcr = env->vfp.fpcr
-        | (env->vfp.vec_len << 16)
-        | (env->vfp.vec_stride << 20);
-
-    /*
-     * M-profile LTPSIZE is the same bits [18:16] as A-profile Len; whichever
-     * of the two is not applicable to this CPU will always be zero.
-     */
-    fpcr |= env->v7m.ltpsize << 16;
-
-    return fpcr;
-}
-
-uint32_t vfp_get_fpsr(CPUARMState *env)
-{
-    uint32_t fpsr = env->vfp.fpsr;
-    uint32_t i;
-
-    fpsr |= vfp_get_fpsr_from_host(env);
-
-    i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
-    fpsr |= i ? FPSR_QC : 0;
-    return fpsr;
-}
-
-uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
-{
-    return (vfp_get_fpcr(env) & FPSCR_FPCR_MASK) |
-        (vfp_get_fpsr(env) & FPSCR_FPSR_MASK);
-}
-
-uint32_t vfp_get_fpscr(CPUARMState *env)
-{
-    return HELPER(vfp_get_fpscr)(env);
-}
-
-void vfp_set_fpsr(CPUARMState *env, uint32_t val)
-{
-    ARMCPU *cpu = env_archcpu(env);
-
-    if (arm_feature(env, ARM_FEATURE_NEON) ||
-        cpu_isar_feature(aa32_mve, cpu)) {
-        /*
-         * The bit we set within vfp.qc[] is arbitrary; the array as a
-         * whole being zero/non-zero is what counts.
-         */
-        env->vfp.qc[0] = val & FPSR_QC;
-        env->vfp.qc[1] = 0;
-        env->vfp.qc[2] = 0;
-        env->vfp.qc[3] = 0;
-    }
-
-    /*
-     * NZCV lives only in env->vfp.fpsr. The cumulative exception flags
-     * IOC|DZC|OFC|UFC|IXC|IDC also live in env->vfp.fpsr, with possible
-     * extra pending exception information that hasn't yet been folded in
-     * living in the float_status values (for TCG).
-     * Since this FPSR write gives us the up to date values of the exception
-     * flags, we want to store into vfp.fpsr the NZCV and CEXC bits, zeroing
-     * anything else. We also need to clear out the float_status exception
-     * information so that the next vfp_get_fpsr does not fold in stale data.
-     */
-    val &= FPSR_NZCV_MASK | FPSR_CEXC_MASK;
-    env->vfp.fpsr = val;
-    vfp_clear_float_status_exc_flags(env);
-}
-
-static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask)
-{
-    /*
-     * We only set FPCR bits defined by mask, and leave the others alone.
-     * We assume the mask is sensible (e.g. doesn't try to set only
-     * part of a field)
-     */
-    ARMCPU *cpu = env_archcpu(env);
-
-    /* When ARMv8.2-FP16 is not supported, FZ16 is RES0.  */
-    if (!cpu_isar_feature(any_fp16, cpu)) {
-        val &= ~FPCR_FZ16;
-    }
-    if (!cpu_isar_feature(aa64_afp, cpu)) {
-        val &= ~(FPCR_FIZ | FPCR_AH | FPCR_NEP);
-    }
-
-    if (!cpu_isar_feature(aa64_ebf16, cpu)) {
-        val &= ~FPCR_EBF;
-    }
-
-    vfp_set_fpcr_to_host(env, val, mask);
-
-    if (mask & (FPCR_LEN_MASK | FPCR_STRIDE_MASK)) {
-        if (!arm_feature(env, ARM_FEATURE_M)) {
-            /*
-             * Short-vector length and stride; on M-profile these bits
-             * are used for different purposes.
-             * We can't make this conditional be "if MVFR0.FPShVec != 0",
-             * because in v7A no-short-vector-support cores still had to
-             * allow Stride/Len to be written with the only effect that
-             * some insns are required to UNDEF if the guest sets them.
-             */
-            env->vfp.vec_len = extract32(val, 16, 3);
-            env->vfp.vec_stride = extract32(val, 20, 2);
-        } else if (cpu_isar_feature(aa32_mve, cpu)) {
-            env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT,
-                                         FPCR_LTPSIZE_LENGTH);
-        }
-    }
-
-    /*
-     * We don't implement trapped exception handling, so the
-     * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
-     *
-     * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF, FZ16,
-     * FIZ, AH, and NEP.
-     * Len, Stride and LTPSIZE we just handled. Store those bits
-     * there, and zero any of the other FPCR bits and the RES0 and RAZ/WI
-     * bits.
-     */
-    val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 |
-        FPCR_EBF | FPCR_FIZ | FPCR_AH | FPCR_NEP;
-    env->vfp.fpcr &= ~mask;
-    env->vfp.fpcr |= val;
-}
-
-void vfp_set_fpcr(CPUARMState *env, uint32_t val)
-{
-    vfp_set_fpcr_masked(env, val, MAKE_64BIT_MASK(0, 32));
-}
-
-void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
-{
-    vfp_set_fpcr_masked(env, val, FPSCR_FPCR_MASK);
-    vfp_set_fpsr(env, val & FPSCR_FPSR_MASK);
-}
-
-void vfp_set_fpscr(CPUARMState *env, uint32_t val)
-{
-    HELPER(vfp_set_fpscr)(env, val);
-}
-
-#ifdef CONFIG_TCG
+/*
+ * VFP support.  We follow the convention used for VFP instructions:
+ * Single precision routines have a "s" suffix, double precision a
+ * "d" suffix.
+ */
 
 #define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
 
@@ -1520,4 +1357,12 @@ void HELPER(check_hcr_el2_trap)(CPUARMState *env, uint32_t rt, uint32_t reg)
     raise_exception(env, EXCP_HYP_TRAP, syndrome, 2);
 }
 
-#endif
+uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
+{
+    return vfp_get_fpscr(env);
+}
+
+void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
+{
+    vfp_set_fpscr(env, val);
+}
diff --git a/target/arm/vfp_fpscr.c b/target/arm/vfp_fpscr.c
new file mode 100644
index 0000000000000..92ea60ebbf28d
--- /dev/null
+++ b/target/arm/vfp_fpscr.c
@@ -0,0 +1,155 @@
+/*
+ * ARM VFP floating-point: handling of FPSCR/FPCR/FPSR
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+#include "cpu-features.h"
+
+uint32_t vfp_get_fpcr(CPUARMState *env)
+{
+    uint32_t fpcr = env->vfp.fpcr
+        | (env->vfp.vec_len << 16)
+        | (env->vfp.vec_stride << 20);
+
+    /*
+     * M-profile LTPSIZE is the same bits [18:16] as A-profile Len; whichever
+     * of the two is not applicable to this CPU will always be zero.
+     */
+    fpcr |= env->v7m.ltpsize << 16;
+
+    return fpcr;
+}
+
+uint32_t vfp_get_fpsr(CPUARMState *env)
+{
+    uint32_t fpsr = env->vfp.fpsr;
+    uint32_t i;
+
+    fpsr |= vfp_get_fpsr_from_host(env);
+
+    i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
+    fpsr |= i ? FPSR_QC : 0;
+    return fpsr;
+}
+
+uint32_t vfp_get_fpscr(CPUARMState *env)
+{
+    return (vfp_get_fpcr(env) & FPSCR_FPCR_MASK) |
+        (vfp_get_fpsr(env) & FPSCR_FPSR_MASK);
+}
+
+void vfp_set_fpsr(CPUARMState *env, uint32_t val)
+{
+    ARMCPU *cpu = env_archcpu(env);
+
+    if (arm_feature(env, ARM_FEATURE_NEON) ||
+        cpu_isar_feature(aa32_mve, cpu)) {
+        /*
+         * The bit we set within vfp.qc[] is arbitrary; the array as a
+         * whole being zero/non-zero is what counts.
+         */
+        env->vfp.qc[0] = val & FPSR_QC;
+        env->vfp.qc[1] = 0;
+        env->vfp.qc[2] = 0;
+        env->vfp.qc[3] = 0;
+    }
+
+    /*
+     * NZCV lives only in env->vfp.fpsr. The cumulative exception flags
+     * IOC|DZC|OFC|UFC|IXC|IDC also live in env->vfp.fpsr, with possible
+     * extra pending exception information that hasn't yet been folded in
+     * living in the float_status values (for TCG).
+     * Since this FPSR write gives us the up to date values of the exception
+     * flags, we want to store into vfp.fpsr the NZCV and CEXC bits, zeroing
+     * anything else. We also need to clear out the float_status exception
+     * information so that the next vfp_get_fpsr does not fold in stale data.
+     */
+    val &= FPSR_NZCV_MASK | FPSR_CEXC_MASK;
+    env->vfp.fpsr = val;
+    vfp_clear_float_status_exc_flags(env);
+}
+
+static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask)
+{
+    /*
+     * We only set FPCR bits defined by mask, and leave the others alone.
+     * We assume the mask is sensible (e.g. doesn't try to set only
+     * part of a field)
+     */
+    ARMCPU *cpu = env_archcpu(env);
+
+    /* When ARMv8.2-FP16 is not supported, FZ16 is RES0.  */
+    if (!cpu_isar_feature(any_fp16, cpu)) {
+        val &= ~FPCR_FZ16;
+    }
+    if (!cpu_isar_feature(aa64_afp, cpu)) {
+        val &= ~(FPCR_FIZ | FPCR_AH | FPCR_NEP);
+    }
+
+    if (!cpu_isar_feature(aa64_ebf16, cpu)) {
+        val &= ~FPCR_EBF;
+    }
+
+    vfp_set_fpcr_to_host(env, val, mask);
+
+    if (mask & (FPCR_LEN_MASK | FPCR_STRIDE_MASK)) {
+        if (!arm_feature(env, ARM_FEATURE_M)) {
+            /*
+             * Short-vector length and stride; on M-profile these bits
+             * are used for different purposes.
+             * We can't make this conditional be "if MVFR0.FPShVec != 0",
+             * because in v7A no-short-vector-support cores still had to
+             * allow Stride/Len to be written with the only effect that
+             * some insns are required to UNDEF if the guest sets them.
+             */
+            env->vfp.vec_len = extract32(val, 16, 3);
+            env->vfp.vec_stride = extract32(val, 20, 2);
+        } else if (cpu_isar_feature(aa32_mve, cpu)) {
+            env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT,
+                                         FPCR_LTPSIZE_LENGTH);
+        }
+    }
+
+    /*
+     * We don't implement trapped exception handling, so the
+     * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
+     *
+     * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF, FZ16,
+     * FIZ, AH, and NEP.
+     * Len, Stride and LTPSIZE we just handled. Store those bits
+     * there, and zero any of the other FPCR bits and the RES0 and RAZ/WI
+     * bits.
+     */
+    val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 |
+        FPCR_EBF | FPCR_FIZ | FPCR_AH | FPCR_NEP;
+    env->vfp.fpcr &= ~mask;
+    env->vfp.fpcr |= val;
+}
+
+void vfp_set_fpcr(CPUARMState *env, uint32_t val)
+{
+    vfp_set_fpcr_masked(env, val, MAKE_64BIT_MASK(0, 32));
+}
+
+void vfp_set_fpscr(CPUARMState *env, uint32_t val)
+{
+    vfp_set_fpcr_masked(env, val, FPSCR_FPCR_MASK);
+    vfp_set_fpsr(env, val & FPSCR_FPSR_MASK);
+}
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
index 8a126ff322282..834c7082aa71e 100644
--- a/target/avr/cpu.c
+++ b/target/avr/cpu.c
@@ -102,6 +102,7 @@ static void avr_cpu_reset_hold(Object *obj, ResetType type)
 
 static void avr_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
 {
+    info->endian = BFD_ENDIAN_LITTLE;
     info->mach = bfd_arch_avr;
     info->print_insn = avr_print_insn;
 }
@@ -200,10 +201,11 @@ static void avr_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps avr_sysemu_ops = {
+    .has_work = avr_cpu_has_work,
     .get_phys_page_debug = avr_cpu_get_phys_page_debug,
 };
 
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 
 static const TCGCPUOps avr_tcg_ops = {
     .initialize = avr_cpu_tcg_init,
@@ -232,7 +234,6 @@ static void avr_cpu_class_init(ObjectClass *oc, void *data)
 
     cc->class_by_name = avr_cpu_class_by_name;
 
-    cc->has_work = avr_cpu_has_work;
     cc->mmu_index = avr_cpu_mmu_index;
     cc->dump_state = avr_cpu_dump_state;
     cc->set_pc = avr_cpu_set_pc;
diff --git a/target/avr/helper.c b/target/avr/helper.c
index 345708a1b39fb..3412312ad5e99 100644
--- a/target/avr/helper.c
+++ b/target/avr/helper.c
@@ -22,8 +22,8 @@
 #include "qemu/log.h"
 #include "qemu/error-report.h"
 #include "cpu.h"
-#include "hw/core/tcg-cpu-ops.h"
-#include "exec/exec-all.h"
+#include "accel/tcg/cpu-ops.h"
+#include "exec/cputlb.h"
 #include "exec/page-protection.h"
 #include "exec/cpu_ldst.h"
 #include "exec/address-spaces.h"
diff --git a/target/hexagon/Kconfig b/target/hexagon/Kconfig
new file mode 100644
index 0000000000000..7e556f350633c
--- /dev/null
+++ b/target/hexagon/Kconfig
@@ -0,0 +1,2 @@
+config HEXAGON
+    bool
diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c
index d053d6848715c..87c2f6a53f6ca 100644
--- a/target/hexagon/arch.c
+++ b/target/hexagon/arch.c
@@ -208,6 +208,11 @@ void arch_fpop_start(CPUHexagonState *env)
  * model it in qemu user mode.
  */
 #define RAISE_FP_EXCEPTION   do {} while (0)
+#else
+ /*
+  * To be implemented.
+  */
+#define RAISE_FP_EXCEPTION   do { g_assert_not_reached(); } while (0)
 #endif
 
 #define SOFTFLOAT_TEST_FLAG(FLAG, MYF, MYE) \
diff --git a/target/hexagon/attribs_def.h.inc b/target/hexagon/attribs_def.h.inc
index 9e3a05f882810..e6523a739b103 100644
--- a/target/hexagon/attribs_def.h.inc
+++ b/target/hexagon/attribs_def.h.inc
@@ -19,20 +19,41 @@
 DEF_ATTRIB(AA_DUMMY, "Dummy Zeroth Attribute", "", "")
 
 /* Misc */
+DEF_ATTRIB(FAKEINSN, "Not a real instruction", "", "")
+DEF_ATTRIB(MAPPING, "Not real -- asm mapped", "", "")
+DEF_ATTRIB(CONDMAPPING, "Not real -- mapped based on values", "", "")
 DEF_ATTRIB(EXTENSION, "Extension instruction", "", "")
+DEF_ATTRIB(SHARED_EXTENSION, "Shared extension instruction", "", "")
+DEF_ATTRIB(CABAC,
+           "Cabac Instruction. Used in conjuction with QDSP6_CABAC_PRESENT", "",
+           "")
+DEF_ATTRIB(EXPERIMENTAL, "This may not work correctly not supported by RTL.",
+           "", "")
 
 DEF_ATTRIB(PRIV, "Not available in user or guest mode", "", "")
 DEF_ATTRIB(GUEST, "Not available in user mode", "", "")
 
 DEF_ATTRIB(FPOP, "Floating Point Operation", "", "")
+DEF_ATTRIB(FPDOUBLE, "Double-precision Floating Point Operation", "", "")
+DEF_ATTRIB(FPSINGLE, "Single-precision Floating Point Operation", "", "")
+DEF_ATTRIB(SFMAKE, "Single Float Make", "", "")
+DEF_ATTRIB(DFMAKE, "Single Float Make", "", "")
+
+DEF_ATTRIB(NO_TIMING_LOG, "Does not get logged to the timing model", "", "")
 
 DEF_ATTRIB(EXTENDABLE, "Immediate may be extended", "", "")
+DEF_ATTRIB(EXT_UPPER_IMMED, "Extend upper case immediate", "", "")
+DEF_ATTRIB(EXT_LOWER_IMMED, "Extend lower case immediate", "", "")
+DEF_ATTRIB(MUST_EXTEND, "Immediate must be extended", "", "")
+DEF_ATTRIB(NA_NT, "Non-Allocating Non-Temporal instruction", "", "")
+DEF_ATTRIB(INVPRED, "The predicate is inverted for true/false sense", "", "")
 
 DEF_ATTRIB(ARCHV2, "V2 architecture", "", "")
 DEF_ATTRIB(ARCHV3, "V3 architecture", "", "")
 DEF_ATTRIB(ARCHV4, "V4 architecture", "", "")
 DEF_ATTRIB(ARCHV5, "V5 architecture", "", "")
 
+DEF_ATTRIB(PACKED, "Packable instruction", "", "")
 DEF_ATTRIB(SUBINSN, "sub-instruction", "", "")
 
 /* Load and Store attributes */
@@ -46,21 +67,48 @@ DEF_ATTRIB(MEMSIZE_4B, "Memory width is 4 bytes", "", "")
 DEF_ATTRIB(MEMSIZE_8B, "Memory width is 8 bytes", "", "")
 DEF_ATTRIB(SCALAR_LOAD, "Load is scalar", "", "")
 DEF_ATTRIB(SCALAR_STORE, "Store is scalar", "", "")
-DEF_ATTRIB(REGWRSIZE_1B, "Memory width is 1 byte", "", "")
-DEF_ATTRIB(REGWRSIZE_2B, "Memory width is 2 bytes", "", "")
-DEF_ATTRIB(REGWRSIZE_4B, "Memory width is 4 bytes", "", "")
-DEF_ATTRIB(REGWRSIZE_8B, "Memory width is 8 bytes", "", "")
+DEF_ATTRIB(REGWRSIZE_1B, "ETM Memory width is 1 byte", "", "")
+DEF_ATTRIB(REGWRSIZE_2B, "ETM Memory width is 2 bytes", "", "")
+DEF_ATTRIB(REGWRSIZE_4B, "ETM Memory width is 4 bytes", "", "")
+DEF_ATTRIB(REGWRSIZE_8B, "ETM Memory width is 8 bytes", "", "")
 DEF_ATTRIB(MEMLIKE, "Memory-like instruction", "", "")
 DEF_ATTRIB(MEMLIKE_PACKET_RULES, "follows Memory-like packet rules", "", "")
+DEF_ATTRIB(CACHEOP, "Cache operation", "", "")
+DEF_ATTRIB(COPBYADDRESS, "Cache operation by address", "", "")
+DEF_ATTRIB(COPBYIDX, "Cache operation by index", "", "")
 DEF_ATTRIB(RELEASE, "Releases a lock", "", "")
 DEF_ATTRIB(ACQUIRE, "Acquires a lock", "", "")
+DEF_ATTRIB(LLSC, "load-locked/store-conditional instruction", "", "")
 
 DEF_ATTRIB(RLS_INNER, "Store release inner visibility", "", "")
+DEF_ATTRIB(RLS_OUTER, "Store release outer visibility", "", "")
 DEF_ATTRIB(RLS_ALL_THREAD, "Store release among all threads", "", "")
 DEF_ATTRIB(RLS_SAME_THREAD, "Store release with the same thread", "", "")
 
+/* Load and Store Addressing Mode Attributes */
+DEF_ATTRIB(EA_REG_ONLY, "EA = input register only", "", "")
+DEF_ATTRIB(EA_IMM_ONLY, "EA = immediate only", "", "")
+DEF_ATTRIB(EA_REG_PLUS_IMM, "EA = register plus immediate", "", "")
+DEF_ATTRIB(EA_REG_PLUS_REGSCALED, "EA = register plus scaled register", "", "")
+DEF_ATTRIB(EA_IMM_PLUS_REGSCALED, "EA = immediate plus scaled register", "", "")
+DEF_ATTRIB(EA_BREV_REG, "EA = bit-reversed input register", "", "")
+DEF_ATTRIB(EA_GP_IMM, "EA = GP plus immediate (unless extended)", "", "")
+DEF_ATTRIB(EA_PAGECROSS, "EA calculation can have a Page Cross Stall", "", "")
+
+DEF_ATTRIB(PM_ANY, "Post Modify", "", "")
+DEF_ATTRIB(PM_I, "Post Modify by Immediate", "", "")
+DEF_ATTRIB(PM_M, "Post Modify by M register", "", "")
+DEF_ATTRIB(PM_CIRI, "Post Modify with Circular Addressing by immediate", "", "")
+DEF_ATTRIB(PM_CIRR, "Post Modify with Circular Addressing by I field", "", "")
+
+DEF_ATTRIB(VMEM, "VMEM-type", "", "")
+DEF_ATTRIB(VBUF, "Touches the VBUF", "", "")
+DEF_ATTRIB(VDBG, "Vector debugging instruction", "", "")
+
 /* V6 Vector attributes */
 DEF_ATTRIB(CVI, "Executes on the HVX extension", "", "")
+DEF_ATTRIB(NT_VMEM, "Non-temporal memory access", "", "")
+DEF_ATTRIB(VMEMU, "Unaligned memory access", "", "")
 
 DEF_ATTRIB(CVI_NEW, "New value memory instruction executes on HVX", "", "")
 DEF_ATTRIB(CVI_VM, "Memory instruction executes on HVX", "", "")
@@ -69,109 +117,415 @@ DEF_ATTRIB(CVI_VP_VS, "Double vector permute/shft insn executes on HVX", "", "")
 DEF_ATTRIB(CVI_VX, "Multiply instruction executes on HVX", "", "")
 DEF_ATTRIB(CVI_VX_DV, "Double vector multiply insn executes on HVX", "", "")
 DEF_ATTRIB(CVI_VS, "Shift instruction executes on HVX", "", "")
-DEF_ATTRIB(CVI_VS_3SRC, "This shift needs to borrow a source register", "", "")
+DEF_ATTRIB(
+    CVI_VS_3SRC,
+    "This shift instruction needs to borrow a source register from the VP slot",
+    "", "")
 DEF_ATTRIB(CVI_VS_VX, "Permute/shift and multiply insn executes on HVX", "", "")
 DEF_ATTRIB(CVI_VA, "ALU instruction executes on HVX", "", "")
+DEF_ATTRIB(CVI_VA_2SRC,
+           "This alu instruction executes on multimedia vector engine and "
+           "requires two vectro sources",
+           "", "")
 DEF_ATTRIB(CVI_VA_DV, "Double vector alu instruction executes on HVX", "", "")
 DEF_ATTRIB(CVI_4SLOT, "Consumes all the vector execution resources", "", "")
 DEF_ATTRIB(CVI_TMP, "Transient Memory Load not written to register", "", "")
 DEF_ATTRIB(CVI_REMAP, "Register Renaming not written to register file", "", "")
+DEF_ATTRIB(CVI_TMP_SRC, "Transient reassign", "", "")
+DEF_ATTRIB(CVI_EXTRACT, "HVX Extract Instruction that goes through L2", "", "")
+DEF_ATTRIB(CVI_EARLY, "HVX instructions that require early sources", "", "")
+DEF_ATTRIB(CVI_LATE, "HVX insn that always require late sources", "", "")
+DEF_ATTRIB(CVI_VV_LATE, "HVX insn that always require late Vv source", "", "")
+DEF_ATTRIB(CVI_REQUIRES_TMPLOAD, ".tmp load must be included in packet", "", "")
+DEF_ATTRIB(CVI_PUMP_2X, "Goes through the pipeline twice", "", "")
+DEF_ATTRIB(CVI_PUMP_4X, "Goes through the pipeline four times", "", "")
 DEF_ATTRIB(CVI_GATHER, "CVI Gather operation", "", "")
 DEF_ATTRIB(CVI_SCATTER, "CVI Scatter operation", "", "")
 DEF_ATTRIB(CVI_SCATTER_RELEASE, "CVI Store Release for scatter", "", "")
+DEF_ATTRIB(CVI_GATHER_RELEASE, "CVI Store Release for gather", "", "")
 DEF_ATTRIB(CVI_TMP_DST, "CVI instruction that doesn't write a register", "", "")
+DEF_ATTRIB(CVI_SCATTER_WORD_ACC, "CVI Scatter Word Accum (second pass)", "", "")
+DEF_ATTRIB(CVI_SCATTER_ACC, "CVI Scatter Accumulate", "", "")
+DEF_ATTRIB(CVI_VX_VSRC0_IS_DST,
+           "For the assembler to handle the special case of non-linear "
+           "instructions with Vxx specified both as src and dst in syntax ",
+           "", "")
+
+DEF_ATTRIB(CVI_VX_ACC_FWD, "VX Accumulator Forwarding", "", "")
+
+DEF_ATTRIB(CVI_VX_NO_TMP_LD,
+           "VX Accumulator renaming not allowed from tmp load instruction", "",
+           "")
+
+DEF_ATTRIB(RESTRICT_CVI_NOVP,
+           "Instructions with this attribute are assigned to the original "
+           "shift unit and can not be assigned to the shift/permute unit",
+           "", "")
+
+DEF_ATTRIB(CVI_GATHER_ADDR_2B, "CVI Scatter/Gather address is halfword", "", "")
+DEF_ATTRIB(CVI_GATHER_ADDR_4B, "CVI Scatter/Gather address is word", "", "")
+
+DEF_ATTRIB(VFETCH, "memory fetch op to L2 for a single vector", "", "")
+
 DEF_ATTRIB(CVI_SLOT23, "Can execute in slot 2 or slot 3 (HVX)", "", "")
 
-DEF_ATTRIB(VTCM_ALLBANK_ACCESS, "Allocates in all VTCM schedulers.", "", "")
+DEF_ATTRIB(HVX_FLT, "This a floating point HVX instruction.", "", "")
+
+DEF_ATTRIB(
+    VTCM_ALLBANK_ACCESS,
+    "This instruction allocates in all VTCM schedulers due to a region access.",
+    "", "")
+DEF_ATTRIB(XUMINOR, "XU minor SMTable instruction", "", "")
+
+DEF_ATTRIB(SYNC_MARKER, "This instruction needs a sync marker.", "", "")
+
 
 /* Change-of-flow attributes */
 DEF_ATTRIB(JUMP, "Jump-type instruction", "", "")
+DEF_ATTRIB(DIRECT, "Uses an PC-relative immediate field", "", "")
 DEF_ATTRIB(INDIRECT, "Absolute register jump", "", "")
+DEF_ATTRIB(CJUMP, "Conditional jump", "", "")
 DEF_ATTRIB(CALL, "Function call instruction", "", "")
+DEF_ATTRIB(RET, "Function return instruction", "", "")
+DEF_ATTRIB(PERM, "Permute instruction", "", "")
 DEF_ATTRIB(COF, "Change-of-flow instruction", "", "")
 DEF_ATTRIB(HINTED_COF, "This instruction is a hinted change-of-flow", "", "")
 DEF_ATTRIB(CONDEXEC, "May be cancelled by a predicate", "", "")
+DEF_ATTRIB(DOTOLD, "Uses a predicate generated in a previous packet", "", "")
+DEF_ATTRIB(DOTNEW, "Uses a predicate generated in the same packet", "", "")
 DEF_ATTRIB(DOTNEWVALUE, "Uses a register value generated in this pkt", "", "")
 DEF_ATTRIB(NEWCMPJUMP, "Compound compare and jump", "", "")
 DEF_ATTRIB(NVSTORE, "New-value store", "", "")
 DEF_ATTRIB(MEMOP, "memop", "", "")
 
-DEF_ATTRIB(ROPS_2, "Compound instruction worth 2 RISC-ops", "", "")
-DEF_ATTRIB(ROPS_3, "Compound instruction worth 3 RISC-ops", "", "")
+DEF_ATTRIB(ROPS_2, "Compound instruction worth 2 wimpy RISC-ops", "", "")
+DEF_ATTRIB(ROPS_3, "Compound instruction worth 3 wimpy RISC-ops", "", "")
 
 /* access to implicit registers */
 DEF_ATTRIB(IMPLICIT_WRITES_LR, "Writes the link register", "", "UREG.LR")
+DEF_ATTRIB(IMPLICIT_READS_LR, "Reads the link register", "UREG.LR", "")
+DEF_ATTRIB(IMPLICIT_READS_LC0, "Reads loop count for loop 0", "UREG.LC0", "")
+DEF_ATTRIB(IMPLICIT_READS_LC1, "Reads loop count for loop 1", "UREG.LC1", "")
+DEF_ATTRIB(IMPLICIT_READS_SA0, "Reads start address for loop 0", "UREG.SA0", "")
+DEF_ATTRIB(IMPLICIT_READS_SA1, "Reads start address for loop 1", "UREG.SA1", "")
+DEF_ATTRIB(IMPLICIT_WRITES_PC, "Writes the program counter", "", "UREG.PC")
+DEF_ATTRIB(IMPLICIT_READS_PC, "Reads the program counter", "UREG.PC", "")
 DEF_ATTRIB(IMPLICIT_WRITES_SP, "Writes the stack pointer", "", "UREG.SP")
+DEF_ATTRIB(IMPLICIT_READS_SP, "Reads the stack pointer", "UREG.SP", "")
 DEF_ATTRIB(IMPLICIT_WRITES_FP, "Writes the frame pointer", "", "UREG.FP")
+DEF_ATTRIB(IMPLICIT_READS_FP, "Reads the frame pointer", "UREG.FP", "")
+DEF_ATTRIB(IMPLICIT_WRITES_GP, "Writes the GP register", "", "UREG.GP")
+DEF_ATTRIB(IMPLICIT_READS_GP, "Reads the GP register", "UREG.GP", "")
 DEF_ATTRIB(IMPLICIT_WRITES_LC0, "Writes loop count for loop 0", "", "UREG.LC0")
 DEF_ATTRIB(IMPLICIT_WRITES_LC1, "Writes loop count for loop 1", "", "UREG.LC1")
 DEF_ATTRIB(IMPLICIT_WRITES_SA0, "Writes start addr for loop 0", "", "UREG.SA0")
 DEF_ATTRIB(IMPLICIT_WRITES_SA1, "Writes start addr for loop 1", "", "UREG.SA1")
+DEF_ATTRIB(IMPLICIT_WRITES_R00, "Writes Register 0", "", "UREG.R00")
 DEF_ATTRIB(IMPLICIT_WRITES_P0, "Writes Predicate 0", "", "UREG.P0")
 DEF_ATTRIB(IMPLICIT_WRITES_P1, "Writes Predicate 1", "", "UREG.P1")
 DEF_ATTRIB(IMPLICIT_WRITES_P2, "Writes Predicate 1", "", "UREG.P2")
 DEF_ATTRIB(IMPLICIT_WRITES_P3, "May write Predicate 3", "", "UREG.P3")
-DEF_ATTRIB(IMPLICIT_READS_PC, "Reads the PC register", "", "")
-DEF_ATTRIB(IMPLICIT_READS_P0, "Reads the P0 register", "", "")
-DEF_ATTRIB(IMPLICIT_READS_P1, "Reads the P1 register", "", "")
-DEF_ATTRIB(IMPLICIT_READS_P2, "Reads the P2 register", "", "")
-DEF_ATTRIB(IMPLICIT_READS_P3, "Reads the P3 register", "", "")
+DEF_ATTRIB(IMPLICIT_READS_R00, "Reads Register 0", "UREG.R00", "")
+DEF_ATTRIB(IMPLICIT_READS_P0, "Reads Predicate 0", "UREG.P0", "")
+DEF_ATTRIB(IMPLICIT_READS_P1, "Reads Predicate 1", "UREG.P1", "")
+DEF_ATTRIB(IMPLICIT_READS_P3, "Reads Predicate 3", "UREG.P3", "")
+DEF_ATTRIB(IMPLICIT_READS_Q3, "Reads Vector Predicate 3", "UREG.Q3", "")
+DEF_ATTRIB(IMPLICIT_READS_CS, "Reads the CS/M register", "UREG.CS", "")
+DEF_ATTRIB(IMPLICIT_READS_FRAMEKEY, "Reads FRAMEKEY", "UREG.FRAMEKEY", "")
+DEF_ATTRIB(IMPLICIT_READS_FRAMELIMIT, "Reads FRAMELIMIT", "UREG.FRAMELIMIT", "")
+DEF_ATTRIB(IMPLICIT_READS_ELR, "Reads the ELR register", "MREG.ELR", "")
+DEF_ATTRIB(IMPLICIT_READS_SGP0, "Reads the SGP0 register", "MREG.SGP0", "")
+DEF_ATTRIB(IMPLICIT_READS_SGP1, "Reads the SGP1 register", "MREG.SGP1", "")
+DEF_ATTRIB(IMPLICIT_WRITES_SGP0, "Reads the SGP0 register", "", "MREG.SGP0")
+DEF_ATTRIB(IMPLICIT_WRITES_SGP1, "Reads the SGP1 register", "", "MREG.SGP1")
+DEF_ATTRIB(IMPLICIT_WRITES_STID_PRIO_ANYTHREAD, "Reads", "", "MREG.STID.PRIO")
+DEF_ATTRIB(IMPLICIT_WRITES_SRBIT, "Writes the OVF bit", "", "UREG.SR.OVF")
+DEF_ATTRIB(IMPLICIT_WRITES_FPFLAGS, "May write FP flags", "", "UREG.SR.FPFLAGS")
+DEF_ATTRIB(IMPLICIT_WRITES_LPCFG, "Writes the loop config", "", "UREG.SR.LPCFG")
+DEF_ATTRIB(IMPLICIT_WRITES_CVBITS, "Writes the CV flags", "", "UREG.SR.CV")
+DEF_ATTRIB(IMPLICIT_READS_FPRND, "May read FP rnd mode", "UREG.SR.FPRND", "")
+DEF_ATTRIB(IMPLICIT_READS_SSR, "May read SSR values", "MREG.SSR", "")
+DEF_ATTRIB(IMPLICIT_READS_CCR, "May read CCR values", "MREG.CCR", "")
+DEF_ATTRIB(IMPLICIT_WRITES_CCR, "May write CCR values", "", "MREG.CCR")
+DEF_ATTRIB(IMPLICIT_WRITES_SSR, "May write SSR values", "", "MREG.SSR")
+DEF_ATTRIB(IMPLICIT_READS_GELR, "May read GELR values", "GREG.GELR", "")
+DEF_ATTRIB(IMPLICIT_READS_GEVB, "May read GEVB values", "MREG.GEVB", "")
+DEF_ATTRIB(IMPLICIT_READS_GSR, "May read GSR values", "GREG.GSR", "")
+DEF_ATTRIB(IMPLICIT_READS_GOSP, "May read GOSP values", "GREG.GOSP", "")
+DEF_ATTRIB(IMPLICIT_WRITES_GELR, "May write GELR values", "", "GREG.GELR")
+DEF_ATTRIB(IMPLICIT_WRITES_GSR, "May write GSR values", "", "GREG.GSR")
+DEF_ATTRIB(IMPLICIT_WRITES_GOSP, "May write GOSP values", "", "GREG.GOSP")
+DEF_ATTRIB(IMPLICIT_READS_IPENDAD_IPEND, "May read", "MREG.IPENDAD.IPEND", "")
+DEF_ATTRIB(IMPLICIT_WRITES_IPENDAD_IPEND, "May write", "", "MREG.IPENDAD.IPEND")
+DEF_ATTRIB(IMPLICIT_READS_IPENDAD_IAD, "May read", "MREG.IPENDAD.IAD", "")
+DEF_ATTRIB(IMPLICIT_WRITES_IPENDAD_IAD, "May write", "", "MREG.IPENDAD.IAD")
+DEF_ATTRIB(IMPLICIT_WRITES_IMASK_ANYTHREAD, "May write", "", "MREG.IMASK")
+DEF_ATTRIB(IMPLICIT_READS_IMASK_ANYTHREAD, "May read", "MREG.IMASK", "")
+DEF_ATTRIB(IMPLICIT_READS_SYSCFG_K0LOCK, "May read", "MREG.SYSCFG.K0LOCK", "")
+DEF_ATTRIB(IMPLICIT_WRITES_SYSCFG_K0LOCK, "May write", "", "MREG.SYSCFG.K0LOCK")
+DEF_ATTRIB(IMPLICIT_READS_SYSCFG_TLBLOCK, "May read", "MREG.SYSCFG.TLBLOCK", "")
+DEF_ATTRIB(IMPLICIT_WRITES_SYSCFG_TLBLOCK, "May wr", "", "MREG.SYSCFG.TLBLOCK")
+DEF_ATTRIB(IMPLICIT_WRITES_SYSCFG_GCA, "May write", "", "MREG.SYSCFG.GCA")
+DEF_ATTRIB(IMPLICIT_READS_SYSCFG_GCA, "May read", "MREG.SYSCFG.GCA", "")
 DEF_ATTRIB(IMPLICIT_WRITES_USR, "May write USR", "", "")
-DEF_ATTRIB(IMPLICIT_READS_SP, "Reads the SP register", "", "")
+
+/* Other things the instruction does */
+DEF_ATTRIB(ACC, "Has a multiply", "", "")
+DEF_ATTRIB(MPY, "Has a multiply", "", "")
+DEF_ATTRIB(SATURATE, "Does signed saturation", "", "")
+DEF_ATTRIB(USATURATE, "Does unsigned saturation", "", "")
+DEF_ATTRIB(CIRCADDR, "Uses circular addressing mode", "", "")
+DEF_ATTRIB(BREVADDR, "Uses bit reverse addressing mode", "", "")
+DEF_ATTRIB(BIDIRSHIFTL, "Uses a bidirectional shift left", "", "")
+DEF_ATTRIB(BIDIRSHIFTR, "Uses a bidirectional shift right", "", "")
+DEF_ATTRIB(BRANCHADDER, "Contains a PC-plus-immediate operation.", "", "")
+DEF_ATTRIB(CRSLOT23, "Can execute in slot 2 or slot 3 (CR)", "", "")
 DEF_ATTRIB(COMMUTES, "The operation is communitive", "", "")
 DEF_ATTRIB(DEALLOCRET, "dealloc_return", "", "")
 DEF_ATTRIB(DEALLOCFRAME, "deallocframe", "", "")
 
-DEF_ATTRIB(CRSLOT23, "Can execute in slot 2 or slot 3 (CR)", "", "")
+/* Instruction Types */
+
+DEF_ATTRIB(IT_ALU, "ALU type", "", "")
+DEF_ATTRIB(IT_ALU_ADDSUB, "ALU add or subtract type", "", "")
+DEF_ATTRIB(IT_ALU_MINMAX, "ALU MIN or MAX type", "", "")
+DEF_ATTRIB(IT_ALU_MOVE, "ALU data movement type", "", "")
+DEF_ATTRIB(IT_ALU_LOGICAL, "ALU logical operation type", "", "")
+DEF_ATTRIB(IT_ALU_SHIFT, "ALU shift operation type", "", "")
+DEF_ATTRIB(IT_ALU_SHIFT_AND_OP, "ALU shift and additional op type", "", "")
+DEF_ATTRIB(IT_ALU_CMP, "ALU compare operation type", "", "")
+
+DEF_ATTRIB(IT_LOAD, "Loads from memory", "", "")
+DEF_ATTRIB(IT_STORE, "Stores to memory", "", "")
+
+DEF_ATTRIB(IT_MPY, "Multiply type", "", "")
+DEF_ATTRIB(IT_MPY_32, "32-bit Multiply type", "", "")
+
+DEF_ATTRIB(IT_COF, "Change-of-flow type", "", "")
+DEF_ATTRIB(IT_HWLOOP, "Sets up hardware loop registers", "", "")
+
+DEF_ATTRIB(IT_MISC, "misc instruction type", "", "")
+
 DEF_ATTRIB(IT_NOP, "nop instruction", "", "")
 DEF_ATTRIB(IT_EXTENDER, "constant extender instruction", "", "")
 
 
+/* Exceptions the instruction can generate */
+
+DEF_ATTRIB(EXCEPTION_TLB, "Can generate a TLB Miss Exception", "", "")
+DEF_ATTRIB(EXCEPTION_ACCESS, "Can generate Access Violation Exception", "", "")
+DEF_ATTRIB(EXCEPTION_SWI, "Software Interrupt (trap) exception", "", "")
+
+
+/* Documentation Notes */
+DEF_ATTRIB(NOTE_ARCHV2, "Only available in the V2 architecture", "", "")
+
+DEF_ATTRIB(NOTE_PACKET_PC, "The PC is the addr of the start of the pkt", "", "")
+
+DEF_ATTRIB(NOTE_PACKET_NPC, "Next PC is the address following pkt", "", "")
+
+DEF_ATTRIB(NOTE_CONDITIONAL, "can be conditionally executed", "", "")
+
+DEF_ATTRIB(NOTE_NEWVAL_SLOT0, "New-value oprnd must execute on slot 0", "", "")
+
+DEF_ATTRIB(NOTE_RELATIVE_ADDRESS, "A PC-relative address is formed", "", "")
+
+DEF_ATTRIB(NOTE_LA_RESTRICT, "Cannot be in the last pkt of a HW loop", "", "")
+
+DEF_ATTRIB(NOTE_OOBVSHIFT, "Possible shift overflow", "", "")
+DEF_ATTRIB(NOTE_BIDIRSHIFT, "Bidirectional shift", "", "")
+
+DEF_ATTRIB(NOTE_CVFLAGS, "Sets the Carry and Overflow flags in USR.", "", "")
+DEF_ATTRIB(NOTE_SR_OVF_WHEN_SATURATING, "Might set OVF bit", "", "")
+DEF_ATTRIB(NOTE_STNT,
+           "Non Temporal Data. The :nt appendix is a hint to the "
+           "microarchitecture indicating that the life of the cache line is "
+           "short. This information is used throughout the cache hierarchy to "
+           "make replacement and allocation decisions.",
+           "", "")
+DEF_ATTRIB(NOTE_PRIV, "Monitor-level feature", "", "")
+DEF_ATTRIB(NOTE_GUEST, "Guest-level feature", "", "")
+DEF_ATTRIB(NOTE_NOPACKET, "solo instruction", "", "")
+DEF_ATTRIB(NOTE_AXOK, "May only be grouped with ALU32 or non-FP XTYPE.", "", "")
+DEF_ATTRIB(NOTE_NOSLOT1, "Packet with this insn must have slot 1 empty", "", "")
+DEF_ATTRIB(NOTE_SLOT1_AOK, "Packet must have slot 1 empty or ALU32", "", "")
+DEF_ATTRIB(NOTE_NOSLOT01, "Packet must have both slot 1 and 2 empty", "", "")
+DEF_ATTRIB(NOTE_NEEDS_MEMLD, "Must be grouped with a memory load", "", "")
+DEF_ATTRIB(NOTE_LATEPRED, "The predicate can not be used as a .new", "", "")
+DEF_ATTRIB(NOTE_COMPAT_ACCURACY, "In the future accuracy may increase", "", "")
+DEF_ATTRIB(NOTE_NVSLOT0, "Can execute only in slot 0 (ST)", "", "")
+DEF_ATTRIB(NOTE_DEPRECATED, "Will be deprecated in a future version.", "", "")
+DEF_ATTRIB(NOTE_NONAPALIV1, "may not work correctly in Napali V1.", "", "")
+DEF_ATTRIB(NOTE_NOLAHAINAV1, "This may not work correctly in Lahaina V1.", "",
+           "")
+DEF_ATTRIB(NOTE_BADTAG_UNDEF, "Undefined if a tag is non-present", "", "")
+DEF_ATTRIB(NOTE_NOSLOT2_MPY, "Packet cannot have a slot 2 multiply", "", "")
+DEF_ATTRIB(NOTE_HVX_ONLY, "Only available on a core with HVX.", "", "")
+
+DEF_ATTRIB(NOTE_NOCOF_RESTRICT, "Cannot be grouped with any COF", "", "")
+DEF_ATTRIB(NOTE_BRANCHADDER_MAX1, "One PC-plus-offset calculation", "", "")
+
+DEF_ATTRIB(NOTE_CRSLOT23, "Execute on either slot2 or slot3 (CR)", "", "")
+DEF_ATTRIB(NOTE_EXTENSION_AUDIO, "Hexagon audio extensions", "", "")
+DEF_ATTRIB(NOTE_FETCHNT,
+           "Non Temporal Data Cache Prefetch. The :nt appendix is a hint to "
+           "the microarchitecture indicating that the life of the cache line "
+           "fetched is short. This information is used throughout the cache "
+           "hierarchy to make replacement and allocation decisions.",
+           "", "")
+DEF_ATTRIB(NOTE_VECX_V67, "This instruction is only available on V67", "", "")
+
+DEF_ATTRIB(NOTE_NOVP,
+           "This instruction cannot be paired with a HVX permute instruction",
+           "", "")
+DEF_ATTRIB(NOTE_VA_UNARY,
+           "If a packet contains this instruction and a HVX ALU op then the "
+           "ALU OP must be unary.",
+           "", "")
+
+
+/* V6 MMVector Notes for Documentation */
+DEF_ATTRIB(NOTE_ANY_RESOURCE, "Can use any HVX resource.", "", "")
+DEF_ATTRIB(NOTE_ANY2_RESOURCE, "Uses any pair of the HVX resources", "", "")
+DEF_ATTRIB(NOTE_PERMUTE_RESOURCE, "Uses the HVX permute resource.", "", "")
+DEF_ATTRIB(NOTE_SHIFT_RESOURCE, "Uses the HVX shift resource.", "", "")
+DEF_ATTRIB(NOTE_MPY_RESOURCE, "Uses a HVX multiply resource.", "", "")
+DEF_ATTRIB(NOTE_MPYDV_RESOURCE, "Uses both HVX multiply resources.", "", "")
+DEF_ATTRIB(NOTE_NT_VMEM, "Non-temporal hint to the micro-architecture", "", "")
+DEF_ATTRIB(NOTE_ALL_RESOURCE, "Uses all HVX resources.", "", "")
+DEF_ATTRIB(NOTE_VMEM, "Immediates are in multiples of vector length.", "", "")
+DEF_ATTRIB(NOTE_ANY_VS_VX_RESOURCE, "Consumes two resources", "", "")
+
+DEF_ATTRIB(NOTE_RT8, "Input scalar register Rt is limited to R0-R7", "", "")
+
+DEF_ATTRIB(NOTE_MX, "This is in-memory matrix multiply instruction.", "", "")
+DEF_ATTRIB(NOTE_VX_ACC_FWD,
+           "The accumulator (Vxx) source of this instruction must be generate "
+           "in the previous packet to avoid a stall. The accumulator cannot "
+           "come from a .tmp operation.",
+           "", "")
+DEF_ATTRIB(NOTE_TMP_NO_VX,
+           "The tmp load instruction destination register cannot be an "
+           "accumulator register.",
+           "", "")
+
+DEF_ATTRIB(
+    NOTE_NO_ECC,
+    "ECC is not supported for scatter and gather instructions. Enabling ECC "
+    "with unprotected access instructions result in undetermined behavior.",
+    "", "")
+
+/* FP8 instructions */
+DEF_ATTRIB(HVX_FP8, "HVX FP8 extension instruction", "", "")
+DEF_ATTRIB(HVX_IEEE_FP_OUT_8, "HVX IEEE FP extension instruction: 8-bit output",
+           "", "")
+
 /* Restrictions to make note of */
+DEF_ATTRIB(RESTRICT_LOOP_LA, "Cannot be in the last packet of a loop", "", "")
+DEF_ATTRIB(RESTRICT_NEEDS_MEMLD, "Must be grouped with a load", "", "")
 DEF_ATTRIB(RESTRICT_COF_MAX1, "One change-of-flow per packet", "", "")
 DEF_ATTRIB(RESTRICT_NOPACKET, "Not allowed in a packet", "", "")
+DEF_ATTRIB(RESTRICT_NOSRMOVE, "Do not write SR in the same packet", "", "")
 DEF_ATTRIB(RESTRICT_SLOT0ONLY, "Must execute on slot0", "", "")
 DEF_ATTRIB(RESTRICT_SLOT1ONLY, "Must execute on slot1", "", "")
 DEF_ATTRIB(RESTRICT_SLOT2ONLY, "Must execute on slot2", "", "")
 DEF_ATTRIB(RESTRICT_SLOT3ONLY, "Must execute on slot3", "", "")
+DEF_ATTRIB(RESTRICT_NOSLOT2_MPY, "A packet cannot have a slot 2 mpy", "", "")
 DEF_ATTRIB(RESTRICT_NOSLOT1, "No slot 1 instruction in parallel", "", "")
+DEF_ATTRIB(RESTRICT_SLOT1_AOK, "Slot 1 insn must be empty or A-type", "", "")
+DEF_ATTRIB(RESTRICT_NOSLOT01, "No slot 0 or 1 instructions in parallel", "", "")
+DEF_ATTRIB(RESTRICT_NOSLOT1_STORE, "Packet must not have slot 1 store", "", "")
+DEF_ATTRIB(RESTRICT_NOSLOT0_LOAD, "Packet must not have a slot 1 load", "", "")
+DEF_ATTRIB(RESTRICT_NOCOF, "Cannot be grouped with any COF", "", "")
+DEF_ATTRIB(RESTRICT_BRANCHADDER_MAX1, "One PC-plus-offset calculation", "", "")
 DEF_ATTRIB(RESTRICT_PREFERSLOT0, "Try to encode into slot 0", "", "")
+DEF_ATTRIB(RESTRICT_SINGLE_MEM_FIRST, "Single memory op must be last", "", "")
 DEF_ATTRIB(RESTRICT_PACKET_AXOK, "May exist with A-type or X-type", "", "")
+DEF_ATTRIB(RESTRICT_PACKET_SOMEREGS_OK, "Relaxed grouping rules", "", "")
+DEF_ATTRIB(RESTRICT_LATEPRED, "Predicate can not be used as a .new.", "", "")
+
+DEF_ATTRIB(PAIR_1OF2, "For assembler", "", "")
+DEF_ATTRIB(PAIR_2OF2, "For assembler", "", "")
+DEF_ATTRIB(NOTE_MX_PAIR,
+           "Weights and Activations need to be paired in a packet.", "", "")
+DEF_ATTRIB(NOTE_RESTRICT_CVI_NOVP,
+           "This instruction cannot use the permute/shift resource", "", "")
+
+/* Performance based preferences */
+DEF_ATTRIB(PREFER_SLOT3, "Complex XU prefering slot3", "", "")
+
+DEF_ATTRIB(RELAX_COF_1ST, "COF can be fisrt in assembly order", "", "")
+DEF_ATTRIB(RELAX_COF_2ND, "COF can be second in assembly order", "", "")
 
 DEF_ATTRIB(ICOP, "Instruction cache op", "", "")
 
+DEF_ATTRIB(INTRINSIC_RETURNS_UNSIGNED, "Intrinsic returns an unsigned", "", "")
+
+DEF_ATTRIB(PRED_BIT_1, "The branch uses bit 1 as the prediction bit", "", "")
+DEF_ATTRIB(PRED_BIT_4, "The branch uses bit 4 as the prediction bit", "", "")
+DEF_ATTRIB(PRED_BIT_8, "The branch uses bit 8 as the prediction bit", "", "")
+DEF_ATTRIB(PRED_BIT_12, "The branch uses bit 12 as the prediction bit", "", "")
+DEF_ATTRIB(PRED_BIT_13, "The branch uses bit 13 as the prediction bit", "", "")
+DEF_ATTRIB(PRED_BIT_7, "The branch uses bit 7 as the prediction bit", "", "")
+DEF_ATTRIB(HWLOOP0_SETUP, "Sets up HW loop0", "", "")
+DEF_ATTRIB(HWLOOP1_SETUP, "Sets up HW loop1", "", "")
 DEF_ATTRIB(HWLOOP0_END, "Ends HW loop0", "", "")
 DEF_ATTRIB(HWLOOP1_END, "Ends HW loop1", "", "")
 DEF_ATTRIB(RET_TYPE, "return type", "", "")
+DEF_ATTRIB(HINTJR, "hintjr type", "", "")
 DEF_ATTRIB(DCZEROA, "dczeroa type", "", "")
+DEF_ATTRIB(ICTAGOP, "ictag op type", "", "")
 DEF_ATTRIB(ICFLUSHOP, "icflush op type", "", "")
 DEF_ATTRIB(DCFLUSHOP, "dcflush op type", "", "")
+DEF_ATTRIB(DCTAGOP, "dctag op type", "", "")
 DEF_ATTRIB(L2FLUSHOP, "l2flush op type", "", "")
+DEF_ATTRIB(L2TAGOP, "l2tag op type", "", "")
 DEF_ATTRIB(DCFETCH, "dcfetch type", "", "")
+DEF_ATTRIB(BIMODAL_BRANCH, "Updates the bimodal branch predictor", "", "")
 
+DEF_ATTRIB(VECINSN, "Long Vector Instruction", "", "")
+DEF_ATTRIB(MEMSIZE_32B, "Memory width is 32 bytes", "", "")
+DEF_ATTRIB(FOUR_PHASE, "Four Phase Instruction", "", "")
 DEF_ATTRIB(L2FETCH, "Instruction is l2fetch type", "", "")
 
+DEF_ATTRIB(PREDUSE_BSB, "Instructions need back-skip-back scheduling", "", "")
 DEF_ATTRIB(ICINVA, "icinva", "", "")
 DEF_ATTRIB(DCCLEANINVA, "dccleaninva", "", "")
 
+DEF_ATTRIB(EXTENSION_AUDIO, "audio extension", "", "")
+
+DEF_ATTRIB(MEMCPY, "memcpy or dma-type instruction", "", "")
 DEF_ATTRIB(NO_INTRINSIC, "Don't generate an intrisic", "", "")
 
-/* Documentation Notes */
-DEF_ATTRIB(NOTE_CONDITIONAL, "can be conditionally executed", "", "")
-DEF_ATTRIB(NOTE_NEWVAL_SLOT0, "New-value oprnd must execute on slot 0", "", "")
-DEF_ATTRIB(NOTE_PRIV, "Monitor-level feature", "", "")
-DEF_ATTRIB(NOTE_NOPACKET, "solo instruction", "", "")
-DEF_ATTRIB(NOTE_AXOK, "May only be grouped with ALU32 or non-FP XTYPE.", "", "")
-DEF_ATTRIB(NOTE_LATEPRED, "The predicate can not be used as a .new", "", "")
-DEF_ATTRIB(NOTE_NVSLOT0, "Can execute only in slot 0 (ST)", "", "")
-DEF_ATTRIB(NOTE_NOVP, "Cannot be paired with a HVX permute instruction", "", "")
-DEF_ATTRIB(NOTE_VA_UNARY, "Combined with HVX ALU op (must be unary)", "", "")
+DEF_ATTRIB(NO_XML, "Don't generate a XML docs for this instruction", "", "")
 
-/* V6 MMVector Notes for Documentation */
-DEF_ATTRIB(NOTE_SHIFT_RESOURCE, "Uses the HVX shift resource.", "", "")
-/* Restrictions to make note of */
-DEF_ATTRIB(RESTRICT_NOSLOT1_STORE, "Packet must not have slot 1 store", "", "")
-DEF_ATTRIB(RESTRICT_LATEPRED, "Predicate can not be used as a .new.", "", "")
+DEF_ATTRIB(DMA, "User-DMA instruction", "", "")
+DEF_ATTRIB(VERIF_DMASTEP,
+           "Hiphop needs to step dma prior to executing this packet", "", "")
+DEF_ATTRIB(VERIF_DMATICK,
+           "DMA gets a tick in verif mode for this instruction after a commit",
+           "", "")
+
+DEF_ATTRIB(HVX_IEEE_FP, "HVX IEEE FP extension instruction", "", "")
+DEF_ATTRIB(NOTE_HVX_IEEE_FP,
+           "Only supported on the HVX cores with the IEEE FP extension", "", "")
+
+DEF_ATTRIB(HVX_IEEE_FP_DV_ONE,
+           "HVX IEEE FP extension instruction - dual pipes: P2 and P3 - output "
+           "only on P2",
+           "", "")
+DEF_ATTRIB(HVX_IEEE_FP_ACC, "HVX IEEE FP accumulate instruction", "", "")
+DEF_ATTRIB(HVX_IEEE_BF,
+           "HVX IEEE BF extension instruction: 16-bit bfloat input", "", "")
+DEF_ATTRIB(HVX_IEEE_FP_OUT_BF,
+           "HVX IEEE FP extension instruction: 16-bit bfloat output", "", "")
+DEF_ATTRIB(HVX_IEEE_FP_OUT_16,
+           "HVX IEEE FP extension instruction: 16-bit output", "", "")
+DEF_ATTRIB(HVX_IEEE_FP_OUT_32,
+           "HVX IEEE FP extension instruction: 32-bit output", "", "")
+DEF_ATTRIB(HVX_IEEE_FP_BINARY_LATE,
+           "HVX IEEE FP extension instruction: Both inputs can arrive late", "",
+           "")
 
 /* Keep this as the last attribute: */
 DEF_ATTRIB(ZZ_LASTATTRIB, "Last attribute in the file", "", "")
diff --git a/target/hexagon/common-semi-target.h b/target/hexagon/common-semi-target.h
new file mode 100644
index 0000000000000..759aaeba905fa
--- /dev/null
+++ b/target/hexagon/common-semi-target.h
@@ -0,0 +1,87 @@
+/*
+ * Target-specific parts of semihosting/arm-compat-semi.c.
+ *
+ * Copyright(c) 2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef TARGET_HEXAGON_COMMON_SEMI_TARGET_H
+#define TARGET_HEXAGON_COMMON_SEMI_TARGET_H
+
+#include "cpu.h"
+#include "cpu_helper.h"
+#include "qemu/log.h"
+#include "semihosting/uaccess.h"
+
+static inline bool common_semi_read_arg_word(CPUArchState *env,
+                                             target_ulong *save_to,
+                                             target_ulong args_addr,
+                                             int arg_num)
+{
+    hexagon_read_memory(env, args_addr + (arg_num) * 4, 4, save_to, 0);
+    return false;
+}
+
+static inline target_ulong common_semi_arg(CPUState *cs, int argno)
+{
+    CPUHexagonState *env = cpu_env(cs);
+    return arch_get_thread_reg(env, HEX_REG_R00 + argno);
+}
+
+static inline void common_semi_set_ret(CPUState *cs, target_ulong ret)
+{
+    CPUHexagonState *env = cpu_env(cs);
+    arch_set_thread_reg(env, HEX_REG_R00, ret);
+}
+
+static inline void hex_semi_set_err(CPUState *cs, target_ulong err)
+{
+    CPUHexagonState *env = cpu_env(cs);
+    arch_set_thread_reg(env, HEX_REG_R01, err);
+}
+
+static inline bool common_semi_sys_exit_extended(CPUState *cs, int nr)
+{
+    return false;
+}
+
+static inline bool is_64bit_semihosting(CPUArchState *env)
+{
+    return false;
+}
+
+static inline target_ulong common_semi_stack_bottom(CPUState *cs)
+{
+    CPUHexagonState *env = cpu_env(cs);
+    return arch_get_thread_reg(env, HEX_REG_SP);
+}
+
+static inline bool common_semi_has_synccache(CPUArchState *env)
+{
+    return false;
+}
+
+static inline void hex_prepare_for_read(CPUState *cs, target_ulong fd,
+                                        target_ulong buf, target_ulong len)
+{
+    CPUHexagonState *env = cpu_env(cs);
+    /*
+     * Need to make sure the page we are going to write to is available.
+     * The file pointer advances with the read. If the write to bufaddr
+     * faults the swi function will be restarted but the file pointer
+     * will be wrong.
+     */
+    hexagon_touch_memory(env, buf, len, 0);
+}
+
+const struct semihosting_opt_callbacks hex_opt_callbacks = {
+    .prepare_for_read = hex_prepare_for_read,
+    .set_err = hex_semi_set_err,
+};
+
+SEMIHOSTING_REGISTER_OPT_CALLBACKS(hex_opt_callbacks)
+
+#define SEMIHOSTING_EXT_OPEN_MODES
+
+#endif
diff --git a/target/hexagon/cpu-param.h b/target/hexagon/cpu-param.h
index 45ee7b46409c7..d414ca89d690b 100644
--- a/target/hexagon/cpu-param.h
+++ b/target/hexagon/cpu-param.h
@@ -18,9 +18,18 @@
 #ifndef HEXAGON_CPU_PARAM_H
 #define HEXAGON_CPU_PARAM_H
 
+#ifdef CONFIG_USER_ONLY
 #define TARGET_PAGE_BITS 16     /* 64K pages */
+#else
+#define TARGET_PAGE_BITS 12     /* 4K pages */
+#endif
 
 #define TARGET_PHYS_ADDR_SPACE_BITS 36
 #define TARGET_VIRT_ADDR_SPACE_BITS 32
 
+/*
+ * Hexagon processors have a strong memory model.
+ */
+#define TCG_GUEST_DEFAULT_MO      (TCG_MO_ALL)
+
 #endif
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
index 0b7fc98f6ce19..c649aef99ee28 100644
--- a/target/hexagon/cpu.c
+++ b/target/hexagon/cpu.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -20,19 +20,39 @@
 #include "cpu.h"
 #include "internal.h"
 #include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/translation-block.h"
 #include "qapi/error.h"
 #include "hw/qdev-properties.h"
 #include "fpu/softfloat-helpers.h"
 #include "tcg/tcg.h"
 #include "exec/gdbstub.h"
+#include "cpu_helper.h"
+#include "max.h"
+#include "hex_mmu.h"
+#include "hw/hexagon/hexagon.h"
+
+#ifndef CONFIG_USER_ONLY
+#include "macros.h"
+#include "sys_macros.h"
+#include "qemu/main-loop.h"
+#include "hex_interrupts.h"
+#include "hexswi.h"
+#endif
+
+#define DEFINE_STD_CPU_INIT_FUNC(REV) \
+    static void hexagon_##REV##_cpu_init(Object *obj) \
+    { \
+        HexagonCPU *cpu = HEXAGON_CPU(obj); \
+        cpu->rev_reg = REV##_rev; \
+    }
 
-static void hexagon_v66_cpu_init(Object *obj) { }
-static void hexagon_v67_cpu_init(Object *obj) { }
-static void hexagon_v68_cpu_init(Object *obj) { }
-static void hexagon_v69_cpu_init(Object *obj) { }
-static void hexagon_v71_cpu_init(Object *obj) { }
-static void hexagon_v73_cpu_init(Object *obj) { }
+DEFINE_STD_CPU_INIT_FUNC(v66)
+DEFINE_STD_CPU_INIT_FUNC(v67)
+DEFINE_STD_CPU_INIT_FUNC(v68)
+DEFINE_STD_CPU_INIT_FUNC(v69)
+DEFINE_STD_CPU_INIT_FUNC(v71)
+DEFINE_STD_CPU_INIT_FUNC(v73)
 
 static ObjectClass *hexagon_cpu_class_by_name(const char *cpu_model)
 {
@@ -50,6 +70,18 @@ static ObjectClass *hexagon_cpu_class_by_name(const char *cpu_model)
 }
 
 static const Property hexagon_cpu_properties[] = {
+#if !defined(CONFIG_USER_ONLY)
+    DEFINE_PROP_UINT32("jtlb-entries", HexagonCPU, num_tlbs, MAX_TLB_ENTRIES),
+    DEFINE_PROP_UINT32("l2vic-base-addr", HexagonCPU, l2vic_base_addr,
+        0xffffffffULL),
+    DEFINE_PROP_UINT32("qtimer-base-addr", HexagonCPU, qtimer_base_addr,
+                       0xffffffffULL),
+    DEFINE_PROP_UINT32("hvx-contexts", HexagonCPU, hvx_contexts, 0),
+    DEFINE_PROP_UINT32("exec-start-addr", HexagonCPU, boot_addr, 0xffffffffULL),
+    DEFINE_PROP_UINT64("config-table-addr", HexagonCPU, config_table_addr,
+                       0xffffffffULL),
+#endif
+    DEFINE_PROP_UINT32("dsp-rev", HexagonCPU, rev_reg, 0),
     DEFINE_PROP_BOOL("lldb-compat", HexagonCPU, lldb_compat, false),
     DEFINE_PROP_UNSIGNED("lldb-stack-adjust", HexagonCPU, lldb_stack_adjust, 0,
                          qdev_prop_uint32, target_ulong),
@@ -62,11 +94,41 @@ const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = {
   "r16", "r17", "r18", "r19", "r20",  "r21", "r22", "r23",
   "r24", "r25", "r26", "r27", "r28",  "r29", "r30", "r31",
   "sa0", "lc0", "sa1", "lc1", "p3_0", "c5",  "m0",  "m1",
-  "usr", "pc",  "ugp", "gp",  "cs0",  "cs1", "c14", "c15",
-  "c16", "c17", "c18", "c19", "pkt_cnt",  "insn_cnt", "hvx_cnt", "c23",
-  "c24", "c25", "c26", "c27", "c28",  "c29", "c30", "c31",
+  "usr", "pc",  "ugp", "gp",  "cs0",  "cs1", "upcyclelo", "upcyclehi",
+  "framelimit", "framekey", "pktcountlo", "pktcounthi", "upmucnt0",
+  "upmucnt1", "upmucnt2", "upmucnt3", "upmucnt4", "upmucnt5", "upmucnt6",
+  "upmucnt7",  "c28", "c29", "utimerlo", "utimerhi",
 };
 
+#ifndef CONFIG_USER_ONLY
+const char * const hexagon_sregnames[] = {
+    "sgp0",       "sgp1",       "stid",       "elr",        "badva0",
+    "badva1",     "ssr",        "ccr",        "htid",       "badva",
+    "imask",      "gevb",       "vwctrl",     "s13",        "s14",
+    "s15",        "evb",        "modectl",    "syscfg",     "segment",
+    "ipendad",    "vid",        "vid1",       "bestwait",   "s24",
+    "schedcfg",   "s26",        "cfgbase",    "diag",       "rev",
+    "pcyclelo",   "pcyclehi",   "isdbst",     "isdbcfg0",   "isdbcfg1",
+    "livelock",   "brkptpc0",   "brkptcfg0",  "brkptpc1",   "brkptcfg1",
+    "isdbmbxin",  "isdbmbxout", "isdben",     "isdbgpr",    "pmucnt4",
+    "pmucnt5",    "pmucnt6",    "pmucnt7",    "pmucnt0",    "pmucnt1",
+    "pmucnt2",    "pmucnt3",    "pmuevtcfg",  "pmustid0",   "pmuevtcfg1",
+    "pmustid1",   "timerlo",    "timerhi",    "pmucfg",     "rgdr2",
+    "rgdr",       "turkey",     "duck",       "chicken",
+};
+
+G_STATIC_ASSERT(NUM_SREGS == ARRAY_SIZE(hexagon_sregnames));
+
+const char * const hexagon_gregnames[] = {
+    "gelr",       "gsr",       "gosp",      "gbadva",    "gcommit1t",
+    "gcommit2t",  "gcommit3t", "gcommit4t", "gcommit5t", "gcommit6t",
+    "gpcycle1t",  "gpcycle2t", "gpcycle3t", "gpcycle4t", "gpcycle5t",
+    "gpcycle6t",  "gpmucnt4",  "gpmucnt5",  "gpmucnt6",  "gpmucnt7",
+    "gcommit7t",  "gcommit8t", "gpcycle7t", "gpcycle8t", "gpcyclelo",
+    "gpcyclehi",  "gpmucnt0",  "gpmucnt1",  "gpmucnt2",  "gpmucnt3",
+    "g30",        "g31",
+};
+#endif
 /*
  * One of the main debugging techniques is to use "-d cpu" and compare against
  * LLDB output when single stepping.  However, the target and qemu put the
@@ -176,7 +238,7 @@ void hexagon_debug_qreg(CPUHexagonState *env, int regnum)
     print_qreg(stdout, env, regnum, false);
 }
 
-static void hexagon_dump(CPUHexagonState *env, FILE *f, int flags)
+void hexagon_dump(CPUHexagonState *env, FILE *f, int flags)
 {
     HexagonCPU *cpu = env_archcpu(env);
 
@@ -216,8 +278,7 @@ static void hexagon_dump(CPUHexagonState *env, FILE *f, int flags)
     qemu_fprintf(f, "  cs0 = 0x00000000\n");
     qemu_fprintf(f, "  cs1 = 0x00000000\n");
 #else
-    print_reg(f, env, HEX_REG_CAUSE);
-    print_reg(f, env, HEX_REG_BADVA);
+    print_reg(f, env, HEX_SREG_BADVA);
     print_reg(f, env, HEX_REG_CS0);
     print_reg(f, env, HEX_REG_CS1);
 #endif
@@ -262,10 +323,25 @@ static void hexagon_cpu_synchronize_from_tb(CPUState *cs,
     cpu_env(cs)->gpr[HEX_REG_PC] = tb->pc;
 }
 
+#ifndef CONFIG_USER_ONLY
+bool hexagon_thread_is_enabled(CPUHexagonState *env)
+{
+    target_ulong modectl = arch_get_system_reg(env, HEX_SREG_MODECTL);
+    uint32_t thread_enabled_mask = GET_FIELD(MODECTL_E, modectl);
+    bool E_bit = thread_enabled_mask & (0x1 << env->threadId);
+
+    return E_bit;
+}
+
 static bool hexagon_cpu_has_work(CPUState *cs)
 {
-    return true;
+    CPUHexagonState *env = cpu_env(cs);
+
+    return hexagon_thread_is_enabled(env) &&
+        (cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_SWI
+            | CPU_INTERRUPT_K0_UNLOCK | CPU_INTERRUPT_TLB_UNLOCK));
 }
+#endif
 
 static void hexagon_restore_state_to_opc(CPUState *cs,
                                          const TranslationBlock *tb,
@@ -274,6 +350,29 @@ static void hexagon_restore_state_to_opc(CPUState *cs,
     cpu_env(cs)->gpr[HEX_REG_PC] = data[0];
 }
 
+
+#ifndef CONFIG_USER_ONLY
+static void mmu_reset(CPUHexagonState *env)
+{
+    CPUState *cs = env_cpu(env);
+    if (cs->cpu_index == 0) {
+        memset(env->hex_tlb, 0, sizeof(*env->hex_tlb));
+    }
+}
+
+void hexagon_cpu_soft_reset(CPUHexagonState *env)
+{
+    BQL_LOCK_GUARD();
+    arch_set_system_reg(env, HEX_SREG_SSR, 0);
+    hexagon_ssr_set_cause(env, HEX_CAUSE_RESET);
+
+    target_ulong evb = arch_get_system_reg(env, HEX_SREG_EVB);
+    arch_set_thread_reg(env, HEX_REG_PC, evb);
+}
+#endif
+
+
+#define HEXAGON_CFG_ADDR_BASE(addr) (((addr) >> 16) & 0x0fffff)
 static void hexagon_cpu_reset_hold(Object *obj, ResetType type)
 {
     CPUState *cs = CPU(obj);
@@ -288,11 +387,51 @@ static void hexagon_cpu_reset_hold(Object *obj, ResetType type)
     set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status);
     /* Default NaN value: sign bit set, all frac bits set */
     set_float_default_nan_pattern(0b11111111, &env->fp_status);
+
+#ifndef CONFIG_USER_ONLY
+    HexagonCPU *cpu = HEXAGON_CPU(cs);
+
+    if (cs->cpu_index == 0) {
+        memset(env->g_sreg, 0, sizeof(target_ulong) * NUM_SREGS);
+    }
+    memset(env->t_sreg, 0, sizeof(target_ulong) * NUM_SREGS);
+    memset(env->greg, 0, sizeof(target_ulong) * NUM_GREGS);
+
+    if (cs->cpu_index == 0) {
+        arch_set_system_reg(env, HEX_SREG_REV, cpu->rev_reg);
+        arch_set_system_reg(env, HEX_SREG_MODECTL, 0x1);
+        *(env->g_pcycle_base) = 0;
+    }
+
+    memset(env->gpr, 0, sizeof(target_ulong) * TOTAL_PER_THREAD_REGS);
+    memset(env->pred, 0, sizeof(target_ulong) * NUM_PREGS);
+    memset(env->VRegs, 0, sizeof(MMVector) * NUM_VREGS);
+    memset(env->QRegs, 0, sizeof(MMQReg) * NUM_QREGS);
+    memset(env->vstore_pending, 0, sizeof(target_ulong) * VSTORES_MAX);
+    env->t_cycle_count = 0;
+    env->vtcm_pending = false;
+
+    mmu_reset(env);
+    arch_set_system_reg(env, HEX_SREG_HTID, cs->cpu_index);
+    hexagon_cpu_soft_reset(env);
+    env->threadId = cs->cpu_index;
+    env->tlb_lock_state = HEX_LOCK_UNLOCKED;
+    env->k0_lock_state = HEX_LOCK_UNLOCKED;
+    env->tlb_lock_count = 0;
+    env->k0_lock_count = 0;
+    env->next_PC = 0;
+    env->wait_next_pc = 0;
+    env->cause_code = -1;
+    arch_set_thread_reg(env, HEX_REG_PC, cpu->boot_addr);
+    arch_set_system_reg(env, HEX_SREG_CFGBASE,
+                        HEXAGON_CFG_ADDR_BASE(cpu->config_table_addr));
+#endif
 }
 
 static void hexagon_cpu_disas_set_info(CPUState *s, disassemble_info *info)
 {
     info->print_insn = print_insn_hexagon;
+    info->endian = BFD_ENDIAN_LITTLE;
 }
 
 static void hexagon_cpu_realize(DeviceState *dev, Error **errp)
@@ -307,29 +446,292 @@ static void hexagon_cpu_realize(DeviceState *dev, Error **errp)
         return;
     }
 
+#ifndef CONFIG_USER_ONLY
+    HexagonCPU *cpu = HEXAGON_CPU(cs);
+    if (cpu->num_tlbs > MAX_TLB_ENTRIES) {
+        error_setg(errp, "Number of TLBs selected is invalid");
+        return;
+    }
+#endif
+
     gdb_register_coprocessor(cs, hexagon_hvx_gdb_read_register,
                              hexagon_hvx_gdb_write_register,
                              gdb_find_static_feature("hexagon-hvx.xml"), 0);
 
+#ifndef CONFIG_USER_ONLY
+    gdb_register_coprocessor(cs, hexagon_sys_gdb_read_register,
+                             hexagon_sys_gdb_write_register,
+                             gdb_find_static_feature("hexagon-sys.xml"), 0);
+#endif
+
     qemu_init_vcpu(cs);
-    cpu_reset(cs);
+    CPUHexagonState *env = cpu_env(cs);
+#ifndef CONFIG_USER_ONLY
+    hex_mmu_realize(env);
+    if (cs->cpu_index == 0) {
+        env->g_sreg = g_new0(target_ulong, NUM_SREGS);
+    } else {
+        CPUState *cpu0 = qemu_get_cpu(0);
+        CPUHexagonState *env0 = cpu_env(cpu0);
+        env->g_sreg = env0->g_sreg;
+    }
+#endif
+    if (cs->cpu_index == 0) {
+        env->g_pcycle_base = g_malloc0(sizeof(*env->g_pcycle_base));
+    } else {
+        CPUState *cpu0 = qemu_get_cpu(0);
+        env->g_pcycle_base = cpu_env(cpu0)->g_pcycle_base;
+    }
 
     mcc->parent_realize(dev, errp);
 }
 
+#if !defined(CONFIG_USER_ONLY)
+static void hexagon_cpu_set_irq(void *opaque, int irq, int level)
+{
+    HexagonCPU *cpu = HEXAGON_CPU(opaque);
+    CPUState *cs = CPU(cpu);
+    CPUHexagonState *env = cpu_env(cs);
+
+    switch (irq) {
+    case HEXAGON_CPU_IRQ_0 ... HEXAGON_CPU_IRQ_7:
+        qemu_log_mask(CPU_LOG_INT, "%s: irq %d, level %d\n",
+                      __func__, irq, level);
+        if (level) {
+            hex_raise_interrupts(env, 1 << irq, CPU_INTERRUPT_HARD);
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+#endif
+
+
 static void hexagon_cpu_init(Object *obj)
 {
+#if !defined(CONFIG_USER_ONLY)
+    HexagonCPU *cpu = HEXAGON_CPU(obj);
+    qdev_init_gpio_in(DEVICE(cpu), hexagon_cpu_set_irq, 8);
+#endif
+}
+
+#include "accel/tcg/cpu-ops.h"
+
+#if !defined(CONFIG_USER_ONLY)
+static bool get_physical_address(CPUHexagonState *env, hwaddr *phys, int *prot,
+                                 int *size, int32_t *excp, target_ulong address,
+                                 MMUAccessType access_type, int mmu_idx)
+
+{
+    if (hexagon_cpu_mmu_enabled(env)) {
+        return hex_tlb_find_match(env, address, access_type, phys, prot, size,
+                                  excp, mmu_idx);
+    } else {
+        *phys = address & 0xFFFFFFFF;
+        *prot = PAGE_VALID | PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+        *size = TARGET_PAGE_SIZE;
+        return true;
+    }
+}
+
+/* qemu seems to only want to know about TARGET_PAGE_SIZE pages */
+static void find_qemu_subpage(vaddr *addr, hwaddr *phys, int page_size)
+{
+    vaddr page_start = *addr & ~((vaddr)(page_size - 1));
+    vaddr offset = ((*addr - page_start) / TARGET_PAGE_SIZE) * TARGET_PAGE_SIZE;
+    *addr = page_start + offset;
+    *phys += offset;
+}
+
+static hwaddr hexagon_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
+{
+    CPUHexagonState *env = cpu_env(cs);
+    hwaddr phys_addr;
+    int prot;
+    int page_size = 0;
+    int32_t excp = 0;
+    int mmu_idx = MMU_KERNEL_IDX;
+
+    if (get_physical_address(env, &phys_addr, &prot, &page_size, &excp,
+                             addr, 0, mmu_idx)) {
+        find_qemu_subpage(&addr, &phys_addr, page_size);
+        return phys_addr;
+    }
+
+    return -1;
+}
+
+
+#define INVALID_BADVA 0xbadabada
+
+static void set_badva_regs(CPUHexagonState *env, target_ulong VA, int slot,
+                           MMUAccessType access_type)
+{
+    arch_set_system_reg(env, HEX_SREG_BADVA, VA);
+
+    if (access_type == MMU_INST_FETCH || slot == 0) {
+        arch_set_system_reg(env, HEX_SREG_BADVA0, VA);
+        arch_set_system_reg(env, HEX_SREG_BADVA1, INVALID_BADVA);
+        SET_SSR_FIELD(env, SSR_V0, 1);
+        SET_SSR_FIELD(env, SSR_V1, 0);
+        SET_SSR_FIELD(env, SSR_BVS, 0);
+    } else if (slot == 1) {
+        arch_set_system_reg(env, HEX_SREG_BADVA0, INVALID_BADVA);
+        arch_set_system_reg(env, HEX_SREG_BADVA1, VA);
+        SET_SSR_FIELD(env, SSR_V0, 0);
+        SET_SSR_FIELD(env, SSR_V1, 1);
+        SET_SSR_FIELD(env, SSR_BVS, 1);
+    } else {
+        g_assert_not_reached();
+    }
+}
+
+static void raise_tlbmiss_exception(CPUState *cs, target_ulong VA, int slot,
+                                    MMUAccessType access_type)
+{
+    CPUHexagonState *env = cpu_env(cs);
+
+    set_badva_regs(env, VA, slot, access_type);
+
+    switch (access_type) {
+    case MMU_INST_FETCH:
+        cs->exception_index = HEX_EVENT_TLB_MISS_X;
+        if ((VA & ~TARGET_PAGE_MASK) == 0) {
+            env->cause_code = HEX_CAUSE_TLBMISSX_CAUSE_NEXTPAGE;
+        } else {
+            env->cause_code = HEX_CAUSE_TLBMISSX_CAUSE_NORMAL;
+        }
+        break;
+    case MMU_DATA_LOAD:
+        cs->exception_index = HEX_EVENT_TLB_MISS_RW;
+        env->cause_code = HEX_CAUSE_TLBMISSRW_CAUSE_READ;
+        break;
+    case MMU_DATA_STORE:
+        cs->exception_index = HEX_EVENT_TLB_MISS_RW;
+        env->cause_code = HEX_CAUSE_TLBMISSRW_CAUSE_WRITE;
+        break;
+    }
+}
+
+static void raise_perm_exception(CPUState *cs, target_ulong VA, int slot,
+                                 MMUAccessType access_type, int32_t excp)
+{
+    CPUHexagonState *env = cpu_env(cs);
+
+    set_badva_regs(env, VA, slot, access_type);
+    cs->exception_index = excp;
+}
+
+static const char *access_type_names[] = { "MMU_DATA_LOAD ", "MMU_DATA_STORE",
+                                           "MMU_INST_FETCH" };
+
+static const char *mmu_idx_names[] = { "MMU_USER_IDX", "MMU_GUEST_IDX",
+                                       "MMU_KERNEL_IDX" };
+
+static bool hexagon_tlb_fill(CPUState *cs, vaddr address, int size,
+                             MMUAccessType access_type, int mmu_idx, bool probe,
+                             uintptr_t retaddr)
+{
+    CPUHexagonState *env = cpu_env(cs);
+    static int slot = 0 /* This is always zero for now */;
+    hwaddr phys;
+    int prot = 0;
+    int page_size = 0;
+    int32_t excp = 0;
+    bool ret = 0;
+
+    qemu_log_mask(
+        CPU_LOG_MMU,
+        "%s: tid = 0x%x, pc = 0x%08" PRIx32 ", vaddr = 0x%08" VADDR_PRIx
+        ", size = %d, %s,\tprobe = %d, %s\n",
+        __func__, env->threadId, env->gpr[HEX_REG_PC], address, size,
+        access_type_names[access_type], probe, mmu_idx_names[mmu_idx]);
+    ret = get_physical_address(env, &phys, &prot, &page_size, &excp, address,
+                               access_type, mmu_idx);
+    if (ret) {
+        if (!excp) {
+            find_qemu_subpage(&address, &phys, page_size);
+            tlb_set_page(cs, address, phys, prot, mmu_idx, TARGET_PAGE_SIZE);
+            return ret;
+        } else {
+            raise_perm_exception(cs, address, slot, access_type, excp);
+            do_raise_exception(env, cs->exception_index, env->gpr[HEX_REG_PC],
+                               retaddr);
+        }
+    }
+    if (probe) {
+        return false;
+    }
+    raise_tlbmiss_exception(cs, address, slot, access_type);
+    do_raise_exception(env, cs->exception_index, env->gpr[HEX_REG_PC], retaddr);
 }
 
-#include "hw/core/tcg-cpu-ops.h"
+
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps hexagon_sysemu_ops = {
+    .has_work = hexagon_cpu_has_work,
+    .get_phys_page_debug = hexagon_cpu_get_phys_page_debug,
+};
+
+static bool hexagon_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
+{
+    CPUHexagonState *env = cpu_env(cs);
+    if (interrupt_request & CPU_INTERRUPT_TLB_UNLOCK) {
+        cs->halted = false;
+        cpu_reset_interrupt(cs, CPU_INTERRUPT_TLB_UNLOCK);
+        return true;
+    }
+    if (interrupt_request & CPU_INTERRUPT_K0_UNLOCK) {
+        cs->halted = false;
+        cpu_reset_interrupt(cs, CPU_INTERRUPT_K0_UNLOCK);
+        return true;
+    }
+    if (interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_SWI)) {
+        return hex_check_interrupts(env);
+    }
+    return false;
+}
+
+#endif
 
 static const TCGCPUOps hexagon_tcg_ops = {
     .initialize = hexagon_translate_init,
     .translate_code = hexagon_translate_code,
     .synchronize_from_tb = hexagon_cpu_synchronize_from_tb,
     .restore_state_to_opc = hexagon_restore_state_to_opc,
+#if !defined(CONFIG_USER_ONLY)
+    .cpu_exec_interrupt = hexagon_cpu_exec_interrupt,
+    .tlb_fill = hexagon_tlb_fill,
+    .cpu_exec_halt = hexagon_cpu_has_work,
+    .do_interrupt = hexagon_cpu_do_interrupt,
+#endif /* !CONFIG_USER_ONLY */
 };
 
+static int hexagon_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+#ifndef CONFIG_USER_ONLY
+    BQL_LOCK_GUARD();
+    CPUHexagonState *env = cpu_env(cs);
+    uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG);
+    uint8_t mmuen = GET_SYSCFG_FIELD(SYSCFG_MMUEN, syscfg);
+    if (!mmuen) {
+        return MMU_KERNEL_IDX;
+    }
+
+    int cpu_mode = get_cpu_mode(env);
+    if (cpu_mode == HEX_CPU_MODE_MONITOR) {
+        return MMU_KERNEL_IDX;
+    } else if (cpu_mode == HEX_CPU_MODE_GUEST) {
+        return MMU_GUEST_IDX;
+    }
+#endif
+
+    return MMU_USER_IDX;
+}
+
+
 static void hexagon_cpu_class_init(ObjectClass *c, void *data)
 {
     HexagonCPUClass *mcc = HEXAGON_CPU_CLASS(c);
@@ -345,7 +747,7 @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data)
                                        &mcc->parent_phases);
 
     cc->class_by_name = hexagon_cpu_class_by_name;
-    cc->has_work = hexagon_cpu_has_work;
+    cc->mmu_index = hexagon_cpu_mmu_index;
     cc->dump_state = hexagon_dump_state;
     cc->set_pc = hexagon_cpu_set_pc;
     cc->get_pc = hexagon_cpu_get_pc;
@@ -354,8 +756,38 @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data)
     cc->gdb_stop_before_watchpoint = true;
     cc->gdb_core_xml_file = "hexagon-core.xml";
     cc->disas_set_info = hexagon_cpu_disas_set_info;
+#ifndef CONFIG_USER_ONLY
+    cc->sysemu_ops = &hexagon_sysemu_ops;
+    dc->vmsd = &vmstate_hexagon_cpu;
+#endif
+#ifdef CONFIG_TCG
     cc->tcg_ops = &hexagon_tcg_ops;
+#endif
+}
+
+#ifndef CONFIG_USER_ONLY
+uint32_t hexagon_greg_read(CPUHexagonState *env, uint32_t reg)
+{
+    target_ulong ssr = arch_get_system_reg(env, HEX_SREG_SSR);
+    int ssr_ce = GET_SSR_FIELD(SSR_CE, ssr);
+
+    if (reg <= HEX_GREG_G3) {
+        return env->greg[reg];
+    }
+    switch (reg) {
+    case HEX_GREG_GPCYCLELO:
+        return ssr_ce ? hexagon_get_sys_pcycle_count_low(env) : 0;
+
+    case HEX_GREG_GPCYCLEHI:
+        return ssr_ce ? hexagon_get_sys_pcycle_count_high(env) : 0;
+
+    default:
+        qemu_log_mask(LOG_UNIMP, "reading greg %" PRId32
+                " not yet supported.\n", reg);
+        return 0;
+    }
 }
+#endif
 
 #define DEFINE_CPU(type_name, initfn)      \
     {                                      \
diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index 79e60d4bfa1b0..8b334068e295b 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -20,12 +20,23 @@
 
 #include "fpu/softfloat-types.h"
 
+#define NUM_GREGS 32
+#define GREG_WRITES_MAX 32
+#define NUM_SREGS 64
+#define SREG_WRITES_MAX 64
+
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
+#include "exec/cpu-common.h"
 #include "hex_regs.h"
 #include "mmvec/mmvec.h"
 #include "hw/registerfields.h"
 
+#ifndef CONFIG_USER_ONLY
+#include "reg_fields.h"
+typedef struct CPUHexagonTLBContext CPUHexagonTLBContext;
+#endif
+
 #define NUM_PREGS 4
 #define TOTAL_PER_THREAD_REGS 64
 
@@ -34,11 +45,48 @@
 #define REG_WRITES_MAX 32
 #define PRED_WRITES_MAX 5                   /* 4 insns + endloop */
 #define VSTORES_MAX 2
+#define THREADS_MAX 8
+#define VECTOR_UNIT_MAX 8
+
+#ifndef CONFIG_USER_ONLY
+#define CPU_INTERRUPT_SWI      CPU_INTERRUPT_TGT_INT_0
+#define CPU_INTERRUPT_K0_UNLOCK CPU_INTERRUPT_TGT_INT_1
+#define CPU_INTERRUPT_TLB_UNLOCK CPU_INTERRUPT_TGT_INT_2
+
+#define HEX_CPU_MODE_USER    1
+#define HEX_CPU_MODE_GUEST   2
+#define HEX_CPU_MODE_MONITOR 3
+
+#define HEX_EXE_MODE_OFF     1
+#define HEX_EXE_MODE_RUN     2
+#define HEX_EXE_MODE_WAIT    3
+#define HEX_EXE_MODE_DEBUG   4
+#endif
+
+#define MMU_USER_IDX         0
+#ifndef CONFIG_USER_ONLY
+#define MMU_GUEST_IDX        1
+#define MMU_KERNEL_IDX       2
+
+#define HEXAGON_CPU_IRQ_0 0
+#define HEXAGON_CPU_IRQ_1 1
+#define HEXAGON_CPU_IRQ_2 2
+#define HEXAGON_CPU_IRQ_3 3
+#define HEXAGON_CPU_IRQ_4 4
+#define HEXAGON_CPU_IRQ_5 5
+#define HEXAGON_CPU_IRQ_6 6
+#define HEXAGON_CPU_IRQ_7 7
+
+typedef enum {
+    HEX_LOCK_UNLOCKED       = 0,
+    HEX_LOCK_WAITING        = 1,
+    HEX_LOCK_OWNER          = 2,
+    HEX_LOCK_QUEUED        = 3
+} hex_lock_state_t;
+#endif
 
 #define CPU_RESOLVING_TYPE TYPE_HEXAGON_CPU
 
-#define MMU_USER_IDX 0
-
 typedef struct {
     target_ulong va;
     uint8_t width;
@@ -71,12 +119,33 @@ typedef struct {
 typedef struct CPUArchState {
     target_ulong gpr[TOTAL_PER_THREAD_REGS];
     target_ulong pred[NUM_PREGS];
+    target_ulong cause_code;
 
     /* For comparing with LLDB on target - see adjust_stack_ptrs function */
     target_ulong last_pc_dumped;
     target_ulong stack_start;
 
     uint8_t slot_cancelled;
+    uint64_t t_cycle_count;
+    uint64_t *g_pcycle_base;
+#ifndef CONFIG_USER_ONLY
+    /* Some system registers are per thread and some are global. */
+    target_ulong t_sreg[NUM_SREGS];
+    target_ulong *g_sreg;
+
+    target_ulong greg[NUM_GREGS];
+    target_ulong wait_next_pc;
+
+    /* This alias of CPUState.cpu_index is used by imported sources: */
+    target_ulong threadId;
+    hex_lock_state_t tlb_lock_state;
+    hex_lock_state_t k0_lock_state;
+    target_ulong tlb_lock_count;
+    target_ulong k0_lock_count;
+    CPUHexagonTLBContext *hex_tlb;
+    GList *dir_list;
+#endif
+    target_ulong next_PC;
     target_ulong new_value_usr;
 
     MemLog mem_log_stores[STORES_MAX];
@@ -119,19 +188,48 @@ struct ArchCPU {
 
     CPUHexagonState env;
 
+    uint32_t rev_reg;
     bool lldb_compat;
     target_ulong lldb_stack_adjust;
     bool short_circuit;
+#ifndef CONFIG_USER_ONLY
+    uint32_t num_tlbs;
+    uint32_t l2vic_base_addr;
+    uint32_t qtimer_base_addr;
+    uint32_t hvx_contexts;
+    uint32_t boot_addr;
+    uint64_t config_table_addr;
+#endif
 };
 
 #include "cpu_bits.h"
 
 FIELD(TB_FLAGS, IS_TIGHT_LOOP, 0, 1)
+FIELD(TB_FLAGS, MMU_INDEX, 1, 3)
+FIELD(TB_FLAGS, PCYCLE_ENABLED, 4, 1)
 
 G_NORETURN void hexagon_raise_exception_err(CPUHexagonState *env,
                                             uint32_t exception,
                                             uintptr_t pc);
 
+#ifndef CONFIG_USER_ONLY
+/*
+ * @return true if the @a thread_env hardware thread is
+ * not stopped.
+ */
+bool hexagon_thread_is_enabled(CPUHexagonState *thread_env);
+uint32_t hexagon_greg_read(CPUHexagonState *env, uint32_t reg);
+uint32_t hexagon_sreg_read(CPUHexagonState *env, uint32_t reg);
+void hexagon_gdb_sreg_write(CPUHexagonState *env, uint32_t reg, uint32_t val);
+void hexagon_cpu_soft_reset(CPUHexagonState *env);
+#endif
+
+#include "exec/cpu-all.h"
+
+#ifndef CONFIG_USER_ONLY
+#include "cpu_helper.h"
+#endif
+
 static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc,
                                         uint64_t *cs_base, uint32_t *flags)
 {
@@ -141,10 +239,27 @@ static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc,
     if (*pc == env->gpr[HEX_REG_SA0]) {
         hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP, 1);
     }
-    *flags = hex_flags;
     if (*pc & PCALIGN_MASK) {
         hexagon_raise_exception_err(env, HEX_CAUSE_PC_NOT_ALIGNED, 0);
     }
+#ifndef CONFIG_USER_ONLY
+    target_ulong syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG);
+
+    bool pcycle_enabled = extract32(syscfg,
+                                    reg_field_info[SYSCFG_PCYCLEEN].offset,
+                                    reg_field_info[SYSCFG_PCYCLEEN].width);
+
+    hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, MMU_INDEX,
+                           cpu_mmu_index(env_cpu(env), false));
+
+    if (pcycle_enabled) {
+        hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, PCYCLE_ENABLED, 1);
+    }
+#else
+    hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, PCYCLE_ENABLED, true);
+    hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, MMU_INDEX, MMU_USER_IDX);
+#endif
+    *flags = hex_flags;
 }
 
 typedef HexagonCPU ArchCPU;
@@ -152,7 +267,4 @@ typedef HexagonCPU ArchCPU;
 void hexagon_translate_init(void);
 void hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
                             int *max_insns, vaddr pc, void *host_pc);
-
-#include "exec/cpu-all.h"
-
 #endif /* HEXAGON_CPU_H */
diff --git a/target/hexagon/cpu_bits.h b/target/hexagon/cpu_bits.h
index ff596e2a94c98..c7cc426ec8888 100644
--- a/target/hexagon/cpu_bits.h
+++ b/target/hexagon/cpu_bits.h
@@ -24,19 +24,88 @@
 #define PCALIGN_MASK (PCALIGN - 1)
 
 enum hex_event {
-    HEX_EVENT_NONE           = -1,
-    HEX_EVENT_TRAP0          =  0x008,
+    HEX_EVENT_NONE = -1,
+    HEX_EVENT_RESET = 0x0,
+    HEX_EVENT_IMPRECISE = 0x1,
+    HEX_EVENT_PRECISE = 0x2,
+    HEX_EVENT_TLB_MISS_X = 0x4,
+    HEX_EVENT_TLB_MISS_RW = 0x6,
+    HEX_EVENT_TRAP0 = 0x8,
+    HEX_EVENT_TRAP1 = 0x9,
+    HEX_EVENT_FPTRAP = 0xb,
+    HEX_EVENT_DEBUG = 0xc,
+    HEX_EVENT_INT0 = 0x10,
+    HEX_EVENT_INT1 = 0x11,
+    HEX_EVENT_INT2 = 0x12,
+    HEX_EVENT_INT3 = 0x13,
+    HEX_EVENT_INT4 = 0x14,
+    HEX_EVENT_INT5 = 0x15,
+    HEX_EVENT_INT6 = 0x16,
+    HEX_EVENT_INT7 = 0x17,
+    HEX_EVENT_INT8 = 0x18,
+    HEX_EVENT_INT9 = 0x19,
+    HEX_EVENT_INTA = 0x1a,
+    HEX_EVENT_INTB = 0x1b,
+    HEX_EVENT_INTC = 0x1c,
+    HEX_EVENT_INTD = 0x1d,
+    HEX_EVENT_INTE = 0x1e,
+    HEX_EVENT_INTF = 0x1f,
 };
 
 enum hex_cause {
     HEX_CAUSE_NONE = -1,
-    HEX_CAUSE_TRAP0 = 0x172,
-    HEX_CAUSE_FETCH_NO_UPAGE =  0x012,
-    HEX_CAUSE_INVALID_PACKET =  0x015,
-    HEX_CAUSE_INVALID_OPCODE =  0x015,
-    HEX_CAUSE_PC_NOT_ALIGNED =  0x01e,
-    HEX_CAUSE_PRIV_NO_UREAD  =  0x024,
-    HEX_CAUSE_PRIV_NO_UWRITE =  0x025,
+    HEX_CAUSE_RESET = 0x000,
+    HEX_CAUSE_BIU_PRECISE = 0x001,
+    HEX_CAUSE_UNSUPORTED_HVX_64B = 0x002, /* QEMU-specific */
+    HEX_CAUSE_DOUBLE_EXCEPT = 0x003,
+    HEX_CAUSE_TRAP0 = 0x008,
+    HEX_CAUSE_TRAP1 = 0x009,
+    HEX_CAUSE_FETCH_NO_XPAGE = 0x011,
+    HEX_CAUSE_FETCH_NO_UPAGE = 0x012,
+    HEX_CAUSE_INVALID_PACKET = 0x015,
+    HEX_CAUSE_INVALID_OPCODE = 0x015,
+    HEX_CAUSE_NO_COPROC_ENABLE = 0x016,
+    HEX_CAUSE_NO_COPROC2_ENABLE = 0x018,
+    HEX_CAUSE_PRIV_USER_NO_GINSN = 0x01a,
+    HEX_CAUSE_PRIV_USER_NO_SINSN = 0x01b,
+    HEX_CAUSE_REG_WRITE_CONFLICT = 0x01d,
+    HEX_CAUSE_PC_NOT_ALIGNED = 0x01e,
+    HEX_CAUSE_MISALIGNED_LOAD = 0x020,
+    HEX_CAUSE_MISALIGNED_STORE = 0x021,
+    HEX_CAUSE_PRIV_NO_READ = 0x022,
+    HEX_CAUSE_PRIV_NO_WRITE = 0x023,
+    HEX_CAUSE_PRIV_NO_UREAD = 0x024,
+    HEX_CAUSE_PRIV_NO_UWRITE = 0x025,
+    HEX_CAUSE_COPROC_LDST = 0x026,
+    HEX_CAUSE_STACK_LIMIT = 0x027,
+    HEX_CAUSE_VWCTRL_WINDOW_MISS = 0x029,
+    HEX_CAUSE_IMPRECISE_NMI = 0x043,
+    HEX_CAUSE_IMPRECISE_MULTI_TLB_MATCH = 0x044,
+    HEX_CAUSE_TLBMISSX_CAUSE_NORMAL = 0x060,
+    HEX_CAUSE_TLBMISSX_CAUSE_NEXTPAGE = 0x061,
+    HEX_CAUSE_TLBMISSRW_CAUSE_READ = 0x070,
+    HEX_CAUSE_TLBMISSRW_CAUSE_WRITE = 0x071,
+    HEX_CAUSE_DEBUG_SINGLESTEP = 0x80,
+    HEX_CAUSE_FPTRAP_CAUSE_BADFLOAT = 0x0bf,
+    HEX_CAUSE_INT0 = 0x0c0,
+    HEX_CAUSE_INT1 = 0x0c1,
+    HEX_CAUSE_INT2 = 0x0c2,
+    HEX_CAUSE_INT3 = 0x0c3,
+    HEX_CAUSE_INT4 = 0x0c4,
+    HEX_CAUSE_INT5 = 0x0c5,
+    HEX_CAUSE_INT6 = 0x0c6,
+    HEX_CAUSE_INT7 = 0x0c7,
+    HEX_CAUSE_VIC0 = 0x0c2,
+    HEX_CAUSE_VIC1 = 0x0c3,
+    HEX_CAUSE_VIC2 = 0x0c4,
+    HEX_CAUSE_VIC3 = 0x0c5,
+};
+
+enum data_cache_state {
+    HEX_DC_STATE_INVALID   = 0x0,
+    HEX_DC_STATE_VALID     = 0x1,
+    HEX_DC_STATE_RESERVED  = 0x2,
+    HEX_DC_STATE_UNUSED_WT = 0x3,
 };
 
 #define PACKET_WORDS_MAX         4
diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c
new file mode 100644
index 0000000000000..6d9bf34aff377
--- /dev/null
+++ b/target/hexagon/cpu_helper.c
@@ -0,0 +1,583 @@
+/*
+ * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "cpu_helper.h"
+#include "system/cpus.h"
+#ifdef CONFIG_USER_ONLY
+#include "qemu.h"
+#include "exec/helper-proto.h"
+#else
+#include "hw/boards.h"
+#include "hw/hexagon/hexagon.h"
+#include "hex_interrupts.h"
+#include "hex_mmu.h"
+#endif
+#include "exec/exec-all.h"
+#include "exec/cputlb.h"
+#include "exec/cpu_ldst.h"
+#include "qemu/log.h"
+#include "tcg/tcg-op.h"
+#include "internal.h"
+#include "macros.h"
+#include "sys_macros.h"
+#include "arch.h"
+
+
+#ifndef CONFIG_USER_ONLY
+
+static bool hexagon_read_memory_small(CPUHexagonState *env, target_ulong addr,
+                                      int byte_count, unsigned char *dstbuf,
+                                      int mmu_idx, uintptr_t retaddr)
+ {
+    /* handle small sizes */
+    switch (byte_count) {
+    case 1:
+        *dstbuf = cpu_ldub_mmuidx_ra(env, addr, mmu_idx, retaddr);
+        return true;
+
+    case 2:
+        if (QEMU_IS_ALIGNED(addr, 2)) {
+            *(unsigned short *)dstbuf =
+                cpu_lduw_mmuidx_ra(env, addr, mmu_idx, retaddr);
+            return true;
+        }
+        break;
+
+    case 4:
+        if (QEMU_IS_ALIGNED(addr, 4)) {
+            *(uint32_t *)dstbuf =
+                cpu_ldl_mmuidx_ra(env, addr, mmu_idx, retaddr);
+            return true;
+        }
+        break;
+
+    case 8:
+        if (QEMU_IS_ALIGNED(addr, 8)) {
+            *(uint64_t *)dstbuf =
+                cpu_ldq_mmuidx_ra(env, addr, mmu_idx, retaddr);
+            return true;
+        }
+        break;
+
+    default:
+        /* larger request, handle elsewhere */
+        return false;
+    }
+
+    /* not aligned, copy bytes */
+    for (int i = 0; i < byte_count; ++i) {
+        *dstbuf++ = cpu_ldub_mmuidx_ra(env, addr++, mmu_idx, retaddr);
+    }
+    return true;
+}
+
+void hexagon_read_memory(CPUHexagonState *env, target_ulong vaddr, int size,
+                         void *retptr, uintptr_t retaddr)
+{
+    BQL_LOCK_GUARD();
+    CPUState *cs = env_cpu(env);
+    unsigned mmu_idx = cpu_mmu_index(cs, false);
+    if (!hexagon_read_memory_small(env, vaddr, size, retptr, mmu_idx, retaddr)) {
+        cpu_abort(cs, "%s: ERROR: bad size = %d!\n", __func__, size);
+    }
+}
+
+static bool hexagon_write_memory_small(CPUHexagonState *env, target_ulong addr,
+                                       int byte_count, unsigned char *srcbuf,
+                                       int mmu_idx, uintptr_t retaddr)
+{
+    /* handle small sizes */
+    switch (byte_count) {
+    case 1:
+        cpu_stb_mmuidx_ra(env, addr, *srcbuf, mmu_idx, retaddr);
+        return true;
+
+    case 2:
+        if (QEMU_IS_ALIGNED(addr, 2)) {
+            cpu_stw_mmuidx_ra(env, addr, *(uint16_t *)srcbuf, mmu_idx, retaddr);
+            return true;
+        }
+        break;
+
+    case 4:
+        if (QEMU_IS_ALIGNED(addr, 4)) {
+            cpu_stl_mmuidx_ra(env, addr, *(uint32_t *)srcbuf, mmu_idx, retaddr);
+            return true;
+        }
+        break;
+
+    case 8:
+        if (QEMU_IS_ALIGNED(addr, 8)) {
+            cpu_stq_mmuidx_ra(env, addr, *(uint64_t *)srcbuf, mmu_idx, retaddr);
+            return true;
+        }
+        break;
+
+    default:
+        /* larger request, handle elsewhere */
+        return false;
+    }
+
+    /* not aligned, copy bytes */
+    for (int i = 0; i < byte_count; ++i) {
+        cpu_stb_mmuidx_ra(env, addr++, *srcbuf++, mmu_idx, retaddr);
+    }
+
+    return true;
+}
+
+void hexagon_write_memory(CPUHexagonState *env, target_ulong vaddr,
+                          int size, uint64_t data, uintptr_t retaddr)
+{
+    CPUState *cs = env_cpu(env);
+    unsigned mmu_idx = cpu_mmu_index(cs, false);
+    if (!hexagon_write_memory_small(env, vaddr, size, (unsigned char *)&data,
+                                   mmu_idx, retaddr)) {
+        cpu_abort(cs, "%s: ERROR: bad size = %d!\n", __func__, size);
+    }
+}
+
+static inline uint32_t page_start(uint32_t addr)
+{
+    uint32_t page_align = ~(TARGET_PAGE_SIZE - 1);
+    return addr & page_align;
+}
+
+void hexagon_touch_memory(CPUHexagonState *env, uint32_t start_addr,
+                          uint32_t length, uintptr_t retaddr)
+{
+    unsigned int warm;
+    uint32_t first = page_start(start_addr);
+    uint32_t last = page_start(start_addr + length - 1);
+    for (uint32_t page = first; page <= last; page += TARGET_PAGE_SIZE) {
+        hexagon_read_memory(env, page, 1, &warm, retaddr);
+    }
+}
+
+uint32_t hexagon_get_pmu_counter(CPUHexagonState *cur_env, int index)
+{
+    g_assert_not_reached();
+}
+
+uint32_t arch_get_system_reg(CPUHexagonState *env, uint32_t reg)
+{
+    if (reg == HEX_SREG_PCYCLELO) {
+        return hexagon_get_sys_pcycle_count_low(env);
+    } else if (reg == HEX_SREG_PCYCLEHI) {
+        return hexagon_get_sys_pcycle_count_high(env);
+    }
+
+    g_assert(reg < NUM_SREGS);
+    return reg < HEX_SREG_GLB_START ? env->t_sreg[reg] : env->g_sreg[reg];
+}
+
+#endif
+
+uint64_t hexagon_get_sys_pcycle_count(CPUHexagonState *env)
+{
+    uint64_t cycles = 0;
+    CPUState *cs;
+    CPU_FOREACH(cs) {
+        CPUHexagonState *env_ = cpu_env(cs);
+        cycles += env_->t_cycle_count;
+    }
+    return *(env->g_pcycle_base) + cycles;
+}
+
+uint32_t hexagon_get_sys_pcycle_count_high(CPUHexagonState *env)
+{
+    return hexagon_get_sys_pcycle_count(env) >> 32;
+}
+
+uint32_t hexagon_get_sys_pcycle_count_low(CPUHexagonState *env)
+{
+    return extract64(hexagon_get_sys_pcycle_count(env), 0, 32);
+}
+
+void hexagon_set_sys_pcycle_count_high(CPUHexagonState *env,
+        uint32_t cycles_hi)
+{
+    uint64_t cur_cycles = hexagon_get_sys_pcycle_count(env);
+    uint64_t cycles =
+        ((uint64_t)cycles_hi << 32) | extract64(cur_cycles, 0, 32);
+    hexagon_set_sys_pcycle_count(env, cycles);
+}
+
+void hexagon_set_sys_pcycle_count_low(CPUHexagonState *env,
+        uint32_t cycles_lo)
+{
+    uint64_t cur_cycles = hexagon_get_sys_pcycle_count(env);
+    uint64_t cycles = extract64(cur_cycles, 32, 32) | cycles_lo;
+    hexagon_set_sys_pcycle_count(env, cycles);
+}
+
+void hexagon_set_sys_pcycle_count(CPUHexagonState *env, uint64_t cycles)
+{
+    *(env->g_pcycle_base) = cycles;
+
+    CPUState *cs;
+    CPU_FOREACH(cs) {
+        CPUHexagonState *env_ = cpu_env(cs);
+        env_->t_cycle_count = 0;
+    }
+}
+
+#ifndef CONFIG_USER_ONLY
+
+static void set_wait_mode(CPUHexagonState *env)
+{
+    g_assert(bql_locked());
+
+    const uint32_t modectl = arch_get_system_reg(env, HEX_SREG_MODECTL);
+    uint32_t thread_wait_mask = GET_FIELD(MODECTL_W, modectl);
+    thread_wait_mask |= 0x1 << env->threadId;
+    SET_SYSTEM_FIELD(env, HEX_SREG_MODECTL, MODECTL_W, thread_wait_mask);
+}
+
+void hexagon_wait_thread(CPUHexagonState *env, target_ulong PC)
+{
+    g_assert(bql_locked());
+
+    if (qemu_loglevel_mask(LOG_GUEST_ERROR) &&
+        (env->k0_lock_state != HEX_LOCK_UNLOCKED ||
+         env->tlb_lock_state != HEX_LOCK_UNLOCKED)) {
+        qemu_log("WARNING: executing wait() with acquired lock"
+                 "may lead to deadlock\n");
+    }
+    g_assert(get_exe_mode(env) != HEX_EXE_MODE_WAIT);
+
+    CPUState *cs = env_cpu(env);
+    /*
+     * The addtion of cpu_has_work is borrowed from arm's wfi helper
+     * and is critical for our stability
+     */
+    if ((cs->exception_index != HEX_EVENT_NONE) ||
+        (cpu_has_work(cs))) {
+        qemu_log_mask(CPU_LOG_INT,
+            "%s: thread %d skipping WAIT mode, have some work\n",
+            __func__, env->threadId);
+        return;
+    }
+    set_wait_mode(env);
+    env->wait_next_pc = PC + 4;
+
+    cpu_interrupt(cs, CPU_INTERRUPT_HALT);
+}
+
+static void hexagon_resume_thread(CPUHexagonState *env)
+{
+    CPUState *cs = env_cpu(env);
+    clear_wait_mode(env);
+    /*
+     * The wait instruction keeps the PC pointing to itself
+     * so that it has an opportunity to check for interrupts.
+     *
+     * When we come out of wait mode, adjust the PC to the
+     * next executable instruction.
+     */
+    env->gpr[HEX_REG_PC] = env->wait_next_pc;
+    cs = env_cpu(env);
+    ASSERT_DIRECT_TO_GUEST_UNSET(env, cs->exception_index);
+    cs->halted = false;
+    cs->exception_index = HEX_EVENT_NONE;
+    qemu_cpu_kick(cs);
+}
+
+void hexagon_resume_threads(CPUHexagonState *current_env, uint32_t mask)
+{
+    CPUState *cs;
+    CPUHexagonState *env;
+
+    g_assert(bql_locked());
+    CPU_FOREACH(cs) {
+        env = cpu_env(cs);
+        g_assert(env->threadId < THREADS_MAX);
+        if ((mask & (0x1 << env->threadId))) {
+            if (get_exe_mode(env) == HEX_EXE_MODE_WAIT) {
+                hexagon_resume_thread(env);
+            }
+        }
+    }
+}
+
+
+static MMVector VRegs[VECTOR_UNIT_MAX][NUM_VREGS];
+static MMQReg QRegs[VECTOR_UNIT_MAX][NUM_QREGS];
+
+/*
+ *                            EXT_CONTEXTS
+ * SSR.XA   2              4              6              8
+ * 000      HVX Context 0  HVX Context 0  HVX Context 0  HVX Context 0
+ * 001      HVX Context 1  HVX Context 1  HVX Context 1  HVX Context 1
+ * 010      HVX Context 0  HVX Context 2  HVX Context 2  HVX Context 2
+ * 011      HVX Context 1  HVX Context 3  HVX Context 3  HVX Context 3
+ * 100      HVX Context 0  HVX Context 0  HVX Context 4  HVX Context 4
+ * 101      HVX Context 1  HVX Context 1  HVX Context 5  HVX Context 5
+ * 110      HVX Context 0  HVX Context 2  HVX Context 2  HVX Context 6
+ * 111      HVX Context 1  HVX Context 3  HVX Context 3  HVX Context 7
+ */
+static int parse_context_idx(CPUHexagonState *env, uint8_t XA)
+{
+    int ret;
+    HexagonCPU *cpu = env_archcpu(env);
+    if (cpu->hvx_contexts == 6 && XA >= 6) {
+        ret = XA - 6 + 2;
+    } else {
+        ret = XA % cpu->hvx_contexts;
+    }
+    g_assert(ret >= 0 && ret < VECTOR_UNIT_MAX);
+    return ret;
+}
+
+static void check_overcommitted_hvx(CPUHexagonState *env, uint32_t ssr)
+{
+    if (!GET_FIELD(SSR_XE, ssr)) {
+        return;
+    }
+
+    uint8_t XA = GET_SSR_FIELD(SSR_XA, ssr);
+
+    CPUState *cs;
+    CPU_FOREACH(cs) {
+        CPUHexagonState *env_ = cpu_env(cs);
+        if (env_ == env) {
+            continue;
+        }
+        /* Check if another thread has the XE bit set and same XA */
+        uint32_t ssr_ = arch_get_system_reg(env_, HEX_SREG_SSR);
+        if (GET_SSR_FIELD(SSR_XE2, ssr_) && GET_FIELD(SSR_XA, ssr_) == XA) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                    "setting SSR.XA '%d' on thread %d but thread"
+                    " %d has same extension active\n", XA, env->threadId,
+                    env_->threadId);
+        }
+    }
+}
+
+void hexagon_modify_ssr(CPUHexagonState *env, uint32_t new, uint32_t old)
+{
+    g_assert(bql_locked());
+
+    bool old_EX = GET_SSR_FIELD(SSR_EX, old);
+    bool old_UM = GET_SSR_FIELD(SSR_UM, old);
+    bool old_GM = GET_SSR_FIELD(SSR_GM, old);
+    bool old_IE = GET_SSR_FIELD(SSR_IE, old);
+    uint8_t old_XA = GET_SSR_FIELD(SSR_XA, old);
+    bool new_EX = GET_SSR_FIELD(SSR_EX, new);
+    bool new_UM = GET_SSR_FIELD(SSR_UM, new);
+    bool new_GM = GET_SSR_FIELD(SSR_GM, new);
+    bool new_IE = GET_SSR_FIELD(SSR_IE, new);
+    uint8_t new_XA = GET_SSR_FIELD(SSR_XA, new);
+
+    if ((old_EX != new_EX) ||
+        (old_UM != new_UM) ||
+        (old_GM != new_GM)) {
+        hex_mmu_mode_change(env);
+    }
+
+    uint8_t old_asid = GET_SSR_FIELD(SSR_ASID, old);
+    uint8_t new_asid = GET_SSR_FIELD(SSR_ASID, new);
+    if (new_asid != old_asid) {
+        CPUState *cs = env_cpu(env);
+        tlb_flush(cs);
+    }
+
+    if (old_XA != new_XA) {
+        int old_unit = parse_context_idx(env, old_XA);
+        int new_unit = parse_context_idx(env, new_XA);
+
+        /* Ownership exchange */
+        memcpy(VRegs[old_unit], env->VRegs, sizeof(env->VRegs));
+        memcpy(QRegs[old_unit], env->QRegs, sizeof(env->QRegs));
+        memcpy(env->VRegs, VRegs[new_unit], sizeof(env->VRegs));
+        memcpy(env->QRegs, QRegs[new_unit], sizeof(env->QRegs));
+
+        check_overcommitted_hvx(env, new);
+    }
+
+    /* See if the interrupts have been enabled or we have exited EX mode */
+    if ((new_IE && !old_IE) ||
+        (!new_EX && old_EX)) {
+        hex_interrupt_update(env);
+    }
+}
+
+void clear_wait_mode(CPUHexagonState *env)
+{
+    g_assert(bql_locked());
+
+    const uint32_t modectl = arch_get_system_reg(env, HEX_SREG_MODECTL);
+    uint32_t thread_wait_mask = GET_FIELD(MODECTL_W, modectl);
+    thread_wait_mask &= ~(0x1 << env->threadId);
+    SET_SYSTEM_FIELD(env, HEX_SREG_MODECTL, MODECTL_W, thread_wait_mask);
+}
+
+void hexagon_ssr_set_cause(CPUHexagonState *env, uint32_t cause)
+{
+    g_assert(bql_locked());
+
+    const uint32_t old = arch_get_system_reg(env, HEX_SREG_SSR);
+    SET_SYSTEM_FIELD(env, HEX_SREG_SSR, SSR_EX, 1);
+    SET_SYSTEM_FIELD(env, HEX_SREG_SSR, SSR_CAUSE, cause);
+    const uint32_t new = arch_get_system_reg(env, HEX_SREG_SSR);
+
+    hexagon_modify_ssr(env, new, old);
+}
+
+
+int get_exe_mode(CPUHexagonState *env)
+{
+    g_assert(bql_locked());
+
+    target_ulong modectl = arch_get_system_reg(env, HEX_SREG_MODECTL);
+    uint32_t thread_enabled_mask = GET_FIELD(MODECTL_E, modectl);
+    bool E_bit = thread_enabled_mask & (0x1 << env->threadId);
+    uint32_t thread_wait_mask = GET_FIELD(MODECTL_W, modectl);
+    bool W_bit = thread_wait_mask & (0x1 << env->threadId);
+    target_ulong isdbst = arch_get_system_reg(env, HEX_SREG_ISDBST);
+    uint32_t debugmode = GET_FIELD(ISDBST_DEBUGMODE, isdbst);
+    bool D_bit = debugmode & (0x1 << env->threadId);
+
+    /* Figure 4-2 */
+    if (!D_bit && !W_bit && !E_bit) {
+        return HEX_EXE_MODE_OFF;
+    }
+    if (!D_bit && !W_bit && E_bit) {
+        return HEX_EXE_MODE_RUN;
+    }
+    if (!D_bit && W_bit && E_bit) {
+        return HEX_EXE_MODE_WAIT;
+    }
+    if (D_bit && !W_bit && E_bit) {
+        return HEX_EXE_MODE_DEBUG;
+    }
+    g_assert_not_reached();
+}
+
+static void set_enable_mask(CPUHexagonState *env)
+{
+    g_assert(bql_locked());
+
+    const uint32_t modectl = arch_get_system_reg(env, HEX_SREG_MODECTL);
+    uint32_t thread_enabled_mask = GET_FIELD(MODECTL_E, modectl);
+    thread_enabled_mask |= 0x1 << env->threadId;
+    SET_SYSTEM_FIELD(env, HEX_SREG_MODECTL, MODECTL_E, thread_enabled_mask);
+}
+
+static uint32_t clear_enable_mask(CPUHexagonState *env)
+{
+    g_assert(bql_locked());
+
+    const uint32_t modectl = arch_get_system_reg(env, HEX_SREG_MODECTL);
+    uint32_t thread_enabled_mask = GET_FIELD(MODECTL_E, modectl);
+    thread_enabled_mask &= ~(0x1 << env->threadId);
+    SET_SYSTEM_FIELD(env, HEX_SREG_MODECTL, MODECTL_E, thread_enabled_mask);
+    return thread_enabled_mask;
+}
+static void do_start_thread(CPUState *cs, run_on_cpu_data tbd)
+{
+    BQL_LOCK_GUARD();
+
+    CPUHexagonState *env = cpu_env(cs);
+
+    hexagon_cpu_soft_reset(env);
+
+    set_enable_mask(env);
+
+    cs->halted = 0;
+    cs->exception_index = HEX_EVENT_NONE;
+    cpu_resume(cs);
+}
+
+void hexagon_start_threads(CPUHexagonState *current_env, uint32_t mask)
+{
+    CPUState *cs;
+    CPU_FOREACH(cs) {
+        CPUHexagonState *env = cpu_env(cs);
+        if (!(mask & (0x1 << env->threadId))) {
+            continue;
+        }
+
+        if (current_env->threadId != env->threadId) {
+            async_safe_run_on_cpu(cs, do_start_thread, RUN_ON_CPU_NULL);
+        }
+    }
+}
+
+/*
+ * When we have all threads stopped, the return
+ * value to the shell is register 2 from thread 0.
+ */
+static target_ulong get_thread0_r2(void)
+{
+    CPUState *cs;
+    CPU_FOREACH(cs) {
+        CPUHexagonState *thread = cpu_env(cs);
+        if (thread->threadId == 0) {
+            return thread->gpr[2];
+        }
+    }
+    g_assert_not_reached();
+}
+
+void hexagon_stop_thread(CPUHexagonState *env)
+
+{
+    BQL_LOCK_GUARD();
+
+    uint32_t thread_enabled_mask = clear_enable_mask(env);
+    CPUState *cs = env_cpu(env);
+    cpu_interrupt(cs, CPU_INTERRUPT_HALT);
+    if (!thread_enabled_mask) {
+        /* All threads are stopped, exit */
+        exit(get_thread0_r2());
+    }
+}
+
+static int sys_in_monitor_mode_ssr(uint32_t ssr)
+{
+    if ((GET_SSR_FIELD(SSR_EX, ssr) != 0) ||
+       ((GET_SSR_FIELD(SSR_EX, ssr) == 0) && (GET_SSR_FIELD(SSR_UM, ssr) == 0)))
+        return 1;
+    return 0;
+}
+
+static int sys_in_guest_mode_ssr(uint32_t ssr)
+{
+    if ((GET_SSR_FIELD(SSR_EX, ssr) == 0) &&
+        (GET_SSR_FIELD(SSR_UM, ssr) != 0) &&
+        (GET_SSR_FIELD(SSR_GM, ssr) != 0))
+        return 1;
+    return 0;
+}
+
+static int sys_in_user_mode_ssr(uint32_t ssr)
+{
+    if ((GET_SSR_FIELD(SSR_EX, ssr) == 0) &&
+        (GET_SSR_FIELD(SSR_UM, ssr) != 0) &&
+        (GET_SSR_FIELD(SSR_GM, ssr) == 0))
+        return 1;
+   return 0;
+}
+
+int get_cpu_mode(CPUHexagonState *env)
+
+{
+    uint32_t ssr = arch_get_system_reg(env, HEX_SREG_SSR);
+
+    if (sys_in_monitor_mode_ssr(ssr)) {
+        return HEX_CPU_MODE_MONITOR;
+    } else if (sys_in_guest_mode_ssr(ssr)) {
+        return HEX_CPU_MODE_GUEST;
+    } else if (sys_in_user_mode_ssr(ssr)) {
+        return HEX_CPU_MODE_USER;
+    }
+    return HEX_CPU_MODE_MONITOR;
+}
+
+#endif
diff --git a/target/hexagon/cpu_helper.h b/target/hexagon/cpu_helper.h
new file mode 100644
index 0000000000000..f86f5e744fd46
--- /dev/null
+++ b/target/hexagon/cpu_helper.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HEXAGON_CPU_HELPER_H
+#define HEXAGON_CPU_HELPER_H
+
+void hexagon_read_memory(CPUHexagonState *env, target_ulong vaddr, int size,
+                         void *retptr, uintptr_t retaddr);
+void hexagon_write_memory(CPUHexagonState *env, target_ulong vaddr,
+                          int size, uint64_t data, uintptr_t retaddr);
+void hexagon_touch_memory(CPUHexagonState *env, uint32_t start_addr,
+                          uint32_t length, uintptr_t retaddr);
+uint32_t hexagon_get_pmu_counter(CPUHexagonState *cur_env, int index);
+uint64_t hexagon_get_sys_pcycle_count(CPUHexagonState *env);
+uint32_t hexagon_get_sys_pcycle_count_low(CPUHexagonState *env);
+uint32_t hexagon_get_sys_pcycle_count_high(CPUHexagonState *env);
+void hexagon_set_sys_pcycle_count(CPUHexagonState *env, uint64_t);
+void hexagon_set_sys_pcycle_count_low(CPUHexagonState *env, uint32_t);
+void hexagon_set_sys_pcycle_count_high(CPUHexagonState *env, uint32_t);
+void hexagon_modify_ssr(CPUHexagonState *env, uint32_t new, uint32_t old);
+int get_cpu_mode(CPUHexagonState *env);
+int get_exe_mode(CPUHexagonState *env);
+void clear_wait_mode(CPUHexagonState *env);
+void hexagon_ssr_set_cause(CPUHexagonState *env, uint32_t cause);
+void hexagon_start_threads(CPUHexagonState *env, uint32_t mask);
+void hexagon_stop_thread(CPUHexagonState *env);
+void hexagon_wait_thread(CPUHexagonState *env, target_ulong PC);
+void hexagon_resume_threads(CPUHexagonState *env, uint32_t mask);
+
+static inline void arch_set_thread_reg(CPUHexagonState *env, uint32_t reg,
+                                       uint32_t val)
+{
+    g_assert(reg < TOTAL_PER_THREAD_REGS);
+    env->gpr[reg] = val;
+}
+
+static inline uint32_t arch_get_thread_reg(CPUHexagonState *env, uint32_t reg)
+{
+    g_assert(reg < TOTAL_PER_THREAD_REGS);
+    return env->gpr[reg];
+}
+
+#ifndef CONFIG_USER_ONLY
+static inline void arch_set_system_reg(CPUHexagonState *env, uint32_t reg,
+                                       uint32_t val)
+{
+    g_assert(reg < NUM_SREGS);
+    if (reg < HEX_SREG_GLB_START) {
+        env->t_sreg[reg] = val;
+    } else {
+        env->g_sreg[reg] = val;
+    }
+}
+#endif
+
+uint32_t arch_get_system_reg(CPUHexagonState *env, uint32_t reg);
+
+#endif
+
diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c
index 23deba2426f84..41bf03c9b5133 100644
--- a/target/hexagon/decode.c
+++ b/target/hexagon/decode.c
@@ -193,6 +193,8 @@ static bool decode_opcode_can_jump(int opcode)
     if ((GET_ATTRIB(opcode, A_JUMP)) ||
         (GET_ATTRIB(opcode, A_CALL)) ||
         (opcode == J2_trap0) ||
+        (opcode == J2_trap1) ||
+        (opcode == J2_rte) ||
         (opcode == J2_pause)) {
         /* Exception to A_JUMP attribute */
         if (opcode == J4_hintjumpr) {
@@ -236,9 +238,9 @@ static void decode_set_insn_attr_fields(Packet *pkt)
             if (GET_ATTRIB(opcode, A_SCALAR_STORE) &&
                 !GET_ATTRIB(opcode, A_MEMSIZE_0B)) {
                 if (pkt->insn[i].slot == 0) {
-                    pkt->pkt_has_store_s0 = true;
+                    pkt->pkt_has_scalar_store_s0 = true;
                 } else {
-                    pkt->pkt_has_store_s1 = true;
+                    pkt->pkt_has_scalar_store_s1 = true;
                 }
             }
         }
@@ -371,6 +373,18 @@ static void decode_shuffle_for_execution(Packet *packet)
             break;
         }
     }
+    /*
+     * And at the very very very end, move any RTE's, since they update
+     * user/supervisor mode.
+     */
+#if !defined(CONFIG_USER_ONLY)
+    for (i = 0; i < last_insn; i++) {
+        if (packet->insn[i].opcode == J2_rte) {
+            decode_send_insn_to(packet, i, last_insn);
+            break;
+        }
+    }
+#endif
 }
 
 static void
diff --git a/target/hexagon/gdbstub.c b/target/hexagon/gdbstub.c
index 12d6b3bbcbb16..8476199b753ef 100644
--- a/target/hexagon/gdbstub.c
+++ b/target/hexagon/gdbstub.c
@@ -76,6 +76,51 @@ int hexagon_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
     g_assert_not_reached();
 }
 
+#ifndef CONFIG_USER_ONLY
+int hexagon_sys_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
+{
+    CPUHexagonState *env = cpu_env(cs);
+
+    if (n < NUM_SREGS) {
+        return gdb_get_regl(mem_buf, hexagon_sreg_read(env, n));
+    }
+    n -= NUM_SREGS;
+
+    if (n < NUM_GREGS) {
+        return gdb_get_regl(mem_buf, hexagon_greg_read(env, n));
+    }
+    n -= NUM_GREGS;
+
+    n -= TOTAL_PER_THREAD_REGS;
+
+    if (n < NUM_PREGS) {
+        env->pred[n] = ldtul_p(mem_buf) & 0xff;
+        return sizeof(uint8_t);
+    }
+
+    n -= NUM_PREGS;
+
+    g_assert_not_reached();
+}
+
+int hexagon_sys_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
+{
+    CPUHexagonState *env = cpu_env(cs);
+
+    if (n < NUM_SREGS) {
+        hexagon_gdb_sreg_write(env, n, ldtul_p(mem_buf));
+        return sizeof(target_ulong);
+    }
+    n -= NUM_SREGS;
+
+    if (n < NUM_GREGS) {
+        return env->greg[n] = ldtul_p(mem_buf);
+    }
+    n -= NUM_GREGS;
+
+    g_assert_not_reached();
+}
+#endif
 static int gdb_get_vreg(CPUHexagonState *env, GByteArray *mem_buf, int n)
 {
     int total = 0;
diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py
index 3ac7cc2cfe577..dfdf5f3b87ba5 100755
--- a/target/hexagon/gen_analyze_funcs.py
+++ b/target/hexagon/gen_analyze_funcs.py
@@ -22,6 +22,8 @@
 import string
 import hex_common
 
+def has_analyze_func(reg, mode):
+    return callable(getattr(reg, f"analyze_{mode}", None))
 
 ##
 ## Generate the code to analyze the instruction
@@ -42,6 +44,14 @@ def gen_analyze_func(f, tag, regs, imms):
     f.write(f"static void analyze_{tag}(DisasContext *ctx)\n")
     f.write("{\n")
 
+    if hex_common.tag_ignore(tag):
+        f.write("}\n\n")
+        return
+
+    if ("A_PRIV" in hex_common.attribdict[tag] or
+        "A_GUEST" in hex_common.attribdict[tag]):
+        f.write("#ifndef CONFIG_USER_ONLY\n")
+
     f.write("    Insn *insn G_GNUC_UNUSED = ctx->insn;\n")
     if (hex_common.is_hvx_insn(tag)):
         if hex_common.has_hvx_helper(tag):
@@ -58,22 +68,27 @@ def gen_analyze_func(f, tag, regs, imms):
     for regno, register in enumerate(regs):
         reg_type, reg_id = register
         reg = hex_common.get_register(tag, reg_type, reg_id)
-        reg.decl_reg_num(f, regno)
+        if has_analyze_func(reg, "read") or has_analyze_func(reg, "write"):
+            reg.decl_reg_num(f, regno)
 
     ## Analyze the register reads
     for regno, register in enumerate(regs):
         reg_type, reg_id = register
         reg = hex_common.get_register(tag, reg_type, reg_id)
-        if reg.is_read():
+        if reg.is_read() and has_analyze_func(reg, "read"):
             reg.analyze_read(f, regno)
 
     ## Analyze the register writes
     for regno, register in enumerate(regs):
         reg_type, reg_id = register
         reg = hex_common.get_register(tag, reg_type, reg_id)
-        if reg.is_written():
+        if reg.is_written() and has_analyze_func(reg, "write"):
             reg.analyze_write(f, tag, regno)
 
+    if ("A_PRIV" in hex_common.attribdict[tag] or
+        "A_GUEST" in hex_common.attribdict[tag]):
+        f.write("#endif /* !CONFIG_USER_ONLY */\n")
+
     f.write("}\n\n")
 
 
diff --git a/target/hexagon/gen_helper_funcs.py b/target/hexagon/gen_helper_funcs.py
index c1f806ac4b25b..32e3bac746251 100755
--- a/target/hexagon/gen_helper_funcs.py
+++ b/target/hexagon/gen_helper_funcs.py
@@ -69,7 +69,7 @@ def gen_helper_function(f, tag, tagregs, tagimms):
     if hex_common.need_slot(tag):
         if "A_LOAD" in hex_common.attribdict[tag]:
             f.write(hex_common.code_fmt(f"""\
-                bool pkt_has_store_s1 = slotval & 0x1;
+                bool pkt_has_scalar_store_s1 = slotval & 0x1;
             """))
         f.write(hex_common.code_fmt(f"""\
             uint32_t slot = slotval >> 1;
@@ -109,26 +109,23 @@ def main():
     tagimms = hex_common.get_tagimms()
 
     with open(args.out, "w") as f:
-        for tag in hex_common.tags:
-            ## Skip the priv instructions
-            if "A_PRIV" in hex_common.attribdict[tag]:
+        for tag in hex_common.get_user_tags():
+            if hex_common.tag_ignore(tag):
                 continue
-            ## Skip the guest instructions
-            if "A_GUEST" in hex_common.attribdict[tag]:
-                continue
-            ## Skip the diag instructions
-            if tag == "Y6_diag":
-                continue
-            if tag == "Y6_diag0":
+            if hex_common.skip_qemu_helper(tag):
                 continue
-            if tag == "Y6_diag1":
+            if hex_common.is_idef_parser_enabled(tag):
                 continue
+            gen_helper_function(f, tag, tagregs, tagimms)
+
+        f.write("#if !defined(CONFIG_USER_ONLY)\n")
+        for tag in hex_common.get_sys_tags():
             if hex_common.skip_qemu_helper(tag):
                 continue
             if hex_common.is_idef_parser_enabled(tag):
                 continue
-
             gen_helper_function(f, tag, tagregs, tagimms)
+        f.write("#endif\n")
 
 
 if __name__ == "__main__":
diff --git a/target/hexagon/gen_helper_protos.py b/target/hexagon/gen_helper_protos.py
index 77f8e0a6a322c..59c8bdd05c0f6 100755
--- a/target/hexagon/gen_helper_protos.py
+++ b/target/hexagon/gen_helper_protos.py
@@ -59,27 +59,28 @@ def main():
     tagimms = hex_common.get_tagimms()
 
     with open(args.out, "w") as f:
-        for tag in hex_common.tags:
-            ## Skip the priv instructions
-            if "A_PRIV" in hex_common.attribdict[tag]:
+        for tag in hex_common.get_user_tags():
+            if hex_common.tag_ignore(tag):
                 continue
-            ## Skip the guest instructions
-            if "A_GUEST" in hex_common.attribdict[tag]:
-                continue
-            ## Skip the diag instructions
-            if tag == "Y6_diag":
-                continue
-            if tag == "Y6_diag0":
+
+            if hex_common.skip_qemu_helper(tag):
                 continue
-            if tag == "Y6_diag1":
+            if hex_common.is_idef_parser_enabled(tag):
                 continue
 
+            gen_helper_prototype(f, tag, tagregs, tagimms)
+
+        f.write("#if !defined(CONFIG_USER_ONLY)\n")
+        for tag in hex_common.get_sys_tags():
+            if hex_common.tag_ignore(tag):
+                continue
             if hex_common.skip_qemu_helper(tag):
                 continue
             if hex_common.is_idef_parser_enabled(tag):
                 continue
 
             gen_helper_prototype(f, tag, tagregs, tagimms)
+        f.write("#endif\n")
 
 
 if __name__ == "__main__":
diff --git a/target/hexagon/gen_idef_parser_funcs.py b/target/hexagon/gen_idef_parser_funcs.py
index 2f6e826f76d60..32bce9b002863 100644
--- a/target/hexagon/gen_idef_parser_funcs.py
+++ b/target/hexagon/gen_idef_parser_funcs.py
@@ -60,6 +60,8 @@ def main():
         f.write('#include "macros.h.inc"\n\n')
 
         for tag in hex_common.tags:
+            if hex_common.tag_ignore(tag):
+                continue
             ## Skip the priv instructions
             if "A_PRIV" in hex_common.attribdict[tag]:
                 continue
diff --git a/target/hexagon/gen_op_attribs.py b/target/hexagon/gen_op_attribs.py
index bbbb02df3a23b..94dd1f876b21c 100755
--- a/target/hexagon/gen_op_attribs.py
+++ b/target/hexagon/gen_op_attribs.py
@@ -38,7 +38,7 @@ def main():
     ##     Generate all the attributes associated with each instruction
     ##
     with open(args.out, "w") as f:
-        for tag in hex_common.tags:
+        for tag in hex_common.get_all_tags():
             f.write(
                 f"OP_ATTRIB({tag},ATTRIBS("
                 f'{",".join(sorted(hex_common.attribdict[tag]))}))\n'
diff --git a/target/hexagon/gen_opcodes_def.py b/target/hexagon/gen_opcodes_def.py
index 94a19ff412e2e..17ba3f9db95e8 100755
--- a/target/hexagon/gen_opcodes_def.py
+++ b/target/hexagon/gen_opcodes_def.py
@@ -37,7 +37,10 @@ def main():
     ##     Generate a list of all the opcodes
     ##
     with open(args.out, "w") as f:
-        for tag in hex_common.tags:
+        for tag in hex_common.get_user_tags():
+            f.write(f"OPCODE({tag}),\n")
+
+        for tag in hex_common.get_sys_tags():
             f.write(f"OPCODE({tag}),\n")
 
 
diff --git a/target/hexagon/gen_semantics.c b/target/hexagon/gen_semantics.c
index 4a2bdd70e9cc7..ed66ae4ec2417 100644
--- a/target/hexagon/gen_semantics.c
+++ b/target/hexagon/gen_semantics.c
@@ -106,7 +106,7 @@ int main(int argc, char *argv[])
 /*
  * Process the macros for HVX
  */
-#define DEF_MACRO(MNAME, BEH, ATTRS) \
+#define DEF_MACRO(MNAME, PARAMS, SDESC, LDESC, BEH, ATTRS) \
     fprintf(outfile, "MACROATTRIB( \\\n" \
                      "    \"%s\", \\\n" \
                      "    \"\"\"%s\"\"\", \\\n" \
diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index 8a3b801287c7a..146aadc737643 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -488,6 +488,7 @@
 
 /* dczeroa clears the 32 byte cache line at the address given */
 #define fGEN_TCG_Y2_dczeroa(SHORTCODE) SHORTCODE
+#define fGEN_TCG_Y2_dczeroa_nt(SHORTCODE) SHORTCODE
 
 /* In linux-user mode, these are not modelled, suppress compiler warning */
 #define fGEN_TCG_Y2_dcinva(SHORTCODE) \
@@ -1133,6 +1134,9 @@
                            RdV, tcg_constant_tl(0)); \
     } while (0)
 
+#define fGEN_TCG_Y2_break(SHORTCODE)
+#define fGEN_TCG_J2_unpause(SHORTCODE)
+
 #define fGEN_TCG_J2_pause(SHORTCODE) \
     do { \
         uiV = uiV; \
@@ -1342,6 +1346,11 @@
         RsV = RsV; \
         uiV = uiV; \
     } while (0)
+#define fGEN_TCG_Y2_dcfetchbo_nt(SHORTCODE) \
+    do { \
+        RsV = RsV; \
+        uiV = uiV; \
+    } while (0)
 
 #define fGEN_TCG_L2_loadw_aq(SHORTCODE)                 SHORTCODE
 #define fGEN_TCG_L4_loadd_aq(SHORTCODE)                 SHORTCODE
@@ -1361,13 +1370,6 @@
 #define fGEN_TCG_S2_storew_rl_st_vi(SHORTCODE)          SHORTCODE
 #define fGEN_TCG_S4_stored_rl_st_vi(SHORTCODE)          SHORTCODE
 
-#define fGEN_TCG_J2_trap0(SHORTCODE) \
-    do { \
-        uiV = uiV; \
-        tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->pkt->pc); \
-        TCGv excp = tcg_constant_tl(HEX_EVENT_TRAP0); \
-        gen_helper_raise_exception(tcg_env, excp); \
-    } while (0)
 #endif
 
 #define fGEN_TCG_A2_nop(SHORTCODE) do { } while (0)
diff --git a/target/hexagon/gen_tcg_func_table.py b/target/hexagon/gen_tcg_func_table.py
index 299a39b1aa02b..70c8db5c44c88 100755
--- a/target/hexagon/gen_tcg_func_table.py
+++ b/target/hexagon/gen_tcg_func_table.py
@@ -41,19 +41,9 @@ def main():
         f.write("#define HEXAGON_FUNC_TABLE_H\n\n")
 
         f.write("const SemanticInsn opcode_genptr[XX_LAST_OPCODE] = {\n")
+
         for tag in hex_common.tags:
-            ## Skip the priv instructions
-            if "A_PRIV" in hex_common.attribdict[tag]:
-                continue
-            ## Skip the guest instructions
-            if "A_GUEST" in hex_common.attribdict[tag]:
-                continue
-            ## Skip the diag instructions
-            if tag == "Y6_diag":
-                continue
-            if tag == "Y6_diag0":
-                continue
-            if tag == "Y6_diag1":
+            if hex_common.tag_ignore(tag):
                 continue
 
             f.write(f"    [{tag}] = generate_{tag},\n")
diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py
index c2ba91ddc0444..65bfa046b8671 100755
--- a/target/hexagon/gen_tcg_funcs.py
+++ b/target/hexagon/gen_tcg_funcs.py
@@ -21,7 +21,7 @@
 import re
 import string
 import hex_common
-
+from textwrap import dedent
 
 ##
 ## Generate the TCG code to call the helper
@@ -50,6 +50,18 @@ def gen_tcg_func(f, tag, regs, imms):
 
     f.write("    Insn *insn G_GNUC_UNUSED = ctx->insn;\n")
 
+    if "A_PRIV" in hex_common.attribdict[tag]:
+        f.write(dedent("""\
+#ifdef CONFIG_USER_ONLY
+    hex_gen_exception_end_tb(ctx, HEX_CAUSE_PRIV_USER_NO_SINSN);
+#else
+"""))
+    if "A_GUEST" in hex_common.attribdict[tag]:
+        f.write(dedent("""\
+#ifdef CONFIG_USER_ONLY
+    hex_gen_exception_end_tb(ctx, HEX_CAUSE_PRIV_USER_NO_GINSN);
+#else
+"""))
     if hex_common.need_ea(tag):
         f.write("    TCGv EA G_GNUC_UNUSED = tcg_temp_new();\n")
 
@@ -97,6 +109,11 @@ def gen_tcg_func(f, tag, regs, imms):
         if reg.is_written():
             reg.log_write(f, tag)
 
+    if (
+        "A_PRIV" in hex_common.attribdict[tag]
+        or "A_GUEST" in hex_common.attribdict[tag]
+    ):
+        f.write("#endif   /* CONFIG_USER_ONLY */\n")
     f.write("}\n\n")
 
 
@@ -121,18 +138,7 @@ def main():
             f.write('#include "idef-generated-emitter.h.inc"\n\n')
 
         for tag in hex_common.tags:
-            ## Skip the priv instructions
-            if "A_PRIV" in hex_common.attribdict[tag]:
-                continue
-            ## Skip the guest instructions
-            if "A_GUEST" in hex_common.attribdict[tag]:
-                continue
-            ## Skip the diag instructions
-            if tag == "Y6_diag":
-                continue
-            if tag == "Y6_diag0":
-                continue
-            if tag == "Y6_diag1":
+            if hex_common.tag_ignore(tag):
                 continue
 
             gen_def_tcg_func(f, tag, tagregs, tagimms)
diff --git a/target/hexagon/gen_tcg_sys.h b/target/hexagon/gen_tcg_sys.h
new file mode 100644
index 0000000000000..e56553462fb06
--- /dev/null
+++ b/target/hexagon/gen_tcg_sys.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright(c) 2022-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HEXAGON_GEN_TCG_SYS_H
+#define HEXAGON_GEN_TCG_SYS_H
+
+/* System mode instructions */
+#define fGEN_TCG_Y2_swi(SHORTCODE) \
+    gen_helper_swi(tcg_env, RsV)
+
+#define fGEN_TCG_Y2_cswi(SHORTCODE) \
+    gen_helper_cswi(tcg_env, RsV)
+
+#define fGEN_TCG_Y2_ciad(SHORTCODE) \
+    gen_helper_ciad(tcg_env, RsV)
+
+#define fGEN_TCG_Y4_siad(SHORTCODE) \
+    gen_helper_siad(tcg_env, RsV)
+
+#define fGEN_TCG_Y2_iassignw(SHORTCODE) \
+    gen_helper_iassignw(tcg_env, RsV)
+
+#define fGEN_TCG_Y2_iassignr(SHORTCODE) \
+    gen_helper_iassignr(RdV, tcg_env, RsV)
+
+#define fGEN_TCG_Y2_getimask(SHORTCODE) \
+    gen_helper_getimask(RdV, tcg_env, RsV)
+
+#define fGEN_TCG_Y2_setimask(SHORTCODE) \
+    gen_helper_setimask(tcg_env, PtV, RsV)
+
+#define fGEN_TCG_Y2_setprio(SHORTCODE) \
+    gen_helper_setprio(tcg_env, PtV, RsV)
+
+#define fGEN_TCG_Y2_crswap0(SHORTCODE) \
+    do { \
+        TCGv tmp = tcg_temp_new(); \
+        tcg_gen_mov_tl(tmp, RxV); \
+        tcg_gen_mov_tl(RxV, hex_t_sreg[HEX_SREG_SGP0]); \
+        tcg_gen_mov_tl(ctx->t_sreg_new_value[HEX_SREG_SGP0], tmp); \
+    } while (0)
+
+#define fGEN_TCG_Y4_crswap1(SHORTCODE) \
+    do { \
+        TCGv tmp = tcg_temp_new(); \
+        tcg_gen_mov_tl(tmp, RxV); \
+        tcg_gen_mov_tl(RxV, hex_t_sreg[HEX_SREG_SGP1]); \
+        tcg_gen_mov_tl(ctx->t_sreg_new_value[HEX_SREG_SGP1], tmp); \
+    } while (0)
+
+#define fGEN_TCG_Y4_crswap10(SHORTCODE) \
+    do { \
+        g_assert_not_reached(); \
+        TCGv_i64 tmp = tcg_temp_new_i64(); \
+        tcg_gen_mov_i64(tmp, RxxV); \
+        tcg_gen_concat_i32_i64(RxxV, \
+                               hex_t_sreg[HEX_SREG_SGP0], \
+                               hex_t_sreg[HEX_SREG_SGP1]); \
+        tcg_gen_extrl_i64_i32(ctx->t_sreg_new_value[HEX_SREG_SGP0], tmp); \
+        tcg_gen_extrh_i64_i32(ctx->t_sreg_new_value[HEX_SREG_SGP1], tmp); \
+    } while (0)
+
+#define fGEN_TCG_Y2_wait(SHORTCODE) \
+    do { \
+        RsV = RsV; \
+        gen_helper_wait(tcg_env, tcg_constant_tl(ctx->pkt->pc)); \
+    } while (0)
+
+#define fGEN_TCG_Y2_resume(SHORTCODE) \
+    gen_helper_resume(tcg_env, RsV)
+
+#define fGEN_TCG_Y2_start(SHORTCODE) \
+    gen_helper_start(tcg_env, RsV)
+
+#define fGEN_TCG_Y2_stop(SHORTCODE) \
+    do { \
+        RsV = RsV; \
+        gen_helper_stop(tcg_env); \
+    } while (0)
+
+#define fGEN_TCG_Y2_tfrscrr(SHORTCODE) \
+    tcg_gen_mov_tl(RdV, SsV)
+
+#define fGEN_TCG_Y2_tfrsrcr(SHORTCODE) \
+    tcg_gen_mov_tl(SdV, RsV)
+
+#define fGEN_TCG_Y4_tfrscpp(SHORTCODE) \
+    tcg_gen_mov_i64(RddV, SssV)
+
+#define fGEN_TCG_Y4_tfrspcp(SHORTCODE) \
+    tcg_gen_mov_i64(SddV, RssV)
+
+#define fGEN_TCG_G4_tfrgcrr(SHORTCODE) \
+    tcg_gen_mov_tl(RdV, GsV)
+
+#define fGEN_TCG_G4_tfrgrcr(SHORTCODE) \
+    tcg_gen_mov_tl(GdV, RsV)
+
+#define fGEN_TCG_G4_tfrgcpp(SHORTCODE) \
+    tcg_gen_mov_i64(RddV, GssV)
+
+#define fGEN_TCG_G4_tfrgpcp(SHORTCODE) \
+    tcg_gen_mov_i64(GddV, RssV)
+
+
+/*
+ * rte (return from exception)
+ *     Clear the EX bit in SSR
+ *     Jump to ELR
+ */
+#define fGEN_TCG_J2_rte(SHORTCODE) \
+    do { \
+        TCGv new_ssr = tcg_temp_new(); \
+        tcg_gen_deposit_tl(new_ssr, hex_t_sreg[HEX_SREG_SSR], \
+                           tcg_constant_tl(0), \
+                           reg_field_info[SSR_EX].offset, \
+                           reg_field_info[SSR_EX].width); \
+        gen_log_sreg_write(ctx, HEX_SREG_SSR, new_ssr); \
+        gen_jumpr(ctx, hex_t_sreg[HEX_SREG_ELR]); \
+    } while (0)
+
+#define fGEN_TCG_Y4_nmi(SHORTCODE) \
+    gen_helper_nmi(tcg_env, RsV)
+
+#endif
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index 2c5e15cfcf6f9..1dde04529bbe6 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -23,6 +23,7 @@
 #include "exec/helper-gen.h"
 #include "insn.h"
 #include "opcodes.h"
+#include "sys_macros.h"
 #include "translate.h"
 #define QEMU_GENERATE       /* Used internally by macros.h */
 #include "macros.h"
@@ -30,6 +31,10 @@
 #undef QEMU_GENERATE
 #include "gen_tcg.h"
 #include "gen_tcg_hvx.h"
+#ifndef CONFIG_USER_ONLY
+#include "gen_tcg_sys.h"
+#endif
+
 #include "genptr.h"
 
 TCGv gen_read_reg(TCGv result, int num)
@@ -128,6 +133,164 @@ TCGv get_result_pred(DisasContext *ctx, int pnum)
     }
 }
 
+#ifndef CONFIG_USER_ONLY
+G_GNUC_UNUSED
+static bool greg_writable(int rnum, bool pair)
+{
+    if (pair) {
+        if (rnum < HEX_GREG_G3) {
+            return true;
+        }
+        qemu_log_mask(LOG_UNIMP,
+                "Warning: ignoring write to guest register pair G%d:%d\n",
+                rnum + 1, rnum);
+    } else {
+        if (rnum <= HEX_GREG_G3) {
+            return true;
+        }
+        qemu_log_mask(LOG_UNIMP,
+                "Warning: ignoring write to guest register G%d\n", rnum);
+    }
+    return false;
+}
+
+G_GNUC_UNUSED
+static void check_greg_impl(int rnum, bool pair)
+{
+    if (pair && (!greg_implemented(rnum) || !greg_implemented(rnum + 1))) {
+        qemu_log_mask(LOG_UNIMP,
+                "Warning: guest register pair G%d:%d is unimplemented or "
+                "reserved. Read will yield 0.\n",
+                rnum + 1, rnum);
+    } else if (!pair && !greg_implemented(rnum)) {
+        qemu_log_mask(LOG_UNIMP,
+                "Warning: guest register G%d is unimplemented or reserved."
+                " Read will yield 0.\n", rnum);
+    }
+}
+
+G_GNUC_UNUSED
+static inline void gen_log_greg_write(DisasContext *ctx, int rnum, TCGv val)
+{
+    tcg_gen_mov_tl(ctx->greg_new_value[rnum], val);
+}
+
+G_GNUC_UNUSED
+static void gen_log_greg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val)
+{
+    TCGv val32 = tcg_temp_new();
+
+    /* Low word */
+    tcg_gen_extrl_i64_i32(val32, val);
+    gen_log_greg_write(ctx, rnum, val32);
+
+    /* High word */
+    tcg_gen_extrh_i64_i32(val32, val);
+    gen_log_greg_write(ctx, rnum + 1, val32);
+}
+
+static const target_ulong sreg_immut_masks[NUM_SREGS] = {
+    [HEX_SREG_STID] = 0xff00ff00,
+    [HEX_SREG_ELR] = 0x00000003,
+    [HEX_SREG_SSR] = 0x00008000,
+    [HEX_SREG_CCR] = 0x10e0ff24,
+    [HEX_SREG_HTID] = IMMUTABLE,
+    [HEX_SREG_IMASK] = 0xffff0000,
+    [HEX_SREG_GEVB] = 0x000000ff,
+    [HEX_SREG_EVB] = 0x000000ff,
+    [HEX_SREG_MODECTL] = IMMUTABLE,
+    [HEX_SREG_SYSCFG] = 0x80001c00,
+    [HEX_SREG_IPENDAD] = IMMUTABLE,
+    [HEX_SREG_VID] = 0xfc00fc00,
+    [HEX_SREG_VID1] = 0xfc00fc00,
+    [HEX_SREG_BESTWAIT] = 0xfffffe00,
+    [HEX_SREG_SCHEDCFG] = 0xfffffef0,
+    [HEX_SREG_CFGBASE] = IMMUTABLE,
+    [HEX_SREG_REV] = IMMUTABLE,
+    [HEX_SREG_ISDBST] = IMMUTABLE,
+    [HEX_SREG_ISDBCFG0] = 0xe0000000,
+    [HEX_SREG_BRKPTPC0] = 0x00000003,
+    [HEX_SREG_BRKPTCFG0] = 0xfc007000,
+    [HEX_SREG_BRKPTPC1] = 0x00000003,
+    [HEX_SREG_BRKPTCFG1] = 0xfc007000,
+    [HEX_SREG_ISDBMBXIN] = IMMUTABLE,
+    [HEX_SREG_ISDBEN] = 0xfffffffe,
+    [HEX_SREG_TIMERLO] = IMMUTABLE,
+    [HEX_SREG_TIMERHI] = IMMUTABLE,
+};
+
+G_GNUC_UNUSED
+static void gen_log_sreg_write(DisasContext *ctx, int rnum, TCGv val)
+{
+    const target_ulong reg_mask = sreg_immut_masks[rnum];
+
+    if (reg_mask != IMMUTABLE) {
+        if (rnum < HEX_SREG_GLB_START) {
+            gen_masked_reg_write(val, hex_t_sreg[rnum], reg_mask);
+            tcg_gen_mov_tl(ctx->t_sreg_new_value[rnum], val);
+        } else {
+            gen_masked_reg_write(val, hex_g_sreg[rnum], reg_mask);
+            gen_helper_sreg_write(tcg_env, tcg_constant_i32(rnum), val);
+        }
+    }
+}
+
+G_GNUC_UNUSED
+static void gen_log_sreg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val)
+{
+    TCGv val32 = tcg_temp_new();
+
+    /* Low word */
+    tcg_gen_extrl_i64_i32(val32, val);
+    gen_log_sreg_write(ctx, rnum, val32);
+
+    /* High word */
+    tcg_gen_extrh_i64_i32(val32, val);
+    gen_log_sreg_write(ctx, rnum + 1, val32);
+}
+
+G_GNUC_UNUSED
+static void gen_read_sreg(TCGv dst, int reg_num)
+{
+    if (reg_num >= HEX_SREG_GLB_START || reg_num == HEX_SREG_BADVA) {
+        gen_helper_sreg_read(dst, tcg_env, tcg_constant_i32(reg_num));
+    } else {
+        tcg_gen_mov_tl(dst, hex_t_sreg[reg_num]);
+    }
+}
+
+G_GNUC_UNUSED
+static void gen_read_sreg_pair(TCGv_i64 dst, int reg_num)
+{
+    if (reg_num < HEX_SREG_GLB_START) {
+        if (reg_num + 1 == HEX_SREG_BADVA) {
+            TCGv badva = tcg_temp_new();
+            gen_helper_sreg_read(badva, tcg_env,
+                                 tcg_constant_tl(HEX_SREG_BADVA));
+            tcg_gen_concat_i32_i64(dst, hex_t_sreg[reg_num], badva);
+        } else {
+            tcg_gen_concat_i32_i64(dst, hex_t_sreg[reg_num],
+                                        hex_t_sreg[reg_num + 1]);
+        }
+    } else {
+        gen_helper_sreg_read_pair(dst, tcg_env, tcg_constant_tl(reg_num));
+    }
+}
+
+G_GNUC_UNUSED
+static void gen_read_greg(TCGv dst, int reg_num)
+{
+    gen_helper_greg_read(dst, tcg_env, tcg_constant_tl(reg_num));
+}
+
+G_GNUC_UNUSED
+static void gen_read_greg_pair(TCGv_i64 dst, int reg_num)
+{
+    gen_helper_greg_read_pair(dst, tcg_env, tcg_constant_tl(reg_num));
+}
+#endif
+
+
 void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val)
 {
     TCGv pred = get_result_pred(ctx, pnum);
@@ -183,6 +346,11 @@ static inline void gen_read_ctrl_reg(DisasContext *ctx, const int reg_num,
     } else if (reg_num == HEX_REG_QEMU_HVX_CNT) {
         tcg_gen_addi_tl(dest, hex_gpr[HEX_REG_QEMU_HVX_CNT],
                         ctx->num_hvx_insns);
+    } else if ((reg_num == HEX_REG_PKTCNTLO)
+            || (reg_num == HEX_REG_PKTCNTHI)
+            || (reg_num == HEX_REG_UTIMERLO)
+            || (reg_num == HEX_REG_UTIMERHI)) {
+        gen_helper_creg_read(dest, tcg_env, tcg_constant_tl(reg_num));
     } else {
         tcg_gen_mov_tl(dest, hex_gpr[reg_num]);
     }
@@ -211,6 +379,10 @@ static inline void gen_read_ctrl_reg_pair(DisasContext *ctx, const int reg_num,
         tcg_gen_addi_tl(hvx_cnt, hex_gpr[HEX_REG_QEMU_HVX_CNT],
                         ctx->num_hvx_insns);
         tcg_gen_concat_i32_i64(dest, hvx_cnt, hex_gpr[reg_num + 1]);
+    } else if ((reg_num == HEX_REG_PKTCNTLO)
+            || (reg_num == HEX_REG_UTIMERLO)
+            || (reg_num == HEX_REG_UPCYCLELO)) {
+        gen_helper_creg_read_pair(dest, tcg_env, tcg_constant_i32(reg_num));
     } else {
         tcg_gen_concat_i32_i64(dest,
             hex_gpr[reg_num],
@@ -395,7 +567,8 @@ static inline void gen_store_conditional8(DisasContext *ctx,
 #ifndef CONFIG_HEXAGON_IDEF_PARSER
 static TCGv gen_slotval(DisasContext *ctx)
 {
-    int slotval = (ctx->pkt->pkt_has_store_s1 & 1) | (ctx->insn->slot << 1);
+    int slotval =
+        (ctx->pkt->pkt_has_scalar_store_s1 & 1) | (ctx->insn->slot << 1);
     return tcg_constant_tl(slotval);
 }
 #endif
@@ -471,14 +644,15 @@ static void gen_write_new_pc_addr(DisasContext *ctx, TCGv addr,
         tcg_gen_brcondi_tl(cond, pred, 0, pred_false);
     }
 
+    TCGv PC_wr = ctx->need_next_pc ? hex_next_PC : hex_gpr[HEX_REG_PC];
     if (ctx->pkt->pkt_has_multi_cof) {
         /* If there are multiple branches in a packet, ignore the second one */
-        tcg_gen_movcond_tl(TCG_COND_NE, hex_gpr[HEX_REG_PC],
+        tcg_gen_movcond_tl(TCG_COND_NE, PC_wr,
                            ctx->branch_taken, tcg_constant_tl(0),
-                           hex_gpr[HEX_REG_PC], addr);
+                           PC_wr, addr);
         tcg_gen_movi_tl(ctx->branch_taken, 1);
     } else {
-        tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], addr);
+        tcg_gen_mov_tl(PC_wr, addr);
     }
 
     if (cond != TCG_COND_ALWAYS) {
diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h
index f8baa599c88cd..b381e0e116b36 100644
--- a/target/hexagon/helper.h
+++ b/target/hexagon/helper.h
@@ -18,7 +18,7 @@
 #include "internal.h"
 #include "helper_protos_generated.h.inc"
 
-DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_RETURN, noreturn, env, i32)
+DEF_HELPER_FLAGS_3(raise_exception, TCG_CALL_NO_RETURN, noreturn, env, i32, i32)
 DEF_HELPER_2(commit_store, void, env, int)
 DEF_HELPER_3(gather_store, void, env, i32, int)
 DEF_HELPER_1(commit_hvx_stores, void, env)
@@ -107,3 +107,31 @@ DEF_HELPER_4(probe_noshuf_load, void, env, i32, int, int)
 DEF_HELPER_2(probe_pkt_scalar_store_s0, void, env, int)
 DEF_HELPER_2(probe_hvx_stores, void, env, int)
 DEF_HELPER_2(probe_pkt_scalar_hvx_stores, void, env, int)
+
+DEF_HELPER_2(creg_read, i32, env, i32)
+DEF_HELPER_2(creg_read_pair, i64, env, i32)
+#if !defined(CONFIG_USER_ONLY)
+DEF_HELPER_2(swi, void, env, i32)
+DEF_HELPER_2(cswi, void, env, i32)
+DEF_HELPER_2(ciad, void, env, i32)
+DEF_HELPER_2(siad, void, env, i32)
+DEF_HELPER_2(iassignw, void, env, i32)
+DEF_HELPER_2(iassignr, i32, env, i32)
+DEF_HELPER_2(getimask, i32, env, i32)
+DEF_HELPER_3(setimask, void, env, i32, i32)
+DEF_HELPER_2(sreg_read, i32, env, i32)
+DEF_HELPER_2(sreg_read_pair, i64, env, i32)
+DEF_HELPER_2(greg_read, i32, env, i32)
+DEF_HELPER_2(greg_read_pair, i64, env, i32)
+DEF_HELPER_3(sreg_write, void, env, i32, i32)
+DEF_HELPER_3(sreg_write_pair, void, env, i32, i64)
+DEF_HELPER_3(setprio, void, env, i32, i32)
+DEF_HELPER_2(start, void, env, i32)
+DEF_HELPER_1(stop, void, env)
+DEF_HELPER_2(wait, void, env, i32)
+DEF_HELPER_2(resume, void, env, i32)
+DEF_HELPER_2(nmi, void, env, i32)
+DEF_HELPER_1(resched, void, env)
+DEF_HELPER_3(modify_ssr, void, env, i32, i32)
+DEF_HELPER_1(pending_interrupt, void, env)
+#endif
diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py
index 758e5fd12dfed..a10f48a093e97 100755
--- a/target/hexagon/hex_common.py
+++ b/target/hexagon/hex_common.py
@@ -33,6 +33,41 @@
 overrides = {}  # tags with helper overrides
 idef_parser_enabled = {}  # tags enabled for idef-parser
 
+
+def is_sysemu_tag(tag):
+    return "A_PRIV" in attribdict[tag] or "A_GUEST" in attribdict[tag]
+
+
+def tag_ignore(tag):
+    tag_skips = (
+        "Y6_diag",
+        "Y6_diag0",
+        "Y6_diag1",
+    )
+    attr_skips = (
+        "A_FAKEINSN",
+        "A_MAPPING",
+    )
+    return tag in tag_skips or \
+        any(attr in attribdict[tag] for attr in attr_skips)
+
+
+def get_sys_tags():
+    return sorted(
+        tag for tag in frozenset(tags) if is_sysemu_tag(tag)
+    )
+
+
+def get_user_tags():
+    return sorted(
+        tag for tag in frozenset(tags) if not is_sysemu_tag(tag)
+    )
+
+
+def get_all_tags():
+    return get_user_tags() + get_sys_tags()
+
+
 # We should do this as a hash for performance,
 # but to keep order let's keep it as a list.
 def uniquify(seq):
@@ -93,8 +128,13 @@ def calculate_attribs():
     add_qemu_macro_attrib("fTRAP", "A_IMPLICIT_READS_PC")
     add_qemu_macro_attrib("fSET_OVERFLOW", "A_IMPLICIT_WRITES_USR")
     add_qemu_macro_attrib("fSET_LPCFG", "A_IMPLICIT_WRITES_USR")
+    add_qemu_macro_attrib("fLOAD_LOCKED", "A_LLSC")
+    add_qemu_macro_attrib("fSTORE_LOCKED", "A_LLSC")
+    add_qemu_macro_attrib("fCLEAR_RTE_EX", "A_IMPLICIT_WRITES_SSR")
     add_qemu_macro_attrib("fLOAD", "A_SCALAR_LOAD")
     add_qemu_macro_attrib("fSTORE", "A_SCALAR_STORE")
+    add_qemu_macro_attrib("fSET_K0_LOCK", "A_IMPLICIT_READS_PC")
+    add_qemu_macro_attrib("fSET_TLB_LOCK", "A_IMPLICIT_READS_PC")
     add_qemu_macro_attrib('fLSBNEW0', 'A_IMPLICIT_READS_P0')
     add_qemu_macro_attrib('fLSBNEW0NOT', 'A_IMPLICIT_READS_P0')
     add_qemu_macro_attrib('fREAD_P0', 'A_IMPLICIT_READS_P0')
@@ -213,9 +253,12 @@ def is_hvx_insn(tag):
 def need_env(tag):
     return ("A_STORE" in attribdict[tag] or
             "A_LOAD" in attribdict[tag] or
+            "A_DMA" in attribdict[tag] or
             "A_CVI_GATHER" in attribdict[tag] or
             "A_CVI_SCATTER" in attribdict[tag] or
-            "A_IMPLICIT_WRITES_USR" in attribdict[tag])
+            "A_IMPLICIT_WRITES_USR" in attribdict[tag] or
+            "A_PRIV" in attribdict[tag] or
+            "J2_trap" in tag)
 
 
 def need_slot(tag):
@@ -224,6 +267,9 @@ def need_slot(tag):
         and "A_CVI_GATHER" not in attribdict[tag]
         and ("A_STORE" in attribdict[tag]
              or "A_LOAD" in attribdict[tag])
+        and tag != "L4_loadw_phys"
+        and tag != "L6_memcpy"
+        and tag != "Y6_dmlink"
     ):
         return 1
     else:
@@ -247,7 +293,11 @@ def need_next_PC(tag):
 
 
 def need_pkt_has_multi_cof(tag):
-    return "A_COF" in attribdict[tag]
+    if "A_JUMP" in attribdict[tag] or "A_CALL" in attribdict[tag]:
+        if tag == "J4_hintjumpr":
+            return False
+        return True
+    return False
 
 
 def need_pkt_need_commit(tag):
@@ -366,12 +416,16 @@ def helper_proto_type(self):
         return "s32"
     def helper_arg_type(self):
         return "int32_t"
+    def is_pair(self):
+        return False
 
 class Pair(Scalar):
     def helper_proto_type(self):
         return "s64"
     def helper_arg_type(self):
         return "int64_t"
+    def is_pair(self):
+        return True
 
 class Hvx:
     def is_scalar_reg(self):
@@ -1009,6 +1063,116 @@ def analyze_write(self, f, tag, regno):
             ctx_log_qreg_write(ctx, {self.reg_num}, insn_has_hvx_helper);
         """))
 
+class GuestRegister(Register):
+    def gen_check_impl(self, f, regno):
+        if self.is_written():
+            f.write(code_fmt(f"""\
+                if (!greg_writable(insn->regno[{regno}],
+                    {str(self.is_pair()).lower()})) {{
+                    return;
+                }}
+            """))
+        else:
+            f.write(code_fmt(f"""\
+                check_greg_impl(insn->regno[{regno}], {str(self.is_pair()).lower()});
+            """))
+
+class GuestDest(GuestRegister, Single, Dest):
+    def decl_tcg(self, f, tag, regno):
+        self.decl_reg_num(f, regno)
+        self.gen_check_impl(f, regno)
+        f.write(code_fmt(f"""\
+            TCGv {self.reg_tcg()} = tcg_temp_new();
+        """))
+    def log_write(self, f, tag):
+        f.write(code_fmt(f"""\
+            gen_log_greg_write(ctx, {self.reg_num}, {self.reg_tcg()});
+        """))
+    def analyze_write(self, f, tag, regno):
+        f.write(code_fmt(f"""\
+            ctx_log_greg_write(ctx, {self.reg_num});
+        """))
+
+class GuestSource(GuestRegister, Single, OldSource):
+    def decl_tcg(self, f, tag, regno):
+        self.decl_reg_num(f, regno);
+        self.gen_check_impl(f, regno)
+        f.write(code_fmt(f"""\
+            TCGv {self.reg_tcg()} = tcg_temp_new();
+            gen_read_greg({self.reg_tcg()}, {self.reg_num});
+        """))
+
+class GuestPairDest(GuestRegister, Pair, Dest):
+    def decl_tcg(self, f, tag, regno):
+        self.decl_reg_num(f, regno)
+        self.gen_check_impl(f, regno)
+        f.write(code_fmt(f"""\
+            TCGv_i64 {self.reg_tcg()} = tcg_temp_new_i64();
+        """))
+    def log_write(self, f, tag):
+        f.write(code_fmt(f"""\
+            gen_log_greg_write_pair(ctx, {self.reg_num}, {self.reg_tcg()});
+        """))
+    def analyze_write(self, f, tag, regno):
+        f.write(code_fmt(f"""\
+            ctx_log_greg_write_pair(ctx, {self.reg_num});
+        """))
+
+class GuestPairSource(GuestRegister, Pair, OldSource):
+    def decl_tcg(self, f, tag, regno):
+        self.decl_reg_num(f, regno)
+        self.gen_check_impl(f, regno)
+        f.write(code_fmt(f"""\
+            TCGv_i64 {self.reg_tcg()} = tcg_temp_new_i64();
+            gen_read_greg_pair({self.reg_tcg()}, {self.reg_num});
+        """))
+
+class SystemDest(Register, Single, Dest):
+    def decl_tcg(self, f, tag, regno):
+        self.decl_reg_num(f, regno)
+        f.write(code_fmt(f"""\
+            TCGv {self.reg_tcg()} = tcg_temp_new();
+        """))
+    def log_write(self, f, tag):
+        f.write(code_fmt(f"""\
+            gen_log_sreg_write(ctx, {self.reg_num}, {self.reg_tcg()});
+        """))
+    def analyze_write(self, f, tag, regno):
+        f.write(code_fmt(f"""\
+            ctx_log_sreg_write(ctx, {self.reg_num});
+        """))
+
+class SystemSource(Register, Single, OldSource):
+    def decl_tcg(self, f, tag, regno):
+        self.decl_reg_num(f, regno);
+        f.write(code_fmt(f"""\
+            TCGv {self.reg_tcg()} = tcg_temp_new();
+            gen_read_sreg({self.reg_tcg()}, {self.reg_num});
+        """))
+
+class SystemPairDest(Register, Pair, Dest):
+    def decl_tcg(self, f, tag, regno):
+        self.decl_reg_num(f, regno)
+        f.write(code_fmt(f"""\
+            TCGv_i64 {self.reg_tcg()} = tcg_temp_new_i64();
+        """))
+    def log_write(self, f, tag):
+        f.write(code_fmt(f"""\
+            gen_log_sreg_write_pair(ctx, {self.reg_num}, {self.reg_tcg()});
+        """))
+    def analyze_write(self, f, tag, regno):
+        f.write(code_fmt(f"""\
+            ctx_log_sreg_write_pair(ctx, {self.reg_num});
+        """))
+
+class SystemPairSource(Register, Pair, OldSource):
+    def decl_tcg(self, f, tag, regno):
+        self.decl_reg_num(f, regno)
+        f.write(code_fmt(f"""\
+            TCGv_i64 {self.reg_tcg()} = tcg_temp_new_i64();
+            gen_read_sreg_pair({self.reg_tcg()}, {self.reg_num});
+        """))
+
 def init_registers():
     regs = {
         GprDest("R", "d"),
@@ -1055,6 +1219,16 @@ def init_registers():
         QRegSource("Q", "u"),
         QRegSource("Q", "v"),
         QRegReadWrite("Q", "x"),
+
+        # system regs
+        GuestDest("G", "d"),
+        GuestSource("G", "s"),
+        GuestPairDest("G", "dd"),
+        GuestPairSource("G", "ss"),
+        SystemDest("S", "d"),
+        SystemSource("S", "s"),
+        SystemPairDest("S", "dd"),
+        SystemPairSource("S", "ss"),
     }
     for reg in regs:
         registers[f"{reg.regtype}{reg.regid}"] = reg
@@ -1070,11 +1244,18 @@ def init_registers():
     for reg in new_regs:
         new_registers[f"{reg.regtype}{reg.regid}"] = reg
 
+def is_new_reg(tag, regid):
+    if regid[0] in "NO":
+        return True
+    return regid[0] == "P" and \
+           f"{regid}N" in semdict[tag] and \
+           f"{regid}V" not in semdict[tag]
+
 def get_register(tag, regtype, regid):
-    if f"{regtype}{regid}V" in semdict[tag]:
-        return registers[f"{regtype}{regid}"]
-    else:
-        return new_registers[f"{regtype}{regid}"]
+    regid = f"{regtype}{regid}"
+    is_new = is_new_reg(tag, regid)
+    reg = new_registers[regid] if is_new else registers[regid]
+    return reg
 
 def helper_ret_type(tag, regs):
     ## If there is a scalar result, it is the return type
@@ -1187,6 +1368,7 @@ def parse_common_args(desc):
     parser.add_argument("semantics", help="semantics file")
     parser.add_argument("overrides", help="overrides file")
     parser.add_argument("overrides_vec", help="vector overrides file")
+    parser.add_argument("overrides_sys", help="system overrides file")
     parser.add_argument("out", help="output file")
     parser.add_argument("--idef-parser",
                         help="file of instructions translated by idef-parser")
@@ -1194,6 +1376,7 @@ def parse_common_args(desc):
     read_semantics_file(args.semantics)
     read_overrides_file(args.overrides)
     read_overrides_file(args.overrides_vec)
+    read_overrides_file(args.overrides_sys)
     if args.idef_parser:
         read_idef_parser_enabled_file(args.idef_parser)
     calculate_attribs()
diff --git a/target/hexagon/hex_interrupts.c b/target/hexagon/hex_interrupts.c
new file mode 100644
index 0000000000000..fd00bcfb9a573
--- /dev/null
+++ b/target/hexagon/hex_interrupts.c
@@ -0,0 +1,324 @@
+/*
+ * Copyright(c) 2022-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/main-loop.h"
+#include "cpu.h"
+#include "hex_interrupts.h"
+#include "macros.h"
+#include "sys_macros.h"
+#include "system/cpus.h"
+
+static bool hex_is_qualified_for_int(CPUHexagonState *env, int int_num);
+
+static bool get_syscfg_gie(CPUHexagonState *env)
+{
+    target_ulong syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG);
+    return GET_SYSCFG_FIELD(SYSCFG_GIE, syscfg);
+}
+
+static bool get_ssr_ex(CPUHexagonState *env)
+{
+    target_ulong ssr = arch_get_system_reg(env, HEX_SREG_SSR);
+    return GET_SSR_FIELD(SSR_EX, ssr);
+}
+
+static bool get_ssr_ie(CPUHexagonState *env)
+{
+    target_ulong ssr = arch_get_system_reg(env, HEX_SREG_SSR);
+    return GET_SSR_FIELD(SSR_IE, ssr);
+}
+
+/* Do these together so we only have to call hexagon_modify_ssr once */
+static void set_ssr_ex_cause(CPUHexagonState *env, int ex, uint32_t cause)
+{
+    target_ulong old = arch_get_system_reg(env, HEX_SREG_SSR);
+    SET_SYSTEM_FIELD(env, HEX_SREG_SSR, SSR_EX, ex);
+    SET_SYSTEM_FIELD(env, HEX_SREG_SSR, SSR_CAUSE, cause);
+    target_ulong new = arch_get_system_reg(env, HEX_SREG_SSR);
+    hexagon_modify_ssr(env, new, old);
+}
+
+static bool get_iad_bit(CPUHexagonState *env, int int_num)
+{
+    target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD);
+    target_ulong iad = GET_FIELD(IPENDAD_IAD, ipendad);
+    return extract32(iad, int_num, 1);
+}
+
+static void set_iad_bit(CPUHexagonState *env, int int_num, int val)
+{
+    target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD);
+    target_ulong iad = GET_FIELD(IPENDAD_IAD, ipendad);
+    iad = deposit32(iad, int_num, 1, val);
+    fSET_FIELD(ipendad, IPENDAD_IAD, iad);
+    arch_set_system_reg(env, HEX_SREG_IPENDAD, ipendad);
+}
+
+static uint32_t get_ipend(CPUHexagonState *env)
+{
+    target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD);
+    return GET_FIELD(IPENDAD_IPEND, ipendad);
+}
+
+static inline bool get_ipend_bit(CPUHexagonState *env, int int_num)
+{
+    target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD);
+    target_ulong ipend = GET_FIELD(IPENDAD_IPEND, ipendad);
+    return extract32(ipend, int_num, 1);
+}
+
+static void clear_ipend(CPUHexagonState *env, uint32_t mask)
+{
+    target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD);
+    target_ulong ipend = GET_FIELD(IPENDAD_IPEND, ipendad);
+    ipend &= ~mask;
+    fSET_FIELD(ipendad, IPENDAD_IPEND, ipend);
+    arch_set_system_reg(env, HEX_SREG_IPENDAD, ipendad);
+}
+
+static void set_ipend(CPUHexagonState *env, uint32_t mask)
+{
+    target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD);
+    target_ulong ipend = GET_FIELD(IPENDAD_IPEND, ipendad);
+    ipend |= mask;
+    fSET_FIELD(ipendad, IPENDAD_IPEND, ipend);
+    arch_set_system_reg(env, HEX_SREG_IPENDAD, ipendad);
+}
+
+static void set_ipend_bit(CPUHexagonState *env, int int_num, int val)
+{
+    target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD);
+    target_ulong ipend = GET_FIELD(IPENDAD_IPEND, ipendad);
+    ipend = deposit32(ipend, int_num, 1, val);
+    fSET_FIELD(ipendad, IPENDAD_IPEND, ipend);
+    arch_set_system_reg(env, HEX_SREG_IPENDAD, ipendad);
+}
+
+static bool get_imask_bit(CPUHexagonState *env, int int_num)
+{
+    target_ulong imask = arch_get_system_reg(env, HEX_SREG_IMASK);
+    return extract32(imask, int_num, 1);
+}
+
+static uint32_t get_prio(CPUHexagonState *env)
+{
+    target_ulong stid = arch_get_system_reg(env, HEX_SREG_STID);
+    return extract32(stid, reg_field_info[STID_PRIO].offset,
+                     reg_field_info[STID_PRIO].width);
+}
+
+static void set_elr(CPUHexagonState *env, target_ulong val)
+{
+    arch_set_system_reg(env, HEX_SREG_ELR, val);
+}
+
+static bool get_schedcfgen(CPUHexagonState *env)
+{
+    target_ulong schedcfg = arch_get_system_reg(env, HEX_SREG_SCHEDCFG);
+    return extract32(schedcfg, reg_field_info[SCHEDCFG_EN].offset,
+                     reg_field_info[SCHEDCFG_EN].width);
+}
+
+static bool is_lowest_prio(CPUHexagonState *env, int int_num)
+{
+    uint32_t my_prio = get_prio(env);
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        CPUHexagonState *hex_env = cpu_env(cs);
+        if (!hex_is_qualified_for_int(hex_env, int_num)) {
+            continue;
+        }
+
+        /* Note that lower values indicate *higher* priority */
+        if (my_prio < get_prio(hex_env)) {
+            return false;
+        }
+    }
+    return true;
+}
+
+static bool hex_is_qualified_for_int(CPUHexagonState *env, int int_num)
+{
+    bool syscfg_gie = get_syscfg_gie(env);
+    bool iad = get_iad_bit(env, int_num);
+    bool ssr_ie = get_ssr_ie(env);
+    bool ssr_ex = get_ssr_ex(env);
+    bool imask = get_imask_bit(env, int_num);
+
+    return syscfg_gie && !iad && ssr_ie && !ssr_ex && !imask;
+}
+
+static void clear_pending_locks(CPUHexagonState *env)
+{
+    g_assert(bql_locked());
+    if (env->k0_lock_state == HEX_LOCK_WAITING) {
+        env->k0_lock_state = HEX_LOCK_UNLOCKED;
+    }
+    if (env->tlb_lock_state == HEX_LOCK_WAITING) {
+        env->tlb_lock_state = HEX_LOCK_UNLOCKED;
+    }
+}
+
+static bool should_not_exec(CPUHexagonState *env)
+{
+    return (get_exe_mode(env) == HEX_EXE_MODE_WAIT);
+}
+
+static void restore_state(CPUHexagonState *env, bool int_accepted)
+{
+    CPUState *cs = env_cpu(env);
+    cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD | CPU_INTERRUPT_SWI);
+    if (!int_accepted && should_not_exec(env)) {
+        cpu_interrupt(cs, CPU_INTERRUPT_HALT);
+    }
+}
+
+static void hex_accept_int(CPUHexagonState *env, int int_num)
+{
+    CPUState *cs = env_cpu(env);
+    target_ulong evb = arch_get_system_reg(env, HEX_SREG_EVB);
+    const int exe_mode = get_exe_mode(env);
+    const bool in_wait_mode = exe_mode == HEX_EXE_MODE_WAIT;
+
+    set_ipend_bit(env, int_num, 0);
+    set_iad_bit(env, int_num, 1);
+    set_ssr_ex_cause(env, 1, HEX_CAUSE_INT0 | int_num);
+    cs->exception_index = HEX_EVENT_INT0 + int_num;
+    env->cause_code = HEX_EVENT_INT0 + int_num;
+    clear_pending_locks(env);
+    if (in_wait_mode) {
+        qemu_log_mask(CPU_LOG_INT,
+            "%s: thread %d resuming, exiting WAIT mode\n",
+            __func__, env->threadId);
+        set_elr(env, env->wait_next_pc);
+        clear_wait_mode(env);
+        cs->halted = false;
+    } else if (env->k0_lock_state == HEX_LOCK_WAITING) {
+        g_assert_not_reached();
+    } else {
+        set_elr(env, env->gpr[HEX_REG_PC]);
+    }
+    env->gpr[HEX_REG_PC] = evb | (cs->exception_index << 2);
+    if (get_ipend(env) == 0) {
+        restore_state(env, true);
+    }
+}
+
+
+bool hex_check_interrupts(CPUHexagonState *env)
+{
+    CPUState *cs = env_cpu(env);
+    bool int_handled = false;
+    bool ssr_ex = get_ssr_ex(env);
+    int max_ints = 32;
+    bool schedcfgen;
+
+    /* Early exit if nothing pending */
+    if (get_ipend(env) == 0) {
+        restore_state(env, false);
+        return false;
+    }
+
+    BQL_LOCK_GUARD();
+    /* Only check priorities when schedcfgen is set */
+    schedcfgen = get_schedcfgen(env);
+    for (int i = 0; i < max_ints; i++) {
+        if (!get_iad_bit(env, i) && get_ipend_bit(env, i)) {
+            qemu_log_mask(CPU_LOG_INT,
+                          "%s: thread[%d] pc = 0x%x found int %d\n", __func__,
+                          env->threadId, env->gpr[HEX_REG_PC], i);
+            if (hex_is_qualified_for_int(env, i) &&
+                (!schedcfgen || is_lowest_prio(env, i))) {
+                qemu_log_mask(CPU_LOG_INT, "%s: thread[%d] int %d handled_\n",
+                    __func__, env->threadId, i);
+                hex_accept_int(env, i);
+                int_handled = true;
+                break;
+            }
+            bool syscfg_gie = get_syscfg_gie(env);
+            bool iad = get_iad_bit(env, i);
+            bool ssr_ie = get_ssr_ie(env);
+            bool imask = get_imask_bit(env, i);
+
+            qemu_log_mask(CPU_LOG_INT,
+                          "%s: thread[%d] int %d not handled, qualified: %d, "
+                          "schedcfg_en: %d, low prio %d\n",
+                          __func__, env->threadId, i,
+                          hex_is_qualified_for_int(env, i), schedcfgen,
+                          is_lowest_prio(env, i));
+
+            qemu_log_mask(CPU_LOG_INT,
+                          "%s: thread[%d] int %d not handled, GIE %d, iad %d, "
+                          "SSR:IE %d, SSR:EX: %d, imask bit %d\n",
+                          __func__, env->threadId, i, syscfg_gie, iad, ssr_ie,
+                          ssr_ex, imask);
+        }
+    }
+
+    /*
+     * If we didn't handle the interrupt and it wasn't
+     * because we were in EX state, then we won't be able
+     * to execute the interrupt on this CPU unless something
+     * changes in the CPU state.  Clear the interrupt_request bits
+     * while preserving the IPEND bits, and we can re-assert the
+     * interrupt_request bit(s) when we execute one of those instructions.
+     */
+    if (!int_handled && !ssr_ex) {
+        restore_state(env, int_handled);
+    } else if (int_handled) {
+        assert(!cs->halted);
+    }
+
+    return int_handled;
+}
+
+void hex_clear_interrupts(CPUHexagonState *env, uint32_t mask, uint32_t type)
+{
+    if (mask == 0) {
+        return;
+    }
+
+    /*
+     * Notify all CPUs that the interrupt has happened
+     */
+    BQL_LOCK_GUARD();
+    clear_ipend(env, mask);
+    hex_interrupt_update(env);
+}
+
+void hex_raise_interrupts(CPUHexagonState *env, uint32_t mask, uint32_t type)
+{
+    g_assert(bql_locked());
+    if (mask == 0) {
+        return;
+    }
+
+    /*
+     * Notify all CPUs that the interrupt has happened
+     */
+    set_ipend(env, mask);
+    hex_interrupt_update(env);
+}
+
+void hex_interrupt_update(CPUHexagonState *env)
+{
+    CPUState *cs;
+
+    g_assert(bql_locked());
+    if (get_ipend(env) != 0) {
+        CPU_FOREACH(cs) {
+            CPUHexagonState *hex_env = cpu_env(cs);
+            const int exe_mode = get_exe_mode(hex_env);
+            if (exe_mode != HEX_EXE_MODE_OFF) {
+                cs->interrupt_request |= CPU_INTERRUPT_SWI;
+                cpu_resume(cs);
+            }
+        }
+    }
+}
diff --git a/target/hexagon/hex_interrupts.h b/target/hexagon/hex_interrupts.h
new file mode 100644
index 0000000000000..17a243946ce2f
--- /dev/null
+++ b/target/hexagon/hex_interrupts.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright(c) 2022-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HEX_INTERRUPTS_H
+#define HEX_INTERRUPTS_H
+
+bool hex_check_interrupts(CPUHexagonState *env);
+void hex_clear_interrupts(CPUHexagonState *env, uint32_t mask, uint32_t type);
+void hex_raise_interrupts(CPUHexagonState *env, uint32_t mask, uint32_t type);
+void hex_interrupt_update(CPUHexagonState *env);
+
+#endif
diff --git a/target/hexagon/hex_mmu.c b/target/hexagon/hex_mmu.c
new file mode 100644
index 0000000000000..923018949b2a8
--- /dev/null
+++ b/target/hexagon/hex_mmu.c
@@ -0,0 +1,604 @@
+/*
+ * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qemu/qemu-print.h"
+#include "cpu.h"
+#include "system/cpus.h"
+#include "internal.h"
+#include "exec/exec-all.h"
+#include "exec/cputlb.h"
+#include "hex_mmu.h"
+#include "macros.h"
+#include "sys_macros.h"
+#include "reg_fields.h"
+
+#define GET_TLB_FIELD(ENTRY, FIELD)                               \
+    ((uint64_t)fEXTRACTU_BITS(ENTRY, reg_field_info[FIELD].width, \
+                              reg_field_info[FIELD].offset))
+
+/* PPD (physical page descriptor) */
+static inline uint64_t GET_PPD(uint64_t entry)
+{
+    return GET_TLB_FIELD(entry, PTE_PPD) |
+        (GET_TLB_FIELD(entry, PTE_PA35) << reg_field_info[PTE_PPD].width);
+}
+
+#define NO_ASID      (1 << 8)
+
+typedef enum {
+    PGSIZE_4K,
+    PGSIZE_16K,
+    PGSIZE_64K,
+    PGSIZE_256K,
+    PGSIZE_1M,
+    PGSIZE_4M,
+    PGSIZE_16M,
+    PGSIZE_64M,
+    PGSIZE_256M,
+    PGSIZE_1G,
+    NUM_PGSIZE_TYPES
+} tlb_pgsize_t;
+
+static const char *pgsize_str[NUM_PGSIZE_TYPES] = {
+    "4K",
+    "16K",
+    "64K",
+    "256K",
+    "1M",
+    "4M",
+    "16M",
+    "64M",
+    "256M",
+    "1G",
+};
+
+#define INVALID_MASK 0xffffffffLL
+
+static const uint64_t encmask_2_mask[] = {
+    0x0fffLL,                           /* 4k,   0000 */
+    0x3fffLL,                           /* 16k,  0001 */
+    0xffffLL,                           /* 64k,  0010 */
+    0x3ffffLL,                          /* 256k, 0011 */
+    0xfffffLL,                          /* 1m,   0100 */
+    0x3fffffLL,                         /* 4m,   0101 */
+    0xffffffLL,                         /* 16m,  0110 */
+    0x3ffffffLL,                        /* 64m,  0111 */
+    0xfffffffLL,                        /* 256m, 1000 */
+    0x3fffffffLL,                       /* 1g,   1001 */
+    INVALID_MASK,                      /* RSVD, 0111 */
+};
+
+/*
+ * @return the page size type from @a entry.
+ */
+static inline tlb_pgsize_t hex_tlb_pgsize_type(uint64_t entry)
+{
+    if (entry == 0) {
+        qemu_log_mask(CPU_LOG_MMU, "%s: Supplied TLB entry was 0!\n", __func__);
+        return 0;
+    }
+    tlb_pgsize_t size = ctz64(entry);
+    g_assert(size < NUM_PGSIZE_TYPES);
+    return size;
+}
+
+/*
+ * @return the page size of @a entry, in bytes.
+ */
+static inline uint64_t hex_tlb_page_size_bytes(uint64_t entry)
+{
+    return 1ull << (TARGET_PAGE_BITS + 2 * hex_tlb_pgsize_type(entry));
+}
+
+static inline uint64_t hex_tlb_phys_page_num(uint64_t entry)
+{
+    uint32_t ppd = GET_PPD(entry);
+    return ppd >> 1;
+}
+
+static inline uint64_t hex_tlb_phys_addr(uint64_t entry)
+{
+    uint64_t pagemask = encmask_2_mask[hex_tlb_pgsize_type(entry)];
+    uint64_t pagenum = hex_tlb_phys_page_num(entry);
+    uint64_t PA = (pagenum << TARGET_PAGE_BITS) & (~pagemask);
+    return PA;
+}
+
+static inline uint64_t hex_tlb_virt_addr(uint64_t entry)
+{
+    return (uint64_t)GET_TLB_FIELD(entry, PTE_VPN) << TARGET_PAGE_BITS;
+}
+
+static bool hex_dump_mmu_entry(FILE *f, uint64_t entry)
+{
+    if (GET_TLB_FIELD(entry, PTE_V)) {
+        fprintf(f, "0x%016" PRIx64 ": ", entry);
+        uint64_t PA = hex_tlb_phys_addr(entry);
+        uint64_t VA = hex_tlb_virt_addr(entry);
+        fprintf(f, "V:%" PRId64 " G:%" PRId64 " A1:%" PRId64 " A0:%" PRId64,
+                GET_TLB_FIELD(entry, PTE_V), GET_TLB_FIELD(entry, PTE_G),
+                GET_TLB_FIELD(entry, PTE_ATR1), GET_TLB_FIELD(entry, PTE_ATR0));
+        fprintf(f, " ASID:0x%02" PRIx64 " VA:0x%08" PRIx64,
+                GET_TLB_FIELD(entry, PTE_ASID), VA);
+        fprintf(f,
+                " X:%" PRId64 " W:%" PRId64 " R:%" PRId64 " U:%" PRId64
+                " C:%" PRId64,
+                GET_TLB_FIELD(entry, PTE_X), GET_TLB_FIELD(entry, PTE_W),
+                GET_TLB_FIELD(entry, PTE_R), GET_TLB_FIELD(entry, PTE_U),
+                GET_TLB_FIELD(entry, PTE_C));
+        fprintf(f, " PA:0x%09" PRIx64 " SZ:%s (0x%" PRIx64 ")", PA,
+                pgsize_str[hex_tlb_pgsize_type(entry)],
+                hex_tlb_page_size_bytes(entry));
+        fprintf(f, "\n");
+        return true;
+    }
+
+    /* Not valid */
+    return false;
+}
+
+void dump_mmu(CPUHexagonState *env)
+{
+    HexagonCPU *cpu = env_archcpu(env);
+    for (uint32_t i = 0; i < cpu->num_tlbs; i++) {
+        uint64_t entry = env->hex_tlb->entries[i];
+        if (GET_TLB_FIELD(entry, PTE_V)) {
+            qemu_printf("[%03" PRIu32 "] ", i);
+            qemu_printf("0x%016" PRIx64 ": ", entry);
+            uint64_t PA = hex_tlb_phys_addr(entry);
+            uint64_t VA = hex_tlb_virt_addr(entry);
+            qemu_printf(
+                "V:%" PRId64 " G:%" PRId64 " A1:%" PRId64 " A0:%" PRId64,
+                GET_TLB_FIELD(entry, PTE_V), GET_TLB_FIELD(entry, PTE_G),
+                GET_TLB_FIELD(entry, PTE_ATR1), GET_TLB_FIELD(entry, PTE_ATR0));
+            qemu_printf(" ASID:0x%02" PRIx64 " VA:0x%08" PRIx64,
+                        GET_TLB_FIELD(entry, PTE_ASID), VA);
+            qemu_printf(
+                " X:%" PRId64 " W:%" PRId64 " R:%" PRId64 " U:%" PRId64
+                " C:%" PRId64,
+                GET_TLB_FIELD(entry, PTE_X), GET_TLB_FIELD(entry, PTE_W),
+                GET_TLB_FIELD(entry, PTE_R), GET_TLB_FIELD(entry, PTE_U),
+                GET_TLB_FIELD(entry, PTE_C));
+            qemu_printf(" PA:0x%09" PRIx64 " SZ:%s (0x%" PRIx64 ")", PA,
+                        pgsize_str[hex_tlb_pgsize_type(entry)],
+                        hex_tlb_page_size_bytes(entry));
+            qemu_printf("\n");
+        }
+    }
+}
+
+static inline void hex_log_tlbw(uint32_t index, uint64_t entry)
+{
+    if (qemu_loglevel_mask(CPU_LOG_MMU)) {
+        if (qemu_log_enabled()) {
+            FILE *logfile = qemu_log_trylock();
+            if (logfile) {
+                fprintf(logfile, "tlbw[%03d]: ", index);
+                if (!hex_dump_mmu_entry(logfile, entry)) {
+                    fprintf(logfile, "invalid\n");
+                }
+                qemu_log_unlock(logfile);
+            }
+        }
+    }
+}
+
+void hex_tlbw(CPUHexagonState *env, uint32_t index, uint64_t value)
+{
+    uint32_t myidx = fTLB_NONPOW2WRAP(fTLB_IDXMASK(index));
+    bool old_entry_valid = GET_TLB_FIELD(env->hex_tlb->entries[myidx], PTE_V);
+    if (old_entry_valid && hexagon_cpu_mmu_enabled(env)) {
+        CPUState *cs = env_cpu(env);
+
+        tlb_flush(cs);
+    }
+    env->hex_tlb->entries[myidx] = (value);
+    hex_log_tlbw(myidx, value);
+}
+
+void hex_mmu_realize(CPUHexagonState *env)
+{
+    CPUState *cs = env_cpu(env);
+    if (cs->cpu_index == 0) {
+        env->hex_tlb = g_malloc0(sizeof(CPUHexagonTLBContext));
+    } else {
+        CPUState *cpu0_s = NULL;
+        CPUHexagonState *env0 = NULL;
+        CPU_FOREACH(cpu0_s) {
+            assert(cpu0_s->cpu_index == 0);
+            env0 = &(HEXAGON_CPU(cpu0_s)->env);
+            break;
+        }
+        env->hex_tlb = env0->hex_tlb;
+    }
+}
+
+void hex_mmu_on(CPUHexagonState *env)
+{
+    CPUState *cs = env_cpu(env);
+    qemu_log_mask(CPU_LOG_MMU, "Hexagon MMU turned on!\n");
+    tlb_flush(cs);
+}
+
+void hex_mmu_off(CPUHexagonState *env)
+{
+    CPUState *cs = env_cpu(env);
+    qemu_log_mask(CPU_LOG_MMU, "Hexagon MMU turned off!\n");
+    tlb_flush(cs);
+}
+
+void hex_mmu_mode_change(CPUHexagonState *env)
+{
+    qemu_log_mask(CPU_LOG_MMU, "Hexagon mode change!\n");
+    CPUState *cs = env_cpu(env);
+    tlb_flush(cs);
+}
+
+static inline bool hex_tlb_entry_match_noperm(uint64_t entry, uint32_t asid,
+                                              uint64_t VA)
+{
+    if (GET_TLB_FIELD(entry, PTE_V)) {
+        if (GET_TLB_FIELD(entry, PTE_G)) {
+            /* Global entry - ingnore ASID */
+        } else if (asid != NO_ASID) {
+            uint32_t tlb_asid = GET_TLB_FIELD(entry, PTE_ASID);
+            if (tlb_asid != asid) {
+                return false;
+            }
+        }
+
+        uint64_t page_size = hex_tlb_page_size_bytes(entry);
+        uint64_t page_start =
+            ROUND_DOWN(hex_tlb_virt_addr(entry), page_size);
+        if (page_start <= VA && VA < page_start + page_size) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static inline void hex_tlb_entry_get_perm(CPUHexagonState *env, uint64_t entry,
+                                          MMUAccessType access_type,
+                                          int mmu_idx, int *prot,
+                                          int32_t *excp)
+{
+    bool perm_x = GET_TLB_FIELD(entry, PTE_X);
+    bool perm_w = GET_TLB_FIELD(entry, PTE_W);
+    bool perm_r = GET_TLB_FIELD(entry, PTE_R);
+    bool perm_u = GET_TLB_FIELD(entry, PTE_U);
+    bool user_idx = mmu_idx == MMU_USER_IDX;
+
+    if (mmu_idx == MMU_KERNEL_IDX) {
+        *prot = PAGE_VALID | PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+        return;
+    }
+
+    *prot = PAGE_VALID;
+    switch (access_type) {
+    case MMU_INST_FETCH:
+        if (user_idx && !perm_u) {
+            *excp = HEX_EVENT_PRECISE;
+            env->cause_code = HEX_CAUSE_FETCH_NO_UPAGE;
+        } else if (!perm_x) {
+            *excp = HEX_EVENT_PRECISE;
+            env->cause_code = HEX_CAUSE_FETCH_NO_XPAGE;
+        }
+        break;
+    case MMU_DATA_LOAD:
+        if (user_idx && !perm_u) {
+            *excp = HEX_EVENT_PRECISE;
+            env->cause_code = HEX_CAUSE_PRIV_NO_UREAD;
+        } else if (!perm_r) {
+            *excp = HEX_EVENT_PRECISE;
+            env->cause_code = HEX_CAUSE_PRIV_NO_READ;
+        }
+        break;
+    case MMU_DATA_STORE:
+        if (user_idx && !perm_u) {
+            *excp = HEX_EVENT_PRECISE;
+            env->cause_code = HEX_CAUSE_PRIV_NO_UWRITE;
+        } else if (!perm_w) {
+            *excp = HEX_EVENT_PRECISE;
+            env->cause_code = HEX_CAUSE_PRIV_NO_WRITE;
+        }
+        break;
+    }
+
+    if (!user_idx || perm_u) {
+        if (perm_x) {
+            *prot |= PAGE_EXEC;
+        }
+        if (perm_r) {
+            *prot |= PAGE_READ;
+        }
+        if (perm_w) {
+            *prot |= PAGE_WRITE;
+        }
+    }
+}
+
+static inline bool hex_tlb_entry_match(CPUHexagonState *env, uint64_t entry,
+                                       uint8_t asid, target_ulong VA,
+                                       MMUAccessType access_type, hwaddr *PA,
+                                       int *prot, int *size, int32_t *excp,
+                                       int mmu_idx)
+{
+    if (hex_tlb_entry_match_noperm(entry, asid, VA)) {
+        hex_tlb_entry_get_perm(env, entry, access_type, mmu_idx, prot, excp);
+        *PA = hex_tlb_phys_addr(entry);
+        *size = hex_tlb_page_size_bytes(entry);
+        return true;
+    }
+    return false;
+}
+
+bool hex_tlb_find_match(CPUHexagonState *env, target_ulong VA,
+                        MMUAccessType access_type, hwaddr *PA, int *prot,
+                        int *size, int32_t *excp, int mmu_idx)
+{
+    *PA = 0;
+    *prot = 0;
+    *size = 0;
+    *excp = 0;
+    uint32_t ssr = arch_get_system_reg(env, HEX_SREG_SSR);
+    uint8_t asid = GET_SSR_FIELD(SSR_ASID, ssr);
+    int i;
+    HexagonCPU *cpu = env_archcpu(env);
+    for (i = 0; i < cpu->num_tlbs; i++) {
+        uint64_t entry = env->hex_tlb->entries[i];
+        if (hex_tlb_entry_match(env, entry, asid, VA, access_type, PA, prot,
+                                size, excp, mmu_idx)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static uint32_t hex_tlb_lookup_by_asid(CPUHexagonState *env, uint32_t asid,
+                                       uint32_t VA)
+{
+    uint32_t not_found = 0x80000000;
+    uint32_t idx = not_found;
+    int i;
+
+    HexagonCPU *cpu = env_archcpu(env);
+    for (i = 0; i < cpu->num_tlbs; i++) {
+        uint64_t entry = env->hex_tlb->entries[i];
+        if (hex_tlb_entry_match_noperm(entry, asid, VA)) {
+            if (idx != not_found) {
+                env->cause_code = HEX_CAUSE_IMPRECISE_MULTI_TLB_MATCH;
+                break;
+            }
+            idx = i;
+        }
+    }
+
+    if (idx == not_found) {
+        qemu_log_mask(CPU_LOG_MMU, "%s: 0x%x, 0x%08x => NOT FOUND\n",
+                      __func__, asid, VA);
+    } else {
+        qemu_log_mask(CPU_LOG_MMU, "%s: 0x%x, 0x%08x => %d\n",
+                      __func__, asid, VA, idx);
+    }
+
+    return idx;
+}
+
+/* Called from tlbp instruction */
+uint32_t hex_tlb_lookup(CPUHexagonState *env, uint32_t ssr, uint32_t VA)
+{
+    return hex_tlb_lookup_by_asid(env, GET_SSR_FIELD(SSR_ASID, ssr), VA);
+}
+
+static bool hex_tlb_is_match(CPUHexagonState *env,
+                             uint64_t entry1, uint64_t entry2,
+                             bool consider_gbit)
+{
+    bool valid1 = GET_TLB_FIELD(entry1, PTE_V);
+    bool valid2 = GET_TLB_FIELD(entry2, PTE_V);
+    uint64_t size1 = hex_tlb_page_size_bytes(entry1);
+    uint64_t vaddr1 = ROUND_DOWN(hex_tlb_virt_addr(entry1), size1);
+    uint64_t size2 = hex_tlb_page_size_bytes(entry2);
+    uint64_t vaddr2 = ROUND_DOWN(hex_tlb_virt_addr(entry2), size2);
+    int asid1 = GET_TLB_FIELD(entry1, PTE_ASID);
+    int asid2 = GET_TLB_FIELD(entry2, PTE_ASID);
+    bool gbit1 = GET_TLB_FIELD(entry1, PTE_G);
+    bool gbit2 = GET_TLB_FIELD(entry2, PTE_G);
+
+    if (!valid1 || !valid2) {
+        return false;
+    }
+
+    if (((vaddr1 <= vaddr2) && (vaddr2 < (vaddr1 + size1))) ||
+        ((vaddr2 <= vaddr1) && (vaddr1 < (vaddr2 + size2)))) {
+        if (asid1 == asid2) {
+            return true;
+        }
+        if ((consider_gbit && gbit1) || gbit2) {
+            return true;
+        }
+    }
+    return false;
+}
+
+/*
+ * Return codes:
+ * 0 or positive             index of match
+ * -1                        multiple matches
+ * -2                        no match
+ */
+int hex_tlb_check_overlap(CPUHexagonState *env, uint64_t entry, uint64_t index)
+{
+    int matches = 0;
+    int last_match = 0;
+    int i;
+
+    HexagonCPU *cpu = env_archcpu(env);
+    for (i = 0; i < cpu->num_tlbs; i++) {
+        if (hex_tlb_is_match(env, entry, env->hex_tlb->entries[i], false)) {
+            matches++;
+            last_match = i;
+        }
+    }
+
+    if (matches == 1) {
+        return last_match;
+    }
+    if (matches == 0) {
+        return -2;
+    }
+    return -1;
+}
+
+static inline void print_thread(const char *str, CPUState *cs)
+{
+    g_assert(bql_locked());
+    CPUHexagonState *thread = cpu_env(cs);
+    bool is_stopped = cpu_is_stopped(cs);
+    int exe_mode = get_exe_mode(thread);
+    hex_lock_state_t lock_state = thread->tlb_lock_state;
+    qemu_log_mask(CPU_LOG_MMU,
+           "%s: threadId = %d: %s, exe_mode = %s, tlb_lock_state = %s\n",
+           str,
+           thread->threadId,
+           is_stopped ? "stopped" : "running",
+           exe_mode == HEX_EXE_MODE_OFF ? "off" :
+           exe_mode == HEX_EXE_MODE_RUN ? "run" :
+           exe_mode == HEX_EXE_MODE_WAIT ? "wait" :
+           exe_mode == HEX_EXE_MODE_DEBUG ? "debug" :
+           "unknown",
+           lock_state == HEX_LOCK_UNLOCKED ? "unlocked" :
+           lock_state == HEX_LOCK_WAITING ? "waiting" :
+           lock_state == HEX_LOCK_OWNER ? "owner" :
+           "unknown");
+}
+
+static inline void print_thread_states(const char *str)
+{
+    CPUState *cs;
+    CPU_FOREACH(cs) {
+        print_thread(str, cs);
+    }
+}
+
+void hex_tlb_lock(CPUHexagonState *env)
+{
+    qemu_log_mask(CPU_LOG_MMU, "hex_tlb_lock: %d\n", env->threadId);
+    BQL_LOCK_GUARD();
+    g_assert((env->tlb_lock_count == 0) || (env->tlb_lock_count == 1));
+
+    uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG);
+    uint8_t tlb_lock = GET_SYSCFG_FIELD(SYSCFG_TLBLOCK, syscfg);
+    if (tlb_lock) {
+        if (env->tlb_lock_state == HEX_LOCK_QUEUED) {
+            env->next_PC += 4;
+            env->tlb_lock_count++;
+            env->tlb_lock_state = HEX_LOCK_OWNER;
+            SET_SYSCFG_FIELD(env, SYSCFG_TLBLOCK, 1);
+            return;
+        }
+        if (env->tlb_lock_state == HEX_LOCK_OWNER) {
+            qemu_log_mask(CPU_LOG_MMU | LOG_GUEST_ERROR,
+                          "Double tlblock at PC: 0x%x, thread may hang\n",
+                          env->next_PC);
+            env->next_PC += 4;
+            CPUState *cs = env_cpu(env);
+            cpu_interrupt(cs, CPU_INTERRUPT_HALT);
+            return;
+        }
+        env->tlb_lock_state = HEX_LOCK_WAITING;
+        CPUState *cs = env_cpu(env);
+        cpu_interrupt(cs, CPU_INTERRUPT_HALT);
+    } else {
+        env->next_PC += 4;
+        env->tlb_lock_count++;
+        env->tlb_lock_state = HEX_LOCK_OWNER;
+        SET_SYSCFG_FIELD(env, SYSCFG_TLBLOCK, 1);
+    }
+
+    if (qemu_loglevel_mask(CPU_LOG_MMU)) {
+        qemu_log_mask(CPU_LOG_MMU, "Threads after hex_tlb_lock:\n");
+        print_thread_states("\tThread");
+    }
+}
+
+void hex_tlb_unlock(CPUHexagonState *env)
+{
+    BQL_LOCK_GUARD();
+    g_assert((env->tlb_lock_count == 0) || (env->tlb_lock_count == 1));
+
+    /* Nothing to do if the TLB isn't locked by this thread */
+    uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG);
+    uint8_t tlb_lock = GET_SYSCFG_FIELD(SYSCFG_TLBLOCK, syscfg);
+    if ((tlb_lock == 0) ||
+        (env->tlb_lock_state != HEX_LOCK_OWNER)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "thread %d attempted to tlbunlock without having the "
+                      "lock, tlb_lock state = %d\n",
+                      env->threadId, env->tlb_lock_state);
+        g_assert(env->tlb_lock_state != HEX_LOCK_WAITING);
+        return;
+    }
+
+    env->tlb_lock_count--;
+    env->tlb_lock_state = HEX_LOCK_UNLOCKED;
+    SET_SYSCFG_FIELD(env, SYSCFG_TLBLOCK, 0);
+
+    /* Look for a thread to unlock */
+    unsigned int this_threadId = env->threadId;
+    CPUHexagonState *unlock_thread = NULL;
+    CPUState *cs;
+    CPU_FOREACH(cs) {
+        CPUHexagonState *thread = cpu_env(cs);
+
+        /*
+         * The hardware implements round-robin fairness, so we look for threads
+         * starting at env->threadId + 1 and incrementing modulo the number of
+         * threads.
+         *
+         * To implement this, we check if thread is a earlier in the modulo
+         * sequence than unlock_thread.
+         *     if unlock thread is higher than this thread
+         *         thread must be between this thread and unlock_thread
+         *     else
+         *         thread higher than this thread is ahead of unlock_thread
+         *         thread must be lower then unlock thread
+         */
+        if (thread->tlb_lock_state == HEX_LOCK_WAITING) {
+            if (!unlock_thread) {
+                unlock_thread = thread;
+            } else if (unlock_thread->threadId > this_threadId) {
+                if (this_threadId < thread->threadId &&
+                    thread->threadId < unlock_thread->threadId) {
+                    unlock_thread = thread;
+                }
+            } else {
+                if (thread->threadId > this_threadId) {
+                    unlock_thread = thread;
+                }
+                if (thread->threadId < unlock_thread->threadId) {
+                    unlock_thread = thread;
+                }
+            }
+        }
+    }
+    if (unlock_thread) {
+        cs = env_cpu(unlock_thread);
+        print_thread("\tWaiting thread found", cs);
+        unlock_thread->tlb_lock_state = HEX_LOCK_QUEUED;
+        SET_SYSCFG_FIELD(unlock_thread, SYSCFG_TLBLOCK, 1);
+        cpu_interrupt(cs, CPU_INTERRUPT_TLB_UNLOCK);
+    }
+
+    if (qemu_loglevel_mask(CPU_LOG_MMU)) {
+        qemu_log_mask(CPU_LOG_MMU, "Threads after hex_tlb_unlock:\n");
+        print_thread_states("\tThread");
+    }
+
+}
+
diff --git a/target/hexagon/hex_mmu.h b/target/hexagon/hex_mmu.h
new file mode 100644
index 0000000000000..fae8aefcac1db
--- /dev/null
+++ b/target/hexagon/hex_mmu.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HEXAGON_MMU_H
+#define HEXAGON_MMU_H
+
+#include "max.h"
+
+struct CPUHexagonTLBContext {
+    uint64_t entries[MAX_TLB_ENTRIES];
+};
+
+extern void hex_tlbw(CPUHexagonState *env, uint32_t index, uint64_t value);
+extern uint32_t hex_tlb_lookup(CPUHexagonState *env, uint32_t ssr, uint32_t VA);
+extern void hex_mmu_realize(CPUHexagonState *env);
+extern void hex_mmu_on(CPUHexagonState *env);
+extern void hex_mmu_off(CPUHexagonState *env);
+extern void hex_mmu_mode_change(CPUHexagonState *env);
+extern bool hex_tlb_find_match(CPUHexagonState *env, target_ulong VA,
+                               MMUAccessType access_type, hwaddr *PA, int *prot,
+                               int *size, int32_t *excp, int mmu_idx);
+extern int hex_tlb_check_overlap(CPUHexagonState *env, uint64_t entry,
+                                 uint64_t index);
+extern void hex_tlb_lock(CPUHexagonState *env);
+extern void hex_tlb_unlock(CPUHexagonState *env);
+void dump_mmu(CPUHexagonState *env);
+#endif
diff --git a/target/hexagon/hex_regs.h b/target/hexagon/hex_regs.h
index bddfc28021c60..ea8c62eba9ce5 100644
--- a/target/hexagon/hex_regs.h
+++ b/target/hexagon/hex_regs.h
@@ -81,4 +81,119 @@ enum {
     HEX_REG_UTIMERHI          = 63,
 };
 
+#ifndef CONFIG_USER_ONLY
+
+#define HEX_GREG_VALUES \
+  DECL_HEX_GREG(G0,         0) \
+  DECL_HEX_GREG(GELR,       0) \
+  DECL_HEX_GREG(G1,         1) \
+  DECL_HEX_GREG(GSR,        1) \
+  DECL_HEX_GREG(G2,         2) \
+  DECL_HEX_GREG(GOSP,       2) \
+  DECL_HEX_GREG(G3,         3) \
+  DECL_HEX_GREG(GBADVA,     3) \
+  DECL_HEX_GREG(GCYCLE_1T,  10) \
+  DECL_HEX_GREG(GCYCLE_2T,  11) \
+  DECL_HEX_GREG(GCYCLE_3T,  12) \
+  DECL_HEX_GREG(GCYCLE_4T,  13) \
+  DECL_HEX_GREG(GCYCLE_5T,  14) \
+  DECL_HEX_GREG(GCYCLE_6T,  15) \
+  DECL_HEX_GREG(GPMUCNT4,   16) \
+  DECL_HEX_GREG(GPMUCNT5,   17) \
+  DECL_HEX_GREG(GPMUCNT6,   18) \
+  DECL_HEX_GREG(GPMUCNT7,   19) \
+  DECL_HEX_GREG(GPCYCLELO,  24) \
+  DECL_HEX_GREG(GPCYCLEHI,  25) \
+  DECL_HEX_GREG(GPMUCNT0,   26) \
+  DECL_HEX_GREG(GPMUCNT1,   27) \
+  DECL_HEX_GREG(GPMUCNT2,   28) \
+  DECL_HEX_GREG(GPMUCNT3,   29) \
+  DECL_HEX_GREG_DONE
+
+#define DECL_HEX_GREG_DONE
+#define DECL_HEX_GREG(name, val) HEX_GREG_ ##name = val,
+enum hex_greg {
+    HEX_GREG_VALUES
+};
+#undef DECL_HEX_GREG
+#undef DECL_HEX_GREG_DONE
+
+#define DECL_HEX_GREG_DONE 0
+#define DECL_HEX_GREG(_, val) (1 << val) |
+static inline bool greg_implemented(enum hex_greg greg)
+{
+#if NUM_GREGS > 32
+#error "NUM_GREGS too large for greg_implemented(): update `impl_bitmap`"
+#endif
+    static int32_t impl_bitmap = HEX_GREG_VALUES;
+    return impl_bitmap & (1 << greg);
+}
+#undef DECL_HEX_GREG
+#undef DECL_HEX_GREG_DONE
+
+#endif /* CONFIG_USER_ONLY */
+
+enum {
+    HEX_SREG_SGP0 = 0,
+    HEX_SREG_SGP1 = 1,
+    HEX_SREG_STID = 2,
+    HEX_SREG_ELR = 3,
+    HEX_SREG_BADVA0 = 4,
+    HEX_SREG_BADVA1 = 5,
+    HEX_SREG_SSR = 6,
+    HEX_SREG_CCR = 7,
+    HEX_SREG_HTID = 8,
+    HEX_SREG_BADVA = 9,
+    HEX_SREG_IMASK = 10,
+    HEX_SREG_GEVB  = 11,
+    HEX_SREG_GLB_START = 16,
+    HEX_SREG_EVB = 16,
+    HEX_SREG_MODECTL = 17,
+    HEX_SREG_SYSCFG = 18,
+    HEX_SREG_IPENDAD = 20,
+    HEX_SREG_VID = 21,
+    HEX_SREG_VID1 = 22,
+    HEX_SREG_BESTWAIT = 23,
+    HEX_SREG_IEL = 24,
+    HEX_SREG_SCHEDCFG = 25,
+    HEX_SREG_IAHL = 26,
+    HEX_SREG_CFGBASE = 27,
+    HEX_SREG_DIAG = 28,
+    HEX_SREG_REV = 29,
+    HEX_SREG_PCYCLELO = 30,
+    HEX_SREG_PCYCLEHI = 31,
+    HEX_SREG_ISDBST = 32,
+    HEX_SREG_ISDBCFG0 = 33,
+    HEX_SREG_ISDBCFG1 = 34,
+    HEX_SREG_LIVELOCK = 35,
+    HEX_SREG_BRKPTPC0 = 36,
+    HEX_SREG_BRKPTCFG0 = 37,
+    HEX_SREG_BRKPTPC1 = 38,
+    HEX_SREG_BRKPTCFG1 = 39,
+    HEX_SREG_ISDBMBXIN = 40,
+    HEX_SREG_ISDBMBXOUT = 41,
+    HEX_SREG_ISDBEN = 42,
+    HEX_SREG_ISDBGPR = 43,
+    HEX_SREG_PMUCNT4 = 44,
+    HEX_SREG_PMUCNT5 = 45,
+    HEX_SREG_PMUCNT6 = 46,
+    HEX_SREG_PMUCNT7 = 47,
+    HEX_SREG_PMUCNT0 = 48,
+    HEX_SREG_PMUCNT1 = 49,
+    HEX_SREG_PMUCNT2 = 50,
+    HEX_SREG_PMUCNT3 = 51,
+    HEX_SREG_PMUEVTCFG = 52,
+    HEX_SREG_PMUSTID0 = 53,
+    HEX_SREG_PMUEVTCFG1 = 54,
+    HEX_SREG_PMUSTID1 = 55,
+    HEX_SREG_TIMERLO = 56,
+    HEX_SREG_TIMERHI = 57,
+    HEX_SREG_PMUCFG = 58,
+    HEX_SREG_S59 = 59,
+    HEX_SREG_S60 = 60,
+    HEX_SREG_S61 = 61,
+    HEX_SREG_S62 = 62,
+    HEX_SREG_S63 = 63,
+};
+
 #endif
diff --git a/target/hexagon/hexswi.c b/target/hexagon/hexswi.c
new file mode 100644
index 0000000000000..a08d7f68917c1
--- /dev/null
+++ b/target/hexagon/hexswi.c
@@ -0,0 +1,728 @@
+/*
+ * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#ifdef CONFIG_USER_ONLY
+#include "exec/helper-proto.h"
+#include "qemu.h"
+#endif
+#include "exec/cpu_ldst.h"
+#include "exec/exec-all.h"
+#include "qemu/log.h"
+#include "qemu/main-loop.h"
+#include "arch.h"
+#include "internal.h"
+#include "macros.h"
+#include "sys_macros.h"
+#include "tcg/tcg-op.h"
+#ifndef CONFIG_USER_ONLY
+#include "hex_mmu.h"
+#include "hexswi.h"
+#include "semihosting/common-semi.h"
+#include "semihosting/syscalls.h"
+#include "semihosting/guestfd.h"
+#endif
+
+#ifndef CONFIG_USER_ONLY
+
+/* non-arm-compatible semihosting calls */
+#define HEXAGON_SPECIFIC_SWI_FLAGS \
+    DEF_SWI_FLAG(EXCEPTION,        0x18) \
+    DEF_SWI_FLAG(READ_CYCLES,      0x40) \
+    DEF_SWI_FLAG(PROF_ON,          0x41) \
+    DEF_SWI_FLAG(PROF_OFF,         0x42) \
+    DEF_SWI_FLAG(WRITECREG,        0x43) \
+    DEF_SWI_FLAG(READ_TCYCLES,     0x44) \
+    DEF_SWI_FLAG(READ_ICOUNT,      0x47) \
+    DEF_SWI_FLAG(PROF_STATSRESET,  0x48) \
+    DEF_SWI_FLAG(DUMP_PMU_STATS,   0x4a) \
+    DEF_SWI_FLAG(READ_PCYCLES,     0x52) \
+    DEF_SWI_FLAG(COREDUMP,         0xCD) \
+    DEF_SWI_FLAG(FTELL,            0x100) \
+    DEF_SWI_FLAG(FSTAT,            0x101) \
+    DEF_SWI_FLAG(STAT,             0x103) \
+    DEF_SWI_FLAG(GETCWD,           0x104) \
+    DEF_SWI_FLAG(ACCESS,           0x105) \
+    DEF_SWI_FLAG(OPENDIR,          0x180) \
+    DEF_SWI_FLAG(CLOSEDIR,         0x181) \
+    DEF_SWI_FLAG(READDIR,          0x182) \
+    DEF_SWI_FLAG(EXEC,             0x185) \
+    DEF_SWI_FLAG(FTRUNC,           0x186)
+
+#define DEF_SWI_FLAG(name, val) HEX_SYS_ ##name = val,
+enum hex_swi_flag {
+    HEXAGON_SPECIFIC_SWI_FLAGS
+};
+#undef DEF_SWI_FLAG
+
+#define DEF_SWI_FLAG(_, val) case val:
+static inline bool is_hexagon_specific_swi_flag(enum hex_swi_flag what_swi)
+{
+    switch (what_swi) {
+    HEXAGON_SPECIFIC_SWI_FLAGS
+        return true;
+    }
+    return false;
+}
+#undef DEF_SWI_FLAG
+
+/* We start from 1 as 0 is used to signal an error from opendir() */
+static const int DIR_INDEX_OFFSET = 1;
+
+static void common_semi_ftell_cb(CPUState *cs, uint64_t ret, int err)
+{
+    if (err) {
+        ret = -1;
+    }
+    common_semi_cb(cs, ret, err);
+}
+
+static void coredump(CPUHexagonState *env)
+{
+    uint32_t ssr = arch_get_system_reg(env, HEX_SREG_SSR);
+    printf("CRASH!\n");
+    printf("I think the exception was: ");
+    switch (GET_SSR_FIELD(SSR_CAUSE, ssr)) {
+    case 0x43:
+        printf("0x43, NMI");
+        break;
+    case 0x42:
+        printf("0x42, Data abort");
+        break;
+    case 0x44:
+        printf("0x44, Multi TLB match");
+        break;
+    case HEX_CAUSE_BIU_PRECISE:
+        printf("0x%x, Bus Error (Precise BIU error)",
+               HEX_CAUSE_BIU_PRECISE);
+        break;
+    case HEX_CAUSE_DOUBLE_EXCEPT:
+        printf("0x%x, Exception observed when EX = 1 (double exception)",
+               HEX_CAUSE_DOUBLE_EXCEPT);
+        break;
+    case HEX_CAUSE_FETCH_NO_XPAGE:
+        printf("0x%x, Privilege violation: User/Guest mode execute"
+               " to page with no execute permissions",
+               HEX_CAUSE_FETCH_NO_XPAGE);
+        break;
+    case HEX_CAUSE_FETCH_NO_UPAGE:
+        printf("0x%x, Privilege violation: "
+               "User mode exececute to page with no user permissions",
+               HEX_CAUSE_FETCH_NO_UPAGE);
+        break;
+    case HEX_CAUSE_INVALID_PACKET:
+        printf("0x%x, Invalid packet",
+               HEX_CAUSE_INVALID_PACKET);
+        break;
+    case HEX_CAUSE_PRIV_USER_NO_GINSN:
+        printf("0x%x, Privilege violation: guest mode insn in user mode",
+               HEX_CAUSE_PRIV_USER_NO_GINSN);
+        break;
+    case HEX_CAUSE_PRIV_USER_NO_SINSN:
+        printf("0x%x, Privilege violation: "
+               "monitor mode insn ins user/guest mode",
+               HEX_CAUSE_PRIV_USER_NO_SINSN);
+        break;
+    case HEX_CAUSE_REG_WRITE_CONFLICT:
+        printf("0x%x, Multiple writes to same register",
+               HEX_CAUSE_REG_WRITE_CONFLICT);
+        break;
+    case HEX_CAUSE_PC_NOT_ALIGNED:
+        printf("0x%x, PC not aligned",
+               HEX_CAUSE_PC_NOT_ALIGNED);
+        break;
+    case HEX_CAUSE_MISALIGNED_LOAD:
+        printf("0x%x, Misaligned Load @ 0x%x",
+               HEX_CAUSE_MISALIGNED_LOAD,
+               arch_get_system_reg(env, HEX_SREG_BADVA));
+        break;
+    case HEX_CAUSE_MISALIGNED_STORE:
+        printf("0x%x, Misaligned Store @ 0x%x",
+               HEX_CAUSE_MISALIGNED_STORE,
+               arch_get_system_reg(env, HEX_SREG_BADVA));
+        break;
+    case HEX_CAUSE_PRIV_NO_READ:
+        printf("0x%x, Privilege violation: "
+            "user/guest read permission @ 0x%x",
+            HEX_CAUSE_PRIV_NO_READ,
+            arch_get_system_reg(env, HEX_SREG_BADVA));
+        break;
+    case HEX_CAUSE_PRIV_NO_WRITE:
+        printf("0x%x, Privilege violation: "
+            "user/guest write permission @ 0x%x",
+            HEX_CAUSE_PRIV_NO_WRITE,
+            arch_get_system_reg(env, HEX_SREG_BADVA));
+        break;
+    case HEX_CAUSE_PRIV_NO_UREAD:
+        printf("0x%x, Privilege violation: user read permission @ 0x%x",
+               HEX_CAUSE_PRIV_NO_UREAD,
+               arch_get_system_reg(env, HEX_SREG_BADVA));
+        break;
+    case HEX_CAUSE_PRIV_NO_UWRITE:
+        printf("0x%x, Privilege violation: user write permission @ 0x%x",
+               HEX_CAUSE_PRIV_NO_UWRITE,
+               arch_get_system_reg(env, HEX_SREG_BADVA));
+        break;
+    case HEX_CAUSE_COPROC_LDST:
+        printf("0x%x, Coprocessor VMEM address error. @ 0x%x",
+               HEX_CAUSE_COPROC_LDST,
+               arch_get_system_reg(env, HEX_SREG_BADVA));
+        break;
+    case HEX_CAUSE_STACK_LIMIT:
+        printf("0x%x, Stack limit check error", HEX_CAUSE_STACK_LIMIT);
+        break;
+    case HEX_CAUSE_FPTRAP_CAUSE_BADFLOAT:
+        printf("0x%X, Floating-Point: Execution of Floating-Point "
+               "instruction resulted in exception",
+               HEX_CAUSE_FPTRAP_CAUSE_BADFLOAT);
+        break;
+    case HEX_CAUSE_NO_COPROC_ENABLE:
+        printf("0x%x, Illegal Execution of Coprocessor Instruction",
+               HEX_CAUSE_NO_COPROC_ENABLE);
+        break;
+    case HEX_CAUSE_NO_COPROC2_ENABLE:
+        printf("0x%x, "
+               "Illegal Execution of Secondary Coprocessor Instruction",
+               HEX_CAUSE_NO_COPROC2_ENABLE);
+        break;
+    case HEX_CAUSE_UNSUPORTED_HVX_64B:
+        printf("0x%x, "
+               "Unsuported Execution of Coprocessor Instruction with 64bits Mode On",
+               HEX_CAUSE_UNSUPORTED_HVX_64B);
+        break;
+    case HEX_CAUSE_VWCTRL_WINDOW_MISS:
+        printf("0x%x, "
+               "Thread accessing a region outside VWCTRL window",
+               HEX_CAUSE_VWCTRL_WINDOW_MISS);
+        break;
+    default:
+        printf("Don't know");
+        break;
+    }
+    printf("\nRegister Dump:\n");
+    hexagon_dump(env, stdout, 0);
+}
+
+static void sim_handle_trap0(CPUHexagonState *env)
+{
+    g_assert(bql_locked());
+    target_ulong what_swi = arch_get_thread_reg(env, HEX_REG_R00);
+    target_ulong swi_info = arch_get_thread_reg(env, HEX_REG_R01);
+    uintptr_t retaddr = 0;
+    CPUState *cs = env_cpu(env);
+
+    if (!is_hexagon_specific_swi_flag(what_swi)) {
+        do_common_semihosting(cs);
+        return;
+    }
+
+    switch (what_swi) {
+
+    case HEX_SYS_EXCEPTION:
+        arch_set_system_reg(env, HEX_SREG_MODECTL, 0);
+        exit(arch_get_thread_reg(env, HEX_REG_R02));
+        break;
+
+    case HEX_SYS_WRITECREG:
+        fprintf(stdout, "%c", swi_info);
+        fflush(stdout);
+        common_semi_cb(cs, 0, 0);
+        break;
+
+    case HEX_SYS_STAT:
+    case HEX_SYS_FSTAT:
+    {
+        /*
+         * This must match the caller's definition, it would be in the
+         * caller's angel.h or equivalent header.
+         */
+        struct __SYS_STAT {
+            uint64_t dev;
+            uint64_t ino;
+            uint32_t mode;
+            uint32_t nlink;
+            uint64_t rdev;
+            uint32_t size;
+            uint32_t __pad1;
+            uint32_t atime;
+            uint32_t mtime;
+            uint32_t ctime;
+            uint32_t __pad2;
+        } sys_stat;
+        struct stat st_buf;
+        uint8_t *st_bufptr = (uint8_t *)&sys_stat;
+        int rc, err = 0;
+        char filename[BUFSIZ];
+        target_ulong physicalFilenameAddr;
+        target_ulong statBufferAddr;
+        hexagon_read_memory(env, swi_info, 4, &physicalFilenameAddr, retaddr);
+
+        if (what_swi == HEX_SYS_STAT) {
+            int i = 0;
+            do {
+                hexagon_read_memory(env, physicalFilenameAddr + i, 1,
+                                    &filename[i], retaddr);
+                i++;
+            } while ((i < BUFSIZ) && filename[i - 1]);
+            rc = stat(filename, &st_buf);
+            err = errno;
+        } else{
+            int fd = physicalFilenameAddr;
+            GuestFD *gf = get_guestfd(fd);
+            if (gf->type != GuestFDHost) {
+                fprintf(stderr, "fstat semihosting only implemented for native mode.\n");
+                g_assert_not_reached();
+            }
+            rc = fstat(gf->hostfd, &st_buf);
+            err = errno;
+        }
+        if (rc == 0) {
+            sys_stat.dev   = st_buf.st_dev;
+            sys_stat.ino   = st_buf.st_ino;
+            sys_stat.mode  = st_buf.st_mode;
+            sys_stat.nlink = (uint32_t) st_buf.st_nlink;
+            sys_stat.rdev  = st_buf.st_rdev;
+            sys_stat.size  = (uint32_t) st_buf.st_size;
+#if defined(__linux__)
+            sys_stat.atime = (uint32_t) st_buf.st_atim.tv_sec;
+            sys_stat.mtime = (uint32_t) st_buf.st_mtim.tv_sec;
+            sys_stat.ctime = (uint32_t) st_buf.st_ctim.tv_sec;
+#elif defined(_WIN32)
+            sys_stat.atime = st_buf.st_atime;
+            sys_stat.mtime = st_buf.st_mtime;
+            sys_stat.ctime = st_buf.st_ctime;
+#endif
+        }
+        hexagon_read_memory(env, swi_info + 4, 4, &statBufferAddr, retaddr);
+
+        for (int i = 0; i < sizeof(sys_stat); i++) {
+            hexagon_write_memory(env, statBufferAddr + i, 1, st_bufptr[i],
+                                 retaddr);
+        }
+        common_semi_cb(cs, rc, err);
+    }
+    break;
+
+    case HEX_SYS_FTRUNC:
+    {
+        int fd;
+        off_t size_limit;
+        hexagon_read_memory(env, swi_info, 4, &fd, retaddr);
+        hexagon_read_memory(env, swi_info + 4, 8, &size_limit, retaddr);
+        semihost_sys_ftruncate(cs, common_semi_cb, fd, size_limit);
+    }
+    break;
+
+    case HEX_SYS_ACCESS:
+    {
+        char filename[BUFSIZ];
+        uint32_t FileNameAddr;
+        uint32_t BufferMode;
+        int rc, err = 0;
+
+        int i = 0;
+
+        hexagon_read_memory(env, swi_info, 4, &FileNameAddr, retaddr);
+        do {
+            hexagon_read_memory(env, FileNameAddr + i, 1, &filename[i], retaddr);
+            i++;
+        } while ((i < BUFSIZ) && (filename[i - 1]));
+        filename[i] = 0;
+
+        hexagon_read_memory(env, swi_info + 4, 4, &BufferMode, retaddr);
+
+        rc = access(filename, BufferMode);
+        if (rc != 0) {
+            err = errno;
+        }
+        common_semi_cb(cs, rc, err);
+    }
+    break;
+
+    case HEX_SYS_GETCWD:
+    {
+        char cwdPtr[PATH_MAX];
+        uint32_t BufferAddr;
+        uint32_t BufferSize;
+        uint32_t rc = 0, err = 0;
+
+        hexagon_read_memory(env, swi_info, 4, &BufferAddr, retaddr);
+        hexagon_read_memory(env, swi_info + 4, 4, &BufferSize, retaddr);
+
+        if (!getcwd(cwdPtr, PATH_MAX)) {
+            err = errno;
+        } else {
+            size_t cwd_size = strlen(cwdPtr);
+            if (cwd_size > BufferSize) {
+                err = ERANGE;
+            } else {
+                for (int i = 0; i < cwd_size; i++) {
+                    hexagon_write_memory(env, BufferAddr + i, 1,
+                                         (uint64_t)cwdPtr[i], retaddr);
+                }
+                rc = BufferAddr;
+            }
+        }
+        common_semi_cb(cs, rc, err);
+        break;
+    }
+
+    case HEX_SYS_EXEC:
+    {
+        qemu_log_mask(LOG_UNIMP, "SYS_EXEC is deprecated\n");
+        common_semi_cb(cs, -1, ENOSYS);
+    }
+    break;
+
+    case HEX_SYS_OPENDIR:
+    {
+        DIR *dir;
+        char buf[BUFSIZ];
+        int rc = 0, err = 0;
+
+        int i = 0;
+        do {
+            hexagon_read_memory(env, swi_info + i, 1, &buf[i], retaddr);
+            i++;
+        } while (buf[i - 1]);
+
+        dir = opendir(buf);
+        if (dir != NULL) {
+            env->dir_list = g_list_append(env->dir_list, dir);
+            rc = g_list_index(env->dir_list, dir) + DIR_INDEX_OFFSET;
+        } else {
+            err = errno;
+        }
+        common_semi_cb(cs, rc, err);
+        break;
+    }
+
+    case HEX_SYS_READDIR:
+    {
+        struct dirent *host_dir_entry = NULL;
+        int dir_index = swi_info - DIR_INDEX_OFFSET;
+        DIR *dir = g_list_nth_data(env->dir_list, dir_index);
+        uint32_t rc = 0, err = 0;
+
+        if (dir) {
+            errno = 0;
+            host_dir_entry = readdir(dir);
+            if (host_dir_entry == NULL) {
+                err = errno;
+            }
+        } else {
+            err = EBADF;
+        }
+
+        if (host_dir_entry) {
+            uint32_t guest_dir_entry = arch_get_thread_reg(env, HEX_REG_R02);
+            hexagon_write_memory(env, guest_dir_entry, 4, host_dir_entry->d_ino,
+                                 retaddr);
+            for (int i = 0; i < sizeof(host_dir_entry->d_name); i++) {
+                hexagon_write_memory(env, guest_dir_entry + 4 + i, 1,
+                                     host_dir_entry->d_name[i], retaddr);
+                if (!host_dir_entry->d_name[i]) {
+                    break;
+                }
+            }
+            rc = guest_dir_entry;
+        }
+        common_semi_cb(cs, rc, err);
+        break;
+    }
+
+    case HEX_SYS_CLOSEDIR:
+    {
+        DIR *dir;
+        int ret = 0, err = 0;
+
+        dir = g_list_nth_data(env->dir_list, swi_info);
+        if (dir != NULL) {
+            ret = closedir(dir);
+            if (ret != 0) {
+                err = errno;
+            }
+        } else {
+            err = EBADF;
+        }
+        common_semi_cb(cs, ret, err);
+        break;
+    }
+
+    case HEX_SYS_COREDUMP:
+        coredump(env);
+        break;
+
+    case HEX_SYS_FTELL:
+    {
+        int fd;
+        hexagon_read_memory(env, swi_info, 4, &fd, retaddr);
+        semihost_sys_lseek(cs, common_semi_ftell_cb, fd, 0, GDB_SEEK_CUR);
+    }
+    break;
+
+    case HEX_SYS_READ_CYCLES:
+    case HEX_SYS_READ_TCYCLES:
+    case HEX_SYS_READ_ICOUNT:
+    {
+        arch_set_thread_reg(env, HEX_REG_R00, 0);
+        arch_set_thread_reg(env, HEX_REG_R01, 0);
+        break;
+    }
+
+    case HEX_SYS_READ_PCYCLES:
+    {
+        arch_set_thread_reg(env, HEX_REG_R00,
+            arch_get_system_reg(env, HEX_SREG_PCYCLELO));
+        arch_set_thread_reg(env, HEX_REG_R01,
+            arch_get_system_reg(env, HEX_SREG_PCYCLEHI));
+        break;
+    }
+
+    case HEX_SYS_PROF_ON:
+    case HEX_SYS_PROF_OFF:
+    case HEX_SYS_PROF_STATSRESET:
+    case HEX_SYS_DUMP_PMU_STATS:
+        common_semi_cb(cs, -1, ENOSYS);
+        qemu_log_mask(LOG_UNIMP, "SWI call %x is unimplemented in QEMU\n",
+                      what_swi);
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "error: unknown swi call 0x%x\n", what_swi);
+        cpu_abort(cs, "Hexagon Unsupported swi call 0x%x\n", what_swi);
+    }
+}
+
+static void set_addresses(CPUHexagonState *env, target_ulong pc_offset,
+                          target_ulong exception_index)
+
+{
+    arch_set_system_reg(env, HEX_SREG_ELR,
+                        arch_get_thread_reg(env, HEX_REG_PC) + pc_offset);
+    arch_set_thread_reg(env, HEX_REG_PC,
+                        arch_get_system_reg(env, HEX_SREG_EVB) |
+                            (exception_index << 2));
+}
+
+static const char *event_name[] = {
+    [HEX_EVENT_RESET] = "HEX_EVENT_RESET",
+    [HEX_EVENT_IMPRECISE] = "HEX_EVENT_IMPRECISE",
+    [HEX_EVENT_TLB_MISS_X] = "HEX_EVENT_TLB_MISS_X",
+    [HEX_EVENT_TLB_MISS_RW] = "HEX_EVENT_TLB_MISS_RW",
+    [HEX_EVENT_TRAP0] = "HEX_EVENT_TRAP0",
+    [HEX_EVENT_TRAP1] = "HEX_EVENT_TRAP1",
+    [HEX_EVENT_FPTRAP] = "HEX_EVENT_FPTRAP",
+    [HEX_EVENT_DEBUG] = "HEX_EVENT_DEBUG",
+    [HEX_EVENT_INT0] = "HEX_EVENT_INT0",
+    [HEX_EVENT_INT1] = "HEX_EVENT_INT1",
+    [HEX_EVENT_INT2] = "HEX_EVENT_INT2",
+    [HEX_EVENT_INT3] = "HEX_EVENT_INT3",
+    [HEX_EVENT_INT4] = "HEX_EVENT_INT4",
+    [HEX_EVENT_INT5] = "HEX_EVENT_INT5",
+    [HEX_EVENT_INT6] = "HEX_EVENT_INT6",
+    [HEX_EVENT_INT7] = "HEX_EVENT_INT7",
+    [HEX_EVENT_INT8] = "HEX_EVENT_INT8",
+    [HEX_EVENT_INT9] = "HEX_EVENT_INT9",
+    [HEX_EVENT_INTA] = "HEX_EVENT_INTA",
+    [HEX_EVENT_INTB] = "HEX_EVENT_INTB",
+    [HEX_EVENT_INTC] = "HEX_EVENT_INTC",
+    [HEX_EVENT_INTD] = "HEX_EVENT_INTD",
+    [HEX_EVENT_INTE] = "HEX_EVENT_INTE",
+    [HEX_EVENT_INTF] = "HEX_EVENT_INTF"
+};
+
+void hexagon_cpu_do_interrupt(CPUState *cs)
+
+{
+    CPUHexagonState *env = cpu_env(cs);
+    BQL_LOCK_GUARD();
+
+    qemu_log_mask(CPU_LOG_INT, "\t%s: event 0x%x:%s, cause 0x%x(%d)\n",
+                  __func__, cs->exception_index,
+                  event_name[cs->exception_index], env->cause_code,
+                  env->cause_code);
+
+    env->llsc_addr = ~0;
+
+    uint32_t ssr = arch_get_system_reg(env, HEX_SREG_SSR);
+    if (GET_SSR_FIELD(SSR_EX, ssr) == 1) {
+        arch_set_system_reg(env, HEX_SREG_DIAG, env->cause_code);
+        env->cause_code = HEX_CAUSE_DOUBLE_EXCEPT;
+        cs->exception_index = HEX_EVENT_PRECISE;
+    }
+
+    switch (cs->exception_index) {
+    case HEX_EVENT_TRAP0:
+        if (env->cause_code == 0) {
+            sim_handle_trap0(env);
+        }
+
+        hexagon_ssr_set_cause(env, env->cause_code);
+        set_addresses(env, 4, cs->exception_index);
+        break;
+
+    case HEX_EVENT_TRAP1:
+        hexagon_ssr_set_cause(env, env->cause_code);
+        set_addresses(env, 4, cs->exception_index);
+        break;
+
+    case HEX_EVENT_TLB_MISS_X:
+        switch (env->cause_code) {
+        case HEX_CAUSE_TLBMISSX_CAUSE_NORMAL:
+        case HEX_CAUSE_TLBMISSX_CAUSE_NEXTPAGE:
+            qemu_log_mask(CPU_LOG_MMU,
+                          "TLB miss EX exception (0x%x) caught: "
+                          "Cause code (0x%x) "
+                          "TID = 0x%" PRIx32 ", PC = 0x%" PRIx32
+                          ", BADVA = 0x%" PRIx32 "\n",
+                          cs->exception_index, env->cause_code, env->threadId,
+                          arch_get_thread_reg(env, HEX_REG_PC),
+                          arch_get_system_reg(env, HEX_SREG_BADVA));
+
+            hexagon_ssr_set_cause(env, env->cause_code);
+            set_addresses(env, 0, cs->exception_index);
+            break;
+
+        default:
+            cpu_abort(cs,
+                      "1:Hexagon exception %d/0x%x: "
+                      "Unknown cause code %d/0x%x\n",
+                      cs->exception_index, cs->exception_index, env->cause_code,
+                      env->cause_code);
+            break;
+        }
+        break;
+
+    case HEX_EVENT_TLB_MISS_RW:
+        switch (env->cause_code) {
+        case HEX_CAUSE_TLBMISSRW_CAUSE_READ:
+        case HEX_CAUSE_TLBMISSRW_CAUSE_WRITE:
+            qemu_log_mask(CPU_LOG_MMU,
+                          "TLB miss RW exception (0x%x) caught: "
+                          "Cause code (0x%x) "
+                          "TID = 0x%" PRIx32 ", PC = 0x%" PRIx32
+                          ", BADVA = 0x%" PRIx32 "\n",
+                          cs->exception_index, env->cause_code, env->threadId,
+                          env->gpr[HEX_REG_PC],
+                          arch_get_system_reg(env, HEX_SREG_BADVA));
+
+            hexagon_ssr_set_cause(env, env->cause_code);
+            set_addresses(env, 0, cs->exception_index);
+            /* env->sreg[HEX_SREG_BADVA] is set when the exception is raised */
+            break;
+
+        default:
+            cpu_abort(cs,
+                      "2:Hexagon exception %d/0x%x: "
+                      "Unknown cause code %d/0x%x\n",
+                      cs->exception_index, cs->exception_index, env->cause_code,
+                      env->cause_code);
+            break;
+        }
+        break;
+
+    case HEX_EVENT_FPTRAP:
+        hexagon_ssr_set_cause(env, env->cause_code);
+        arch_set_thread_reg(env, HEX_REG_PC,
+                            arch_get_system_reg(env, HEX_SREG_EVB) |
+                                (cs->exception_index << 2));
+        break;
+
+    case HEX_EVENT_DEBUG:
+        hexagon_ssr_set_cause(env, env->cause_code);
+        set_addresses(env, 0, cs->exception_index);
+        qemu_log_mask(LOG_UNIMP, "single-step exception is not handled\n");
+        break;
+
+    case HEX_EVENT_PRECISE:
+        switch (env->cause_code) {
+        case HEX_CAUSE_FETCH_NO_XPAGE:
+        case HEX_CAUSE_FETCH_NO_UPAGE:
+        case HEX_CAUSE_PRIV_NO_READ:
+        case HEX_CAUSE_PRIV_NO_UREAD:
+        case HEX_CAUSE_PRIV_NO_WRITE:
+        case HEX_CAUSE_PRIV_NO_UWRITE:
+        case HEX_CAUSE_MISALIGNED_LOAD:
+        case HEX_CAUSE_MISALIGNED_STORE:
+        case HEX_CAUSE_PC_NOT_ALIGNED:
+            qemu_log_mask(CPU_LOG_MMU,
+                          "MMU permission exception (0x%x) caught: "
+                          "Cause code (0x%x) "
+                          "TID = 0x%" PRIx32 ", PC = 0x%" PRIx32
+                          ", BADVA = 0x%" PRIx32 "\n",
+                          cs->exception_index, env->cause_code, env->threadId,
+                          env->gpr[HEX_REG_PC],
+                          arch_get_system_reg(env, HEX_SREG_BADVA));
+
+
+            hexagon_ssr_set_cause(env, env->cause_code);
+            set_addresses(env, 0, cs->exception_index);
+            /* env->sreg[HEX_SREG_BADVA] is set when the exception is raised */
+            break;
+
+        case HEX_CAUSE_DOUBLE_EXCEPT:
+        case HEX_CAUSE_PRIV_USER_NO_SINSN:
+        case HEX_CAUSE_PRIV_USER_NO_GINSN:
+        case HEX_CAUSE_INVALID_OPCODE:
+        case HEX_CAUSE_NO_COPROC_ENABLE:
+        case HEX_CAUSE_NO_COPROC2_ENABLE:
+        case HEX_CAUSE_UNSUPORTED_HVX_64B:
+        case HEX_CAUSE_REG_WRITE_CONFLICT:
+        case HEX_CAUSE_VWCTRL_WINDOW_MISS:
+            hexagon_ssr_set_cause(env, env->cause_code);
+            set_addresses(env, 0, cs->exception_index);
+            break;
+
+        case HEX_CAUSE_COPROC_LDST:
+            hexagon_ssr_set_cause(env, env->cause_code);
+            set_addresses(env, 0, cs->exception_index);
+            break;
+
+        case HEX_CAUSE_STACK_LIMIT:
+            hexagon_ssr_set_cause(env, env->cause_code);
+            set_addresses(env, 0, cs->exception_index);
+            break;
+
+        default:
+            cpu_abort(cs,
+                      "3:Hexagon exception %d/0x%x: "
+                      "Unknown cause code %d/0x%x\n",
+                      cs->exception_index, cs->exception_index, env->cause_code,
+                      env->cause_code);
+            break;
+        }
+        break;
+
+    case HEX_EVENT_IMPRECISE:
+        qemu_log_mask(LOG_UNIMP,
+                "Imprecise exception: this case is not yet handled");
+        break;
+
+    default:
+        qemu_log_mask(LOG_UNIMP,
+                "Hexagon Unsupported exception 0x%x/0x%x\n",
+                  cs->exception_index, env->cause_code);
+        break;
+    }
+
+    cs->exception_index = HEX_EVENT_NONE;
+}
+
+void register_trap_exception(CPUHexagonState *env, int traptype, int imm,
+                             target_ulong PC)
+{
+    CPUState *cs = env_cpu(env);
+
+    cs->exception_index = (traptype == 0) ? HEX_EVENT_TRAP0 : HEX_EVENT_TRAP1;
+    ASSERT_DIRECT_TO_GUEST_UNSET(env, cs->exception_index);
+
+    env->cause_code = imm;
+    env->gpr[HEX_REG_PC] = PC;
+    cpu_loop_exit(cs);
+}
+#endif
diff --git a/target/hexagon/hexswi.h b/target/hexagon/hexswi.h
new file mode 100644
index 0000000000000..5d232cb06cb05
--- /dev/null
+++ b/target/hexagon/hexswi.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright(c) 2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HEXSWI_H
+#define HEXSWI_H
+
+
+#include "cpu.h"
+
+void hexagon_cpu_do_interrupt(CPUState *cpu);
+void register_trap_exception(CPUHexagonState *env, int type, int imm,
+                             target_ulong PC);
+
+#endif /* HEXSWI_H */
diff --git a/target/hexagon/idef-parser/README.rst b/target/hexagon/idef-parser/README.rst
index 7199177ee33e6..235e3debee3c3 100644
--- a/target/hexagon/idef-parser/README.rst
+++ b/target/hexagon/idef-parser/README.rst
@@ -637,7 +637,7 @@ tinycode for the Hexagon ``add`` instruction
 ::
 
    ---- 00021094
-   mov_i32 pkt_has_store_s1,$0x0
+   mov_i32 pkt_has_scalar_store_s1,$0x0
    add_i32 tmp0,r2,r2
    mov_i32 loc2,tmp0
    mov_i32 new_r1,loc2
diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c
index a7dcd85fe43d9..3316c230f8a5f 100644
--- a/target/hexagon/idef-parser/parser-helpers.c
+++ b/target/hexagon/idef-parser/parser-helpers.c
@@ -1725,7 +1725,7 @@ void gen_cancel(Context *c, YYLTYPE *locp)
 
 void gen_load_cancel(Context *c, YYLTYPE *locp)
 {
-    OUT(c, locp, "if (insn->slot == 0 && pkt->pkt_has_store_s1) {\n");
+    OUT(c, locp, "if (insn->slot == 0 && pkt->pkt_has_scalar_store_s1) {\n");
     OUT(c, locp, "ctx->s1_store_processed = false;\n");
     OUT(c, locp, "process_store(ctx, 1);\n");
     OUT(c, locp, "}\n");
@@ -1750,7 +1750,7 @@ void gen_load(Context *c, YYLTYPE *locp, HexValue *width,
 
     /* Lookup the effective address EA */
     find_variable(c, locp, ea, ea);
-    OUT(c, locp, "if (insn->slot == 0 && pkt->pkt_has_store_s1) {\n");
+    OUT(c, locp, "if (insn->slot == 0 && pkt->pkt_has_scalar_store_s1) {\n");
     OUT(c, locp, "probe_noshuf_load(", ea, ", ", width, ", ctx->mem_idx);\n");
     OUT(c, locp, "process_store(ctx, 1);\n");
     OUT(c, locp, "}\n");
diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def
index 0cd30a5e85755..2c45388ab6290 100644
--- a/target/hexagon/imported/encode_pp.def
+++ b/target/hexagon/imported/encode_pp.def
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2020 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -16,6 +16,7 @@
  */
 
 /*
+ * encode32.def
  * Encodings for 32 bit instructions
  *
  */
@@ -341,6 +342,8 @@ DEF_ENC32(L4_pload##TAG##tnew_abs,ICLASS_LD" 1 11 "OPC"  iiiii  PP110tti  1--ddd
 DEF_ENC32(L4_pload##TAG##fnew_abs,ICLASS_LD" 1 11 "OPC"  iiiii  PP111tti  1--ddddd")
 
 
+
+
 /*               0 000  misc: dealloc,loadw_locked,dcfetch      */
 STD_LD_ENC(bzw4,"0 101")
 STD_LD_ENC(bzw2,"0 011")
@@ -375,6 +378,7 @@ DEF_ANTICLASS32(ICLASS_LD" 1110 000----- PP------ --------",LD_ADDR_POST_REG)
 
 DEF_ENC32(L2_deallocframe,    ICLASS_LD" 000 0 000 sssss PP0----- ---ddddd")
 DEF_ENC32(L4_return,          ICLASS_LD" 011 0 000 sssss PP0000-- ---ddddd")
+
 DEF_ENC32(L4_return_t,        ICLASS_LD" 011 0 000 sssss PP0100vv ---ddddd")
 DEF_ENC32(L4_return_f,        ICLASS_LD" 011 0 000 sssss PP1100vv ---ddddd")
 DEF_ENC32(L4_return_tnew_pt,  ICLASS_LD" 011 0 000 sssss PP0110vv ---ddddd")
@@ -382,15 +386,19 @@ DEF_ENC32(L4_return_fnew_pt,  ICLASS_LD" 011 0 000 sssss PP1110vv ---ddddd")
 DEF_ENC32(L4_return_tnew_pnt, ICLASS_LD" 011 0 000 sssss PP0010vv ---ddddd")
 DEF_ENC32(L4_return_fnew_pnt, ICLASS_LD" 011 0 000 sssss PP1010vv ---ddddd")
 
-DEF_ENC32(L2_loadw_locked,ICLASS_LD" 001 0 000 sssss PP000--- 000ddddd")
-
+/** Load Acquire Store Release Encoding **/
 
+DEF_ENC32(L4_loadw_phys,      ICLASS_LD" 001 0 000 sssss PP1ttttt -00ddddd")
+DEF_ENC32(L2_loadw_locked,    ICLASS_LD" 001 0 000 sssss PP000--- 000ddddd")
+DEF_ENC32(L4_loadd_locked,    ICLASS_LD" 001 0 000 sssss PP010--- 000ddddd")
 
 DEF_ENC32(L2_loadw_aq,        ICLASS_LD" 001 0 000 sssss PP001--- 000ddddd")
 DEF_ENC32(L4_loadd_aq,        ICLASS_LD" 001 0 000 sssss PP011--- 000ddddd")
 
-DEF_ENC32(R6_release_at_vi,    ICLASS_ST" 000 01 11sssss PP0ttttt --0011dd")
-DEF_ENC32(R6_release_st_vi,   ICLASS_ST" 000 01 11sssss PP0ttttt --1011dd")
+
+DEF_ENC32(S2_storew_locked,   ICLASS_ST" 000 01 01sssss PP-ttttt ----00dd")
+DEF_ENC32(S4_stored_locked,   ICLASS_ST" 000 01 11sssss PP0ttttt ----00dd")
+
 
 DEF_ENC32(S2_storew_rl_at_vi,  ICLASS_ST" 000 01 01sssss PP-ttttt --0010dd")
 DEF_ENC32(S2_storew_rl_st_vi, ICLASS_ST" 000 01 01sssss PP-ttttt --1010dd")
@@ -398,13 +406,11 @@ DEF_ENC32(S2_storew_rl_st_vi, ICLASS_ST" 000 01 01sssss PP-ttttt --1010dd")
 DEF_ENC32(S4_stored_rl_at_vi,  ICLASS_ST" 000 01 11sssss PP0ttttt --0010dd")
 DEF_ENC32(S4_stored_rl_st_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --1010dd")
 
-DEF_ENC32(L4_loadd_locked,ICLASS_LD" 001 0 000 sssss PP010--- 000ddddd")
-DEF_EXT_SPACE(EXTRACTW,   ICLASS_LD" 001 0 000 iiiii PP0iiiii -01iiiii")
-DEF_ENC32(Y2_dcfetchbo,   ICLASS_LD" 010 0 000 sssss PP0--iii iiiiiiii")
-
-
-
+DEF_ENC32(R6_release_at_vi,    ICLASS_ST" 000 01 11sssss PP0ttttt --0011dd")
+DEF_ENC32(R6_release_st_vi,   ICLASS_ST" 000 01 11sssss PP0ttttt --1011dd")
 
+DEF_EXT_SPACE(EXTRACTW,   ICLASS_LD" 001 0 000 iiiii PP0iiiii 001iiiii")
+DEF_ENC32(Y2_dcfetchbo,   ICLASS_LD" 010 0 000 sssss PP0--iii iiiiiiii")
 
 
 
@@ -488,13 +494,17 @@ STD_PST_ENC(rinew, "1 101","10ttt")
 /*                               x bus/cache     */
 /*                                    x store/cache     */
 DEF_ENC32(S2_allocframe,   ICLASS_ST" 000 01 00xxxxx PP000iii iiiiiiii")
-DEF_ENC32(S2_storew_locked,ICLASS_ST" 000 01 01sssss PP-ttttt ----00dd")
-DEF_ENC32(S4_stored_locked,ICLASS_ST" 000 01 11sssss PP0ttttt ----00dd")
+DEF_ENC32(Y5_l2locka,      ICLASS_ST" 000 01 11sssss PP1----- ------dd")
 DEF_ENC32(Y2_dczeroa,      ICLASS_ST" 000 01 10sssss PP0----- --------")
 
 
-DEF_ENC32(Y2_barrier,      ICLASS_ST" 100 00 00----- PP------ 000-----")
+DEF_ENC32(Y2_barrier,          ICLASS_ST" 100 00 00----- PP------ 000-----")
 DEF_ENC32(Y2_syncht,       ICLASS_ST" 100 00 10----- PP------ --------")
+DEF_ENC32(Y2_l2kill,       ICLASS_ST" 100 00 01----- PP-000-- --------")
+DEF_ENC32(Y5_l2gunlock,    ICLASS_ST" 100 00 01----- PP-010-- --------")
+DEF_ENC32(Y5_l2gclean,     ICLASS_ST" 100 00 01----- PP-100-- --------")
+DEF_ENC32(Y5_l2gcleaninv,  ICLASS_ST" 100 00 01----- PP-110-- --------")
+DEF_ENC32(Y2_l2cleaninvidx,ICLASS_ST" 100 00 11sssss PP------ --------")
 
 
 
@@ -502,9 +512,34 @@ DEF_ENC32(Y2_dccleana,     ICLASS_ST" 000 00 00sssss PP------ --------")
 DEF_ENC32(Y2_dcinva,       ICLASS_ST" 000 00 01sssss PP------ --------")
 DEF_ENC32(Y2_dccleaninva,  ICLASS_ST" 000 00 10sssss PP------ --------")
 
-DEF_ENC32(Y4_l2fetch,      ICLASS_ST" 011 00 00sssss PP-ttttt 000-----")
+/* Super */
+DEF_ENC32(Y2_dckill,       ICLASS_ST" 001 00 00----- PP------ --------")
+DEF_ENC32(Y2_dccleanidx,   ICLASS_ST" 001 00 01sssss PP------ --------")
+DEF_ENC32(Y2_dcinvidx,     ICLASS_ST" 001 00 10sssss PP------ --------")
+DEF_ENC32(Y2_dccleaninvidx,ICLASS_ST" 001 00 11sssss PP------ --------")
+
+DEF_ENC32(Y2_dctagw       ,ICLASS_ST" 010 00 00sssss PP-ttttt --------")
+DEF_ENC32(Y2_dctagr       ,ICLASS_ST" 010 00 01sssss PP------ ---ddddd")
+
+DEF_ENC32(Y4_l2tagw       ,ICLASS_ST" 010 00 10sssss PP0ttttt --------")
+DEF_ENC32(Y4_l2tagr       ,ICLASS_ST" 010 00 11sssss PP------ ---ddddd")
+
+DEF_ENC32(Y4_l2fetch,          ICLASS_ST" 011 00 00sssss PP-ttttt 000-----")
+DEF_ENC32(Y5_l2cleanidx,   ICLASS_ST" 011 00 01sssss PP------ --------")
+DEF_ENC32(Y5_l2invidx,     ICLASS_ST" 011 00 10sssss PP------ --------")
+DEF_ENC32(Y5_l2unlocka,    ICLASS_ST" 011 00 11sssss PP------ --------")
 DEF_ENC32(Y5_l2fetch,      ICLASS_ST" 011 01 00sssss PP-ttttt --------")
 
+DEF_ENC32(Y6_l2gcleanpa,   ICLASS_ST" 011 01 01----- PP-ttttt --------")
+DEF_ENC32(Y6_l2gcleaninvpa,ICLASS_ST" 011 01 10----- PP-ttttt --------")
+
+
+
+
+
+
+
+
 /*******************************/
 /*                             */
 /*                             */
@@ -547,13 +582,23 @@ DEF_ENC32(J2_jumprfnewpt, ICLASS_J" 0011  011sssss  PP-11-uu  --------")
 
 DEF_FIELDROW_DESC32(ICLASS_J" 0100 -------- PP------ --------","[#4] (#u8) ")
 DEF_ENC32(J2_trap0,     ICLASS_J" 0100  00------  PP-iiiii  ---iii--")
-DEF_ENC32(J2_pause,     ICLASS_J" 0100  01------  PP-iiiii  ---iii--")
+DEF_ENC32(J2_trap1,     ICLASS_J" 0100  10-xxxxx  PP-iiiii  ---iii--")
+DEF_ENC32(J2_pause,     ICLASS_J" 0100  01----ii  PP-iiiii  ---iii--")
+
+DEF_FIELDROW_DESC32(ICLASS_J" 0101 -------- PP------ --------","[#5] Rd=(Rs) ")
+DEF_ENC32(Y2_icdatar,   ICLASS_J" 0101  101sssss  PP------  ---ddddd")
+DEF_ENC32(Y2_ictagr,    ICLASS_J" 0101  111sssss  PP------  ---ddddd")
+DEF_ENC32(Y2_ictagw,    ICLASS_J" 0101  110sssss  PP0ttttt  --------")
+DEF_ENC32(Y2_icdataw,   ICLASS_J" 0101  110sssss  PP1ttttt  --------")
 
 DEF_FIELDROW_DESC32(ICLASS_J" 0110 -------- PP------ --------","[#6] icop(Rs) ")
 DEF_ENC32(Y2_icinva,    ICLASS_J" 0110  110sssss  PP000---  --------")
+DEF_ENC32(Y2_icinvidx,  ICLASS_J" 0110  110sssss  PP001---  --------")
+DEF_ENC32(Y2_ickill,    ICLASS_J" 0110  110-----  PP010---  --------")
 
 DEF_FIELDROW_DESC32(ICLASS_J" 0111 -------- PP------ --------","[#7] () ")
 DEF_ENC32(Y2_isync,     ICLASS_J" 0111  11000000  PP0---00  00000010")
+DEF_ENC32(J2_rte,       ICLASS_J" 0111  111-----  PP00----  000-----")
 
 /* JUMP */
 DEF_FIELDROW_DESC32(ICLASS_J" 100- -------- PP------ --------","[#8,9] PC=(#r22)")
@@ -591,7 +636,6 @@ DEF_ENC32(J2_callf,     ICLASS_J" 1101  ii1iiiii  PPi-0-uu  iiiiiii-")
 /*******************************/
 
 
-/* EJP: this has to match what we have in htmldocs.py... so I will call it CJ, we can change it */
 DEF_CLASS32(ICLASS_CJ" 0--- -------- PP------ --------",CJ)
 
 DEF_FIELDROW_DESC32(ICLASS_CJ" 00-- -------- -------- --------","[#0-3]  pd=cmp.xx(R,#u5) ; if ([!]p0.new) jump:[h] #s9:2 ")
@@ -738,12 +782,30 @@ DEF_ENC32(J2_jumprltezpt,ICLASS_CR" 0001  11isssss  PPi1iiii  iiiiiii-")
 
 DEF_FIELDROW_DESC32(    ICLASS_CR" 0010  --------  PP------  --------","[#2] Cd=Rs ")
 DEF_ENC32(A2_tfrrcr,    ICLASS_CR" 0010  001sssss  PP------  ---ddddd")
+DEF_ENC32(G4_tfrgrcr,   ICLASS_CR" 0010  000sssss  PP------  ---ddddd")
+DEF_ENC32(Y4_trace,     ICLASS_CR" 0010  010sssss  PP------  000-----")
+DEF_ENC32(Y6_diag,      ICLASS_CR" 0010  010sssss  PP------  001-----")
+DEF_ENC32(Y6_diag0,     ICLASS_CR" 0010  010sssss  PP-ttttt  010-----")
+DEF_ENC32(Y6_diag1,     ICLASS_CR" 0010  010sssss  PP-ttttt  011-----")
+
+DEF_ENC32(Y6_dmcfgrd,"10101000000sssssPP------101ddddd")
+DEF_ENC32(Y6_dmcfgwr,"10101000000sssssPP-ttttt110-----")
+DEF_ENC32(Y6_dmlink,"10100110000sssssPP-ttttt010-----")
+DEF_ENC32(Y6_dmpause,"10101000000-----PP------011ddddd")
+DEF_ENC32(Y6_dmpoll,"10101000000-----PP------010ddddd")
+DEF_ENC32(Y6_dmresume,"10100110000sssssPP------100-----")
+DEF_ENC32(Y6_dmstart,"10100110000sssssPP------001-----")
+DEF_ENC32(Y6_dmsyncht,"10101000000-----PP-----0111ddddd")
+DEF_ENC32(Y6_dmtlbsynch,"10101000000-----PP-----1111ddddd")
+DEF_ENC32(Y6_dmwait,"10101000000-----PP------001ddddd")
 
 DEF_FIELDROW_DESC32(    ICLASS_CR" 0011  --------  PP------  --------","[#3] Cdd=Rss ")
 DEF_ENC32(A4_tfrpcp,    ICLASS_CR" 0011  001sssss  PP------  ---ddddd")
+DEF_ENC32(G4_tfrgpcp,   ICLASS_CR" 0011  000sssss  PP------  ---ddddd")
 
 DEF_FIELDROW_DESC32(    ICLASS_CR" 1000  --------  PP------  --------","[#8] Rdd=Css ")
 DEF_ENC32(A4_tfrcpp,    ICLASS_CR" 1000  000sssss  PP------  ---ddddd")
+DEF_ENC32(G4_tfrgcpp,   ICLASS_CR" 1000  001sssss  PP------  ---ddddd")
 
 DEF_FIELDROW_DESC32(    ICLASS_CR" 1001  --------  PP------  --------","[#9] (#r8,#U10)")
 DEF_ENC32(J2_ploop1si,  ICLASS_CR" 1001  101IIIII  PP-iiiii  IIIii-II")
@@ -754,6 +816,7 @@ DEF_ENC32(J2_loop1i,    ICLASS_CR" 1001  001IIIII  PP-iiiii  IIIii-II")
 
 DEF_FIELDROW_DESC32(    ICLASS_CR" 1010  --------  PP------  --------","[#10] Rd=Cs ")
 DEF_ENC32(A2_tfrcrr,    ICLASS_CR" 1010  000sssss  PP------  ---ddddd")
+DEF_ENC32(G4_tfrgcrr,   ICLASS_CR" 1010  001sssss  PP------  ---ddddd")
 DEF_ENC32(C4_addipc,    ICLASS_CR" 1010  01001001  PP-iiiii  i--ddddd")
 
 
@@ -776,8 +839,66 @@ DEF_ENC32(C4_and_orn,   ICLASS_CR" 1011  1011--ss  PP0---tt  uu----dd")
 DEF_ENC32(C4_or_andn,   ICLASS_CR" 1011  1101--ss  PP0---tt  uu----dd")
 DEF_ENC32(C4_or_orn,    ICLASS_CR" 1011  1111--ss  PP0---tt  uu----dd")
 
-DEF_ENC32(C4_fastcorner9,       ICLASS_CR"1011 0000--ss  PP1---tt 1--1--dd")
-DEF_ENC32(C4_fastcorner9_not,   ICLASS_CR"1011 0001--ss  PP1---tt 1--1--dd")
+DEF_ENC32(C4_fastcorner9,	ICLASS_CR"1011 0000--ss  PP1---tt 1--1--dd")
+DEF_ENC32(C4_fastcorner9_not,	ICLASS_CR"1011 0001--ss  PP1---tt 1--1--dd")
+
+
+
+/* Supervisor CR ops */
+/* Interrupts */
+DEF_FIELDROW_DESC32(   ICLASS_CR" 0100 -------- PP------  --------","[#4] (Rs,Pt)")
+DEF_ENC32(Y2_swi,      ICLASS_CR" 0100 000sssss PP------ 000-----")
+DEF_ENC32(Y2_cswi,     ICLASS_CR" 0100 000sssss PP------ 001-----")
+DEF_ENC32(Y2_iassignw, ICLASS_CR" 0100 000sssss PP------ 010-----")
+DEF_ENC32(Y2_ciad,     ICLASS_CR" 0100 000sssss PP------ 011-----")
+DEF_ENC32(Y2_setimask, ICLASS_CR" 0100 100sssss PP----tt 000-----")
+DEF_ENC32(Y2_setprio,  ICLASS_CR" 0100 100sssss PP----tt 001-----")
+DEF_ENC32(Y4_siad,     ICLASS_CR" 0100 100sssss PP------ 011-----")
+
+DEF_ENC32(Y2_wait,     ICLASS_CR" 0100 010sssss PP------ 000-----")
+DEF_ENC32(Y2_resume,   ICLASS_CR" 0100 010sssss PP------ 001-----")
+DEF_ENC32(Y2_stop,     ICLASS_CR" 0100 011sssss PP------ 000-----")
+DEF_ENC32(Y2_start,    ICLASS_CR" 0100 011sssss PP------ 001-----")
+DEF_ENC32(Y4_nmi,      ICLASS_CR" 0100 011sssss PP------ 010-----")
+
+DEF_FIELDROW_DESC32(   ICLASS_CR" 0101 -------- PP------  --------","[#5] Rx ")
+DEF_ENC32(Y2_crswap0,  ICLASS_CR" 0101 000xxxxx PP------ --------")
+DEF_ENC32(Y4_crswap1,  ICLASS_CR" 0101 001xxxxx PP------ --------")
+
+DEF_FIELDROW_DESC32(   ICLASS_CR" 0110 -------- PP------  --------","[#6] Rd=(Rs)")
+DEF_ENC32(Y2_getimask, ICLASS_CR" 0110 000sssss PP------ ---ddddd")
+DEF_ENC32(Y2_iassignr, ICLASS_CR" 0110 011sssss PP------ ---ddddd")
+
+DEF_FIELDROW_DESC32(   ICLASS_CR" 0111 -------- PP------  --------","[#7] cr=Rs ")
+DEF_ENC32(Y2_tfrsrcr,  ICLASS_CR" 0111 00-sssss PP------ -ddddddd")
+
+DEF_FIELDROW_DESC32(   ICLASS_CR" 1100 -------- PP------  --------","[#12] ")
+DEF_ENC32(Y2_break,    ICLASS_CR" 1100 001----- PP------ 000-----")
+DEF_ENC32(Y2_tlblock,  ICLASS_CR" 1100 001----- PP------ 001-----")
+DEF_ENC32(Y2_tlbunlock,ICLASS_CR" 1100 001----- PP------ 010-----")
+DEF_ENC32(Y2_k0lock,   ICLASS_CR" 1100 001----- PP------ 011-----")
+DEF_ENC32(Y2_k0unlock, ICLASS_CR" 1100 001----- PP------ 100-----")
+DEF_ENC32(Y2_tlbp,     ICLASS_CR" 1100 100sssss PP------ ---ddddd")
+DEF_ENC32(Y5_tlboc,    ICLASS_CR" 1100 111sssss PP------ ---ddddd")
+DEF_ENC32(Y5_tlbasidi, ICLASS_CR" 1100 101sssss PP------ --------")
+DEF_ENC32(Y2_tlbr,     ICLASS_CR" 1100 010sssss PP------ ---ddddd")
+DEF_ENC32(Y2_tlbw,     ICLASS_CR" 1100 000sssss PP0ttttt --------")
+DEF_ENC32(Y5_ctlbw,    ICLASS_CR" 1100 110sssss PP0ttttt ---ddddd")
+
+DEF_FIELDROW_DESC32(   ICLASS_CR" 1101 -------- PP------  --------","[#13] Rxx ")
+DEF_ENC32(Y4_crswap10, ICLASS_CR" 1101 10-xxxxx PP------ ---00000")
+DEF_ENC32(Y4_tfrspcp,  ICLASS_CR" 1101 00-sssss PP------ -ddddddd")
+
+DEF_FIELDROW_DESC32(   ICLASS_CR" 1110 -------- PP------  --------","[#14] Rd=cr ")
+DEF_ENC32(Y2_tfrscrr,  ICLASS_CR" 1110 1sssssss PP------ ---ddddd")
+
+DEF_FIELDROW_DESC32(   ICLASS_CR" 1111 -------- PP------  --------","[#15] Rdd=Sss ")
+DEF_ENC32(Y4_tfrscpp,  ICLASS_CR" 1111 0sssssss PP------ ---ddddd")
+
+
+
+
+
 
 
 
@@ -956,9 +1077,9 @@ MPY_ENC(F2_dfmin,            "1000","ddddd","0","0","1","1","11")
 MPY_ENC(F2_dfmax,            "1000","ddddd","0","1","0","0","11")
 MPY_ENC(F2_dfmpyll,          "1000","ddddd","0","1","0","1","11")
 
-MPY_ENC(M7_dcmpyrw,          "1000","ddddd","0","0","0","1","10")
+MPY_ENC(M7_dcmpyrw,       	"1000","ddddd","0","0","0","1","10")
 MPY_ENC(M7_dcmpyrwc,         "1000","ddddd","0","0","1","1","10")
-MPY_ENC(M7_dcmpyiw,          "1000","ddddd","0","1","1","0","10")
+MPY_ENC(M7_dcmpyiw,       	"1000","ddddd","0","1","1","0","10")
 MPY_ENC(M7_dcmpyiwc,         "1000","ddddd","0","1","1","1","10")
 
 
@@ -967,14 +1088,14 @@ DEF_FIELDROW_DESC32(ICLASS_M" 1001 -------- PP------ --------","[#9] Rd=(Rss,Rtt
 MPY_ENC(M2_vdmpyrs_s0,       "1001","ddddd","0","0","0","0","00")
 MPY_ENC(M2_vdmpyrs_s1,       "1001","ddddd","0","0","0","1","00")
 
-MPY_ENC(M7_wcmpyrw,          "1001","ddddd","0","0","1","0","00")
+MPY_ENC(M7_wcmpyrw,      	 "1001","ddddd","0","0","1","0","00")
 MPY_ENC(M7_wcmpyrw_rnd,      "1001","ddddd","0","0","1","1","00")
-MPY_ENC(M7_wcmpyiw,          "1001","ddddd","0","1","0","0","00")
+MPY_ENC(M7_wcmpyiw,       	 "1001","ddddd","0","1","0","0","00")
 MPY_ENC(M7_wcmpyiw_rnd,      "1001","ddddd","0","1","0","1","00")
 
-MPY_ENC(M7_wcmpyrwc,         "1001","ddddd","0","1","1","0","00")
+MPY_ENC(M7_wcmpyrwc,      	 "1001","ddddd","0","1","1","0","00")
 MPY_ENC(M7_wcmpyrwc_rnd,     "1001","ddddd","0","1","1","1","00")
-MPY_ENC(M7_wcmpyiwc,         "1001","ddddd","1","0","0","0","00")
+MPY_ENC(M7_wcmpyiwc,       	 "1001","ddddd","1","0","0","0","00")
 MPY_ENC(M7_wcmpyiwc_rnd,     "1001","ddddd","1","0","0","1","00")
 
 
@@ -1030,10 +1151,10 @@ MPY_ENC(F2_dfmpylh,          "1010","xxxxx","0","0","0","0","11")
 MPY_ENC(F2_dfmpyhh,          "1010","xxxxx","0","0","0","1","11")
 
 
-MPY_ENC(M7_dcmpyrw_acc,      "1010","xxxxx","0","0","0","1","10")
-MPY_ENC(M7_dcmpyrwc_acc,     "1010","xxxxx","0","0","1","1","10")
-MPY_ENC(M7_dcmpyiw_acc,      "1010","xxxxx","0","1","1","0","10")
-MPY_ENC(M7_dcmpyiwc_acc,     "1010","xxxxx","1","0","1","0","10")
+MPY_ENC(M7_dcmpyrw_acc,       	"1010","xxxxx","0","0","0","1","10")
+MPY_ENC(M7_dcmpyrwc_acc,         "1010","xxxxx","0","0","1","1","10")
+MPY_ENC(M7_dcmpyiw_acc,       	"1010","xxxxx","0","1","1","0","10")
+MPY_ENC(M7_dcmpyiwc_acc,         "1010","xxxxx","1","0","1","0","10")
 
 
 
@@ -1063,7 +1184,6 @@ SP_MPY(M2_mpy_sat_rnd,       "1100","ddddd","1","1","0")
 SP_MPY(M2_mpyu,              "1100","ddddd","0","0","1")
 
 DEF_FIELDROW_DESC32(ICLASS_M" 1101 -------- PP------ --------","[#13] Rd=(Rs,Rt)")
-/* EJP: same as mpyi MPY_ENC(M2_mpyui,            "1101","ddddd","0","0","1","0","00") */
 MPY_ENC(M2_mpyi,             "1101","ddddd","0","0","0","0","00")
 MPY_ENC(M2_mpy_up,           "1101","ddddd","0","0","0","0","01")
 MPY_ENC(M2_mpyu_up,          "1101","ddddd","0","0","1","0","01")
@@ -1266,7 +1386,6 @@ DEF_ENC32(C2_cmovenewif,ICLASS_ALU2op" 1110 1uu0iiii PP1iiiii iiiddddd")
 DEF_ENC32(C2_cmoveit,   ICLASS_ALU2op" 1110 0uu0iiii PP0iiiii iiiddddd")
 DEF_ENC32(C2_cmoveif,   ICLASS_ALU2op" 1110 1uu0iiii PP0iiiii iiiddddd")
 
-
 DEF_FIELDROW_DESC32(    ICLASS_ALU2op" 1111 -------- PP------ --------","[#15] nop")
 DEF_ENC32(A2_nop,       ICLASS_ALU2op" 1111 -------- PP------ --------")
 
@@ -1408,9 +1527,6 @@ DEF_FIELDROW_DESC32(ICLASS_ALU3op"  1110 -------- PP------ --------","[#14] Rese
 
 
 
-
-
-
 /*******************************/
 /*                             */
 /*                             */
@@ -1508,7 +1624,6 @@ SH_RRI6_ENC(S6_rol_i_##TAGEND,MAJ4,MIN3,SMOD1 "11",DSTCHARS)
 
 
 DEF_FIELDROW_DESC32(ICLASS_S2op" 0000 -------- PP------ --------","[#0] Rdd=(Rss,#u6)")
-/* EJP: there is actually quite a bit of space here, look at the reserved bits */
 I6SHIFTTYPES(p,                 "0000","000","0","ddddd")
 I5SHIFTTYPES_NOROL(vw,          "0000","010","0","ddddd")
 I4SHIFTTYPES(vh,                "0000","100","0","ddddd")
@@ -1620,8 +1735,8 @@ SH2_RR_ENC(A2_roundsat,           "1000","110","-","001","ddddd")
 SH_RRI5_ENC(S2_asr_i_svw_trun,    "1000","110",    "010","ddddd")
 SH_RRI5_ENC(A4_bitspliti,         "1000","110",    "100","ddddd")
 
-SH_RRI5_ENC(A7_clip,              "1000","110",    "101","ddddd")
-SH_RRI5_ENC(A7_vclip,             "1000","110",    "110","ddddd")
+SH_RRI5_ENC(A7_clip,         	  "1000","110",    "101","ddddd")
+SH_RRI5_ENC(A7_vclip,         	  "1000","110",    "110","ddddd")
 
 
 SH2_RR_ENC(S4_clbpnorm,           "1000","011","-","000","ddddd")
@@ -1743,10 +1858,11 @@ SH_RRR_ENC(S2_shuffob,          "0001","00-","-","10-","ddddd")
 SH_RRR_ENC(S2_shuffeh,          "0001","00-","-","11-","ddddd")
 
 SH_RRR_ENC(S2_shuffoh,          "0001","10-","-","000","ddddd")
+// 001
 SH_RRR_ENC(S2_vtrunewh,         "0001","10-","-","010","ddddd")
-SH_RRR_ENC(S6_vtrunehb_ppp,     "0001","10-","-","011","ddddd")
+SH_RRR_ENC(S6_vtrunehb_ppp,		"0001","10-","-","011","ddddd")
 SH_RRR_ENC(S2_vtrunowh,         "0001","10-","-","100","ddddd")
-SH_RRR_ENC(S6_vtrunohb_ppp,     "0001","10-","-","101","ddddd")
+SH_RRR_ENC(S6_vtrunohb_ppp,		"0001","10-","-","101","ddddd")
 SH_RRR_ENC(S2_lfsp,             "0001","10-","-","110","ddddd")
 
 SH_RRR_ENC(S4_vxaddsubw,        "0001","01-","-","000","ddddd")
@@ -1780,8 +1896,6 @@ SH_RRR_ENC(S4_vrcrotate,        "0011","11-","i","11i","ddddd")
 DEF_FIELDROW_DESC32(ICLASS_S3op" 0100 -------- PP------ --------","[#4] Rd=(Rs,Rt,#u3)")
 DEF_ENC32(S2_addasl_rrri, ICLASS_S3op" 0100   000 sssss PP0ttttt iiiddddd")
 
-
-
 DEF_FIELDROW_DESC32(ICLASS_S3op" 0101 -------- PP------ --------","[#5] Rd=(Rss,Rt)")
 SH_RRR_ENC(S2_asr_r_svw_trun,   "0101","---","-","010","ddddd")
 SH_RRR_ENC(M4_cmpyi_wh,         "0101","---","-","100","ddddd")
@@ -1841,6 +1955,7 @@ DEF_FIELDROW_DESC32(ICLASS_S3op" 1010 -------- PP------ --------","[#10] Rxx=(Rs
 SH_RRR_ENC(S2_insertp_rp,       "1010","0--","0","---","xxxxx")
 SH_RRR_ENC(M4_xor_xacc,         "1010","10-","0","000","xxxxx")
 
+
 DEF_FIELDROW_DESC32(ICLASS_S3op" 1011 -------- PP------ --------","[#11] Rxx=(Rss,Rt)")
 RSHIFTTYPES(p_or,               "1011","000","-","-","xxxxx")
 RSHIFTTYPES(p_and,              "1011","010","-","-","xxxxx")
@@ -1848,19 +1963,19 @@ RSHIFTTYPES(p_nac,              "1011","100","-","-","xxxxx")
 RSHIFTTYPES(p_acc,              "1011","110","-","-","xxxxx")
 RSHIFTTYPES(p_xor,              "1011","011","-","-","xxxxx")
 
-SH_RRR_ENCX(A4_vrmaxh,          "1011","001","0","001","uuuuu")
-SH_RRR_ENCX(A4_vrmaxuh,         "1011","001","1","001","uuuuu")
-SH_RRR_ENCX(A4_vrmaxw,          "1011","001","0","010","uuuuu")
-SH_RRR_ENCX(A4_vrmaxuw,         "1011","001","1","010","uuuuu")
+SH_RRR_ENCX(A4_vrmaxh,		"1011","001","0","001","uuuuu")
+SH_RRR_ENCX(A4_vrmaxuh,		"1011","001","1","001","uuuuu")
+SH_RRR_ENCX(A4_vrmaxw,		"1011","001","0","010","uuuuu")
+SH_RRR_ENCX(A4_vrmaxuw,		"1011","001","1","010","uuuuu")
 
-SH_RRR_ENCX(A4_vrminh,          "1011","001","0","101","uuuuu")
-SH_RRR_ENCX(A4_vrminuh,         "1011","001","1","101","uuuuu")
-SH_RRR_ENCX(A4_vrminw,          "1011","001","0","110","uuuuu")
-SH_RRR_ENCX(A4_vrminuw,         "1011","001","1","110","uuuuu")
+SH_RRR_ENCX(A4_vrminh,		"1011","001","0","101","uuuuu")
+SH_RRR_ENCX(A4_vrminuh,		"1011","001","1","101","uuuuu")
+SH_RRR_ENCX(A4_vrminw,		"1011","001","0","110","uuuuu")
+SH_RRR_ENCX(A4_vrminuw,		"1011","001","1","110","uuuuu")
 
-SH_RRR_ENC(S2_vrcnegh,          "1011","001","1","111","xxxxx")
+SH_RRR_ENC(S2_vrcnegh,		"1011","001","1","111","xxxxx")
 
-SH_RRR_ENC(S4_vrcrotate_acc,    "1011","101","i","--i","xxxxx")
+SH_RRR_ENC(S4_vrcrotate_acc,	"1011","101","i","--i","xxxxx")
 
 
 DEF_FIELDROW_DESC32(ICLASS_S3op" 1100 -------- PP------ --------","[#12] Rx=(Rs,Rt)")
@@ -1874,11 +1989,6 @@ DEF_FIELDROW_DESC32(ICLASS_S3op" 1101 -------- PP------ --------","[#13] Reserve
 DEF_FIELDROW_DESC32(ICLASS_S3op" 1110 -------- PP------ --------","[#14] Reserved")
 
 
-DEF_FIELDROW_DESC32(ICLASS_S3op" 1111 -------- PP------ --------","[#14] User Instruction")
-
-
-
-
 
 
 
@@ -2129,3 +2239,5 @@ OP_OPI_RI(lsr,"1")
 DEF_FIELDROW_DESC32(ICLASS_ALU64" 1111 -------- PP------ --------","[#15] Rd=(Rs,Ru,#u6:2)")
 DEF_ENC32(M4_mpyri_addr_u2, ICLASS_ALU64" 1111   0ii sssss PPiddddd iiiuuuuu")
 DEF_ENC32(M4_mpyri_addr,    ICLASS_ALU64" 1111   1ii sssss PPiddddd iiiuuuuu")
+
+
diff --git a/target/hexagon/imported/ldst.idef b/target/hexagon/imported/ldst.idef
index 53198176a994e..4e1e5d5326ddb 100644
--- a/target/hexagon/imported/ldst.idef
+++ b/target/hexagon/imported/ldst.idef
@@ -203,6 +203,9 @@ Q6INSN(S2_storew_locked,"memw_locked(Rs32,Pd4)=Rt32", ATTRIBS(A_REGWRSIZE_4B,A_M
 Q6INSN(L4_loadd_locked,"Rdd32=memd_locked(Rs32)", ATTRIBS(A_REGWRSIZE_8B,A_MEMSIZE_8B,A_LOAD,A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK), "Load double with lock",
 { fEA_REG(RsV); fLOAD_LOCKED(1,8,u,EA,RddV) })
 
+Q6INSN(L4_loadw_phys,"Rd32=memw_phys(Rs32,Rt32)", ATTRIBS(A_REGWRSIZE_4B,A_PRIV,A_RESTRICT_SLOT0ONLY,A_NOTE_PRIV,A_MEMSIZE_4B,A_LOAD,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Load word from physical address",
+{ fLOAD_PHYS(1,4,u,RsV,RtV,RdV); })
+
 Q6INSN(S4_stored_locked,"memd_locked(Rs32,Pd4)=Rtt32", ATTRIBS(A_REGWRSIZE_8B,A_MEMSIZE_8B,A_STORE,A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK,A_RESTRICT_LATEPRED,A_NOTE_LATEPRED), "Store word with lock",
 { fEA_REG(RsV); fSTORE_LOCKED(1,8,EA,RttV,PdV) })
 
diff --git a/target/hexagon/imported/macros.def b/target/hexagon/imported/macros.def
old mode 100755
new mode 100644
index 4bbcfdd5e194a..f24f89f361263
--- a/target/hexagon/imported/macros.def
+++ b/target/hexagon/imported/macros.def
@@ -353,6 +353,12 @@ DEF_MACRO(
     ()
 )
 
+DEF_MACRO(
+    fREAD_SSR, /* read SSR register */
+    (READ_RREG(REG_SSR)),          /* behavior */
+    ()
+)
+
 DEF_MACRO(
     fWRITE_LR, /* write lr */
     WRITE_RREG(REG_LR,A),          /* behavior */
@@ -371,12 +377,36 @@ DEF_MACRO(
     (A_IMPLICIT_WRITES_SP)
 )
 
+DEF_MACRO(
+    fWRITE_GOSP, /* write gosp */
+    WRITE_RREG(REG_GOSP,A),          /* behavior */
+    (A_IMPLICIT_WRITES_GOSP)
+)
+
 DEF_MACRO(
     fREAD_SP, /* read stack pointer */
     (READ_RREG(REG_SP)),          /* behavior */
     ()
 )
 
+DEF_MACRO(
+    fREAD_GOSP, /* read guest other stack pointer */
+    (READ_RREG(REG_GOSP)),          /* behavior */
+    ()
+)
+
+DEF_MACRO(
+    fREAD_GELR, /* read guest other stack pointer */
+    (READ_RREG(REG_GELR)),          /* behavior */
+    ()
+)
+
+DEF_MACRO(
+    fREAD_GEVB, /* read guest other stack pointer */
+    (READ_RREG(REG_GEVB)),          /* behavior */
+    ()
+)
+
 DEF_MACRO(
     fREAD_CSREG, /* read  CS register */
     (READ_RREG(REG_CSA+N)),          /* behavior */
@@ -570,6 +600,11 @@ DEF_MACRO(
     WRITE_PREG(3,VAL),     /* behavior */
     (A_IMPLICIT_WRITES_P3)
 )
+DEF_MACRO(
+	fWRITE_P3_LATE, /* write Predicate 0 */
+	{WRITE_PREG(3,VAL); fHIDE(MARK_LATE_PRED_WRITE(3))} ,          /* behavior */
+	(A_IMPLICIT_WRITES_P3,A_RESTRICT_LATEPRED)
+)
 
 DEF_MACRO(
     fPART1, /* write Predicate 0 */
@@ -660,6 +695,7 @@ DEF_MACRO(
     ((size8s_t)((size2s_t)(A))),
     /* optional attributes */
 )
+
 DEF_MACRO(
     fCAST2_8u, /* macro name */
     ((size8u_t)((size2u_t)(A))),
@@ -1532,18 +1568,209 @@ DEF_MACRO(fECHO,
 /* OS interface and stop/wait               */
 /********************************************/
 
+DEF_MACRO(RUNNABLE_THREADS_MAX,
+    (thread->processor_ptr->runnable_threads_max),
+    ()
+)
+
+DEF_MACRO(THREAD_IS_ON,
+    ((PROC->arch_proc_options->thread_enable_mask>>TNUM) & 0x1),
+    ()
+)
+
+DEF_MACRO(THREAD_EN_MASK,
+    ((PROC->arch_proc_options->thread_enable_mask)),
+    ()
+)
+
+
+
+DEF_MACRO(READ_IMASK,
+    (((TH) >= (thread->processor_ptr->runnable_threads_max)) ? 0 : (thread->processor_ptr->thread[TH]->Regs[REG_IMASK])),
+    ()
+)
+DEF_MACRO(WRITE_IMASK,
+    if ((TH) < (thread->processor_ptr->runnable_threads_max)) { thread->processor_ptr->thread[TH]->Regs[REG_IMASK]=(VAL & reg_mutability[REG_IMASK] ); },
+    (A_IMPLICIT_WRITES_IMASK_ANYTHREAD)
+)
+
+
+DEF_MACRO(WRITE_PRIO,
+    {
+        if ((TH) < (thread->processor_ptr->runnable_threads_max)) {
+            size4u_t tid_reg = thread->processor_ptr->thread[TH]->Regs[REG_TID];
+            fINSERT_BITS(tid_reg, reg_field_info[STID_PRIO].width, reg_field_info[STID_PRIO].offset, VAL);
+            LOG_OTHER_THREAD_REG_WRITE(thread,REG_TID,tid_reg,TH);
+        }
+    },
+    (A_IMPLICIT_WRITES_STID_PRIO_ANYTHREAD)
+)
+
+
+DEF_MACRO(DO_IASSIGNW,
+    {
+        int i;
+        int intbitpos = ((REG>>16)&0xF);
+        for (i=0;i<RUNNABLE_THREADS_MAX;i++) {
+            if(( (thread->processor_ptr->arch_proc_options->thread_enable_mask>>i) & 0x1)) {
+                fINSERT_BITS(thread->processor_ptr->thread[i]->Regs[REG_IMASK],1, intbitpos, (REG>>i) & 1);
+           }
+        }
+    },
+    (A_IMPLICIT_WRITES_IMASK_ANYTHREAD)
+)
+
+
+
+
+DEF_MACRO(fDO_NMI,
+    {
+        int i;
+        for (i=0;i<RUNNABLE_THREADS_MAX;i++) {
+            if( ( (thread->processor_ptr->arch_proc_options->thread_enable_mask>>i) & 0x1) ) {
+                if (SREG & (1<<i)) {
+                    register_nmi_interrupt(thread->processor_ptr->thread[i]);
+                }
+            }
+        }
+    },
+)
+
+DEF_MACRO(fDO_TRACE,
+    {
+        fHIDE(HEX_CALLBACK(thread->processor_ptr->options->trace_callback,
+            thread->system_ptr,thread->processor_ptr,
+            thread->threadId,SREG);)
+    },
+)
+
+DEF_MACRO(DO_IASSIGNR,
+    {
+        int i;
+        int result=0;
+        int intbitpos = ((SREG>>16)&0xF);
+        for (i=0;i<RUNNABLE_THREADS_MAX;i++) {
+            if(( (thread->processor_ptr->arch_proc_options->thread_enable_mask>>i) & 0x1)) {
+                result |= (((thread->processor_ptr->thread[i]->Regs[REG_IMASK]>>intbitpos)&1)<<i);
+            }
+        }
+        DREG=result;
+    },
+    ()
+)
+
+DEF_MACRO(DO_SWI,
+        {fHIDE(HEX_CALLBACK(thread->processor_ptr->options->swi_callback,
+         thread->system_ptr,thread->processor_ptr,
+         thread->threadId,REG));
+         LOG_GLOBAL_REG_WRITE(REG_IPEND,(GLOBAL_REG_READ(REG_IPEND) | (REG & GLOBAL_REG_READ(REG_IEL))));
+        },
+        (A_EXCEPTION_SWI)
+)
+
+DEF_MACRO(DO_CSWI,
+        LOG_GLOBAL_REG_WRITE(REG_IPEND,GLOBAL_REG_READ(REG_IPEND) & ~((REG) & GLOBAL_REG_READ(REG_IEL)));,
+        ()
+)
+
+DEF_MACRO(DO_CIAD,
+        sys_ciad(thread,VAL); LOG_GLOBAL_REG_WRITE(REG_IAD,GLOBAL_REG_READ(REG_IAD) & ~(VAL));,
+        (A_EXCEPTION_SWI)
+)
+
+DEF_MACRO(DO_SIAD,
+        sys_siad(thread,VAL); LOG_GLOBAL_REG_WRITE(REG_IAD,GLOBAL_REG_READ(REG_IAD) | (VAL));,
+        (A_EXCEPTION_SWI)
+)
+
+DEF_MACRO(fBREAK,
+    {isdb_brkpt_insn(thread->processor_ptr,thread->threadId);},
+    ()
+)
+
 DEF_MACRO(fPAUSE,
     {sys_pause(thread, insn->slot, IMM);},
     ()
 )
 
+
 DEF_MACRO(fTRAP,
     warn("Trap NPC=%x ",fREAD_NPC());
     warn("Trap exception, PCYCLE=%lld TYPE=%d NPC=%x IMM=0x%x",thread->processor_ptr->pstats[pcycles],TRAPTYPE,fREAD_NPC(),IMM);
     register_trap_exception(thread,fREAD_NPC(),TRAPTYPE,IMM);,
+    (A_EXCEPTION_SWI)
+)
+
+DEF_MACRO(fINTERNAL_CLEAR_SAMEPAGE,
+    /* force re-xlate at next fetch, refresh of in_user_mode, etc */
+    /* Permissions change too... */
+    sys_utlb_invalidate(thread->processor_ptr,thread),
+    /* NOTHING */
+)
+
+DEF_MACRO(fCLEAR_RTE_EX,
+      {
+        fLOG_REG_FIELD(SSR,SSR_EX,0);
+        fINTERNAL_CLEAR_SAMEPAGE();
+      },
+      ()
+)
+
+DEF_MACRO(fTLB_LOCK_AVAILABLE,
+    (fREAD_GLOBAL_REG_FIELD(SYSCONF,SYSCFG_TLBLOCK) == 0),
     ()
 )
 
+DEF_MACRO(fK0_LOCK_AVAILABLE,
+    (fREAD_GLOBAL_REG_FIELD(SYSCONF,SYSCFG_K0LOCK) == 0),
+    ()
+)
+
+DEF_MACRO(fSET_TLB_LOCK,
+      {
+      if (fTLB_LOCK_AVAILABLE()) {
+        fLOG_GLOBAL_REG_FIELD(SYSCONF,SYSCFG_TLBLOCK,1);
+      } else {
+        sys_waiting_for_tlb_lock(thread);
+      }
+      },
+      ()
+)
+
+DEF_MACRO(fSET_K0_LOCK,
+      {
+          if (fK0_LOCK_AVAILABLE() && sys_k0lock_queue_ready(thread)) {
+              warn("k0lock: T%d: PC=0x%x: PCycle=%lld",thread->threadId,thread->Regs[REG_PC],thread->processor_ptr->pstats[pcycles]);
+              fLOG_GLOBAL_REG_FIELD(SYSCONF,SYSCFG_K0LOCK,1);
+          } else {
+              warn("k0lock_waiting: T%d: PC=0x%x: PCycle=%lld",thread->threadId,thread->Regs[REG_PC],thread->processor_ptr->pstats[pcycles]);
+                sys_waiting_for_k0_lock(thread);
+          }
+      },
+      ()
+)
+
+DEF_MACRO(fCLEAR_TLB_LOCK,
+      {
+          int i;
+          fLOG_GLOBAL_REG_FIELD(SYSCONF,SYSCFG_TLBLOCK,0);
+          for (i = 0; i < RUNNABLE_THREADS_MAX; i++) {
+              if(( (thread->processor_ptr->arch_proc_options->thread_enable_mask>>i) & 0x1)) {
+                  thread->processor_ptr->thread[i]->cu_tlb_lock_waiting = 0;
+              }
+          }
+      },
+      ()
+)
+
+DEF_MACRO(fCLEAR_K0_LOCK,
+      do {
+      warn("k0unlock: T%d: PC=0x%x: Pcycle=%lld",thread->threadId,thread->Regs[REG_PC], thread->processor_ptr->pstats[pcycles]);
+      sys_initiate_clear_k0_lock(thread);
+      } while (0),
+      ()
+)
+
 DEF_MACRO(fALIGN_REG_FIELD_VALUE,
     ((VAL)<<reg_field_info[FIELD].offset),
     /* */
@@ -1554,6 +1781,26 @@ DEF_MACRO(fGET_REG_FIELD_MASK,
     /* */
 )
 
+DEF_MACRO(fLOG_REG_FIELD,
+    LOG_MASKED_REG_WRITE(thread,REG_##REG,
+    fALIGN_REG_FIELD_VALUE(FIELD,VAL),
+    fGET_REG_FIELD_MASK(FIELD)),
+    ()
+)
+
+DEF_MACRO(fWRITE_GLOBAL_REG_FIELD,
+    fINSERT_BITS(thread->processor_ptr->global_regs[REG_##REG],
+            reg_field_info[FIELD].width,
+            reg_field_info[FIELD].offset,VAL),
+)
+
+DEF_MACRO(fLOG_GLOBAL_REG_FIELD,
+    LOG_MASKED_GLOBAL_REG_WRITE(REG_##REG,
+        fALIGN_REG_FIELD_VALUE(FIELD,VAL),
+        fGET_REG_FIELD_MASK(FIELD)),
+    ()
+)
+
 DEF_MACRO(fREAD_REG_FIELD,
     fEXTRACTU_BITS(thread->Regs[REG_##REG],
         reg_field_info[FIELD].width,
@@ -1561,6 +1808,13 @@ DEF_MACRO(fREAD_REG_FIELD,
     /* ATTRIBS */
 )
 
+DEF_MACRO(fREAD_GLOBAL_REG_FIELD,
+    fEXTRACTU_BITS(thread->processor_ptr->global_regs[REG_##REG],
+        reg_field_info[FIELD].width,
+        reg_field_info[FIELD].offset),
+    /* ATTRIBS */
+)
+
 DEF_MACRO(fGET_FIELD,
     fEXTRACTU_BITS(VAL,
         reg_field_info[FIELD].width,
@@ -1576,6 +1830,185 @@ DEF_MACRO(fSET_FIELD,
     /* ATTRIBS */
 )
 
+DEF_MACRO(fSET_RUN_MODE_NOW,
+        {thread->processor_ptr->global_regs[REG_MODECTL] |= (1<<TNUM);
+         thread->last_commit_cycle = thread->processor_ptr->pcycle_counter;
+         sys_recalc_num_running_threads(thread->processor_ptr);},
+)
+
+DEF_MACRO(fIN_DEBUG_MODE,
+    (thread->debug_mode || (fREAD_GLOBAL_REG_FIELD(ISDBST,ISDBST_DEBUGMODE) & 1<<TNUM)),
+    ()
+)
+DEF_MACRO(fIN_DEBUG_MODE_NO_ISDB,
+    (thread->debug_mode),
+    ()
+)
+
+
+DEF_MACRO(fIN_DEBUG_MODE_WARN,
+    {
+        if (fREAD_GLOBAL_REG_FIELD(ISDBST,ISDBST_DEBUGMODE) & 1<<TNUM)
+            warn("In ISDB debug mode, but TB told me to step normally");
+    },
+    ()
+)
+
+DEF_MACRO(fCLEAR_RUN_MODE,
+    {fLOG_GLOBAL_REG_FIELD(MODECTL,MODECTL_E,
+     fREAD_GLOBAL_REG_FIELD(MODECTL,MODECTL_E) & ~(1<<(TNUM)))},
+    /* NOTHING */
+)
+
+DEF_MACRO(fCLEAR_RUN_MODE_NOW,
+    do {
+        fWRITE_GLOBAL_REG_FIELD(MODECTL,MODECTL_E,
+        fREAD_GLOBAL_REG_FIELD(MODECTL,MODECTL_E) & ~(1<<(TNUM)));
+        sys_recalc_num_running_threads(thread->processor_ptr);
+    } while (0),
+    /* NOTHING */
+)
+
+DEF_MACRO(fGET_RUN_MODE,
+        ((thread->processor_ptr->global_regs[REG_MODECTL]>>TNUM)&0x1),
+)
+
+DEF_MACRO(fSET_WAIT_MODE,
+    {fLOG_GLOBAL_REG_FIELD(MODECTL,MODECTL_W,
+    fREAD_GLOBAL_REG_FIELD(MODECTL,MODECTL_W) | 1<<(TNUM))},
+    /* NOTHING */
+)
+
+DEF_MACRO(fCLEAR_WAIT_MODE,
+        {thread->processor_ptr->global_regs[REG_MODECTL] &= ~(1<<(TNUM+16));
+         thread->last_commit_cycle = thread->processor_ptr->pcycle_counter;
+         sys_recalc_num_running_threads(thread->processor_ptr);},
+)
+
+DEF_MACRO(fGET_WAIT_MODE,
+        ((thread->processor_ptr->global_regs[REG_MODECTL]>>(TNUM+16))&0x1),
+)
+
+
+DEF_MACRO(fRESET_THREAD,
+        register_reset_interrupt(T,NUM),
+)
+
+DEF_MACRO(fREAD_CURRENT_EVB,
+    (GLOBAL_REG_READ(REG_EVB)),
+    /* nothing */
+)
+
+DEF_MACRO(fREAD_ELR,
+    READ_RREG(REG_ELR),
+    ()
+)
+
+DEF_MACRO(fPOW2_HELP_ROUNDUP,
+    ((VAL) | ((VAL) >> 1) | ((VAL) >> 2) | ((VAL) >> 4) | ((VAL) >> 8) | ((VAL) >> 16)),
+    ()
+)
+
+DEF_MACRO(fPOW2_ROUNDUP,
+    fPOW2_HELP_ROUNDUP((VAL)-1)+1,
+    ()
+)
+
+DEF_MACRO(fTLB_IDXMASK,
+    ((INDEX) & (fPOW2_ROUNDUP(fCAST4u(thread->processor_ptr->arch_proc_options->jtlb_size)) - 1)),
+    ()
+)
+
+DEF_MACRO(fTLB_NONPOW2WRAP,
+    (((INDEX) >= thread->processor_ptr->arch_proc_options->jtlb_size) ? ((INDEX) - thread->processor_ptr->arch_proc_options->jtlb_size) : (INDEX)),
+    /* ATTRIBS */
+)
+
+DEF_MACRO(fTLBW,
+    do {size4u_t __myidx = fTLB_NONPOW2WRAP(fTLB_IDXMASK(INDEX));
+        TLB_REG_WRITE(__myidx,VALUE);
+        fHIDE(HEX_CALLBACK(thread->processor_ptr->options->tlbw_callback,thread->system_ptr,thread->processor_ptr,thread->threadId,__myidx);)
+        fHIDE(sys_tlb_write(thread,__myidx,VALUE);)} while (0),
+    /* ATTRIBS */
+)
+
+DEF_MACRO(fTLB_ENTRY_OVERLAP,
+    fHIDE( (sys_check_overlap(thread,VALUE)!=-2) ),
+    /* ATTRIBS */
+)
+
+DEF_MACRO(fTLB_ENTRY_OVERLAP_IDX,
+    fHIDE(sys_check_overlap(thread,VALUE)),
+    /* ATTRIBS */
+)
+
+
+DEF_MACRO(fTLBR,
+    TLB_REG_READ(fTLB_NONPOW2WRAP(fTLB_IDXMASK(INDEX))),
+    /* ATTRIBS */
+)
+
+DEF_MACRO(fTLBP,
+    tlb_lookup(thread,((TLBHI)>>12),((TLBHI)<<12),1),
+    /* attribs */
+)
+
+
+
+DEF_MACRO(READ_SGP0,
+    READ_RREG(REG_SGP),
+    ()
+)
+
+DEF_MACRO(READ_SGP1,
+    READ_RREG(REG_SGP+1),
+    ()
+)
+
+DEF_MACRO(READ_SGP10,
+    READ_RREG_PAIR(REG_SGP),
+    ()
+)
+
+DEF_MACRO(READ_UGP,
+    READ_RREG(REG_UGP),
+)
+
+DEF_MACRO(WRITE_SGP0,
+    WRITE_RREG(REG_SGP,VAL),
+    (A_IMPLICIT_WRITES_SGP0)
+)
+
+DEF_MACRO(WRITE_SGP1,
+    WRITE_RREG(REG_SGP+1,VAL),
+    (A_IMPLICIT_WRITES_SGP1)
+)
+
+DEF_MACRO(WRITE_SGP10,
+    WRITE_RREG_PAIR(REG_SGP,VAL),
+    (A_IMPLICIT_WRITES_SGP0,A_IMPLICIT_WRITES_SGP1)
+)
+
+DEF_MACRO(WRITE_UGP,
+        WRITE_RREG(REG_UGP,VAL),
+)
+
+DEF_MACRO(fSTART,
+    fLOG_GLOBAL_REG_FIELD(MODECTL,MODECTL_E, fREAD_GLOBAL_REG_FIELD(MODECTL,MODECTL_E) | (((REG & ((1<<RUNNABLE_THREADS_MAX)-1))) & THREAD_EN_MASK(thread->processor_ptr))),
+    ()
+)
+
+DEF_MACRO(fRESUME,
+    fLOG_GLOBAL_REG_FIELD(MODECTL,MODECTL_W,
+    fREAD_GLOBAL_REG_FIELD(MODECTL,MODECTL_W) & (~(REG))),
+    ()
+)
+
+DEF_MACRO(fGET_TNUM,
+    thread->threadId,
+    ()
+)
+
 /********************************************/
 /* Cache Management                         */
 /********************************************/
@@ -1602,6 +2035,11 @@ DEF_MACRO(fISYNC,
 )
 
 
+DEF_MACRO(fICFETCH,
+    ,
+    ()
+)
+
 DEF_MACRO(fDCFETCH,
     sys_dcfetch(thread, (REG), insn->slot),
     (A_MEMLIKE)
@@ -1615,6 +2053,34 @@ DEF_MACRO(fICINVA,
     (A_ICINVA)
 )
 
+DEF_MACRO(fDCTAGR,
+    ({DST=sys_dctagr(thread, INDEX, insn->slot,DSTREGNO);})/* FIXME */,
+    ()
+)
+
+DEF_MACRO(fDCTAGW,
+    (sys_dctagw(thread, INDEX, PART2, insn->slot)),
+    ()
+)
+DEF_MACRO(fICTAGR,
+    ({DST=sys_ictagr(thread, INDEX, insn->slot,REGNO);}),
+    ()
+)
+
+DEF_MACRO(fICDATAR,
+    ({DST=sys_icdatar(thread, INDEX, insn->slot);}),
+    ()
+)
+
+DEF_MACRO(fICTAGW,
+    (sys_ictagw(thread, INDEX, PART2, insn->slot)),
+    ()
+)
+DEF_MACRO(fICDATAW,
+    ({ fHIDE(); }),
+    ()
+)
+
 DEF_MACRO(fL2FETCH,
     sys_l2fetch(thread, ADDR,HEIGHT,WIDTH,STRIDE,FLAGS, insn->slot),
     (A_MEMLIKE,A_L2FETCH)
@@ -1635,6 +2101,12 @@ DEF_MACRO(fDCZEROA,
     (A_MEMLIKE)
 )
 
+DEF_MACRO(fDCINVA,
+    sys_dcinva(thread, (REG)),
+    (A_MEMLIKE)
+)
+
+
 DEF_MACRO(fCHECKFORPRIV,
     {sys_check_privs(thread); if (EXCEPTION_DETECTED) return; },
     ()
@@ -1645,6 +2117,16 @@ DEF_MACRO(fCHECKFORGUEST,
     ()
 )
 
+DEF_MACRO(fTAKEN_INTERRUPT_EDGECLEAR,
+        { proc->global_regs[REG_IPEND] &= ~(INT_NUMTOMASK(intnum) & proc->global_regs[REG_IEL]); },
+        ()
+)
+
+DEF_MACRO(fSET_IAD,
+        { sys_siad(thread,INT_NUMTOMASK(intnum)); thread->processor_ptr->global_regs[REG_IAD] |= INT_NUMTOMASK(intnum); },
+        ()
+)
+
 DEF_MACRO(fBRANCH_SPECULATE_STALL,
     {
         sys_speculate_branch_stall(thread, insn->slot, JUMP_COND(JUMP_PRED_SET),
@@ -1664,3 +2146,79 @@ DEF_MACRO(IV1DEAD,
     ,
     ()
 )
+
+DEF_MACRO(fIN_MONITOR_MODE,
+    sys_in_monitor_mode(thread),
+    ()
+)
+
+DEF_MACRO(fIN_USER_MODE,
+    sys_in_user_mode(thread),
+    ()
+)
+
+DEF_MACRO(fIN_GUEST_MODE,
+    sys_in_guest_mode(thread),
+    ()
+)
+
+DEF_MACRO(fGRE_ENABLED,
+    fREAD_REG_FIELD(CCR,CCR_GRE),
+    ()
+)
+
+DEF_MACRO(fGTE_ENABLED,
+    fREAD_REG_FIELD(CCR,CCR_GRE),
+    ()
+)
+
+DEF_MACRO(fTRAP1_VIRTINSN,
+    ((fIN_GUEST_MODE())
+     && (fGRE_ENABLED())
+     && (   ((IMM) == 1)
+         || ((IMM) == 3)
+         || ((IMM) == 4)
+         || ((IMM) == 6))),
+    ()
+)
+
+DEF_MACRO(fVIRTINSN_RTE,
+    do {
+        thread->trap1_info = TRAP1_VIRTINSN_RTE;
+        fLOG_REG_FIELD(SSR,SSR_SS,fREAD_REG_FIELD(GSR,GSR_SS));
+        fLOG_REG_FIELD(CCR,CCR_GIE,fREAD_REG_FIELD(GSR,GSR_IE));
+        fLOG_REG_FIELD(SSR,SSR_GM,!fREAD_REG_FIELD(GSR,GSR_UM));
+        fBRANCH((fREAD_GELR() & -4),COF_TYPE_RTE);
+        fINTERNAL_CLEAR_SAMEPAGE();
+    } while (0),
+    (A_IMPLICIT_WRITES_CCR,A_IMPLICIT_WRITES_SSR)
+)
+
+DEF_MACRO(fVIRTINSN_SETIE,
+    do {
+        fLOG_REG_FIELD(CCR,CCR_GIE,(REG) & 1);
+        REG = fREAD_REG_FIELD(CCR,CCR_GIE);
+        thread->trap1_info = TRAP1_VIRTINSN_SETIE;
+    } while (0),
+    (A_IMPLICIT_WRITES_CCR)
+)
+
+DEF_MACRO(fVIRTINSN_GETIE,
+    {
+        thread->trap1_info = TRAP1_VIRTINSN_GETIE;
+        REG = fREAD_REG_FIELD(CCR,CCR_GIE);
+    },
+    ()
+)
+
+DEF_MACRO(fVIRTINSN_SPSWAP,
+    do {
+        if (fREAD_REG_FIELD(GSR,GSR_UM)) {
+            size4u_t TEMP = REG;
+            REG = fREAD_GOSP();
+            fWRITE_GOSP(TEMP);
+            thread->trap1_info = TRAP1_VIRTINSN_SPSWAP;
+        }
+    } while (0),
+    (A_IMPLICIT_WRITES_GOSP)
+)
diff --git a/target/hexagon/imported/mmvec/encode_ext.def b/target/hexagon/imported/mmvec/encode_ext.def
index 402438f566c17..9df9204764419 100644
--- a/target/hexagon/imported/mmvec/encode_ext.def
+++ b/target/hexagon/imported/mmvec/encode_ext.def
@@ -647,7 +647,7 @@ DEF_ENC(V6_vsubububb_sat,    ICLASS_CJ" 1 110 101 vvvvv PP 0 uuuuu 101 ddddd")
 DEF_ENC(V6_vmpyewuh_64,        ICLASS_CJ" 1 110 101 vvvvv PP 0 uuuuu 110 ddddd")
 
 DEF_FIELDROW_DESC32(        ICLASS_CJ" 1 110 --0 ----- PP 1 ----- ----- ---","Vx32=Vu32")
-DEF_ENC(V6_vunpackob,         ICLASS_CJ" 1 110 --0 ---00 PP 1 uuuuu 000 xxxxx") //
+DEF_ENC(V6_vunpackob,         ICLASS_CJ" 1 110 --0 --000 PP 1 uuuuu 000 xxxxx") //
 DEF_ENC(V6_vunpackoh,         ICLASS_CJ" 1 110 --0 ---00 PP 1 uuuuu 001 xxxxx") //
 //DEF_ENC(V6_vunpackow,     ICLASS_CJ" 1 110 --0 ---00 PP 1 uuuuu 010 xxxxx") //
 
@@ -804,5 +804,31 @@ DEF_ENC(V6_vmpyewuh,    ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 101 ddddd")
 DEF_ENC(V6_vmpyowh,        ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 111 ddddd")
 DEF_ENC(V6_vmpyuhvs,"00011111110vvvvvPP1uuuuu111ddddd")
 
+DEF_ENC(V6_vadd_hf,"00011111011vvvvvPP1uuuuu011ddddd")
+DEF_ENC(V6_vadd_sf,"00011111101vvvvvPP1uuuuu001ddddd")
+DEF_ENC(V6_vadd_qf16,"00011111011vvvvvPP1uuuuu010ddddd")
+DEF_ENC(V6_vadd_qf16_mix,"00011111011vvvvvPP1uuuuu100ddddd")
+DEF_ENC(V6_vadd_qf32,"00011111101vvvvvPP1uuuuu000ddddd")
+DEF_ENC(V6_vadd_qf32_mix,"00011111101vvvvvPP1uuuuu010ddddd")
+
+DEF_ENC(V6_vconv_hf_qf16,"00011110--0--100PP1uuuuu011ddddd")
+DEF_ENC(V6_vconv_hf_qf32,"00011110--0--100PP1uuuuu110ddddd")
+DEF_ENC(V6_vconv_sf_qf32,"00011110--0--100PP1uuuuu000ddddd")
+
+DEF_ENC(V6_vmpy_qf16,"00011111111vvvvvPP1uuuuu011ddddd")
+DEF_ENC(V6_vmpy_qf16_hf,"00011111111vvvvvPP1uuuuu100ddddd")
+DEF_ENC(V6_vmpy_qf16_mix_hf,"00011111111vvvvvPP1uuuuu101ddddd")
+DEF_ENC(V6_vmpy_qf32,"00011111111vvvvvPP1uuuuu000ddddd")
+DEF_ENC(V6_vmpy_qf32_hf,"00011111111vvvvvPP1uuuuu111ddddd")
+DEF_ENC(V6_vmpy_qf32_mix_hf,"00011111100vvvvvPP1uuuuu000ddddd")
+DEF_ENC(V6_vmpy_qf32_qf16,"00011111111vvvvvPP1uuuuu110ddddd")
+DEF_ENC(V6_vmpy_qf32_sf,"00011111111vvvvvPP1uuuuu001ddddd")
+
+DEF_ENC(V6_vsub_hf,"00011111011vvvvvPP1uuuuu110ddddd")
+DEF_ENC(V6_vsub_sf,"00011111101vvvvvPP1uuuuu100ddddd")
+DEF_ENC(V6_vsub_qf32,"00011111101vvvvvPP1uuuuu011ddddd")
+DEF_ENC(V6_vsub_qf32_mix,"00011111101vvvvvPP1uuuuu101ddddd")
+DEF_ENC(V6_vsub_qf16,"00011111011vvvvvPP1uuuuu101ddddd")
+DEF_ENC(V6_vsub_qf16_mix,"00011111011vvvvvPP1uuuuu111ddddd")
 
 #endif /* NO MMVEC */
diff --git a/target/hexagon/imported/mmvec/ext.idef b/target/hexagon/imported/mmvec/ext.idef
index 03d31f6181d75..1b7c5afb42f76 100644
--- a/target/hexagon/imported/mmvec/ext.idef
+++ b/target/hexagon/imported/mmvec/ext.idef
@@ -1400,6 +1400,376 @@ ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyhus_acc, "Vxx32+=vmpyhus(Vu32,Vv32)","
     VxxV.v[1].w[i] += fMPY16SU(fGETHALF(1, VuV.w[i]), fGETUHALF(1, VvV.uw[i])))
 
 
+/* VMPY_QF32 */
+/* multiply qf32 input, produce qf32 output*/
+ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpy_qf32,"Vd32.qf32=vmpy(Vu32.qf32,Vv32.qf32)","Vector multiply: qf32 output from qf32 input",
+    fHIDE(unfloat )u = fPARSEQF32(VuV.qf32[i]);
+    fHIDE(unfloat )v = fPARSEQF32(VvV.qf32[i]);
+    VdV.qf32[i] = fRNDSATQF32(u.exp+v.exp, u.sig*v.sig, 0))
+
+/* VMPY_QF32_SF */
+/* multiply ieee sf input, produce qf32 output*/
+ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpy_qf32_sf,"Vd32.qf32=vmpy(Vu32.sf,Vv32.sf)","Vector multiply: qf32 output from IEEE sf input",
+    fHIDE(unfloat )u = fPARSESF(VuV.sf[i]);
+    fHIDE(unfloat )v = fPARSESF(VvV.sf[i]);
+    VdV.qf32[i] = fRNDSATQF32(u.exp+v.exp, u.sig*v.sig, 0);
+    if(u.sign^v.sign) VdV.qf32[i] = fNEGQF32(VdV.qf32[i]))
+
+
+/* VMPY_QF16 */
+ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(16,vmpy_qf16,"Vd32.qf16=vmpy(Vu32.qf16,Vv32.qf16)","Vector multiply: qf16 output from qf16 inupt",
+    fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]);
+    fHIDE(unfloat )v = fPARSEQF16(VvV.qf16[i]);
+    VdV.qf16[i] = fRNDSATQF16(u.exp+v.exp, u.sig*v.sig, 0))
+
+/* VMPY_QF16_HF */
+ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(16,vmpy_qf16_hf,"Vd32.qf16=vmpy(Vu32.hf,Vv32.hf)","Vector multiply: qf16 output from ieee hf input",
+    fHIDE(unfloat )u = fPARSEHF(VuV.hf[i]);
+    fHIDE(unfloat )v = fPARSEHF(VvV.hf[i]);
+    VdV.qf16[i] = fRNDSATQF16(u.exp+v.exp, u.sig*v.sig, 0);
+    if(u.sign^v.sign) VdV.qf16[i] = fNEGQF16(VdV.qf16[i]))
+
+/* VMPY_QF16_with_QF16_HF */
+/* get the magnitude of qf16 before multiply */
+ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(16,vmpy_qf16_mix_hf,"Vd32.qf16=vmpy(Vu32.qf16,Vv32.hf)","Vector multiply: qf16 output from mixed input of qf16 and ieee hf",
+    fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]);
+    fHIDE(unfloat )v = fPARSEHF(VvV.hf[i]);
+    VdV.qf16[i] = fRNDSATQF16(u.exp+v.exp, u.sig*v.sig, 0);
+    if(v.sign) VdV.qf16[i] = fNEGQF16(VdV.qf16[i]))
+
+/* VMPY_QF32_QF16 */
+ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpy_qf32_qf16,"Vdd32.qf32=vmpy(Vu32.qf16,Vv32.qf16)","Vector multiply: double qf32 output from qf16 input",
+    fHIDE(unfloat )u0 = fPARSEQF16(VuV.w[i] & 0xFFFF);
+    fHIDE(unfloat )u1 = fPARSEQF16((VuV.w[i]>>16) & 0xFFFF);
+    fHIDE(unfloat )v0 = fPARSEQF16(VvV.w[i] & 0xFFFF);
+    fHIDE(unfloat )v1 = fPARSEQF16((VvV.w[i]>>16) & 0xFFFF);
+    VddV.v[0].qf32[i] = fRNDSATQF32(u0.exp+v0.exp, u0.sig*v0.sig, 0);
+    VddV.v[1].qf32[i] = fRNDSATQF32(u1.exp+v1.exp, u1.sig*v1.sig, 0))
+
+/* VMPY_QF32_HF */
+ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpy_qf32_hf,"Vdd32.qf32=vmpy(Vu32.hf,Vv32.hf)","Vector multiply: double qf32 output from ieee hf input",
+    fHIDE(unfloat )u0 = fPARSEHF(VuV.w[i] & 0xFFFF);
+    fHIDE(unfloat )u1 = fPARSEHF((VuV.w[i]>>16) & 0xFFFF);
+    fHIDE(unfloat )v0 = fPARSEHF(VvV.w[i] & 0xFFFF);
+    fHIDE(unfloat )v1 = fPARSEHF((VvV.w[i]>>16) & 0xFFFF);
+    VddV.v[0].qf32[i] = fRNDSATQF32(u0.exp+v0.exp, u0.sig*v0.sig, 0);
+    VddV.v[1].qf32[i] = fRNDSATQF32(u1.exp+v1.exp, u1.sig*v1.sig, 0);
+    if(u0.sign^v0.sign) VddV.v[0].qf32[i] = fNEGQF32(VddV.v[0].qf32[i]);
+    if(u1.sign^v1.sign) VddV.v[1].qf32[i] = fNEGQF32(VddV.v[1].qf32[i]))
+
+/* VMPY_QF32_with_QF16_HF */
+ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpy_qf32_mix_hf,"Vdd32.qf32=vmpy(Vu32.qf16,Vv32.hf)","Vector multiply: double qf32 output from mixed input of qf16 and ieee hf",
+    fHIDE(unfloat )u0 = fPARSEQF16(VuV.w[i] & 0xFFFF);
+    fHIDE(unfloat )u1 = fPARSEQF16((VuV.w[i]>>16) & 0xFFFF);
+    fHIDE(unfloat )v0 = fPARSEHF(VvV.w[i] & 0xFFFF);
+    fHIDE(unfloat )v1 = fPARSEHF((VvV.w[i]>>16) & 0xFFFF);
+    VddV.v[0].qf32[i] = fRNDSATQF32(u0.exp+v0.exp, u0.sig*v0.sig, 0);
+    VddV.v[1].qf32[i] = fRNDSATQF32(u1.exp+v1.exp, u1.sig*v1.sig, 0);
+    if(v0.sign) VddV.v[0].qf32[i] = fNEGQF32(VddV.v[0].qf32[i]);
+    if(v1.sign) VddV.v[1].qf32[i] = fNEGQF32(VddV.v[1].qf32[i]))
+
+/* VADD_QF32 */
+ITERATOR_INSN_SHIFT_SLOT(32,vadd_qf32,"Vd32.qf32=vadd(Vu32.qf32,Vv32.qf32)","Vector addition of qf32 input",
+    fHIDE(unfloat )u = fPARSEQF32(VuV.qf32[i]);
+    fHIDE(unfloat )v = fPARSEQF32(VvV.qf32[i]);
+    fHIDE(size2s_t exp=0;)
+    if (u.exp>v.exp) {
+        exp = u.exp+((u.sig==0.0)? (-(FRAC_SF+1)):ilogb(u.sig));
+        if (exp<v.exp) exp = v.exp;
+    } else {
+        exp = v.exp+((v.sig==0.0)? (-(FRAC_SF+1)):ilogb(v.sig));
+        if (exp<u.exp) exp = u.exp;
+    }
+    fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp);
+    fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp);
+    fHIDE(double ) sig = sig_u + sig_v;
+    fHIDE(double ) sig_low = (u.exp>v.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u;
+    VdV.qf32[i] = fRNDSATQF32(exp, sig, sig_low))
+
+/* VADD_SF */
+ITERATOR_INSN_SHIFT_SLOT(32,vadd_sf,"Vd32.qf32=vadd(Vu32.sf,Vv32.sf)","Vector addition of sf input",
+    fHIDE(unfloat )u = fPARSESF(VuV.sf[i]);
+    fHIDE(unfloat )v = fPARSESF(VvV.sf[i]);
+    fHIDE(size2s_t exp=0;)
+    if (u.exp>v.exp) {
+        exp = u.exp+((u.sig==0.0)? (-(FRAC_SF+1)):ilogb(u.sig));
+        if (exp<v.exp) exp = v.exp;
+    } else {
+        exp = v.exp+((v.sig==0.0)? (-(FRAC_SF+1)):ilogb(v.sig));
+        if (exp<u.exp) exp = u.exp;
+    }
+    //printf("ARCHSIM: u.sign:%d, v.sign:%d, u.sig:%10.30f, v.sig:%10.30f\\n", u.sign, v.sign, u.sig, v.sig);
+
+    fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp);
+    fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp);
+
+    //printf("ARCHSIM: u.exp:%d, v.exp:%d, exp:%d, sig_u:%10.30f, sig_v:%10.30f\\n", u.exp, v.exp, exp, sig_u, sig_v);
+
+    fHIDE(double sig;)
+    fHIDE(double sig_low;)
+
+    if((u.sign^v.sign)==0){
+        sig = sig_u + sig_v;
+        sig_low = (u.exp>v.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u;
+    }
+    else if((u.sign==0) && (v.sign==1))
+    {
+        sig = sig_u - sig_v;
+        sig_low = (u.exp>v.exp) ? (sig_u-sig)-sig_v : sig_u-(sig_v+sig);
+    }
+    else{
+        sig = sig_v - sig_u;
+        sig_low = (v.exp>u.exp) ? (sig_v-sig)-sig_u : sig_v-(sig_u+sig);
+    }
+    VdV.qf32[i] = fRNDSATQF32(exp, sig, sig_low);
+    //printf("ARCHSIM: output:%x\\n", VdV.qf32[i]);
+    if(u.sign && v.sign)   VdV.qf32[i] = fNEGQF32(VdV.qf32[i]))
+
+/* VADD_QF32_MIX */
+ITERATOR_INSN_SHIFT_SLOT(32,vadd_qf32_mix,"Vd32.qf32=vadd(Vu32.qf32,Vv32.sf)","Vector addition of mixed qf32 and sf",
+    fHIDE(unfloat )u = fPARSEQF32(VuV.qf32[i]);
+    fHIDE(unfloat )v = fPARSESF(VvV.sf[i]);
+    if(v.sign) v.sig = (-1.0)*v.sig;
+    fHIDE(size2s_t exp=0;)
+    if (u.exp>v.exp) {
+        exp = u.exp+((u.sig==0.0)? (-(FRAC_SF+1)):ilogb(u.sig));
+        if (exp<v.exp) exp = v.exp;
+    } else {
+        exp = v.exp+((v.sig==0.0)? (-(FRAC_SF+1)):ilogb(v.sig));
+        if (exp<u.exp) exp = u.exp;
+    }
+    fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp);
+    fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp);
+    fHIDE(double) sig = sig_u + sig_v;
+    fHIDE(double) sig_low = (u.exp>v.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u;
+    VdV.qf32[i] = fRNDSATQF32(exp, sig, sig_low))
+
+/* VSUB_QF32 */
+ITERATOR_INSN_SHIFT_SLOT(32,vsub_qf32,"Vd32.qf32=vsub(Vu32.qf32,Vv32.qf32)","Vector subtraction of qf32 input",
+    fHIDE(unfloat )u = fPARSEQF32(VuV.qf32[i]);
+    fHIDE(unfloat )v = fPARSEQF32(VvV.qf32[i]);
+    fHIDE(size2s_t exp=0;)
+    if (u.exp>v.exp) {
+        exp = u.exp+((u.sig==0.0)? (-(FRAC_SF+1)):ilogb(u.sig));
+        if (exp<v.exp) exp = v.exp;
+    } else {
+        exp = v.exp+((v.sig==0.0)? (-(FRAC_SF+1)):ilogb(v.sig));
+        if (exp<u.exp) exp = u.exp;
+    }
+    fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp);
+    fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp);
+    fHIDE(double) sig = sig_u - sig_v;
+    fHIDE(double) sig_low = (u.exp>v.exp) ? (sig_u-sig)-sig_v : (sig_u-(sig_v+sig));
+    VdV.qf32[i] = fRNDSATQF32(exp, sig, sig_low))
+
+/* VSUB_SF */
+ITERATOR_INSN_SHIFT_SLOT(32,vsub_sf,"Vd32.qf32=vsub(Vu32.sf,Vv32.sf)","Vector subtraction of ieee sf input",
+    fHIDE(unfloat )u = fPARSESF(VuV.sf[i]);
+    fHIDE(unfloat )v = fPARSESF(VvV.sf[i]);
+    fHIDE(size2s_t exp=0;)
+    if (u.exp>v.exp) {
+        exp = u.exp+((u.sig==0.0)? (-(FRAC_SF+1)):ilogb(u.sig));
+        if (exp<v.exp) exp = v.exp;
+    } else {
+        exp = v.exp+((v.sig==0.0)? (-(FRAC_SF+1)):ilogb(v.sig));
+        if (exp<u.exp) exp = u.exp;
+    }
+    fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp);
+    fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp);
+    fHIDE(double sig;)
+    fHIDE(double sig_low;)
+    if((u.sign==0) && (v.sign==0)) {
+        sig = sig_u - sig_v;
+        sig_low = (u.exp>v.exp) ? (sig_u-sig)-sig_v : (sig_u-(sig_v+sig));
+    }
+    else if(u.sign ^ v.sign){
+        sig = sig_u + sig_v;
+        sig_low = (u.exp>v.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u;
+    }
+    else{
+        sig = sig_v - sig_u;
+        sig_low = (v.exp>u.exp) ? (sig_v-sig)-sig_u : sig_v-(sig_u+sig);
+    }
+    VdV.qf32[i] = fRNDSATQF32(exp, sig, sig_low);
+    if((u.sign==1) && (v.sign==0))   VdV.qf32[i] = fNEGQF32(VdV.qf32[i]))
+
+/* VSUB_QF32_MIX */
+ITERATOR_INSN_SHIFT_SLOT(32,vsub_qf32_mix,"Vd32.qf32=vsub(Vu32.qf32,Vv32.sf)","Vector subtraction of mixed qf32 input and sf",
+    fHIDE(unfloat )u = fPARSEQF32(VuV.qf32[i]);
+    fHIDE(unfloat )v = fPARSESF(VvV.sf[i]);
+    if(v.sign) v.sig = (-1.0)*v.sig;
+    fHIDE(size2s_t exp=0;)
+    if (u.exp>v.exp) {
+        exp = u.exp+((u.sig==0.0)? (-(FRAC_SF+1)):ilogb(u.sig));
+        if (exp<v.exp) exp = v.exp;
+    } else {
+        exp = v.exp+((v.sig==0.0)? (-(FRAC_SF+1)):ilogb(v.sig));
+        if (exp<u.exp) exp = u.exp;
+    }
+    fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp);
+    fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp);
+    fHIDE(double ) sig = sig_u - sig_v;
+    fHIDE(double) sig_low = (u.exp>v.exp) ? (sig_u-sig)-sig_v : (sig_u-(sig_v+sig));
+    VdV.qf32[i] = fRNDSATQF32(exp, sig, sig_low))
+
+/* VADD_QF16 */
+ITERATOR_INSN_SHIFT_SLOT(16,vadd_qf16,"Vd32.qf16=vadd(Vu32.qf16,Vv32.qf16)","Vector addition of qf16 input",
+    fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]);
+    fHIDE(unfloat )v = fPARSEQF16(VvV.qf16[i]);
+    fHIDE(size2s_t exp=0;)
+    if (u.exp>v.exp) {
+        exp = u.exp+((u.sig==0.0)? (-(FRAC_HF+1)):ilogb(u.sig));
+        if (exp<v.exp) exp = v.exp;
+    } else {
+        exp = v.exp+((v.sig==0.0)? (-(FRAC_HF+1)):ilogb(v.sig));
+        if (exp<u.exp) exp = u.exp;
+    }
+    fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp);
+    fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp);
+    fHIDE(double) sig = sig_u + sig_v;
+    fHIDE(double) sig_low = (u.exp>v.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u;
+    VdV.qf16[i] = fRNDSATQF16(exp, sig, sig_low))
+
+/* VADD_HF */
+ITERATOR_INSN_SHIFT_SLOT(16,vadd_hf,"Vd32.qf16=vadd(Vu32.hf,Vv32.hf)","Vector addition of hf input",
+    fHIDE(unfloat )u = fPARSEHF(VuV.hf[i]);
+    fHIDE(unfloat )v = fPARSEHF(VvV.hf[i]);
+    fHIDE(size2s_t exp=0;)
+    if (u.exp>v.exp) {
+        exp = u.exp+((u.sig==0.0)? (-(FRAC_HF+1)):ilogb(u.sig));
+        if (exp<v.exp) exp = v.exp;
+    } else {
+        exp = v.exp+((v.sig==0.0)? (-(FRAC_HF+1)):ilogb(v.sig));
+        if (exp<u.exp) exp = u.exp;
+    }
+    fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp);
+    fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp);
+
+    fHIDE(double sig;)
+    fHIDE(double sig_low;)
+
+    if((u.sign^v.sign)==0){
+        sig = sig_u + sig_v;
+        sig_low = (u.exp>v.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u;
+    }
+    else if((u.sign==0) && (v.sign==1))
+    {
+        sig = sig_u - sig_v;
+        sig_low = (u.exp>v.exp) ? (sig_u-sig)-sig_v : sig_u-(sig_v+sig);
+    }
+    else{
+        sig = sig_v - sig_u;
+        sig_low = (v.exp>u.exp) ? (sig_v-sig)-sig_u : sig_v-(sig_u+sig);
+    }
+    VdV.qf16[i] = fRNDSATQF16(exp, sig, sig_low);
+    if(u.sign && v.sign)
+        VdV.qf16[i] = fNEGQF16(VdV.qf16[i]))
+
+/* VADD_QF16_MIX */
+ITERATOR_INSN_SHIFT_SLOT(16,vadd_qf16_mix,"Vd32.qf16=vadd(Vu32.qf16,Vv32.hf)","Vector addition of mixed qf16 and hf",
+    fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]);
+    fHIDE(unfloat )v = fPARSEHF(VvV.hf[i]);
+    if(v.sign)  v.sig = (-1.0)*v.sig;
+    fHIDE(size2s_t exp=0;)
+    if (u.exp>v.exp) {
+        exp = u.exp+((u.sig==0.0)? (-(FRAC_HF+1)):ilogb(u.sig));
+        if (exp<v.exp) exp = v.exp;
+    } else {
+        exp = v.exp+((v.sig==0.0)? (-(FRAC_HF+1)):ilogb(v.sig));
+        if (exp<u.exp) exp = u.exp;
+    }
+    fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp);
+    fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp);
+    fHIDE(double) sig = sig_u + sig_v;
+    fHIDE(double) sig_low = (u.exp>v.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u;
+    VdV.qf16[i] = fRNDSATQF16(exp, sig, sig_low))
+
+/* VSUB_QF16 */
+ITERATOR_INSN_SHIFT_SLOT(16,vsub_qf16,"Vd32.qf16=vsub(Vu32.qf16,Vv32.qf16)","Vector subtraction of qf16 input",
+    fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]);
+    fHIDE(unfloat )v = fPARSEQF16(VvV.qf16[i]);
+    fHIDE(size2s_t exp=0;)
+    if (u.exp>v.exp) {
+        exp = u.exp+((u.sig==0.0)? (-(FRAC_HF+1)):ilogb(u.sig));
+        if (exp<v.exp) exp = v.exp;
+    } else {
+        exp = v.exp+((v.sig==0.0)? (-(FRAC_HF+1)):ilogb(v.sig));
+        if (exp<u.exp) exp = u.exp;
+    }
+    fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp);
+    fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp);
+    fHIDE(double) sig = sig_u - sig_v;
+    fHIDE(double) sig_low = (u.exp>v.exp) ? (sig_u-sig)-sig_v : (sig_u-(sig_v+sig));
+    VdV.qf16[i] = fRNDSATQF16(exp, sig, sig_low))
+
+/* VSUB_HF */
+ITERATOR_INSN_SHIFT_SLOT(16,vsub_hf,"Vd32.qf16=vsub(Vu32.hf,Vv32.hf)","Vector subtraction of hf input",
+    fHIDE(unfloat )u = fPARSEHF(VuV.hf[i]);
+    fHIDE(unfloat )v = fPARSEHF(VvV.hf[i]);
+    fHIDE(size2s_t exp=0;)
+    if (u.exp>v.exp) {
+        exp = u.exp+((u.sig==0.0)? (-(FRAC_HF+1)):ilogb(u.sig));
+        if (exp<v.exp) exp = v.exp;
+    } else {
+        exp = v.exp+((v.sig==0.0)? (-(FRAC_HF+1)):ilogb(v.sig));
+        if (exp<u.exp) exp = u.exp;
+    }
+    fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp);
+    fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp);
+    fHIDE(double sig;)
+    fHIDE(double sig_low;)
+    if((u.sign==0) && (v.sign==0)) {
+        sig = sig_u - sig_v;
+        sig_low = (u.exp>v.exp) ? (sig_u-sig)-sig_v : (sig_u-(sig_v+sig));
+    }
+    else if(u.sign ^ v.sign){
+        sig = sig_u + sig_v;
+        sig_low = (u.exp>v.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u;
+    }
+    else{
+        sig = sig_v - sig_u;
+        sig_low = (v.exp>u.exp) ? (sig_v-sig)-sig_u : sig_v-(sig_u+sig);
+    }
+    VdV.qf16[i] = fRNDSATQF16(exp, sig, sig_low);
+    if((u.sign==1) && (v.sign==0))
+        VdV.qf16[i] = fNEGQF16(VdV.qf16[i]))
+
+
+/* VSUB_QF16_MIXED */
+ITERATOR_INSN_SHIFT_SLOT(16,vsub_qf16_mix,"Vd32.qf16=vsub(Vu32.qf16,Vv32.hf)","Vector subtraction of mixed qf16 and hf",
+    fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]);
+    fHIDE(unfloat )v = fPARSEHF(VvV.hf[i]);
+    if(v.sign)  v.sig = (-1.0)*v.sig;
+    fHIDE(size2s_t exp=0;)
+    if (u.exp>v.exp) {
+        exp = u.exp+((u.sig==0.0)? (-(FRAC_HF+1)):ilogb(u.sig));
+        if (exp<v.exp) exp = v.exp;
+    } else {
+        exp = v.exp+((v.sig==0.0)? (-(FRAC_HF+1)):ilogb(v.sig));
+        if (exp<u.exp) exp = u.exp;
+    }
+    fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp);
+    fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp);
+    fHIDE(double) sig = sig_u - sig_v;
+    fHIDE(double) sig_low = (u.exp>v.exp) ? (sig_u-sig)-sig_v : (sig_u-(sig_v+sig));
+    VdV.qf16[i] = fRNDSATQF16(exp, sig, sig_low))
+
+// FP Convert QF32/W/UW to ieee SF
+ITERATOR_INSN_SHIFT_SLOT(32,vconv_sf_qf32,"Vd32.sf=Vu32.qf32","Vector conversion of qf32 format to ieee SF",
+    fHIDE(unfloat )u = fPARSEQF32(VuV.qf32[i]);
+    VdV.sf[i] = fRNDSATSF(u.exp, u.sig))
+
+// FP Convert QF16/H/UH to ieee HF
+ITERATOR_INSN_SHIFT_SLOT(16,vconv_hf_qf16,"Vd32.hf=Vu32.qf16","Vector conversion of qf16 format to ieee HF",
+    fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]);
+    VdV.hf[i] = fRNDSATHF(u.exp, u.sig))
+
+// FP Convert double QF32 to two packed ieee HF in one vector
+ITERATOR_INSN_SHIFT_SLOT(32,vconv_hf_qf32,"Vd32.hf=Vuu32.qf32","Vector conversion of double qf32 to ieee HF",
+    fHIDE(unfloat )u0 = fPARSEQF32(VuuV.v[0].qf32[i]);
+    fHIDE(unfloat )u1 = fPARSEQF32(VuuV.v[1].qf32[i]);
+    VdV.hf[2*i] = fRNDSATHF(u0.exp, u0.sig);
+    VdV.hf[2*i+1] = fRNDSATHF(u1.exp, u1.sig))
 
 
 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpyih,"Vd32=vmpyih(Vu32,Vv32)","Vd32.h=vmpyi(Vu32.h,Vv32.h)",
diff --git a/target/hexagon/imported/mmvec/macros.def b/target/hexagon/imported/mmvec/macros.def
index 7e5438a998021..e9524aa56d1e1 100755
--- a/target/hexagon/imported/mmvec/macros.def
+++ b/target/hexagon/imported/mmvec/macros.def
@@ -15,46 +15,76 @@
  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-DEF_MACRO(fDUMPQ,
+DEF_MACRO(fDUMPQ,(STR,REG),
+	"dump REG",
+	"dump REG",
 	do {
 		printf(STR ":" #REG ": 0x%016llx\n",REG.ud[0]);
 	} while (0),
 	()
 )
 
-DEF_MACRO(fUSE_LOOKUP_ADDRESS_BY_REV,
-	PROC->arch_proc_options->mmvec_use_full_va_for_lookup,
+DEF_MACRO(fUSE_LOOKUP_ADDRESS_BY_REV,(PROC),
+	"",
+	"Use full VA address for lookup and exception based on REV ",
+	PROC->arch_proc_options->HVX_USE_FULL_VA,
 	()
 )
 
-DEF_MACRO(fUSE_LOOKUP_ADDRESS,
+DEF_MACRO(fUSE_LOOKUP_ADDRESS,(),
+	"",
+	"Use full VA address for lookup and exception",
 	1,
 	()
 )
 
-DEF_MACRO(fNOTQ,
+DEF_MACRO(fRT8NOTE, (),
+	"",
+	"",
+	,
+	(A_NOTE_RT8)
+)
+
+DEF_MACRO(fCVI_VX_NO_TMP_LD, (),
+    "",
+    "",
+    ,
+    (A_CVI_VX_NO_TMP_LD)
+)
+DEF_MACRO(fNOTQ,(VAL),
+	"~VAL",
+	"~VAL",
+	/* Will break Visual Studio? */
 	({mmqreg_t _ret = {0}; int _i_; for (_i_ = 0; _i_ < fVECSIZE()/64; _i_++) _ret.ud[_i_] = ~VAL.ud[_i_]; _ret;}),
 	()
 )
 
-DEF_MACRO(fGETQBITS,
+DEF_MACRO(fGETQBITS,(REG,WIDTH,MASK,BITNO),
+	"REG[BITNO+WIDTH-1:BITNO]",
+	"Get MASK bits at BITNO from REG",
 	((MASK) & (REG.w[(BITNO)>>5] >> ((BITNO) & 0x1f))),
 	()
 )
 
-DEF_MACRO(fGETQBIT,
+DEF_MACRO(fGETQBIT,(REG,BITNO),
+	"REG[BITNO]",
+	"Get bit BITNO from REG",
 	fGETQBITS(REG,1,1,BITNO),
 	()
 )
 
-DEF_MACRO(fGENMASKW,
+DEF_MACRO(fGENMASKW,(QREG,IDX),
+	"maskw(QREG,IDX)",
+	"Generate mask from QREG for word IDX",
 	(((fGETQBIT(QREG,(IDX*4+0)) ? 0xFF : 0x0) << 0)
 	|((fGETQBIT(QREG,(IDX*4+1)) ? 0xFF : 0x0) << 8)
 	|((fGETQBIT(QREG,(IDX*4+2)) ? 0xFF : 0x0) << 16)
 	|((fGETQBIT(QREG,(IDX*4+3)) ? 0xFF : 0x0) << 24)),
 	()
 )
-DEF_MACRO(fGET10BIT,
+DEF_MACRO(fGET10BIT,(COE,VAL,POS),
+	"COE=(((((fGETUBYTE(3,VAL) >> (2 * POS)) & 3) << 8) | fGETUBYTE(POS,VAL)) << 6) >> 6;",
+	"Get 10-bit coefficient from current word value and byte position",
 	{
 		COE = (((((fGETUBYTE(3,VAL) >> (2 * POS)) & 3) << 8) | fGETUBYTE(POS,VAL)) << 6);
 		COE >>= 6;
@@ -62,62 +92,160 @@ DEF_MACRO(fGET10BIT,
 	()
 )
 
-DEF_MACRO(fVMAX,
+DEF_MACRO(fVMAX,(X,Y),
+	"max(X,Y)",
+	"",
 	(X>Y) ? X : Y,
 	()
 )
 
 
-DEF_MACRO(fGETNIBBLE,
+DEF_MACRO(fREAD_VEC,
+    (DST,IDX),
+    "DST=VREG[IDX]",   /* short desc */
+    "Read Vector IDX", /* long desc */
+	(DST = READ_VREG(fMODCIRCU((IDX),5))),
+    ()
+)
+DEF_MACRO(fREAD_ZVEC,
+    (DST,IDX),
+    "DST=ZREG[IDX]",   /* short desc */
+    "Read Vector IDX", /* long desc */
+	(DST = READ_ZREG(fMODCIRCU((IDX),5))),
+    ()
+)
+
+DEF_MACRO(fREAD_ZVEC_WORD,
+    (DST,IDX),
+    "DST=ZReg.uw[IDX]",   /* short desc */
+    "Read Z Vector IDX", /* long desc */
+	{
+		mmvector_t ZReg = READ_ZREG(0);
+		DST = ZReg.uw[IDX];
+
+	},
+    ()
+)
+DEF_MACRO(fREAD_ZVEC_ALL,
+    (DST,N,NZ),
+    "",   /* short desc */
+    "Read Z Vector IDX", /* long desc */
+	{
+		int __idx = 0;
+		for (__idx = 0; __idx < NZ/N; __idx++) {
+			memcpy(&DST[N*__idx], &THREAD2STRUCT->ZRegs[__idx], N);
+		}
+	},
+    ()
+)
+DEF_MACRO(fZREGB,
+    (Z,IDX),
+    "ZREG.b[IDX]",   /* short desc */
+    "Read Z IDX", /* long desc */
+	((size1s_t)Z[IDX]),
+    ()
+)
+DEF_MACRO(fZREGUB,
+    (Z,IDX),
+    "ZREG.ub[IDX]",   /* short desc */
+    "Read Z IDX", /* long desc */
+	((size1u_t)Z[IDX]),
+    ()
+)
+DEF_MACRO(fZREGH,
+    (Z,IDX),
+    "ZREG.h[IDX]",   /* short desc */
+    "Read Z IDX", /* long desc */
+	((size2s_t)Z[IDX]),
+    ()
+)
+DEF_MACRO(fZREGUB,
+    (Z,IDX),
+    "ZREG.ub[IDX]",   /* short desc */
+    "Read Z IDX", /* long desc */
+	((size1u_t)Z[IDX]),
+    ()
+)
+
+DEF_MACRO(fGETNIBBLE,(IDX,SRC),
+    "SRC.s4[IDX]",
+    "Get nibble",
     ( fSXTN(4,8,(SRC >> (4*IDX)) & 0xF) ),
     ()
 )
 
-DEF_MACRO(fGETCRUMB,
+DEF_MACRO(fGETCRUMB,(IDX,SRC),
+    "SRC.s2[IDX]",
+    "Get 2bits",
     ( fSXTN(2,8,(SRC >> (2*IDX)) & 0x3) ),
     ()
 )
 
-DEF_MACRO(fGETCRUMB_SYMMETRIC,
+DEF_MACRO(fGETCRUMB_SYMMETRIC,(IDX,SRC),
+    "SRC.s2[IDX] >= 0 ? (2-SRC.s2[IDX]) : SRC.s2[IDX]",
+    "Get 2bits",
     ( (fGETCRUMB(IDX,SRC)>=0 ? (2-fGETCRUMB(IDX,SRC)) : fGETCRUMB(IDX,SRC) ) ),
     ()
 )
 
+//#define ZERO_OFFSET_2B +(fGETCRUMB(z,VuV.uw[i])>=0)
 #define ZERO_OFFSET_2B +
 
-DEF_MACRO(fGENMASKH,
+DEF_MACRO(fWRITE_VEC,
+    (IDX,VAR),
+    "VREG[IDX]=VAR",   /* short desc */
+    "Write Vector IDX", /* long desc */
+	(WRITE_VREG(fMODCIRCU((IDX),5),VAR)),
+    ()
+)
+
+DEF_MACRO(fGENMASKH,(QREG,IDX),
+	"maskh(QREG,IDX)",
+	"generate mask from QREG for halfword IDX",
 	(((fGETQBIT(QREG,(IDX*2+0)) ? 0xFF : 0x0) << 0)
 	|((fGETQBIT(QREG,(IDX*2+1)) ? 0xFF : 0x0) << 8)),
 	()
 )
 
-DEF_MACRO(fGETMASKW,
+DEF_MACRO(fGETMASKW,(VREG,QREG,IDX),
+	"VREG.w[IDX] & fGENMASKW(QREG,IDX)",
+	"Mask word IDX from VREG using QREG",
 	(VREG.w[IDX] & fGENMASKW((QREG),IDX)),
 	()
 )
 
-DEF_MACRO(fGETMASKH,
+DEF_MACRO(fGETMASKH,(VREG,QREG,IDX),
+	"VREG.h[IDX] & fGENMASKH(QREG,IDX)",
+	"Mask word IDX from VREG using QREG",
 	(VREG.h[IDX] & fGENMASKH((QREG),IDX)),
 	()
 )
 
-DEF_MACRO(fCONDMASK8,
+DEF_MACRO(fCONDMASK8,(QREG,IDX,YESVAL,NOVAL),
+	"QREG.IDX ? YESVAL : NOVAL",
+	"QREG.IDX ? YESVAL : NOVAL",
 	(fGETQBIT(QREG,IDX) ? (YESVAL) : (NOVAL)),
 	()
 )
 
-DEF_MACRO(fCONDMASK16,
+DEF_MACRO(fCONDMASK16,(QREG,IDX,YESVAL,NOVAL),
+	"select_bytes(QREG,IDX,YESVAL,NOVAL)",
+	"select_bytes(QREG,IDX,YESVAL,NOVAL)",
 	((fGENMASKH(QREG,IDX) & (YESVAL)) | (fGENMASKH(fNOTQ(QREG),IDX) & (NOVAL))),
 	()
 )
 
-DEF_MACRO(fCONDMASK32,
+DEF_MACRO(fCONDMASK32,(QREG,IDX,YESVAL,NOVAL),
+	"select_bytes(QREG,IDX,YESVAL,NOVAL)",
+	"select_bytes(QREG,IDX,YESVAL,NOVAL)",
 	((fGENMASKW(QREG,IDX) & (YESVAL)) | (fGENMASKW(fNOTQ(QREG),IDX) & (NOVAL))),
 	()
 )
 
 
-DEF_MACRO(fSETQBITS,
+DEF_MACRO(fSETQBITS,(REG,WIDTH,MASK,BITNO,VAL),
+	"REG[BITNO+WIDTH-1:BITNO] = VAL",
+	"Put bits into REG",
 	do {
 		size4u_t __TMP = (VAL);
 		REG.w[(BITNO)>>5] &= ~((MASK) << ((BITNO) & 0x1f));
@@ -126,58 +254,101 @@ DEF_MACRO(fSETQBITS,
 	()
 )
 
-DEF_MACRO(fSETQBIT,
+DEF_MACRO(fSETQBIT,(REG,BITNO,VAL),
+	"REG[BITNO]=VAL",
+	"Put bit into REG",
 	fSETQBITS(REG,1,1,BITNO,VAL),
 	()
 )
 
-DEF_MACRO(fVBYTES,
+DEF_MACRO(fVBYTES,(),
+	"VWIDTH",
+	"Number of bytes in a vector",
 	(fVECSIZE()),
 	()
 )
 
-DEF_MACRO(fVHALVES,
+DEF_MACRO(fVHALVES,(),
+	"VWIDTH/2",
+	"Number of halves in a vector",
 	(fVECSIZE()/2),
 	()
 )
 
-DEF_MACRO(fVWORDS,
+DEF_MACRO(fVWORDS,(),
+	"VWIDTH/2",
+	"Number of words in a vector",
 	(fVECSIZE()/4),
 	()
 )
 
-DEF_MACRO(fVDWORDS,
+DEF_MACRO(fVDWORDS,(),
+	"VWIDTH/8",
+	"Number of double words in a vector",
 	(fVECSIZE()/8),
 	()
 )
 
-DEF_MACRO(fVALIGN,
+DEF_MACRO(fVALIGN, (ADDR, LOG2_ALIGNMENT),
+    "ADDR = ADDR & ~(LOG2_ALIGNMENT-1)",
+    "Align to Element Size",
     ( ADDR = ADDR & ~(LOG2_ALIGNMENT-1)),
     ()
 )
 
-DEF_MACRO(fVLASTBYTE,
+DEF_MACRO(fVLASTBYTE, (ADDR, LOG2_ALIGNMENT),
+    "ADDR = ADDR | (LOG2_ALIGNMENT-1)",
+    "Set LSB of length to last byte",
     ( ADDR = ADDR | (LOG2_ALIGNMENT-1)),
     ()
 )
 
 
-DEF_MACRO(fVELEM,
+DEF_MACRO(fVELEM, (WIDTH),
+    "VBITS/WIDTH",
+    "Number of WIDTH-bit elements in a vector",
     ((fVECSIZE()*8)/WIDTH),
     ()
 )
 
-DEF_MACRO(fVECLOGSIZE,
+DEF_MACRO(fVECLOGSIZE,(),
+    "log2(VECTOR_SIZE)",
+    "Log base 2 of the number of bytes in a vector",
     (mmvec_current_veclogsize(thread)),
     ()
 )
 
-DEF_MACRO(fVECSIZE,
+DEF_MACRO(fVBUF_IDX,(EA),
+	"(EA >> log2(VECTOR_SIZE)) & 0xFF",
+	"(EA >> log2(VECTOR_SIZE)) & 0xFF",
+	(((EA) >> fVECLOGSIZE()) & 0xFF),
+	(A_FAKEINSN)
+)
+
+DEF_MACRO(fREAD_VBUF,(IDX,WIDX),
+	"vbuf[IDX].w[WIDX]",
+	"vbuf[IDX].w[WIDX]",
+	READ_VBUF(IDX,WIDX),
+	(A_FAKEINSN)
+)
+
+DEF_MACRO(fLOG_VBUF,(IDX,VAL,WIDX),
+	"vbuf[IDX].w[WIDX] = VAL",
+	"vbuf[IDX].w[WIDX] = VAL",
+	LOG_VBUF(IDX,VAL,WIDX),
+	(A_FAKEINSN)
+)
+
+DEF_MACRO(fVECSIZE,(),
+    "VBYTES",
+    "Number of bytes in a vector currently",
     (1<<fVECLOGSIZE()),
     ()
 )
 
-DEF_MACRO(fSWAPB,
+DEF_MACRO(fSWAPB,(A, B),
+    "SWAP(A,B)",
+    "Swap bytes",
     {
 		size1u_t tmp = A;
 		A = B;
@@ -187,41 +358,54 @@ DEF_MACRO(fSWAPB,
 )
 
 DEF_MACRO(
-	fVZERO,
+	fVZERO,(),
+	"0",
+	"0",
 	mmvec_zero_vector(),
 	()
 )
 
 DEF_MACRO(
-    fNEWVREG,
+    fNEWVREG,(VNUM),
+    "VNUM.new",
+    "Register value produced in this packet",
     ((THREAD2STRUCT->VRegs_updated & (((VRegMask)1)<<VNUM)) ? THREAD2STRUCT->future_VRegs[VNUM] : mmvec_zero_vector()),
     (A_DOTNEWVALUE,A_RESTRICT_SLOT0ONLY)
 )
 
 DEF_MACRO(
 	fV_AL_CHECK,
+	(EA,MASK),
+	"",
+	"",
 	if ((EA) & (MASK)) {
 		warn("aligning misaligned vector. PC=%08x EA=%08x",thread->Regs[REG_PC],(EA));
 	},
 	()
 )
-DEF_MACRO(fSCATTER_INIT,
+DEF_MACRO(fSCATTER_INIT, ( REGION_START, LENGTH, ELEMENT_SIZE),
+    "",
+    "",
     {
     mem_vector_scatter_init(thread, insn,   REGION_START, LENGTH, ELEMENT_SIZE);
 	if (EXCEPTION_DETECTED) return;
     },
-    (A_STORE,A_MEMLIKE,A_RESTRICT_SLOT0ONLY)
+    (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_SLOT0ONLY)
 )
 
-DEF_MACRO(fGATHER_INIT,
+DEF_MACRO(fGATHER_INIT, ( REGION_START, LENGTH, ELEMENT_SIZE),
+    "",
+    "",
     {
     mem_vector_gather_init(thread, insn,   REGION_START, LENGTH, ELEMENT_SIZE);
 	if (EXCEPTION_DETECTED) return;
     },
-    (A_LOAD,A_MEMLIKE,A_RESTRICT_SLOT1ONLY)
+    (A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST)
 )
 
-DEF_MACRO(fSCATTER_FINISH,
+DEF_MACRO(fSCATTER_FINISH, (OP),
+    "",
+    "",
     {
 	if (EXCEPTION_DETECTED) return;
     mem_vector_scatter_finish(thread, insn, OP);
@@ -229,7 +413,9 @@ DEF_MACRO(fSCATTER_FINISH,
     ()
 )
 
-DEF_MACRO(fGATHER_FINISH,
+DEF_MACRO(fGATHER_FINISH, (),
+    "",
+    "",
     {
 	if (EXCEPTION_DETECTED) return;
     mem_vector_gather_finish(thread, insn);
@@ -238,7 +424,9 @@ DEF_MACRO(fGATHER_FINISH,
 )
 
 
-DEF_MACRO(CHECK_VTCM_PAGE,
+DEF_MACRO(CHECK_VTCM_PAGE, (FLAG, BASE, LENGTH, OFFSET, ALIGNMENT),
+    "FLAG=((BASE+OFFSET) < (BASE+LENGTH))",
+    "FLAG=((BASE+OFFSET) < (BASE+LENGTH))",
      {
         int slot = insn->slot;
         paddr_t pa = thread->mem_access[slot].paddr+OFFSET;
@@ -247,7 +435,9 @@ DEF_MACRO(CHECK_VTCM_PAGE,
      },
     ()
 )
-DEF_MACRO(COUNT_OUT_OF_BOUNDS,
+DEF_MACRO(COUNT_OUT_OF_BOUNDS, (FLAG, SIZE),
+    " ",
+    "",
      {
         if (!FLAG)
         {
@@ -258,7 +448,9 @@ DEF_MACRO(COUNT_OUT_OF_BOUNDS,
     ()
 )
 
-DEF_MACRO(fLOG_SCATTER_OP,
+DEF_MACRO(fLOG_SCATTER_OP, (SIZE),
+    "  ",
+    "  ",
     {
         // Log the size and indicate that the extension ext.c file needs to increment right before memory write
         THREAD2STRUCT->vtcm_log.op = 1;
@@ -269,7 +461,9 @@ DEF_MACRO(fLOG_SCATTER_OP,
 
 
 
-DEF_MACRO(fVLOG_VTCM_WORD_INCREMENT,
+DEF_MACRO(fVLOG_VTCM_WORD_INCREMENT, (EA,OFFSET,INC,IDX,ALIGNMENT,LEN),
+    "if (RtV <= EA <= RtV + LEN) *EA += INC.uw[IDX] ",
+    "if (RtV <= EA <= RtV + LEN) *EA += INC.uw[IDX] ",
     {
         int slot = insn->slot;
         int log_bank = 0;
@@ -287,7 +481,9 @@ DEF_MACRO(fVLOG_VTCM_WORD_INCREMENT,
     ()
 )
 
-DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT,
+DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT, (EA,OFFSET,INC,IDX,ALIGNMENT,LEN),
+    "if (RtV <= EA <= RtV + LEN) *EA += INC.uh[IDX] ",
+    "if (RtV <= EA <= RtV + LEN) *EA += INC.uh[IDX] ",
     {
         int slot = insn->slot;
         int log_bank = 0;
@@ -304,7 +500,9 @@ DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT,
     ()
 )
 
-DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT_DV,
+DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT_DV, (EA,OFFSET,INC,IDX,IDX2,IDX_H,ALIGNMENT,LEN),
+    "if (RtV <= EA <= RtV + LEN) *EA += INC.w[IDX2].uh[IDX_H] ",
+    "if (RtV <= EA <= RtV + LEN) *EA += INC.w[IDX2].uh[IDX_H] ",
     {
         int slot = insn->slot;
         int log_bank = 0;
@@ -323,7 +521,9 @@ DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT_DV,
 
 
 
-DEF_MACRO(GATHER_FUNCTION,
+DEF_MACRO(GATHER_FUNCTION, (EA,OFFSET,IDX, LEN, ELEMENT_SIZE, BANK_IDX, QVAL),
+"",
+"",
 {
         int slot = insn->slot;
         int i0;
@@ -336,6 +536,9 @@ DEF_MACRO(GATHER_FUNCTION,
             log_byte =  ((OFFSET>=0)&&((pa+i0)<=pa_high)) && QVAL;
             log_bank |= (log_byte<<i0);
             size1u_t B  = sim_mem_read1(thread->system_ptr, thread->threadId, thread->mem_access[slot].paddr+OFFSET+i0);
+#ifdef VERIFICATION
+			warn("Gather[%d] sim_mem_read1 pa:%llx val: %x", ELEMENT_SIZE*IDX+i0, thread->mem_access[slot].paddr+OFFSET+i0, B);
+#endif
             THREAD2STRUCT->tmp_VRegs[0].ub[ELEMENT_SIZE*IDX+i0] = B;
             LOG_VTCM_BYTE(pa+i0,log_byte,B,ELEMENT_SIZE*IDX+i0);
         }
@@ -346,38 +549,50 @@ DEF_MACRO(GATHER_FUNCTION,
 
 
 
-DEF_MACRO(fVLOG_VTCM_GATHER_WORD,
+DEF_MACRO(fVLOG_VTCM_GATHER_WORD, (EA,OFFSET,IDX, LEN),
+    "if (RtV <= EA <= RtV + LEN) TEMP.uw[IDX] = *EA ",
+    "if (RtV <= EA <= RtV + LEN) TEMP.uw[IDX] = *EA ",
     {
 		GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 4, IDX, 1);
     },
     ()
 )
-DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORD,
+DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORD, (EA,OFFSET,IDX, LEN),
+    " if (RtV <= EA <= RtV + LEN) TEMP.uh[IDX] = *EA ",
+    " if (RtV <= EA <= RtV + LEN) TEMP.uh[IDX] = *EA ",
     {
 		GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, IDX, 1);
     },
     ()
 )
-DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORD_DV,
+DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORD_DV, (EA,OFFSET,IDX,IDX2,IDX_H, LEN),
+    "if (RtV <= EA <= RtV + LEN) TEMP.uw[IDX2].uh[IDX_H] = *EA ",
+    "if (RtV <= EA <= RtV + LEN) TEMP.uw[IDX2].uh[IDX_H] = *EA ",
     {
 		GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), 1);
     },
     ()
 )
-DEF_MACRO(fVLOG_VTCM_GATHER_WORDQ,
+DEF_MACRO(fVLOG_VTCM_GATHER_WORDQ, (EA,OFFSET,IDX, Q, LEN),
+    " if ( (RtV <= EA <= RtV + LEN) & Q) TEMP.uw[IDX] = *EA ",
+    " if ( (RtV <= EA <= RtV + LEN) & Q) TEMP.uw[IDX] = *EA ",
     {
 		GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 4, IDX, fGETQBIT(QsV,4*IDX+i0));
     },
     ()
 )
-DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORDQ,
+DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORDQ, (EA,OFFSET,IDX, Q, LEN),
+    " if ( (RtV <= EA <= RtV + LEN) & Q) TEMP.uh[IDX] = *EA ",
+    " if ( (RtV <= EA <= RtV + LEN) & Q) TEMP.uh[IDX] = *EA ",
     {
 		GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, IDX, fGETQBIT(QsV,2*IDX+i0));
     },
     ()
 )
 
-DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORDQ_DV,
+DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORDQ_DV, (EA,OFFSET,IDX,IDX2,IDX_H, Q, LEN),
+    " if ( (RtV <= EA <= RtV + LEN) & Q) TEMP.uw[IDX2].uh[IDX_H] = *EA ",
+    " if ( (RtV <= EA <= RtV + LEN) & Q) TEMP.uw[IDX2].uh[IDX_H] = *EA ",
     {
 		GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), fGETQBIT(QsV,2*IDX+i0));
     },
@@ -385,7 +600,9 @@ DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORDQ_DV,
 )
 
 
-DEF_MACRO(DEBUG_LOG_ADDR,
+DEF_MACRO(DEBUG_LOG_ADDR, (OFFSET),
+    "  ",
+    "  ",
     {
 
         if (thread->processor_ptr->arch_proc_options->mmvec_network_addr_log2)
@@ -393,6 +610,7 @@ DEF_MACRO(DEBUG_LOG_ADDR,
 
             int slot = insn->slot;
             paddr_t pa = thread->mem_access[slot].paddr+OFFSET;
+           // pa = pa & ~(ALIGNMENT-1);
         }
     },
     ()
@@ -404,7 +622,9 @@ DEF_MACRO(DEBUG_LOG_ADDR,
 
 
 
-DEF_MACRO(SCATTER_OP_WRITE_TO_MEM,
+DEF_MACRO(SCATTER_OP_WRITE_TO_MEM, (TYPE),
+    " Read, accumulate, and write to VTCM",
+    "  ",
     {
         for (int i = 0; i < mmvecx->vtcm_log.size; i+=sizeof(TYPE))
         {
@@ -430,7 +650,9 @@ DEF_MACRO(SCATTER_OP_WRITE_TO_MEM,
     ()
 )
 
-DEF_MACRO(SCATTER_FUNCTION,
+DEF_MACRO(SCATTER_FUNCTION, (EA,OFFSET,IDX, LEN, ELEMENT_SIZE, BANK_IDX, QVAL, IN),
+"",
+"",
 {
         int slot = insn->slot;
         int i0;
@@ -449,26 +671,34 @@ DEF_MACRO(SCATTER_FUNCTION,
 ()
 )
 
-DEF_MACRO(fVLOG_VTCM_HALFWORD,
+DEF_MACRO(fVLOG_VTCM_HALFWORD, (EA,OFFSET,IN,IDX, LEN),
+    "if (RtV <= EA <= RtV + LEN) *EA = IN.uh[IDX] ",
+    "if (RtV <= EA <= RtV + LEN) *EA = IN.uh[IDX] ",
     {
 		SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, IDX, 1, IN);
     },
     ()
 )
-DEF_MACRO(fVLOG_VTCM_WORD,
+DEF_MACRO(fVLOG_VTCM_WORD, (EA,OFFSET,IN,IDX,LEN),
+    "if (RtV <= EA <= RtV + LEN) *EA = IN.uw[IDX] ",
+    "if (RtV <= EA <= RtV + LEN) *EA = IN.uw[IDX] ",
     {
 		SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 4, IDX, 1, IN);
     },
     ()
 )
 
-DEF_MACRO(fVLOG_VTCM_HALFWORDQ,
+DEF_MACRO(fVLOG_VTCM_HALFWORDQ, (EA,OFFSET,IN,IDX,Q,LEN),
+    " if ( (RtV <= EA <= RtV + LEN) & Q) *EA = IN.uh[IDX] ",
+    " if ( (RtV <= EA <= RtV + LEN) & Q) *EA = IN.uh[IDX] ",
     {
 		SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, IDX, fGETQBIT(QsV,2*IDX+i0), IN);
     },
     ()
 )
-DEF_MACRO(fVLOG_VTCM_WORDQ,
+DEF_MACRO(fVLOG_VTCM_WORDQ, (EA,OFFSET,IN,IDX,Q,LEN),
+    " if ( (RtV <= EA <= RtV + LEN) & Q) *EA = IN.uw[IDX] ",
+    " if ( (RtV <= EA <= RtV + LEN) & Q) *EA = IN.uw[IDX] ",
     {
 		SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 4, IDX, fGETQBIT(QsV,4*IDX+i0), IN);
     },
@@ -479,14 +709,18 @@ DEF_MACRO(fVLOG_VTCM_WORDQ,
 
 
 
-DEF_MACRO(fVLOG_VTCM_HALFWORD_DV,
+DEF_MACRO(fVLOG_VTCM_HALFWORD_DV, (EA,OFFSET,IN,IDX,IDX2,IDX_H, LEN),
+    "if (RtV <= EA <= RtV + LEN) *EA = IN.w[IDX2].uh[IDX_H] ",
+    "if (RtV <= EA <= RtV + LEN) *EA = IN.w[IDX2].uh[IDX_H] ",
     {
 		SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), 1, IN);
     },
     ()
 )
 
-DEF_MACRO(fVLOG_VTCM_HALFWORDQ_DV,
+DEF_MACRO(fVLOG_VTCM_HALFWORDQ_DV, (EA,OFFSET,IN,IDX,Q,IDX2,IDX_H, LEN),
+    " if ( (RtV <= EA <= RtV + LEN) & Q) *EA = IN.w[IDX2].uh[IDX_H] ",
+    " if ( (RtV <= EA <= RtV + LEN) & Q) *EA = IN.w[IDX2].uh[IDX_H] ",
     {
 		SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), fGETQBIT(QsV,2*IDX+i0), IN);
     },
@@ -498,39 +732,161 @@ DEF_MACRO(fVLOG_VTCM_HALFWORDQ_DV,
 
 
 
-DEF_MACRO(fSTORERELEASE,
+DEF_MACRO(fSTORERELEASE, (EA,TYPE),
+	"char* addr = EA&~(ALIGNMENT-1); Zero Byte Store Release (Non-blocking Sync)",
+	"Zero Byte Store Release (Sync)",
     {
         fV_AL_CHECK(EA,fVECSIZE()-1);
 
         mem_store_release(thread, insn, fVECSIZE(), EA&~(fVECSIZE()-1), EA, TYPE, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
     },
-	(A_STORE,A_MEMLIKE)
+	(A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST)
 )
 
-DEF_MACRO(fVFETCH_AL,
+DEF_MACRO(fVFETCH_AL, (EA),
+    "Prefetch vector into L2 cache at EA",
+    "Prefetch vector into L2 cache at EA",
     {
     fV_AL_CHECK(EA,fVECSIZE()-1);
     mem_fetch_vector(thread, insn, EA&~(fVECSIZE()-1), insn->slot, fVECSIZE());
     },
-    (A_LOAD,A_MEMLIKE)
+    (A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_NOSLOT1_STORE)
 )
 
 
-DEF_MACRO(fLOADMMV_AL,
+DEF_MACRO(fLOADMMV_AL, (EA, ALIGNMENT, LEN, DST),
+    "char* addr = EA&~(ALIGNMENT-1); for (i=0; i<LEN; ++i) DST[i] = addr[i]",
+    "Load LEN bytes from memory at EA (forced alignment) to DST.",
     {
     fV_AL_CHECK(EA,ALIGNMENT-1);
 	thread->last_pkt->double_access_vec = 0;
     mem_load_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, &DST.ub[0], LEN, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
     },
-    (A_LOAD,A_MEMLIKE)
+    (A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_NOSLOT1_STORE)
 )
 
-DEF_MACRO(fLOADMMV,
+DEF_MACRO(fLOADMMV, (EA, DST),
+	"DST = *(EA&~(ALIGNMENT-1))",
+	"Load vector from memory at EA (forced alignment) to DST.",
 	fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST),
 	()
 )
 
-DEF_MACRO(fLOADMMVQ,
+DEF_MACRO(fLOADMMZ, (EA,DST),
+	"DST[EA[7]] = *(EA)",
+	"Load splatter register from memory at EA (forced alignment) to DST.",
+	{
+		mmvector_t load_vec;
+		fV_AL_CHECK(EA,fVECSIZE()-1);
+		mem_load_vector_oddva(thread, insn, EA&~(fVECSIZE()-1), EA, insn->slot, fVECSIZE(), &load_vec.ub[0], fVECSIZE(), fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
+		int idx = (EA & 0x80)>0;
+		DST.v[idx] = load_vec;
+
+	},
+	()
+)
+DEF_MACRO(fLOADZ_LOAD, (EA,EAU,WIDTH,DST),
+	"",
+	"",
+	{
+		thread->last_pkt->ext_slot_cancelled = 0;
+		thread->last_pkt->double_access_vec = 0;
+		int etm_size = ((EA % width) ==0) ? fVECSIZE() : 0;
+		if (thread->processor_ptr->options->testgen_mode)
+			etm_size = ((EA % width) ==0) ? WIDTH : 0;
+
+		mem_load_vector_oddva(thread, insn, EA, EAU, insn->slot, WIDTH, &DST.ub[0], etm_size, fUSE_LOOKUP_ADDRESS());
+	},
+	(A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_NOSLOT1_STORE)
+)
+
+DEF_MACRO(fELSE_CANCELZ, (),
+	"",
+	"",
+	else {
+		if (thread->last_pkt) {
+			thread->mem_access[insn->slot].dropped_z = 1;
+			thread->last_pkt->ext_slot_cancelled |= (1<<insn->slot);
+		}
+	},
+	(A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_NOSLOT1_STORE)
+)
+
+
+
+
+DEF_MACRO(fPOST_INC4, (R),
+	"R+=4",
+	"",
+	R+=4;
+	,
+	(A_CVI_Z_INC_4)
+)
+DEF_MACRO(fPOST_INC8, (R),
+	"R+=8",
+	"",
+	R+=8;
+	,
+	(A_CVI_Z_INC_8)
+)
+DEF_MACRO(fPOST_INC16, (R),
+	"R+=16",
+	"",
+	R+=16;
+	,
+	(A_CVI_Z_INC_16)
+)
+
+DEF_MACRO(fEXTRACTZ,
+    (DST,IDX),
+    "DST=ZREG[IDX]",   /* short desc */
+    "Read Vector IDX", /* long desc */
+	(DST = READ_ZREG(fMODCIRCU((IDX),5))),
+    ()
+)
+
+DEF_MACRO(fLOADZ_UPDATE, (EA,WIDTH,ZN,N,SRC),
+	"for(i = 0; i < width; i++) ZREG.b[(EA+i)%ZN] = *(EA+i)",
+	"Load splatter register from memory at EA (forced alignment) to DST.",
+	{
+		mmvector_t Z[2];
+		Z[0] = READ_ZREG(0);
+		Z[1] = READ_ZREG(1);
+		for(int k = 0; k < WIDTH; k++) {
+			int element_idx = (EA+k)%N;
+			int z_idx = ((EA+k)%ZN)/N;
+			Z[z_idx].ub[element_idx] = SRC.ub[k];
+		}
+
+		WRITE_EXT_ZREG(0,Z[0],0);
+		WRITE_EXT_ZREG(1,Z[1],0);
+	},
+	(A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_NOSLOT1_STORE)
+)
+DEF_MACRO(fSTOREZ, (EA,WIDTH,ZN,N),
+	"for(i = 0; i < width; i++) *(EA+i) = ZREG.b[(EA+i)%ZN]",
+	"Store splatter register from memory at EA (forced alignment) to DST.",
+	{
+		mmvector_t store_vec;
+		mmvector_t maskvec = {0};
+		mmvector_t Z[2];
+		Z[0] = READ_ZREG(0);
+		Z[1] = READ_ZREG(1);
+
+		for(int k = 0; k < WIDTH; k++) {
+			int element_idx = (EA+k)%N;
+			int z_idx = ((EA+k)%ZN)/N;
+			store_vec.ub[k] = Z[z_idx].ub[element_idx];
+			maskvec.ub[k] = 1;
+		}
+		mem_store_vector_oddva(thread, insn, EA, EA, insn->slot, WIDTH, &store_vec.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
+	},
+	(A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST)
+)
+
+DEF_MACRO(fLOADMMVQ, (EA,DST,QVAL),
+	"DST = vmux(QVAL,*(EA&~(ALIGNMENT-1)),0)",
+	"Load vector from memory at EA (forced alignment) to DST.",
 	do {
 		int __i;
 		fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST);
@@ -539,7 +895,9 @@ DEF_MACRO(fLOADMMVQ,
 	()
 )
 
-DEF_MACRO(fLOADMMVNQ,
+DEF_MACRO(fLOADMMVNQ, (EA,DST,QVAL),
+	"DST = vmux(QVAL,0,*(EA&~(ALIGNMENT-1)))",
+	"Load vector from memory at EA (forced alignment) to DST.",
 	do {
 		int __i;
 		fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST);
@@ -548,7 +906,9 @@ DEF_MACRO(fLOADMMVNQ,
 	()
 )
 
-DEF_MACRO(fLOADMMVU_AL,
+DEF_MACRO(fLOADMMVU_AL, (EA, ALIGNMENT, LEN, DST),
+    "char* addr = EA; for (i=0; i<LEN; ++i) DST[i] = addr[i]",
+    "Load LEN bytes from memory at EA (unaligned) to DST.",
     {
     size4u_t size2 = (EA)&(ALIGNMENT-1);
     size4u_t size1 = LEN-size2;
@@ -556,10 +916,12 @@ DEF_MACRO(fLOADMMVU_AL,
     mem_load_vector_oddva(thread, insn, EA+size1, EA+fVECSIZE(), /* slot */ 1, size2, &DST.ub[size1], size2, fUSE_LOOKUP_ADDRESS());
     mem_load_vector_oddva(thread, insn, EA, EA,/* slot */ 0, size1, &DST.ub[0], size1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
     },
-    (A_LOAD,A_MEMLIKE)
+    (A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_NOSLOT1_STORE)
 )
 
-DEF_MACRO(fLOADMMVU,
+DEF_MACRO(fLOADMMVU, (EA, DST),
+	"DST = *EA",
+	"Load vector from memory at EA (unaligned) to DST.",
 	{
 		/* if address happens to be aligned, only do aligned load */
         thread->last_pkt->pkt_has_vtcm_access = 0;
@@ -579,63 +941,79 @@ DEF_MACRO(fLOADMMVU,
 	()
 )
 
-DEF_MACRO(fSTOREMMV_AL,
+DEF_MACRO(fSTOREMMV_AL, (EA, ALIGNMENT, LEN, SRC),
+    "char* addr = EA&~(ALIGNMENT-1); for (i=0; i<LEN; ++i) addr[i] = SRC[i]",
+    "Store LEN bytes from SRC into memory at EA (forced alignment).",
     {
     fV_AL_CHECK(EA,ALIGNMENT-1);
-    mem_store_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, &SRC.ub[0], 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
+    mem_store_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, SRC.ub, 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
     },
-    (A_STORE,A_MEMLIKE)
+    (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST)
 )
 
-DEF_MACRO(fSTOREMMV,
+DEF_MACRO(fSTOREMMV, (EA, SRC),
+	"*(EA&~(ALIGNMENT-1)) = SRC",
+	"Store vector SRC to memory at EA (unaligned).",
 	fSTOREMMV_AL(EA,fVECSIZE(),fVECSIZE(),SRC),
 	()
 )
 
-DEF_MACRO(fSTOREMMVQ_AL,
+DEF_MACRO(fSTOREMMVQ_AL, (EA, ALIGNMENT, LEN, SRC, MASK),
+    "char* addr = EA&~(ALIGNMENT-1); for (i=0; i<LEN; ++i) if (MASK[i]) addr[i] = SRC[i]",
+    "Store LEN bytes from SRC into memory at EA (forced alignment).",
     do {
 	mmvector_t maskvec;
 	int i;
 	for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i);
-	mem_store_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, &SRC.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
+	mem_store_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, SRC.ub, &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
     } while (0),
-    (A_STORE,A_MEMLIKE)
+    (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST)
 )
 
-DEF_MACRO(fSTOREMMVQ,
+DEF_MACRO(fSTOREMMVQ, (EA, SRC, MASK),
+	"*(EA&~(ALIGNMENT-1)) = SRC",
+	"Masked store vector SRC to memory at EA (forced alignment).",
 	fSTOREMMVQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK),
 	()
 )
 
-DEF_MACRO(fSTOREMMVNQ_AL,
+DEF_MACRO(fSTOREMMVNQ_AL, (EA, ALIGNMENT, LEN, SRC, MASK),
+    "char* addr = EA&~(ALIGNMENT-1); for (i=0; i<LEN; ++i) if (!MASK[i]) addr[i] = SRC[i]",
+    "Store LEN bytes from SRC into memory at EA (forced alignment).",
     {
 	mmvector_t maskvec;
 	int i;
 	for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i);
         fV_AL_CHECK(EA,ALIGNMENT-1);
-	mem_store_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, &SRC.ub[0], &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
+	mem_store_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, SRC.ub, &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
     },
-    (A_STORE,A_MEMLIKE)
+    (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST)
 )
 
-DEF_MACRO(fSTOREMMVNQ,
+DEF_MACRO(fSTOREMMVNQ, (EA, SRC, MASK),
+	"*(EA&~(ALIGNMENT-1)) = SRC",
+	"Masked negated store vector SRC to memory at EA (forced alignment).",
 	fSTOREMMVNQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK),
 	()
 )
 
-DEF_MACRO(fSTOREMMVU_AL,
+DEF_MACRO(fSTOREMMVU_AL, (EA, ALIGNMENT, LEN, SRC),
+    "char* addr = EA; for (i=0; i<LEN; ++i) addr[i] = SRC[i]",
+    "Store LEN bytes from SRC into memory at EA (unaligned).",
     {
     size4u_t size1 = ALIGNMENT-((EA)&(ALIGNMENT-1));
     size4u_t size2;
     if (size1>LEN) size1 = LEN;
     size2 = LEN-size1;
     mem_store_vector_oddva(thread, insn, EA+size1, EA+fVECSIZE(), /* slot */ 1, size2, &SRC.ub[size1], 0, 0, fUSE_LOOKUP_ADDRESS());
-    mem_store_vector_oddva(thread, insn, EA, EA, /* slot */ 0, size1, &SRC.ub[0], 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
+    mem_store_vector_oddva(thread, insn, EA, EA, /* slot */ 0, size1, SRC.ub, 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
     },
-    (A_STORE,A_MEMLIKE)
+    (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST)
 )
 
-DEF_MACRO(fSTOREMMVU,
+DEF_MACRO(fSTOREMMVU, (EA, SRC),
+	"*EA = SRC",
+	"Store vector SRC to memory at EA (unaligned).",
 	{
         thread->last_pkt->pkt_has_vtcm_access = 0;
         thread->last_pkt->pkt_access_count = 0;
@@ -651,7 +1029,9 @@ DEF_MACRO(fSTOREMMVU,
 	()
 )
 
-DEF_MACRO(fSTOREMMVQU_AL,
+DEF_MACRO(fSTOREMMVQU_AL, (EA, ALIGNMENT, LEN, SRC, MASK),
+    "char* addr = EA; for (i=0; i<LEN; ++i) if (MASK[i]) addr[i] = SRC[i]",
+    "Store LEN bytes from SRC into memory at EA (unaligned).",
     {
 	size4u_t size1 = ALIGNMENT-((EA)&(ALIGNMENT-1));
 	size4u_t size2;
@@ -661,12 +1041,14 @@ DEF_MACRO(fSTOREMMVQU_AL,
 	if (size1>LEN) size1 = LEN;
 	size2 = LEN-size1;
 	mem_store_vector_oddva(thread, insn, EA+size1, EA+fVECSIZE(),/* slot */ 1, size2, &SRC.ub[size1], &maskvec.ub[size1], 0, fUSE_LOOKUP_ADDRESS());
-	mem_store_vector_oddva(thread, insn, EA, /* slot */ 0, size1, &SRC.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
+	mem_store_vector_oddva(thread, insn, EA, /* slot */ 0, size1, SRC.ub, &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
     },
-    (A_STORE,A_MEMLIKE)
+    (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST)
 )
 
-DEF_MACRO(fSTOREMMVQU,
+DEF_MACRO(fSTOREMMVQU, (EA, SRC, MASK),
+	"*EA = SRC",
+	"Store vector SRC to memory at EA (unaligned).",
 	{
         thread->last_pkt->pkt_has_vtcm_access = 0;
         thread->last_pkt->pkt_access_count = 0;
@@ -682,7 +1064,9 @@ DEF_MACRO(fSTOREMMVQU,
 	()
 )
 
-DEF_MACRO(fSTOREMMVNQU_AL,
+DEF_MACRO(fSTOREMMVNQU_AL, (EA, ALIGNMENT, LEN, SRC, MASK),
+    "char* addr = EA; for (i=0; i<LEN; ++i) if (!MASK[i]) addr[i] = SRC[i]",
+    "Store LEN bytes from SRC into memory at EA (unaligned).",
     {
 	size4u_t size1 = ALIGNMENT-((EA)&(ALIGNMENT-1));
 	size4u_t size2;
@@ -692,12 +1076,14 @@ DEF_MACRO(fSTOREMMVNQU_AL,
 	if (size1>LEN) size1 = LEN;
 	size2 = LEN-size1;
 	mem_store_vector_oddva(thread, insn, EA+size1, EA+fVECSIZE(), /* slot */ 1, size2, &SRC.ub[size1], &maskvec.ub[size1], 1, fUSE_LOOKUP_ADDRESS());
-	mem_store_vector_oddva(thread, insn, EA, EA, /* slot */ 0, size1, &SRC.ub[0], &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
+	mem_store_vector_oddva(thread, insn, EA, EA, /* slot */ 0, size1, SRC.ub, &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
     },
-    (A_STORE,A_MEMLIKE)
+    (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST)
 )
 
-DEF_MACRO(fSTOREMMVNQU,
+DEF_MACRO(fSTOREMMVNQU, (EA, SRC, MASK),
+	"*EA = SRC",
+	"Store vector SRC to memory at EA (unaligned).",
 	{
         thread->last_pkt->pkt_has_vtcm_access = 0;
         thread->last_pkt->pkt_access_count = 0;
@@ -716,127 +1102,446 @@ DEF_MACRO(fSTOREMMVNQU,
 
 
 
-DEF_MACRO(fVFOREACH,
+DEF_MACRO(fVFOREACH,(WIDTH, VAR),
+    "for (VAR = 0; VAR < VELEM(WIDTH); VAR++)",
+    "For VAR in each WIDTH-bit vector index",
     for (VAR = 0; VAR < fVELEM(WIDTH); VAR++),
     /* NOTHING */
 )
 
-DEF_MACRO(fVARRAY_ELEMENT_ACCESS,
+DEF_MACRO(fVARRAY_ELEMENT_ACCESS, (ARRAY, TYPE, INDEX),
+    "ARRAY.TYPE[INDEX]",
+    "Access element of type TYPE at position INDEX of flattened ARRAY",
     ARRAY.v[(INDEX) / (fVECSIZE()/(sizeof(ARRAY.TYPE[0])))].TYPE[(INDEX) % (fVECSIZE()/(sizeof(ARRAY.TYPE[0])))],
     ()
 )
 
-DEF_MACRO(fVNEWCANCEL,
+DEF_MACRO(fVNEWCANCEL,(REGNUM),
+	"Ignore current value for register REGNUM",
+	"Ignore current value for register REGNUM",
 	do { THREAD2STRUCT->VRegs_select &= ~(1<<(REGNUM)); } while (0),
 	()
 )
 
-DEF_MACRO(fTMPVDATA,
+DEF_MACRO(fTMPVDATA,(),
+	"Data from .tmp load",
+	"Data from .tmp load and clear tmp status",
 	mmvec_vtmp_data(thread),
-	(A_CVI)
+	(A_CVI,A_CVI_REQUIRES_TMPLOAD)
 )
 
-DEF_MACRO(fVSATDW,
+DEF_MACRO(fVSATDW, (U,V),
+    "usat_32(U:V)",
+    "Use 32-bits of U as MSW and 32-bits of V as LSW and saturate the resultant 64-bits to 32 bits",
     fVSATW( ( ( ((long long)U)<<32 ) | fZXTN(32,64,V) ) ),
     /* attribs */
 )
 
-DEF_MACRO(fVASL_SATHI,
+DEF_MACRO(fVASL_SATHI, (U,V),
+    "uasl_sathi(U:V)",
+    "Use 32-bits of U as MSW and 32-bits of V as LSW, left shift by 1 and saturate the result and take high word",
     fVSATW(((U)<<1) | ((V)>>31)),
     /* attribs */
 )
 
-DEF_MACRO(fVUADDSAT,
+DEF_MACRO(fVUADDSAT,(WIDTH,U,V),
+	"usat_##WIDTH(U+V)",
+	"Add WIDTH-bit values U and V with saturation",
 	fVSATUN( WIDTH, fZXTN(WIDTH, 2*WIDTH, U)  + fZXTN(WIDTH, 2*WIDTH, V)),
 	/* attribs */
 )
 
-DEF_MACRO(fVSADDSAT,
-	fVSATN(  WIDTH, fSXTN(WIDTH, 2*WIDTH, U)  + fSXTN(WIDTH, 2*WIDTH, V)),
+DEF_MACRO(fVSADDSAT,(WIDTH,U,V),
+	"sat_##WIDTH(U+V)",
+	"Add WIDTH-bit values U and V with saturation",
+	({size8s_t tmp5 = fSXTN(WIDTH, 2*WIDTH, U);
+	  size8s_t tmp6 = fSXTN(WIDTH, 2*WIDTH, V);
+      size8s_t tmp7 = tmp5 + tmp6;
+	  fVSATN(  WIDTH, tmp7);
+	 }),
 	/* attribs */
 )
 
-DEF_MACRO(fVUSUBSAT,
+DEF_MACRO(fVUSUBSAT,(WIDTH,U,V),
+	"usat_##WIDTH(U-V)",
+	"sub WIDTH-bit values U and V with saturation",
 	fVSATUN( WIDTH, fZXTN(WIDTH, 2*WIDTH, U)  - fZXTN(WIDTH, 2*WIDTH, V)),
 	/* attribs */
 )
 
-DEF_MACRO(fVSSUBSAT,
+DEF_MACRO(fVSSUBSAT,(WIDTH,U,V),
+	"sat_##WIDTH(U-V)",
+	"sub WIDTH-bit values U and V with saturation",
 	fVSATN(  WIDTH, fSXTN(WIDTH, 2*WIDTH, U)  - fSXTN(WIDTH, 2*WIDTH, V)),
 	/* attribs */
 )
 
-DEF_MACRO(fVAVGU,
+DEF_MACRO(fVAVGU,(WIDTH,U,V),
+	"(U+V)/2",
+	"average WIDTH-bit values U and V with saturation",
 	((fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V))>>1),
 	/* attribs */
 )
 
-DEF_MACRO(fVAVGURND,
+DEF_MACRO(fVAVGURND,(WIDTH,U,V),
+	"(U+V+1)/2",
+	"average WIDTH-bit values U and V with saturation",
 	((fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V)+1)>>1),
 	/* attribs */
 )
 
-DEF_MACRO(fVNAVGU,
+DEF_MACRO(fVNAVGU,(WIDTH,U,V),
+	"(U-V)/2",
+	"average WIDTH-bit values U and V with saturation",
 	((fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V))>>1),
 	/* attribs */
 )
 
-DEF_MACRO(fVNAVGURNDSAT,
+DEF_MACRO(fVNAVGURNDSAT,(WIDTH,U,V),
+	"(U-V+1)/2",
+	"average WIDTH-bit values U and V with saturation",
 	fVSATUN(WIDTH,((fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V)+1)>>1)),
 	/* attribs */
 )
 
-DEF_MACRO(fVAVGS,
+DEF_MACRO(fVAVGS,(WIDTH,U,V),
+	"(U+V)/2",
+	"average WIDTH-bit values U and V with saturation",
 	((fSXTN(WIDTH, 2*WIDTH, U) + fSXTN(WIDTH, 2*WIDTH, V))>>1),
 	/* attribs */
 )
 
-DEF_MACRO(fVAVGSRND,
+DEF_MACRO(fVAVGSRND,(WIDTH,U,V),
+	"(U+V+1)/2",
+	"average WIDTH-bit values U and V with saturation",
 	((fSXTN(WIDTH, 2*WIDTH, U) + fSXTN(WIDTH, 2*WIDTH, V)+1)>>1),
 	/* attribs */
 )
 
-DEF_MACRO(fVNAVGS,
+DEF_MACRO(fVNAVGS,(WIDTH,U,V),
+	"(U-V)/2",
+	"average WIDTH-bit values U and V with saturation",
 	((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V))>>1),
 	/* attribs */
 )
 
-DEF_MACRO(fVNAVGSRND,
+DEF_MACRO(fVNAVGSRND,(WIDTH,U,V),
+	"(U-V+1)/2",
+	"average WIDTH-bit values U and negative V followed by rounding",
 	((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V)+1)>>1),
 	/* attribs */
 )
 
-DEF_MACRO(fVNAVGSRNDSAT,
+DEF_MACRO(fVNAVGSRNDSAT,(WIDTH,U,V),
+	"(U-V+1)/2",
+	"average WIDTH-bit values U and V with saturation",
 	fVSATN(WIDTH,((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V)+1)>>1)),
 	/* attribs */
 )
 
 
-DEF_MACRO(fVNOROUND,
+DEF_MACRO(fVNOROUND,(VAL,SHAMT),
+	"VAL",
+	"VAL",
 	VAL,
 	/* NOTHING */
 )
-DEF_MACRO(fVNOSAT,
+DEF_MACRO(fVNOSAT,(VAL),
+	"VAL",
+	"VAL",
 	VAL,
 	/* NOTHING */
 )
 
-DEF_MACRO(fVROUND,
+DEF_MACRO(fVROUND,(VAL,SHAMT),
+	"VAL + (1<<(SHAMT-1))",
+	"VAL + RNDBIT",
 	((VAL) + (((SHAMT)>0)?(1LL<<((SHAMT)-1)):0)),
 	/* NOTHING */
 )
 
-DEF_MACRO(fCARRY_FROM_ADD32,
+DEF_MACRO(fCARRY_FROM_ADD32,(A,B,C),
+	"carry_from(A,B,C)",
+	"carry_from(A,B,C)",
 	(((fZXTN(32,64,A)+fZXTN(32,64,B)+C) >> 32) & 1),
 	/* NOTHING */
 )
 
-DEF_MACRO(fUARCH_NOTE_PUMP_4X,
+DEF_MACRO(fUARCH_NOTE_PUMP_4X,(),
+	"",
+	"",
 	,
-	()
+	(A_CVI_PUMP_4X)
 )
 
-DEF_MACRO(fUARCH_NOTE_PUMP_2X,
+DEF_MACRO(fUARCH_NOTE_PUMP_2X,(),
+	"",
+	"",
 	,
+	(A_CVI_PUMP_2X)
+)
+
+DEF_MACRO(fVDOCHKPAGECROSS,(BASE,SUM),
+	"",
+	"",
+	if (UNLIKELY(thread->timing_on)) {
+		thread->mem_access[slot].check_page_crosses = 1;
+		thread->mem_access[slot].page_cross_base = BASE;
+		thread->mem_access[slot].page_cross_sum = SUM;
+	},
+	(A_EA_PAGECROSS)
+)
+
+/* FP instructions */
+/*Qfloat Macros for muls*/
+DEF_MACRO(fPARSEQF32,(A),
+  "A",
+  "Parsing QF32 to extract exp/sig",
+  parse_qf32(A),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fRNDSATQF32,(A,B,C),
+  "rnd_sat(A,B,C)",
+  "Rnd/Sat/Norm of Vector Multiply of two QF32 inputs",
+  rnd_sat_qf32(A,B,C),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fPARSEQF16,(A),
+  "A",
+  "Parsing QF16 to extract exp/sig",
+  parse_qf16(A),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fRNDSATQF16,(A,B,C),
+  "rnd_sat(A,B,C)",
+  "Rnd/Sat/Norm of Vector Multiply of two QF16 inputs",
+  rnd_sat_qf16(A,B,C),
 	()
 )
+/*Qfloat Macros for others*/
+DEF_MACRO(fPARSESF,(A),
+  "A",
+  "Parsing IEEE SF to extract sign/exp/sig",
+  parse_sf(A),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fRNDSATSF,(A,B),
+  "rnd_sat(A,B)",
+  "Rnd/Sat/Norm of Vector Multiply of two IEEE SF inputs",
+  rnd_sat_sf(A,B),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fPARSEHF,(A),
+  "A",
+  "Parsing IEEE HF to extract sign/exp/sig",
+  parse_hf(A),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fRNDSATHF,(A,B),
+  "rnd_sat(A,B)",
+  "Rnd/Sat/Norm of Vector Multiply of two IEEE HF inputs",
+  rnd_sat_hf(A,B),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fRNDSATW,(A,B),
+  "rnd_sat(A,B)",
+  "Rnd/Sat/Norm of Vector convert of W inputs",
+  rnd_sat_w(A,B),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fRNDSATUW,(A,B),
+  "rnd_sat(A,B)",
+  "Rnd/Sat/Norm of Vector convert of UW inputs",
+  rnd_sat_uw(A,B),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fRNDSATH,(A,B),
+  "rnd_sat(A,B)",
+  "Rnd/Sat/Norm of Vector convert of H inputs",
+  rnd_sat_h(A,B),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fRNDSATUH,(A,B),
+  "rnd_sat(A,B)",
+  "Rnd/Sat/Norm of Vector convert of UW inputs",
+  rnd_sat_uh(A,B),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fRNDSATB,(A,B),
+  "rnd_sat(A,B)",
+  "Rnd/Sat/Norm of Vector convert of B inputs",
+  rnd_sat_b(A,B),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fRNDSATUB,(A,B),
+  "rnd_sat(A,B)",
+  "Rnd/Sat/Norm of Vector convert of UB inputs",
+  rnd_sat_ub(A,B),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fNEGQF32,(A),
+  "-(A)",
+  "Take Ones complement",
+  negate32(A),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fNEGQF16,(A),
+  "-(A)",
+  "Take Ones complement",
+  negate16(A),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fNEGSF,(A),
+  "-(A)",
+  "Change sign",
+  negate_sf(A),
+  (A_HVX_FLT)
+)
+DEF_MACRO(fNEGHF,(A),
+  "-(A)",
+  "Change sign",
+  negate_hf(A),
+  (A_HVX_FLT)
+)
+
+//FP vector compare
+DEF_MACRO(fCMPGT_QF32,(A,B),
+  "(A > B)",
+  "Vector compare of QF32 format",
+  cmpgt_qf32(A,B),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fCMPGT_QF16,(A,B),
+  "(A > B)",
+  "Vector compare of QF16 format",
+  cmpgt_qf16(A,B),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fCMPGT_SF,(A,B),
+  "(A > B)",
+  "Vector compare of SF format",
+  cmpgt_sf(A,B),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fCMPGT_HF,(A,B),
+  "(A > B)",
+  "Vector compare of HF format",
+  cmpgt_hf(A,B),
+	(A_HVX_FLT)
+)
+
+DEF_MACRO(fCMPGT_BF,(A,B),
+	"(A > B)",
+	"Vector compare of BF format",
+	cmpgt_sf(((int)A) << 16,((int)B) << 16),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fCMPGT_QF32_SF,(A,B),
+  "(A > B)",
+  "Vector compare of QF32/SF format",
+  cmpgt_qf32_sf(A,B),
+  (A_HVX_FLT)
+)
+
+DEF_MACRO(fCMPGT_QF16_HF,(A,B),
+  "(A > B)",
+  "Vector compare of QF16/HF format",
+  cmpgt_qf16_hf(A,B),
+  (A_HVX_FLT)
+)
+
+//VMAX/VMIN_QF32/QF16
+DEF_MACRO(fMAX_QF32,(X,Y),
+  "max(X,Y)",
+  "",
+  max_qf32(X,Y),
+  (A_HVX_FLT)
+)
+DEF_MACRO(fMIN_QF32,(X,Y),
+  "min(X,Y)",
+  "",
+  min_qf32(X,Y),
+  (A_HVX_FLT)
+)
+DEF_MACRO(fMAX_QF32_SF,(X,Y),
+  "max(X,Y)",
+  "",
+  max_qf32_sf(X,Y),
+  (A_HVX_FLT)
+)
+DEF_MACRO(fMIN_QF32_SF,(X,Y),
+  "min(X,Y)",
+  "",
+  min_qf32_sf(X,Y),
+  (A_HVX_FLT)
+)
+DEF_MACRO(fMAX_QF16,(X,Y),
+  "max(X,Y)",
+  "",
+  max_qf16(X,Y),
+  (A_HVX_FLT)
+)
+DEF_MACRO(fMIN_QF16,(X,Y),
+  "min(X,Y)",
+  "",
+  min_qf16(X,Y),
+  (A_HVX_FLT)
+)
+DEF_MACRO(fMAX_QF16_HF,(X,Y),
+  "max(X,Y)",
+  "",
+  max_qf16_hf(X,Y),
+  (A_HVX_FLT)
+)
+DEF_MACRO(fMIN_QF16_HF,(X,Y),
+  "min(X,Y)",
+  "",
+  min_qf16_hf(X,Y),
+  (A_HVX_FLT)
+)
+
+//MAX/MIN_SF/HF
+DEF_MACRO(fMAX_SF,(X,Y),
+  "max(X,Y)",
+  "",
+  max_sf(X,Y),
+  (A_HVX_FLT)
+)
+DEF_MACRO(fMIN_SF,(X,Y),
+  "min(X,Y)",
+  "",
+  min_sf(X,Y),
+  (A_HVX_FLT)
+)
+DEF_MACRO(fMAX_HF,(X,Y),
+  "max(X,Y)",
+  "",
+  max_hf(X,Y),
+  (A_HVX_FLT)
+)
+DEF_MACRO(fMIN_HF,(X,Y),
+  "min(X,Y)",
+  "",
+  min_hf(X,Y),
+  (A_HVX_FLT)
+)
+
diff --git a/target/hexagon/imported/system.idef b/target/hexagon/imported/system.idef
index 7c6568e75e429..aa57149a1ceb2 100644
--- a/target/hexagon/imported/system.idef
+++ b/target/hexagon/imported/system.idef
@@ -25,44 +25,301 @@
 /* User->OS interface                       */
 /********************************************/
 
-Q6INSN(J2_trap0,"trap0(#u8)",ATTRIBS(A_COF),
+Q6INSN(J2_trap0,"trap0(#u8)",ATTRIBS(A_COF,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),
 "Trap to Operating System",
-    fTRAP(0,uiV);
+	fTRAP(0,uiV);
 )
 
-Q6INSN(J2_pause,"pause(#u8)",ATTRIBS(A_COF),
+Q6INSN(J2_trap1,"trap1(Rx32,#u8)",ATTRIBS(A_COF,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),
+"Trap to Operating System",
+	/*
+	 * Note: if RxV is not written, we get the same as the input.
+	 * Since trap1 is SOLO, this means the register will effectively not be updated
+	 */
+	if (!fTRAP1_VIRTINSN(uiV)) {
+		fTRAP(1,uiV);
+	} else if (uiV == 1) {
+		fVIRTINSN_RTE(uiV,RxV);
+	} else if (uiV == 3) {
+		fVIRTINSN_SETIE(uiV,RxV);
+	} else if (uiV == 4) {
+		fVIRTINSN_GETIE(uiV,RxV);
+	} else if (uiV == 6) {
+		fVIRTINSN_SPSWAP(uiV,RxV);
+	})
+
+Q6INSN(J2_pause,"pause(#u8)",ATTRIBS(A_COF,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),
 "Enter low-power state for #u8 cycles",{fPAUSE(uiV);})
 
-Q6INSN(Y2_icinva,"icinva(Rs32)",ATTRIBS(A_ICOP,A_ICFLUSHOP),"Instruction Cache Invalidate Address",{fEA_REG(RsV); fICINVA(EA);})
+Q6INSN(J2_rte,  "rte", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NO_TIMING_LOG),
+"Return from Exception",
+{
+fHIDE(if((thread->timing_on) && (thread->status & EXEC_STATUS_REPLAY)) { return; })
+fHIDE(CALLBACK(thread->processor_ptr->options->rte_callback,
+      thread->system_ptr,thread->processor_ptr,
+      thread->threadId,0);)
+fCLEAR_RTE_EX();
+fBRANCH(fREAD_ELR(),COF_TYPE_RTE);})
+
+
+/********************************************/
+/* Interrupt Management                     */
+/********************************************/
+
+Q6INSN(Y2_swi,"swi(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Software Interrupt",{DO_SWI(RsV);})
+Q6INSN(Y2_cswi,"cswi(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Cancel Software Interrupt",{DO_CSWI(RsV);})
+Q6INSN(Y2_ciad,"ciad(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Re-enable interrupt in IAD",{DO_CIAD(RsV);})
+Q6INSN(Y4_siad,"siad(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Disable interrupt in IAD",{DO_SIAD(RsV);})
+Q6INSN(Y2_iassignr,"Rd32=iassignr(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Read interrupt to thread assignments",{DO_IASSIGNR(RsV,RdV);})
+Q6INSN(Y2_iassignw,"iassignw(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Write interrupt to thread assignments",{DO_IASSIGNW(RsV);})
+
+
+Q6INSN(Y2_getimask,"Rd32=getimask(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Read imask register of another thread",
+{RdV = READ_IMASK(RsV & thread->processor_ptr->thread_system_mask); })
+
+Q6INSN(Y2_setimask,"setimask(Pt4,Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Change imask register of another thread",
+{fPREDUSE_TIMING();WRITE_IMASK(PtV & thread->processor_ptr->thread_system_mask,RsV); })
+
+
+
+/********************************************/
+/* TLB management                           */
+/********************************************/
+
+Q6INSN(Y2_tlbw,"tlbw(Rss32,Rt32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),
+"Write TLB entry", {fTLBW(RtV,RssV);})
+
+Q6INSN(Y5_ctlbw,"Rd32=ctlbw(Rss32,Rt32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),
+"Conditional Write TLB entry",
+{
+  if (fTLB_ENTRY_OVERLAP( (1LL<<63) | RssV )) {
+        RdV=fTLB_ENTRY_OVERLAP_IDX( (1LL<<63) | RssV);
+     } else {
+        fTLBW(RtV,RssV);
+        RdV=0x80000000;
+     }
+})
+
+Q6INSN(Y5_tlboc,"Rd32=tlboc(Rss32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),
+"TLB overlap check",
+{
+  if (fTLB_ENTRY_OVERLAP( (1LL<<63) | RssV )) {
+        RdV=fTLB_ENTRY_OVERLAP_IDX( (1LL<<63) | RssV);
+     } else {
+        RdV=0x80000000;
+     }
+})
+
+Q6INSN(Y2_tlbr,"Rdd32=tlbr(Rs32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Read TLB entry",
+{RddV = fTLBR(RsV);})
+
+Q6INSN(Y2_tlbp,"Rd32=tlbp(Rs32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Probe TLB", {RdV=fTLBP(RsV);})
+
+Q6INSN(Y5_tlbasidi,"tlbinvasid(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Invalidate ASID",
+{
+	fHIDE(int i;)
+    fHIDE(unsigned int NUM_TLB_ENTRIES = NUM_TLB_REGS(thread->processor_ptr);)
+	for (i = 0; i < NUM_TLB_ENTRIES; i++) {
+		if ((fGET_FIELD(fTLBR(i),PTE_G) == 0) &&
+			(fGET_FIELD(fTLBR(i),PTE_ASID) == fEXTRACTU_RANGE(RsV,26,20))) {
+			fTLBW(i,fTLBR(i) & ~(1ULL << 63));
+		}
+	}
+})
+
+Q6INSN(Y2_tlblock,"tlblock", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_NO_TIMING_LOG), "Lock TLB",
+{fSET_TLB_LOCK();})
+
+Q6INSN(Y2_tlbunlock,"tlbunlock", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Unlock TLB",
+{fCLEAR_TLB_LOCK();})
+
+Q6INSN(Y2_k0lock,"k0lock", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_NO_TIMING_LOG), "Lock K0",
+{fSET_K0_LOCK();})
+
+Q6INSN(Y2_k0unlock,"k0unlock", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Unlock K0",
+{fCLEAR_K0_LOCK();})
+
+/********************************************/
+/* Supervisor Reg Management                */
+/********************************************/
+
+Q6INSN(Y2_crswap0,"crswap(Rx32,sgp0)",ATTRIBS(A_PRIV,A_NOTE_PRIV), "Swap system general pointer 0 with GPR",
+{fHIDE(size4s_t tmp;) tmp = RxV; RxV = READ_SGP0(); WRITE_SGP0(tmp);})
+Q6INSN(Y4_crswap1,"crswap(Rx32,sgp1)",ATTRIBS(A_PRIV,A_NOTE_PRIV), "Swap system general pointer 1 with GPR",
+{fHIDE(size4s_t tmp;) tmp = RxV; RxV = READ_SGP1(); WRITE_SGP1(tmp);})
+
+Q6INSN(Y4_crswap10,"crswap(Rxx32,sgp1:0)",ATTRIBS(A_PRIV,A_NOTE_PRIV), "Swap system general purpose 0/1 with GPR Pair",
+{fHIDE(size8s_t tmp;) tmp = RxxV; RxxV=READ_SGP10(); WRITE_SGP10(tmp);})
+
+Q6INSN(Y2_tfrscrr,"Rd32=Ss128",ATTRIBS(A_PRIV,A_NOTE_PRIV),"Transfer Supervisor Reg to GPR", {RdV=SsV;})
+Q6INSN(Y2_tfrsrcr,"Sd128=Rs32",ATTRIBS(A_PRIV,A_NOTE_PRIV),"Transfer GPR to Supervisor Reg", {SdV=RsV;})
+Q6INSN(Y4_tfrscpp,"Rdd32=Sss128",ATTRIBS(A_PRIV,A_NOTE_PRIV),"Transfer Supervisor Reg to GPR", {RddV=SssV;})
+Q6INSN(Y4_tfrspcp,"Sdd128=Rss32",ATTRIBS(A_PRIV,A_NOTE_PRIV),"Transfer GPR to Supervisor Reg", {SddV=RssV;})
+
+Q6INSN(G4_tfrgcrr,"Rd32=Gs32",ATTRIBS(A_GUEST,A_NOTE_GUEST),"Transfer Guest Reg to GPR", {RdV=GsV;})
+Q6INSN(G4_tfrgrcr,"Gd32=Rs32",ATTRIBS(A_GUEST,A_NOTE_GUEST),"Transfer GPR to Guest Reg", {GdV=RsV;})
+Q6INSN(G4_tfrgcpp,"Rdd32=Gss32",ATTRIBS(A_GUEST,A_NOTE_GUEST),"Transfer Guest Reg to GPR", {RddV=GssV;})
+Q6INSN(G4_tfrgpcp,"Gdd32=Rss32",ATTRIBS(A_GUEST,A_NOTE_GUEST),"Transfer GPR to Guest Reg", {GddV=RssV;})
+
+
+
+Q6INSN(Y2_setprio,"setprio(Pt4,Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV),"Change TID Prio of another thread",
+{fPREDUSE_TIMING();WRITE_PRIO(PtV & thread->processor_ptr->thread_system_mask,RsV); })
+
+
+
+
+/********************************************/
+/* Power Management / Thread on/off         */
+/********************************************/
+Q6INSN(Y6_diag,"diag(Rs32)",ATTRIBS(),"Send value to Diag trace module",{
+})
+Q6INSN(Y6_diag0,"diag0(Rss32,Rtt32)",ATTRIBS(),"Send values of two register to DIAG Trace. Set X=0",{
+})
+Q6INSN(Y6_diag1,"diag1(Rss32,Rtt32)",ATTRIBS(),"Send values of two register to DIAG Trace.  Set X=1",{
+})
+
 
-Q6INSN(Y2_isync,"isync",ATTRIBS(),"Memory Synchronization",{fISYNC();})
-Q6INSN(Y2_barrier,"barrier",ATTRIBS(A_RESTRICT_SLOT0ONLY),"Memory Barrier",{fBARRIER();})
-Q6INSN(Y2_syncht,"syncht",ATTRIBS(A_RESTRICT_SLOT0ONLY),"Memory Synchronization",{fSYNCH();})
+Q6INSN(Y4_trace,"trace(Rs32)",ATTRIBS(A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Send value to ETM trace",{
+    fDO_TRACE(RsV);
+})
+
+Q6INSN(Y2_stop,"stop(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),"Stop thread(s)",{
+    fHIDE(RsV=RsV;)
+    if (!fIN_DEBUG_MODE_NO_ISDB(fGET_TNUM())) fCLEAR_RUN_MODE(fGET_TNUM());
+})
+
+Q6INSN(Y4_nmi,"nmi(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_NO_TIMING_LOG),"Raise NMI on thread(s)",{
+    fDO_NMI(RsV);
+})
+
+Q6INSN(Y2_start,"start(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),"Start thread(s)",fSTART(RsV);)
+
+Q6INSN(Y2_wait,"wait(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_NO_TIMING_LOG),"Make thread(s) wait",{
+    fHIDE(RsV=RsV;)
+    if (!fIN_DEBUG_MODE(fGET_TNUM())) fSET_WAIT_MODE(fGET_TNUM());
+	fIN_DEBUG_MODE_WARN(fGET_TNUM());
+})
+
+Q6INSN(Y2_resume,"resume(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),"Make thread(s) stop waiting",fRESUME(RsV);)
+
+Q6INSN(Y2_break,"brkpt",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),"Breakpoint",{fBREAK();})
+
+
+/********************************************/
+/* Cache Management                         */
+/********************************************/
+
+Q6INSN(Y2_ictagr,"Rd32=ictagr(Rs32)",ATTRIBS(A_ICOP,A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_ICTAGOP),"Instruction Cache Tag Read",{fICTAGR(RsV,RdV,RdN);})
+Q6INSN(Y2_ictagw,"ictagw(Rs32,Rt32)",ATTRIBS(A_ICOP,A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_ICTAGOP),"Instruction Cache Tag Write",{fICTAGW(RsV,RtV);})
+Q6INSN(Y2_icdataw,"icdataw(Rs32,Rt32)",ATTRIBS(A_ICOP,A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_ICTAGOP),"Instruction Cache Data Write",{fICDATAW(RsV,RtV);})
+Q6INSN(Y2_icdatar,"Rd32=icdatar(Rs32)",ATTRIBS(A_ICOP,A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_ICTAGOP),"Instruction Cache Data Read",{fICDATAR(RsV, RdV);})
+Q6INSN(Y2_icinva,"icinva(Rs32)",ATTRIBS(A_ICOP,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYADDRESS,A_ICFLUSHOP),"Instruction Cache Invalidate Address",{fEA_REG(RsV); fICINVA(EA);})
+Q6INSN(Y2_icinvidx,"icinvidx(Rs32)",ATTRIBS(A_ICOP,A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_ICFLUSHOP),"Instruction Cache Invalidate Index",{fICINVIDX(RsV);})
+Q6INSN(Y2_ickill,"ickill",ATTRIBS(A_ICOP,A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_ICFLUSHOP),"Instruction Cache Invalidate",{fICKILL();})
+
+Q6INSN(Y2_isync,"isync",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),"Memory Synchronization",{fISYNC();})
+Q6INSN(Y2_barrier,"barrier",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK),"Memory Barrier",{fBARRIER();})
+Q6INSN(Y2_syncht,"syncht",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET),"Memory Synchronization",{fSYNCH();})
+
+
+Q6INSN(Y2_dcfetchbo,"dcfetch(Rs32+#u11:3)",ATTRIBS(A_RESTRICT_PREFERSLOT0,A_DCFETCH,A_RESTRICT_NOSLOT1_STORE),"Data Cache Prefetch",{fEA_RI(RsV,uiV); fDCFETCH(EA);})
+Q6INSN(Y2_dckill,"dckill",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_DCFLUSHOP),"Data Cache Invalidate",{fDCKILL();})
 
 
-Q6INSN(Y2_dcfetchbo,"dcfetch(Rs32+#u11:3)",ATTRIBS(A_RESTRICT_PREFERSLOT0,A_DCFETCH),"Data Cache Prefetch",{fEA_RI(RsV,uiV); fDCFETCH(EA);})
+Q6INSN(Y2_dczeroa,"dczeroa(Rs32)",ATTRIBS(A_STORE,A_RESTRICT_SLOT1_AOK,A_NOTE_SLOT1_AOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYADDRESS,A_DCZEROA),"Zero an aligned 32-byte cacheline",{fEA_REG(RsV); fDCZEROA(EA);})
+Q6INSN(Y2_dccleana,"dccleana(Rs32)",ATTRIBS(A_RESTRICT_SLOT1_AOK,A_NOTE_SLOT1_AOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYADDRESS,A_DCFLUSHOP),"Data Cache Clean Address",{fEA_REG(RsV); fDCCLEANA(EA);})
+Q6INSN(Y2_dccleanidx,"dccleanidx(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_DCFLUSHOP),"Data Cache Clean Index",{fDCCLEANIDX(RsV);})
+Q6INSN(Y2_dccleaninva,"dccleaninva(Rs32)",ATTRIBS(A_RESTRICT_SLOT1_AOK,A_NOTE_SLOT1_AOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYADDRESS,A_DCFLUSHOP),"Data Cache Clean and Invalidate Address",{fEA_REG(RsV); fDCCLEANINVA(EA);})
+Q6INSN(Y2_dccleaninvidx,"dccleaninvidx(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_DCFLUSHOP),"Data Cache Clean and Invalidate Index",{fDCCLEANINVIDX(RsV);})
+Q6INSN(Y2_dcinva,"dcinva(Rs32)",ATTRIBS(A_RESTRICT_SLOT1_AOK,A_NOTE_SLOT1_AOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYADDRESS,A_DCFLUSHOP),"Data Cache Invalidate Address",{fEA_REG(RsV); fDCCLEANINVA(EA);})
+Q6INSN(Y2_dcinvidx,"dcinvidx(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_DCFLUSHOP),"Data Cache Invalidate Index",{fDCINVIDX(RsV);})
+Q6INSN(Y2_dctagr,"Rd32=dctagr(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_DCTAGOP),"Data Cache Tag Read",{fDCTAGR(RsV,RdV,RdN);})
+Q6INSN(Y2_dctagw,"dctagw(Rs32,Rt32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_RESTRICT_SLOT0ONLY,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_DCTAGOP),"Data Cache Tag Write",{fDCTAGW(RsV,RtV);})
 
 
-Q6INSN(Y2_dczeroa,"dczeroa(Rs32)",ATTRIBS(A_STORE,A_RESTRICT_SLOT0ONLY,A_DCZEROA),"Zero an aligned 32-byte cacheline",{fEA_REG(RsV); fDCZEROA(EA);})
-Q6INSN(Y2_dccleana,"dccleana(Rs32)",ATTRIBS(A_RESTRICT_SLOT0ONLY,A_DCFLUSHOP),"Data Cache Clean Address",{fEA_REG(RsV); fDCCLEANA(EA);})
-Q6INSN(Y2_dccleaninva,"dccleaninva(Rs32)",ATTRIBS(A_RESTRICT_SLOT0ONLY,A_DCFLUSHOP),"Data Cache Clean and Invalidate Address",{fEA_REG(RsV); fDCCLEANINVA(EA);})
-Q6INSN(Y2_dcinva,"dcinva(Rs32)",ATTRIBS(A_RESTRICT_SLOT0ONLY,A_DCFLUSHOP),"Data Cache Invalidate Address",{fEA_REG(RsV); fDCCLEANINVA(EA);})
+Q6INSN(Y2_l2kill,"l2kill",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Cache Invalidate",{fL2KILL();})
+Q6INSN(Y4_l2tagw,"l2tagw(Rs32,Rt32)",ATTRIBS(A_PRIV,A_NOTE_BADTAG_UNDEF,A_NOTE_PRIV,A_RESTRICT_SLOT0ONLY,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_L2TAGOP),"L2 Cache Tag Write",{fL2TAGW(RsV,RtV);})
+Q6INSN(Y4_l2tagr,"Rd32=l2tagr(Rs32)",ATTRIBS(A_PRIV,A_NOTE_BADTAG_UNDEF,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_L2TAGOP),"L2 Cache Tag Read",{fL2TAGR(RsV,RdV,RdN);})
 
+Q6INSN(Y2_l2cleaninvidx,"l2cleaninvidx(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_L2FLUSHOP),"L2 Cache Clean and Invalidate Index",{fL2CLEANINVIDX(RsV); })
+Q6INSN(Y5_l2cleanidx,"l2cleanidx(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_L2FLUSHOP),"L2 Cache Clean by Index",{fL2CLEANIDX(RsV); })
+Q6INSN(Y5_l2invidx,"l2invidx(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_L2FLUSHOP),"L2 Cache Invalidate by Index",{fL2INVIDX(RsV); })
 
-Q6INSN(Y4_l2fetch,"l2fetch(Rs32,Rt32)",ATTRIBS(A_RESTRICT_SLOT0ONLY),"L2 Cache Prefetch",
+
+
+Q6INSN(Y4_l2fetch,"l2fetch(Rs32,Rt32)",ATTRIBS(A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK),"L2 Cache Prefetch",
 { fL2FETCH(RsV,
-           (RtV&0xff), /*height*/
-           ((RtV>>8)&0xff), /*width*/
-           ((RtV>>16)&0xffff), /*stride*/
-           0); /*extra attrib flags*/
+	(RtV&0xff), /*height*/
+	((RtV>>8)&0xff), /*width*/
+	((RtV>>16)&0xffff), /*stride*/
+	0); /*extra attrib flags*/
+})
+
+Q6INSN(Y6_dmstart,"dmstart(Rs32)",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_SYNC_MARKER,A_NO_TIMING_LOG),"DMA Start", {
+	fUNIMP();
+})
+
+Q6INSN(Y6_dmlink,"dmlink(Rs32,Rt32)",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_SYNC_MARKER,A_NO_TIMING_LOG),"DMA Link", {
+	fUNIMP();
 })
 
+Q6INSN(Y6_dmpoll,"Rd32=dmpoll",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG),"DMA Poll", {
+	fUNIMP();
+})
+
+Q6INSN(Y6_dmwait,"Rd32=dmwait",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG),"DMA Wait", {
+	fUNIMP();
+})
+
+Q6INSN(Y6_dmsyncht,"Rd32=dmsyncht",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG),"DMA SynchT",{
+	fUNIMP();
+})
+Q6INSN(Y6_dmtlbsynch,"Rd32=dmtlbsynch",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG),"DMA TLB Synch",{
+	fUNIMP();
+})
+
+Q6INSN(Y6_dmcfgrd,"Rd32=dmcfgrd(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG),
+  "DMA Config Read", {
+	fUNIMP();
+})
+
+Q6INSN(Y6_dmcfgwr,"dmcfgwr(Rs32,Rt32)",ATTRIBS(A_NOTE_PRIV,A_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG),
+  "DMA Config Write", {
+	fUNIMP();
+})
+
+Q6INSN(Y6_dmpause,"Rd32=dmpause",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG),"DMA Pause",{
+	fUNIMP();
+})
+
+Q6INSN(Y6_dmresume,"dmresume(Rs32)",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_SYNC_MARKER,A_NO_TIMING_LOG),"DMA Resume",{
+	fUNIMP();
+})
 
 
-Q6INSN(Y5_l2fetch,"l2fetch(Rs32,Rtt32)",ATTRIBS(A_RESTRICT_SLOT0ONLY),"L2 Cache Prefetch",
+Q6INSN(Y5_l2fetch,"l2fetch(Rs32,Rtt32)",ATTRIBS(A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK),"L2 Cache Prefetch",
 { fL2FETCH(RsV,
-           fGETUHALF(0,RttV), /*height*/
-           fGETUHALF(1,RttV), /*width*/
-           fGETUHALF(2,RttV), /*stride*/
-           fGETUHALF(3,RttV)); /*flags*/
+	fGETUHALF(0,RttV), /*height*/
+	fGETUHALF(1,RttV), /*width*/
+	fGETUHALF(2,RttV), /*stride*/
+	fGETUHALF(3,RttV)); /*flags*/
 })
+
+Q6INSN(Y5_l2locka,"Pd4=l2locka(Rs32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_CACHEOP,A_COPBYADDRESS,A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK,A_RESTRICT_LATEPRED,A_NOTE_LATEPRED),
+"Lock L2 cache line by address", { fEA_REG(RsV); fL2LOCKA(EA,PdV,PdN); fHIDE(MARK_LATE_PRED_WRITE(PdN)) })
+
+
+Q6INSN(Y5_l2unlocka,"l2unlocka(Rs32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_CACHEOP,A_COPBYADDRESS,A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK), "UnLock L2 cache line by address", { fEA_REG(RsV); fL2UNLOCKA(EA); })
+
+
+
+Q6INSN(Y5_l2gunlock,"l2gunlock",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Global Unlock",{fL2UNLOCK();})
+
+Q6INSN(Y5_l2gclean,"l2gclean",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Global Clean",{fL2CLEAN();})
+
+Q6INSN(Y5_l2gcleaninv,"l2gcleaninv",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Global Clean and Invalidate",{fL2CLEANINV();})
+
+Q6INSN(Y6_l2gcleanpa,"l2gclean(Rtt32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Global Clean by PA Range",{fL2CLEANPA(RttV);})
+
+Q6INSN(Y6_l2gcleaninvpa,"l2gcleaninv(Rtt32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Global Clean and Invalidate by PA Range",{fL2CLEANINVPA(RttV);})
diff --git a/target/hexagon/insn.h b/target/hexagon/insn.h
index 24dcf7fe9f385..5d59430da9e12 100644
--- a/target/hexagon/insn.h
+++ b/target/hexagon/insn.h
@@ -66,8 +66,8 @@ struct Packet {
 
     bool pkt_has_dczeroa;
 
-    bool pkt_has_store_s0;
-    bool pkt_has_store_s1;
+    bool pkt_has_scalar_store_s0;
+    bool pkt_has_scalar_store_s1;
 
     bool pkt_has_hvx;
     Insn *vhist_insn;
diff --git a/target/hexagon/internal.h b/target/hexagon/internal.h
index 32e96f00d97a1..ff89c9cda43ff 100644
--- a/target/hexagon/internal.h
+++ b/target/hexagon/internal.h
@@ -22,13 +22,32 @@
 
 int hexagon_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int hexagon_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+#ifndef CONFIG_USER_ONLY
+int hexagon_sys_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n);
+int hexagon_sys_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n);
+#endif
 int hexagon_hvx_gdb_read_register(CPUState *env, GByteArray *mem_buf, int n);
 int hexagon_hvx_gdb_write_register(CPUState *env, uint8_t *mem_buf, int n);
 
 void hexagon_debug_vreg(CPUHexagonState *env, int regnum);
 void hexagon_debug_qreg(CPUHexagonState *env, int regnum);
 void hexagon_debug(CPUHexagonState *env);
+void hexagon_dump(CPUHexagonState *env, FILE *f, int flags);
 
 extern const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS];
+extern const char * const hexagon_sregnames[];
+extern const char * const hexagon_gregnames[];
+
+void G_NORETURN do_raise_exception(CPUHexagonState *env,
+        uint32_t exception,
+        target_ulong PC,
+        uintptr_t retaddr);
+
+#define hexagon_cpu_mmu_enabled(env) \
+    GET_SYSCFG_FIELD(SYSCFG_MMUEN, arch_get_system_reg(env, HEX_SREG_SYSCFG))
+
+#ifndef CONFIG_USER_ONLY
+extern const VMStateDescription vmstate_hexagon_cpu;
+#endif
 
 #endif
diff --git a/target/hexagon/machine.c b/target/hexagon/machine.c
new file mode 100644
index 0000000000000..79e9b7effa5e7
--- /dev/null
+++ b/target/hexagon/machine.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "migration/cpu.h"
+#include "cpu.h"
+#include "hex_mmu.h"
+
+static int get_u64_ptr(QEMUFile *f, void *pv, size_t size,
+                       const VMStateField *field)
+{
+    uint64_t *p = pv;
+    *p = qemu_get_be64(f);
+    return 0;
+}
+
+static int put_u64_ptr(QEMUFile *f, void *pv, size_t size,
+                      const VMStateField *field, JSONWriter *vmdesc)
+{
+    qemu_put_be64(f, *((uint64_t *)pv));
+    return 0;
+}
+
+const VMStateInfo vmstate_info_uint64_ptr = {
+    .name = "uint64_t_pointer",
+    .get  = get_u64_ptr,
+    .put  = put_u64_ptr,
+};
+
+static int get_hex_tlb_ptr(QEMUFile *f, void *pv, size_t size,
+                       const VMStateField *field)
+{
+    CPUHexagonTLBContext *tlb = pv;
+    for (int i = 0; i < ARRAY_SIZE(tlb->entries); i++) {
+        tlb->entries[i] = qemu_get_be64(f);
+    }
+    return 0;
+}
+
+static int put_hex_tlb_ptr(QEMUFile *f, void *pv, size_t size,
+                      const VMStateField *field, JSONWriter *vmdesc)
+{
+    CPUHexagonTLBContext *tlb = pv;
+    for (int i = 0; i < ARRAY_SIZE(tlb->entries); i++) {
+        qemu_put_be64(f,  tlb->entries[i]);
+    }
+    return 0;
+}
+
+const VMStateInfo vmstate_info_hex_tlb_ptr = {
+    .name = "hex_tlb_pointer",
+    .get  = get_hex_tlb_ptr,
+    .put  = put_hex_tlb_ptr,
+};
+
+const VMStateDescription vmstate_hexagon_cpu = {
+    .name = "cpu",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_CPU(),
+        VMSTATE_UINTTL_ARRAY(env.gpr, HexagonCPU, TOTAL_PER_THREAD_REGS),
+        VMSTATE_UINTTL_ARRAY(env.pred, HexagonCPU, NUM_PREGS),
+        VMSTATE_UINTTL_ARRAY(env.t_sreg, HexagonCPU, NUM_SREGS),
+        VMSTATE_UINTTL_ARRAY(env.greg, HexagonCPU, NUM_GREGS),
+        VMSTATE_UINTTL(env.next_PC, HexagonCPU),
+        VMSTATE_UINTTL(env.tlb_lock_state, HexagonCPU),
+        VMSTATE_UINTTL(env.k0_lock_state, HexagonCPU),
+        VMSTATE_UINTTL(env.tlb_lock_count, HexagonCPU),
+        VMSTATE_UINTTL(env.k0_lock_count, HexagonCPU),
+        VMSTATE_UINTTL(env.threadId, HexagonCPU),
+        VMSTATE_UINTTL(env.cause_code, HexagonCPU),
+        VMSTATE_UINTTL(env.wait_next_pc, HexagonCPU),
+        VMSTATE_POINTER(env.hex_tlb, HexagonCPU, 0,
+                        vmstate_info_hex_tlb_ptr, CPUHexagonTLBContext *),
+        VMSTATE_UINT64(env.t_cycle_count, HexagonCPU),
+        VMSTATE_POINTER(env.g_pcycle_base, HexagonCPU, 0,
+                        vmstate_info_uint64_ptr, uint64_t *),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
index ee3d4c88e7bdf..01469a28a0cce 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@@ -82,7 +82,7 @@
  */
 #define CHECK_NOSHUF(VA, SIZE) \
     do { \
-        if (insn->slot == 0 && ctx->pkt->pkt_has_store_s1) { \
+        if (insn->slot == 0 && ctx->pkt->pkt_has_scalar_store_s1) { \
             probe_noshuf_load(VA, SIZE, ctx->mem_idx); \
             process_store(ctx, 1); \
         } \
@@ -93,11 +93,11 @@
         TCGLabel *noshuf_label = gen_new_label(); \
         tcg_gen_brcondi_tl(TCG_COND_EQ, PRED, 0, noshuf_label); \
         GET_EA; \
-        if (insn->slot == 0 && ctx->pkt->pkt_has_store_s1) { \
+        if (insn->slot == 0 && ctx->pkt->pkt_has_scalar_store_s1) { \
             probe_noshuf_load(EA, SIZE, ctx->mem_idx); \
         } \
         gen_set_label(noshuf_label); \
-        if (insn->slot == 0 && ctx->pkt->pkt_has_store_s1) { \
+        if (insn->slot == 0 && ctx->pkt->pkt_has_scalar_store_s1) { \
             process_store(ctx, 1); \
         } \
     } while (0)
@@ -524,7 +524,7 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift)
 
 #define fLOAD(NUM, SIZE, SIGN, EA, DST) \
     do { \
-        check_noshuf(env, pkt_has_store_s1, slot, EA, SIZE, GETPC()); \
+        check_noshuf(env, pkt_has_scalar_store_s1, slot, EA, SIZE, GETPC()); \
         DST = (size##SIZE##SIGN##_t)MEM_LOAD##SIZE(env, EA, GETPC()); \
     } while (0)
 #endif
@@ -537,9 +537,6 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift)
 
 #ifdef CONFIG_USER_ONLY
 #define fFRAMECHECK(ADDR, EA) do { } while (0) /* Not modelled in linux-user */
-#else
-/* System mode not implemented yet */
-#define fFRAMECHECK(ADDR, EA)  g_assert_not_reached();
 #endif
 
 #ifdef QEMU_GENERATE
@@ -630,8 +627,18 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift)
 #define fCONSTLL(A) A##LL
 #define fECHO(A) (A)
 
-#define fTRAP(TRAPTYPE, IMM) helper_raise_exception(env, HEX_EXCP_TRAP0)
+#ifdef CONFIG_USER_ONLY
+#define fTRAP(TRAPTYPE, IMM) \
+    do { \
+        hexagon_raise_exception_err(env, HEX_EVENT_TRAP0, PC); \
+    } while (0)
+#endif
+
+#define fDO_TRACE(SREG)
+#define fBREAK()
+#define fUNPAUSE()
 #define fPAUSE(IMM)
+#define fDCFETCH(REG)
 
 #define fALIGN_REG_FIELD_VALUE(FIELD, VAL) \
     ((VAL) << reg_field_info[FIELD].offset)
@@ -642,16 +649,43 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift)
                    reg_field_info[FIELD].width, \
                    reg_field_info[FIELD].offset)
 
+#define fGET_FIELD(VAL, FIELD) \
+    fEXTRACTU_BITS(VAL, \
+                   reg_field_info[FIELD].width, \
+                   reg_field_info[FIELD].offset)
+#define fSET_FIELD(VAL, FIELD, NEWVAL) \
+    fINSERT_BITS(VAL, \
+                 reg_field_info[FIELD].width, \
+                 reg_field_info[FIELD].offset, \
+                 (NEWVAL))
+
 #ifdef QEMU_GENERATE
 #define fDCZEROA(REG) \
     do { \
         ctx->dczero_addr = tcg_temp_new(); \
         tcg_gen_mov_tl(ctx->dczero_addr, (REG)); \
     } while (0)
+#else
+#define fDCZEROA(REG) ((void) REG)
 #endif
 
 #define fBRANCH_SPECULATE_STALL(DOTNEWVAL, JUMP_COND, SPEC_DIR, HINTBITNUM, \
                                 STRBITNUM) /* Nothing */
 
+#ifdef CONFIG_USER_ONLY
+/*
+ * This macro can only be true in guest mode.
+ * In user mode, the 4 VIRTINSN's can't be reached
+ */
+#define fTRAP1_VIRTINSN(IMM)       (false)
+#define fVIRTINSN_SPSWAP(IMM, REG) g_assert_not_reached()
+#define fVIRTINSN_GETIE(IMM, REG)  g_assert_not_reached()
+#define fVIRTINSN_SETIE(IMM, REG)  g_assert_not_reached()
+#define fVIRTINSN_RTE(IMM, REG)    g_assert_not_reached()
+#endif
 
 #endif
+
+#define fPREDUSE_TIMING()
+
+#define fUNIMP() qemu_log_mask(LOG_UNIMP, "Unimplemented instruction\n")
diff --git a/target/hexagon/max.h b/target/hexagon/max.h
new file mode 100644
index 0000000000000..0f595bcb736d7
--- /dev/null
+++ b/target/hexagon/max.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HEXAGON_MAX_H
+#define HEXAGON_MAX_H
+
+#define MAX_EXT_CONTEXTS 8
+#define MAX_L2_INTERLEAVES 2
+#define MAX_VFIFO_COUNT 4
+
+#define SLOTS_MAX 4
+
+#define REG_WRITES_MAX 32
+#define PRED_WRITES_MAX 5
+#define STORES_MAX 2
+#define LOADS_MAX 2
+#define MAX_PRED 4
+
+#define PACKET_BYTES_MAX 16
+#define MAX_TLB_ENTRIES 1024
+#define DTLB_ENTRIES 16
+#define ITLB_ENTRIES 16
+
+#endif /* HEXAGON_MAX_H */
diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build
index bb4ebaae816ea..280b5dc58ac5b 100644
--- a/target/hexagon/meson.build
+++ b/target/hexagon/meson.build
@@ -20,6 +20,7 @@ hexagon_ss = ss.source_set()
 hex_common_py = 'hex_common.py'
 gen_tcg_h = meson.current_source_dir() / 'gen_tcg.h'
 gen_tcg_hvx_h = meson.current_source_dir() / 'gen_tcg_hvx.h'
+gen_tcg_sys_h = meson.current_source_dir() / 'gen_tcg_sys.h'
 idef_parser_dir = meson.current_source_dir() / 'idef-parser'
 
 #
@@ -244,11 +245,13 @@ decodetree_trans_funcs_generated = custom_target(
     command: [python, files('gen_trans_funcs.py'), semantics_generated, '@OUTPUT@'],
 )
 hexagon_ss.add(decodetree_trans_funcs_generated)
+hexagon_softmmu_ss = ss.source_set()
 
 hexagon_ss.add(files(
     'cpu.c',
     'translate.c',
     'op_helper.c',
+    'cpu_helper.c',
     'gdbstub.c',
     'genptr.c',
     'reg_fields.c',
@@ -260,6 +263,16 @@ hexagon_ss.add(files(
     'fma_emu.c',
     'mmvec/decode_ext_mmvec.c',
     'mmvec/system_ext_mmvec.c',
+    'mmvec/mmvec_qfloat.c',
+    'mmvec/kvx_ieee.c',
+))
+
+hexagon_softmmu_ss.add(files(
+    'hex_mmu.c',
+    'hex_interrupts.c',
+    'hexswi.c',
+    'machine.c',
+    'monitor.c',
 ))
 
 #
@@ -271,7 +284,8 @@ hexagon_ss.add(files(
 #     idef-generated-enabled-instructions
 #
 idef_parser_enabled = get_option('hexagon_idef_parser')
-if idef_parser_enabled and 'hexagon-linux-user' in target_dirs
+if idef_parser_enabled and ('hexagon-linux-user' in target_dirs or
+                            'hexagon-softmmu' in target_dirs)
     idef_parser_input_generated = custom_target(
         'idef_parser_input.h.inc',
         output: 'idef_parser_input.h.inc',
@@ -346,12 +360,12 @@ if idef_parser_enabled and 'hexagon-linux-user' in target_dirs
     # Setup input and dependencies for the next step, this depends on whether or
     # not idef-parser is enabled
     helper_dep = [semantics_generated, idef_generated_tcg_c, idef_generated_tcg]
-    helper_in = [semantics_generated, gen_tcg_h, gen_tcg_hvx_h, '--idef-parser', idef_generated_list]
+    helper_in = [semantics_generated, gen_tcg_h, gen_tcg_hvx_h, gen_tcg_sys_h, '--idef-parser', idef_generated_list]
 else
     # Setup input and dependencies for the next step, this depends on whether or
     # not idef-parser is enabled
     helper_dep = [semantics_generated]
-    helper_in = [semantics_generated, gen_tcg_h, gen_tcg_hvx_h]
+    helper_in = [semantics_generated, gen_tcg_h, gen_tcg_hvx_h, gen_tcg_sys_h]
 endif
 
 #
@@ -365,7 +379,7 @@ helper_protos_generated = custom_target(
     'helper_protos_generated.h.inc',
     output: 'helper_protos_generated.h.inc',
     depends: helper_dep,
-    depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h],
+    depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h, gen_tcg_sys_h],
     command: [python, files('gen_helper_protos.py'), helper_in, '@OUTPUT@'],
 )
 hexagon_ss.add(helper_protos_generated)
@@ -374,7 +388,7 @@ helper_funcs_generated = custom_target(
     'helper_funcs_generated.c.inc',
     output: 'helper_funcs_generated.c.inc',
     depends: helper_dep,
-    depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h],
+    depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h, gen_tcg_sys_h],
     command: [python, files('gen_helper_funcs.py'), helper_in, '@OUTPUT@'],
 )
 hexagon_ss.add(helper_funcs_generated)
@@ -383,7 +397,7 @@ tcg_funcs_generated = custom_target(
     'tcg_funcs_generated.c.inc',
     output: 'tcg_funcs_generated.c.inc',
     depends: helper_dep,
-    depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h],
+    depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h, gen_tcg_sys_h],
     command: [python, files('gen_tcg_funcs.py'), helper_in, '@OUTPUT@'],
 )
 hexagon_ss.add(tcg_funcs_generated)
@@ -392,9 +406,10 @@ analyze_funcs_generated = custom_target(
     'analyze_funcs_generated.c.inc',
     output: 'analyze_funcs_generated.c.inc',
     depends: helper_dep,
-    depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h],
+    depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h, gen_tcg_sys_h],
     command: [python, files('gen_analyze_funcs.py'), helper_in, '@OUTPUT@'],
 )
 hexagon_ss.add(analyze_funcs_generated)
 
 target_arch += {'hexagon': hexagon_ss}
+target_system_arch += {'hexagon': hexagon_softmmu_ss}
diff --git a/target/hexagon/mmvec/kvx_ieee.c b/target/hexagon/mmvec/kvx_ieee.c
new file mode 100644
index 0000000000000..3e67230f62e47
--- /dev/null
+++ b/target/hexagon/mmvec/kvx_ieee.c
@@ -0,0 +1,1460 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "kvx_ieee.h"
+#include "kvx_mac_reduce.c"
+#include "qemu/host-utils.h"
+
+uint32_t shiftRightJam32( uint32_t a, uint_fast16_t dist )
+{
+    return
+        (dist < 31) ? a>>dist | ((uint32_t) (a<<(-dist & 31)) != 0) : (a != 0);
+}
+
+uint_fast8_t countLeadingZeros16( uint16_t a )
+{
+    return clz16(a);
+}
+
+struct exp8_sig16 normSubnormalF16Sig( uint_fast16_t sig )
+{
+    int_fast8_t shiftDist;
+    struct exp8_sig16 z;
+
+    shiftDist = countLeadingZeros16( sig ) - 5;
+    z.exp = 1 - shiftDist;
+    z.sig = sig<<shiftDist;
+    return z;
+
+}
+
+uint16_t roundPackToF16( bool sign, int_fast16_t exp, uint_fast16_t sig )
+{
+    bool roundNearEven;
+    uint_fast8_t roundIncrement, roundBits;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    roundNearEven  = 1;
+    roundIncrement = 0x8;
+    roundBits      = sig & 0xF;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( 0x1D <= (unsigned int) exp ) {
+        if ( exp < 0 ) {
+            /*----------------------------------------------------------------
+            *----------------------------------------------------------------*/
+            sig = shiftRightJam32( sig, -exp );
+            exp = 0;
+            roundBits = sig & 0xF;
+            //if ( isTiny && roundBits ) {
+            //    softfloat_raiseFlags( softfloat_flag_underflow );
+            //}
+        } else if ( (0x1D < exp) || (0x8000 <= sig + roundIncrement) ) {
+            /*----------------------------------------------------------------
+            *----------------------------------------------------------------*/
+            return packToF16UI( sign, 0x1F, 0 ) - ! roundIncrement;
+        }
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sig = (sig + roundIncrement)>>4;
+    sig &= ~(uint_fast16_t) (! (roundBits ^ 8) & roundNearEven);
+    if ( ! sig ) exp = 0;
+
+    return packToF16UI( sign, exp, sig );
+
+}
+
+
+uint32_t fp_mult_sf_sf (uint32_t op1, uint32_t op2)
+{
+
+    union ui32_f32 u_op1;
+    union ui32_f32 u_op2;
+    union ui32_f32 u_rslt;
+
+    float a,b,rslt;
+    uint32_t result;
+
+    #ifdef DEBUG
+    printf("fp_mult_sf_sf");
+    printf("Debug : op1 =0x%08x\n",op1);
+    printf("Debug : op2 =0x%08x\n",op2);
+    #endif
+
+    if(isNaNF32UI(op1) || isNaNF32UI(op2))
+       return FP32_DEF_NAN;
+
+    u_op1.ui = op1;
+    u_op2.ui = op2;
+    a = u_op1.f;
+    b = u_op2.f;
+    rslt = a*b;
+    u_rslt.f = rslt;
+    result = u_rslt.ui;
+
+    result = isNaNF32UI(result) ? FP32_DEF_NAN : result;
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : b = %f\n",b);
+    printf("Debug : rslt = %f\n",rslt);
+    printf("Debug : result =0x%08x\n",result);
+    #endif
+
+    return result;
+}
+
+uint32_t fp_add_sf_sf (uint32_t op1, uint32_t op2)
+{
+    union ui32_f32 u_op1;
+    union ui32_f32 u_op2;
+    union ui32_f32 u_rslt;
+
+    float a,b,rslt;
+    uint32_t result;
+
+    #ifdef DEBUG
+    printf("fp_add_sf_sf");
+    printf("Debug : op1 =0x%08x\n",op1);
+    printf("Debug : op2 =0x%08x\n",op2);
+    #endif
+
+    if(isNaNF32UI(op1) || isNaNF32UI(op2))
+       return FP32_DEF_NAN;
+
+    u_op1.ui = op1;
+    u_op2.ui = op2;
+    a = u_op1.f;
+    b = u_op2.f;
+    rslt = a+b;
+    u_rslt.f = rslt;
+    result = u_rslt.ui;
+    result = isNaNF32UI(result) ? FP32_DEF_NAN : result;
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : b = %f\n",b);
+    printf("Debug : rslt = %f\n",rslt);
+    printf("Debug : result =0x%08x\n",result);
+    #endif
+
+    return result;
+}
+
+uint32_t fp_sub_sf_sf (uint32_t op1, uint32_t op2)
+{
+    union ui32_f32 u_op1;
+    union ui32_f32 u_op2;
+    union ui32_f32 u_rslt;
+
+    float a,b,rslt;
+    uint32_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1 =0x%08x\n",op1);
+    printf("Debug : op2 =0x%08x\n",op2);
+    #endif
+
+    if(isNaNF32UI(op1) || isNaNF32UI(op2))
+       return FP32_DEF_NAN;
+
+    u_op1.ui = op1;
+    u_op2.ui = op2;
+    a = u_op1.f;
+    b = u_op2.f;
+    rslt = a-b;
+    u_rslt.f = rslt;
+    result = u_rslt.ui;
+    result = isNaNF32UI(result) ? FP32_DEF_NAN : result;
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : b = %f\n",b);
+    printf("Debug : rslt = %f\n",rslt);
+    printf("Debug : result =0x%08x\n",result);
+    #endif
+
+    return result;
+}
+
+//--------------------------------------------------------------
+//Function to convert FP32 to FP16
+//--------------------------------------------------------------
+
+uint16_t f32_to_f16 ( uint32_t a)
+{
+    bool sign;
+    int_fast16_t exp;
+    uint_fast32_t frac;
+    uint_fast16_t frac16;
+
+    sign = signF32UI( a );
+    exp  = expF32UI ( a );
+    frac = fracF32UI( a );
+
+    // Inf and NaN case
+    if ( exp == 0xFF ) {
+        if ( frac ) {
+           return FP16_DEF_NAN;
+        } else {
+           return packToF16UI( sign, 0x1F, 0 );
+        }
+    }
+
+    /*------------------------------------------------------------------------
+    frac>>9              : keeping 14 bit of precision out ot 23 bits in FP32
+    (frac & 0x1FF) != 0) : setting the sticky bit required for rounding
+    *------------------------------------------------------------------------*/
+    frac16 = frac>>9 | ((frac & 0x1FF) != 0);
+
+    //If input was a Zero
+    if ( ! (exp | frac16) ) {
+        return packToF16UI( sign, 0, 0 );
+    }
+
+    return roundPackToF16( sign, exp - 0x71, frac16 | 0x4000 );
+
+}
+
+//--------------------------------------------------------------
+//Function to convert FP16 to FP32
+//--------------------------------------------------------------
+
+uint32_t f16_to_f32( uint16_t a )
+{
+    bool sign;
+    int_fast8_t exp;
+    uint_fast16_t frac;
+    struct exp8_sig16 normExpSig;
+
+    sign = signF16UI( a );
+    exp  = expF16UI ( a );
+    frac = fracF16UI( a );
+
+
+    if ( exp == 0x1F ) {
+        if ( frac ) {
+           return FP32_DEF_NAN;
+        } else {
+            return packToF32UI( sign, 0xFF, 0 );
+        }
+    }
+
+
+    if ( ! exp ) {
+        if ( ! frac ) {
+            return packToF32UI( sign, 0, 0 );
+        }
+        normExpSig = normSubnormalF16Sig( frac );
+        exp = normExpSig.exp - 1;
+        frac = normExpSig.sig;
+    }
+
+
+    return packToF32UI( sign, exp + 0x70, (uint_fast32_t) frac<<13 );
+
+}
+
+uint16_t fp_mult_hf_hf (uint16_t op1, uint16_t op2)
+{
+
+    union ui32_f32 u_op1;
+    union ui32_f32 u_op2;
+    union ui32_f32 u_rslt;
+
+    uint32_t op1_f32;
+    uint32_t op2_f32;
+
+    float a,b,rslt;
+    uint32_t result_f32;
+    uint16_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1 =0x%08x\n",op1);
+    printf("Debug : op2 =0x%08x\n",op2);
+    #endif
+
+    if(isNaNF16UI(op1) || isNaNF16UI(op2))
+       return FP16_DEF_NAN;
+
+    op1_f32 = f16_to_f32(op1);
+    op2_f32 = f16_to_f32(op2);
+
+    u_op1.ui = op1_f32;
+    u_op2.ui = op2_f32;
+    a = u_op1.f;
+    b = u_op2.f;
+    rslt = a*b;
+    u_rslt.f = rslt;
+    result_f32 = u_rslt.ui;
+
+    result = f32_to_f16(result_f32);
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : b = %f\n",b);
+    printf("Debug : rslt = %f\n",rslt);
+    printf("Debug : result =0x%08x\n",result);
+    #endif
+
+    return result;
+}
+
+uint16_t fp_add_hf_hf (uint16_t op1, uint16_t op2)
+{
+
+    union ui32_f32 u_op1;
+    union ui32_f32 u_op2;
+    union ui32_f32 u_rslt;
+
+    uint32_t op1_f32;
+    uint32_t op2_f32;
+
+    float a,b,rslt;
+    uint32_t result_f32;
+    uint16_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1 =0x%08x\n",op1);
+    printf("Debug : op2 =0x%08x\n",op2);
+    #endif
+
+    if(isNaNF16UI(op1) || isNaNF16UI(op2))
+       return FP16_DEF_NAN;
+
+    op1_f32 = f16_to_f32(op1);
+    op2_f32 = f16_to_f32(op2);
+
+    u_op1.ui = op1_f32;
+    u_op2.ui = op2_f32;
+    a = u_op1.f;
+    b = u_op2.f;
+    rslt = a+b;
+    u_rslt.f = rslt;
+    result_f32 = u_rslt.ui;
+
+    result = f32_to_f16(result_f32);
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : b = %f\n",b);
+    printf("Debug : rslt = %f\n",rslt);
+    printf("Debug : result =0x%08x\n",result);
+    #endif
+
+    return result;
+}
+
+uint16_t fp_sub_hf_hf (uint16_t op1, uint16_t op2)
+{
+
+    union ui32_f32 u_op1;
+    union ui32_f32 u_op2;
+    union ui32_f32 u_rslt;
+
+    uint32_t op1_f32;
+    uint32_t op2_f32;
+
+    float a,b,rslt;
+    uint32_t result_f32;
+    uint16_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1 =0x%08x\n",op1);
+    printf("Debug : op2 =0x%08x\n",op2);
+    #endif
+
+    if(isNaNF16UI(op1) || isNaNF16UI(op2))
+       return FP16_DEF_NAN;
+
+    op1_f32 = f16_to_f32(op1);
+    op2_f32 = f16_to_f32(op2);
+
+    u_op1.ui = op1_f32;
+    u_op2.ui = op2_f32;
+    a = u_op1.f;
+    b = u_op2.f;
+    rslt = a-b;
+    u_rslt.f = rslt;
+    result_f32 = u_rslt.ui;
+
+    result = f32_to_f16(result_f32);
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : b = %f\n",b);
+    printf("Debug : rslt = %f\n",rslt);
+    printf("Debug : result =0x%08x\n",result);
+    #endif
+
+    return result;
+}
+
+uint32_t fp_mult_sf_hf (uint16_t op1, uint16_t op2)
+{
+
+    union ui32_f32 u_op1;
+    union ui32_f32 u_op2;
+    union ui32_f32 u_rslt;
+
+    uint32_t op1_f32;
+    uint32_t op2_f32;
+
+    float a,b,rslt;
+    uint32_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1 =0x%08x\n",op1);
+    printf("Debug : op2 =0x%08x\n",op2);
+    #endif
+
+    if(isNaNF16UI(op1) || isNaNF16UI(op2))
+       return FP32_DEF_NAN;
+
+    op1_f32 = f16_to_f32(op1);
+    op2_f32 = f16_to_f32(op2);
+
+    u_op1.ui = op1_f32;
+    u_op2.ui = op2_f32;
+    a = u_op1.f;
+    b = u_op2.f;
+    rslt = a*b;
+    u_rslt.f = rslt;
+    result = u_rslt.ui;
+    result = isNaNF32UI(result) ? FP32_DEF_NAN : result;
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : b = %f\n",b);
+    printf("Debug : rslt = %f\n",rslt);
+    printf("Debug : result =0x%08x\n",result);
+    #endif
+
+    return result;
+}
+
+uint32_t fp_add_sf_hf (uint16_t op1, uint16_t op2)
+{
+
+    union ui32_f32 u_op1;
+    union ui32_f32 u_op2;
+    union ui32_f32 u_rslt;
+
+    uint32_t op1_f32;
+    uint32_t op2_f32;
+
+    float a,b,rslt;
+    uint32_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1 =0x%08x\n",op1);
+    printf("Debug : op2 =0x%08x\n",op2);
+    #endif
+
+    if(isNaNF16UI(op1) || isNaNF16UI(op2))
+       return FP32_DEF_NAN;
+
+    op1_f32 = f16_to_f32(op1);
+    op2_f32 = f16_to_f32(op2);
+
+    u_op1.ui = op1_f32;
+    u_op2.ui = op2_f32;
+    a = u_op1.f;
+    b = u_op2.f;
+    rslt = a+b;
+    u_rslt.f = rslt;
+    result = u_rslt.ui;
+    result = isNaNF32UI(result) ? FP32_DEF_NAN : result;
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : b = %f\n",b);
+    printf("Debug : rslt = %f\n",rslt);
+    printf("Debug : result =0x%08x\n",result);
+    #endif
+
+    return result;
+}
+
+uint32_t fp_sub_sf_hf (uint16_t op1, uint16_t op2)
+{
+
+    union ui32_f32 u_op1;
+    union ui32_f32 u_op2;
+    union ui32_f32 u_rslt;
+
+    uint32_t op1_f32;
+    uint32_t op2_f32;
+
+    float a,b,rslt;
+    uint32_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1 =0x%08x\n",op1);
+    printf("Debug : op2 =0x%08x\n",op2);
+    #endif
+
+    if(isNaNF16UI(op1) || isNaNF16UI(op2))
+       return FP32_DEF_NAN;
+
+    op1_f32 = f16_to_f32(op1);
+    op2_f32 = f16_to_f32(op2);
+
+    u_op1.ui = op1_f32;
+    u_op2.ui = op2_f32;
+    a = u_op1.f;
+    b = u_op2.f;
+    rslt = a-b;
+    u_rslt.f = rslt;
+    result = u_rslt.ui;
+    result = isNaNF32UI(result) ? FP32_DEF_NAN : result;
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : b = %f\n",b);
+    printf("Debug : rslt = %f\n",rslt);
+    printf("Debug : result =0x%08x\n",result);
+    #endif
+
+    return result;
+}
+
+uint32_t fp_mult_sf_bf_acc (uint16_t op1, uint16_t op2, uint32_t acc)
+{
+    union ui32_f32 u_op1;
+    union ui32_f32 u_op2;
+    union ui32_f32 u_acc;
+    union ui32_f32 u_rslt;
+
+    uint32_t op1_f32;
+    uint32_t op2_f32;
+
+    double a,b,facc,rslt;
+    uint32_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1 =0x%04x\n",op1);
+    printf("Debug : op2 =0x%04x\n",op2);
+    printf("Debug : acc =0x%08x\n",acc);
+    #endif
+
+    op1_f32 = ((uint32_t)op1) << 16;
+    op2_f32 = ((uint32_t)op2) << 16;
+
+    if(isNaNF32UI(op1_f32) || isNaNF32UI(op2_f32) || isNaNF32UI(acc))
+       return FP32_DEF_NAN;
+
+    u_op1.ui = op1_f32;
+    u_op2.ui = op2_f32;
+    u_acc.ui = acc;
+    a = u_op1.f;
+    b = u_op2.f;
+    facc = u_acc.f;
+    //rslt = fma(a,b,facc);
+    rslt = (a * b) + facc;
+    u_rslt.f = rslt;
+    result = u_rslt.ui;
+    result = isNaNF32UI(result) ? FP32_DEF_NAN : result;
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : b = %f\n",b);
+    printf("Debug : facc = %f\n",facc);
+    printf("Debug : rslt = %f\n",rslt);
+    printf("Debug : result =0x%04x\n",result);
+    #endif
+
+    return result;
+}
+
+uint32_t fp_mult_sf_bf (uint16_t op1, uint16_t op2)
+{
+    uint32_t op1_f32;
+    uint32_t op2_f32;
+    op1_f32 = ((uint32_t)op1) << 16;
+    op2_f32 = ((uint32_t)op2) << 16;
+    return fp_mult_sf_sf(op1_f32, op2_f32);
+}
+
+uint32_t fp_add_sf_bf (uint16_t op1, uint16_t op2)
+{
+    uint32_t op1_f32;
+    uint32_t op2_f32;
+    op1_f32 = ((uint32_t)op1) << 16;
+    op2_f32 = ((uint32_t)op2) << 16;
+    return fp_add_sf_sf(op1_f32, op2_f32);
+}
+
+uint32_t fp_sub_sf_bf (uint16_t op1, uint16_t op2)
+{
+    uint32_t op1_f32;
+    uint32_t op2_f32;
+    op1_f32 = ((uint32_t)op1) << 16;
+    op2_f32 = ((uint32_t)op2) << 16;
+    return fp_sub_sf_sf(op1_f32, op2_f32);
+}
+
+uint16_t f16_to_uh( uint16_t op1)
+{
+    union ui32_f32 u_op1;
+
+    float a,frac;
+    uint32_t op1_f32;
+    uint16_t result;
+
+    //converting a NaN to an integral ----> Vx4Rslt is +MAX_INT
+    if(isNaNF16UI(op1))
+    {
+       result = UHW_MAX;
+       goto end;
+    }
+    //converting a negative floating-point value to
+    //unsigned integer U(h|b) ----> (Vx4Rslt is 0)
+    if(signF16UI(op1))
+    {
+       result = 0x0;
+       goto end;
+    }
+    //converting ±Inf to an integral ----> Vx4Rslt is ±MAX_INT
+    if(isInfF16UI(op1))
+    {
+       result = UHW_MAX;
+       goto end;
+    }
+    //out of range FP to integer ------> Vx4Rslt is ±MAX_INT
+
+    //The default float-to-integer conversion in C does not
+    //round to the nearest integer, but instead truncates toward zero.
+    op1_f32 = f16_to_f32(op1);
+    u_op1.ui = op1_f32;
+    a = u_op1.f;
+    frac = a - (float)((uint16_t) a);
+    //round to the nearest
+    result = (uint16_t) (a + 0.5);
+    //Ties to Even
+    if(frac == 0.5)
+    {
+       if((result % 2)) result--;
+    }
+    #ifdef DEBUG
+    printf("Debug : a      = %f\n",a);
+    printf("Debug : a frac = %f\n",frac);
+    #endif
+
+ end:
+    #ifdef DEBUG
+    printf("Debug : result =0x%x\n",result);
+    #endif
+    return result;
+}
+
+int16_t f16_to_h( uint16_t op1)
+{
+    union ui32_f32 u_op1;
+
+    float a,frac;
+    uint32_t op1_f32;
+    int16_t  result;
+
+    //converting a NaN to an integral ----> Vx4Rslt is +MAX_INT
+    if(isNaNF16UI(op1))
+    {
+       result = HW_MAX;
+       goto end;
+    }
+    //converting ±Inf to an integral ----> Vx4Rslt is ±MAX_INT
+    if(isInfF16UI(op1))
+    {
+       result = signF16UI(op1) ? HW_MIN : HW_MAX;
+       goto end;
+    }
+
+    //The default float-to-integer conversion in C does not round
+    //to the nearest integer, but instead truncates toward zero.
+    op1_f32 = f16_to_f32(op1);
+    u_op1.ui = op1_f32;
+    a = u_op1.f;
+
+    //out of range FP to integer ------> Vx4Rslt is ±MAX_INT
+    if(a > (float)(HW_MAX))
+    {
+       result = HW_MAX;
+       goto end;
+    }
+    if(a < (float)(HW_MIN))
+    {
+       result = HW_MIN;
+       goto end;
+    }
+
+    frac = fabs(a - (float)((int16_t) a));
+    //round to the nearest
+    result = (a > 0) ? ((int16_t) (a + 0.5)) : ((int16_t) (a - 0.5));
+    //Ties to Even
+    if(frac == 0.5)
+    {
+       if((result % 2))
+       {
+          if(a > 0) result--;
+          if(a < 0) result++;
+       }
+    }
+    #ifdef DEBUG
+    printf("Debug : a      = %f\n",a);
+    printf("Debug : a frac = %f\n",frac);
+    #endif
+
+ end:
+    #ifdef DEBUG
+    printf("Debug : result =0x%04x\n",result);
+    #endif
+    return result;
+}
+
+uint8_t f16_to_ub( uint16_t op1)
+{
+    union ui32_f32 u_op1;
+
+    float a,frac;
+    uint32_t op1_f32;
+    uint8_t result;
+
+    //converting a NaN to an integral ----> Vx4Rslt is +MAX_INT
+    if(isNaNF16UI(op1))
+    {
+       result = UBYTE_MAX;
+       goto end;
+    }
+    //converting a negative floating-point value to
+    //unsigned integer U(h|b) ----> (Vx4Rslt is 0)
+    if(signF16UI(op1))
+    {
+       result = 0x0;
+       goto end;
+    }
+    //converting ±Inf to an integral ----> Vx4Rslt is ±MAX_INT
+    if(isInfF16UI(op1))
+    {
+       result = UBYTE_MAX;
+       goto end;
+    }
+
+    //The default float-to-integer conversion in C does
+    //not round to the nearest integer, but instead truncates toward zero.
+    op1_f32 = f16_to_f32(op1);
+    u_op1.ui = op1_f32;
+    a = u_op1.f;
+
+    //out of range FP to integer ------> Vx4Rslt is ±MAX_INT
+    if( a  > (float)(UBYTE_MAX))
+    {
+       result = UBYTE_MAX;
+       goto end;
+    }
+
+    frac = a - (float)((uint16_t) a);
+    //round to the nearest
+    result = (uint8_t) (a + 0.5);
+    //Ties to Even
+    if(frac == 0.5)
+    {
+       if((result % 2))
+       {
+          if(a > 0) result--;
+          if(a < 0) result++;
+       }
+    }
+    #ifdef DEBUG
+    printf("Debug : a      = %f\n",a);
+    printf("Debug : a frac = %f\n",frac);
+    #endif
+
+ end:
+    #ifdef DEBUG
+    printf("Debug : result =0x%x\n",result);
+    #endif
+    return result;
+}
+
+int8_t f16_to_b( uint16_t op1)
+{
+    union ui32_f32 u_op1;
+
+    float a,frac;
+    uint32_t op1_f32;
+    int16_t  result;
+
+    //converting a NaN to an integral ----> Vx4Rslt is +MAX_INT
+    if(isNaNF16UI(op1))
+    {
+       result = BYTE_MAX;
+       goto end;
+    }
+    //converting ±Inf to an integral ----> Vx4Rslt is ±MAX_INT
+    if(isInfF16UI(op1))
+    {
+       result = signF16UI(op1) ? BYTE_MIN : BYTE_MAX;
+       goto end;
+    }
+
+    //The default float-to-integer conversion in C does not
+    //round to the nearest integer, but instead truncates toward zero.
+    op1_f32 = f16_to_f32(op1);
+    u_op1.ui = op1_f32;
+    a = u_op1.f;
+
+    //out of range FP to integer ------> Vx4Rslt is ±MAX_INT
+    if(a > (float)(BYTE_MAX))
+    {
+       result = BYTE_MAX;
+       goto end;
+    }
+    if(a < (float)(BYTE_MIN))
+    {
+       result = BYTE_MIN;
+       goto end;
+    }
+
+    frac = fabs(a - (float)((int16_t) a));
+    //round to the nearest
+    result = (a > 0) ? ((int16_t) (a + 0.5)) : ((int16_t) (a - 0.5));
+    //Ties to Even
+    if(frac == 0.5)
+    {
+       if((result % 2))
+       {
+          if(a > 0) result--;
+          if(a < 0) result++;
+       }
+    }
+    #ifdef DEBUG
+    printf("Debug : a      = %f\n",a);
+    printf("Debug : a frac = %f\n",frac);
+    #endif
+
+ end:
+    #ifdef DEBUG
+    printf("Debug : result =0x%04x\n",result);
+    #endif
+    return result;
+}
+
+uint16_t uh_to_f16(uint16_t op1)
+{
+    union ui32_f32 u_op1;
+
+    float a;
+    uint32_t rslt;
+    uint16_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1 =0x%08x\n",op1);
+    #endif
+
+    a = (float) op1;
+    u_op1.f = a;
+    rslt = u_op1.ui;
+    result = f32_to_f16(rslt);
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : rslt = 0x%08x\n",rslt);
+    printf("Debug : result =0x%04x\n",result);
+    #endif
+
+    return result;
+}
+
+uint16_t h_to_f16 (int16_t op1)
+{
+    union ui32_f32 u_op1;
+
+    float a;
+    uint32_t rslt;
+    uint16_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1 =0x%08x\n",op1);
+    #endif
+
+    a = (float) op1;
+    u_op1.f = a;
+    rslt = u_op1.ui;
+    result = f32_to_f16(rslt);
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : rslt = 0x%08x\n",rslt);
+    printf("Debug : result =0x%04x\n",result);
+    #endif
+
+    return result;
+}
+
+uint16_t ub_to_f16(uint8_t op1)
+{
+    union ui32_f32 u_op1;
+
+    float a;
+    uint32_t rslt;
+    uint16_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1 =0x%08x\n",op1);
+    #endif
+
+    a = (float) op1;
+    u_op1.f = a;
+    rslt = u_op1.ui;
+    result = f32_to_f16(rslt);
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : rslt = 0x%08x\n",rslt);
+    printf("Debug : result =0x%04x\n",result);
+    #endif
+
+    return result;
+}
+
+uint16_t b_to_f16 (int8_t op1)
+{
+    union ui32_f32 u_op1;
+
+    float a;
+    uint32_t rslt;
+    uint16_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1 =0x%08x\n",op1);
+    #endif
+
+    a = (float) op1;
+    u_op1.f = a;
+    rslt = u_op1.ui;
+    result = f32_to_f16(rslt);
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : rslt = 0x%08x\n",rslt);
+    printf("Debug : result =0x%04x\n",result);
+    #endif
+
+    return result;
+}
+
+uint16_t sf_to_bf (int32_t op1)
+{
+    uint32_t rslt = op1;
+    if((rslt & 0x1FFFF) == 0x08000){
+        //break; // do not round up if exactly .5 and even already
+    }
+    else if ((rslt & 0x8000) == 0x8000){
+        rslt += 0x8000; //rounding to nearest number
+    }
+    rslt = isNaNF32UI(op1) ? FP32_DEF_NAN : rslt;
+    uint16_t result = (rslt >> 16);
+    return result;
+}
+
+uint32_t fp_vdmpy (uint16_t op1_u,uint16_t op1_l,uint16_t op2_u,uint16_t op2_l)
+{
+    union ui32_f32 u_op;
+    union ui32_f32 u_rslt;
+
+    uint32_t op1_u_f32, op1_l_f32, op2_u_f32, op2_l_f32;
+    float f_op1_u, f_op1_l, f_op2_u, f_op2_l;
+    double f_prod_l, f_prod_u, rslt;
+    uint32_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1_u =0x%04x\n",op1_u);
+    printf("Debug : op1_l =0x%04x\n",op1_l);
+    printf("Debug : op2_u =0x%04x\n",op2_u);
+    printf("Debug : op2_l =0x%04x\n",op2_l);
+    #endif
+
+    if(isNaNF16UI(op1_u) || isNaNF16UI(op1_l) || isNaNF16UI(op2_u) ||
+       isNaNF16UI(op2_l))
+    {   result = FP32_DEF_NAN;
+        goto end;
+    }
+
+    op1_u_f32 = f16_to_f32(op1_u);
+    op1_l_f32 = f16_to_f32(op1_l);
+    op2_u_f32 = f16_to_f32(op2_u);
+    op2_l_f32 = f16_to_f32(op2_l);
+
+    u_op.ui = op1_u_f32;
+    f_op1_u = u_op.f;
+
+    u_op.ui = op1_l_f32;
+    f_op1_l = u_op.f;
+
+    u_op.ui = op2_l_f32;
+    f_op2_l = u_op.f;
+
+    u_op.ui = op2_u_f32;
+    f_op2_u = u_op.f;
+
+    f_prod_l = f_op1_l * f_op2_l;
+    f_prod_u = f_op1_u * f_op2_u;
+    rslt     = f_prod_u + f_prod_l;
+
+    u_rslt.f = rslt;
+    result = u_rslt.ui;
+    result = isNaNF32UI(result) ? FP32_DEF_NAN : result;
+
+    #ifdef DEBUG
+    printf("Debug : f_op1_u = %f\n",f_op1_u);
+    printf("Debug : f_op1_l = %f\n",f_op1_l);
+    printf("Debug : f_op2_u = %f\n",f_op2_u);
+    printf("Debug : f_op2_l = %f\n",f_op2_l);
+    printf("Debug : f_prod_l = %f\n",f_prod_l);
+    printf("Debug : f_prod_u = %f\n",f_prod_u);
+    printf("Debug : rslt = %f\n",rslt);
+    #endif
+
+end:
+    #ifdef DEBUG
+    printf("Debug : result =0x%08x\n",result);
+    #endif
+    return result;
+}
+
+uint32_t fp_vdmpy_acc_dumb  (uint32_t acc,uint16_t op1_u,uint16_t op1_l,
+    uint16_t op2_u,uint16_t op2_l)
+{
+    union ui32_f32 u_op;
+    union ui32_f32 u_acc;
+    union ui32_f32 u_rslt;
+
+    uint32_t op1_u_f32, op1_l_f32, op2_u_f32, op2_l_f32;
+    float f_op1_u, f_op1_l, f_op2_u, f_op2_l, f_acc;
+    long double f_prod_l, f_prod_u, rslt;
+    uint32_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1_u =0x%04x\n",op1_u);
+    printf("Debug : op1_l =0x%04x\n",op1_l);
+    printf("Debug : op2_u =0x%04x\n",op2_u);
+    printf("Debug : op2_l =0x%04x\n",op2_l);
+    printf("Debug : acc   =0x%08x\n",acc);
+    #endif
+
+    op1_u_f32 = f16_to_f32(op1_u);
+    op1_l_f32 = f16_to_f32(op1_l);
+    op2_u_f32 = f16_to_f32(op2_u);
+    op2_l_f32 = f16_to_f32(op2_l);
+
+    u_op.ui = op1_u_f32;
+    f_op1_u = u_op.f;
+
+    u_op.ui = op1_l_f32;
+    f_op1_l = u_op.f;
+
+    u_op.ui = op2_l_f32;
+    f_op2_l = u_op.f;
+
+    u_op.ui = op2_u_f32;
+    f_op2_u = u_op.f;
+
+    u_acc.ui = acc;
+    f_acc   = u_acc.f;
+
+    f_prod_l =  (long double)(f_op1_l * f_op2_l);
+    f_prod_u =  (long double)(f_op1_u * f_op2_u);
+    rslt     =  (long double)((long double)f_acc + f_prod_u + f_prod_l);
+
+    u_rslt.f = rslt;
+    result = u_rslt.ui;
+    result = isNaNF32UI(result) ? FP32_DEF_NAN : result;
+
+    #ifdef DEBUG
+    printf("Debug : f_op1_u = %f\n",f_op1_u);
+    printf("Debug : f_op1_l = %f\n",f_op1_l);
+    printf("Debug : f_op2_u = %f\n",f_op2_u);
+    printf("Debug : f_op2_l = %f\n",f_op2_l);
+    printf("Debug : f_acc   = %f\n",f_acc);
+    printf("Debug : f_prod_l = %Lf\n",f_prod_l);
+    printf("Debug : f_prod_u = %Lf\n",f_prod_u);
+    printf("Debug : rslt = %Lf\n",rslt);
+    printf("Debug : result =0x%08x\n",result);
+    #endif
+
+    return result;
+}
+
+uint16_t fp_min_hf(uint16_t op1,uint16_t op2)
+{
+    union ui32_f32 u_op1;
+    union ui32_f32 u_op2;
+    union ui32_f32 u_rslt;
+
+    uint32_t op1_f32;
+    uint32_t op2_f32;
+
+    float a,b,rslt;
+    uint32_t result_f32;
+    uint16_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1 =0x%08x\n",op1);
+    printf("Debug : op2 =0x%08x\n",op2);
+    #endif
+
+    if(isNaNF16UI(op1) || isNaNF16UI(op2))
+       return FP16_DEF_NAN;
+
+    op1_f32 = f16_to_f32(op1);
+    op2_f32 = f16_to_f32(op2);
+
+    u_op1.ui = op1_f32;
+    u_op2.ui = op2_f32;
+    a = u_op1.f;
+    b = u_op2.f;
+
+    rslt = (a>b) ? b : a;
+    // +0 is evaluated equal to -0 in C. Handeling that case separatly
+    if( (fabs(a) == 0.0f) && (fabs(b) == 0.0f) && (signF16UI(op1) !=
+        signF16UI(op2)) )
+    {
+       rslt = signF16UI(op1) ? a : b;
+    }
+    u_rslt.f = rslt;
+    result_f32 = u_rslt.ui;
+
+    result = f32_to_f16(result_f32);
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : b = %f\n",b);
+    printf("Debug : rslt = %f\n",rslt);
+    printf("Debug : result =0x%08x\n",result);
+    #endif
+
+    return result;
+
+}
+
+uint32_t fp_min_sf(uint32_t op1,uint32_t op2)
+{
+    union ui32_f32 u_op1;
+    union ui32_f32 u_op2;
+    union ui32_f32 u_rslt;
+
+    float a,b,rslt;
+    uint32_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1 =0x%08x\n",op1);
+    printf("Debug : op2 =0x%08x\n",op2);
+    #endif
+
+    if(isNaNF32UI(op1) || isNaNF32UI(op2))
+       return FP32_DEF_NAN;
+
+    u_op1.ui = op1;
+    u_op2.ui = op2;
+    a = u_op1.f;
+    b = u_op2.f;
+    rslt = (a>b) ? b : a;
+    // +0 is evaluated equal to -0 in C. Handeling that case separatly
+    if( (fabs(a) == 0.0f) && (fabs(b) == 0.0f) &&
+         (signF32UI(op1) != signF32UI(op2)) )
+    {
+       rslt = signF32UI(op1) ? a : b;
+    }
+    u_rslt.f = rslt;
+    result = u_rslt.ui;
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : b = %f\n",b);
+    printf("Debug : rslt = %f\n",rslt);
+    printf("Debug : result =0x%08x\n",result);
+    #endif
+
+    return result;
+}
+
+uint16_t fp_min_bf(uint16_t op1,uint16_t op2)
+{
+    uint32_t op1_f32;
+    uint32_t op2_f32;
+
+    uint32_t result_f32;
+    uint16_t result;
+
+    op1_f32 = ((uint32_t)op1) << 16;
+    op2_f32 = ((uint32_t)op2) << 16;
+
+    result_f32 = fp_min_sf(op1_f32, op2_f32);
+    result_f32 = result_f32 >> 16;
+    result = result_f32 & 0xFFFF;
+    return result;
+}
+
+
+uint16_t fp_max_hf(uint16_t op1,uint16_t op2)
+{
+    union ui32_f32 u_op1;
+    union ui32_f32 u_op2;
+    union ui32_f32 u_rslt;
+
+    uint32_t op1_f32;
+    uint32_t op2_f32;
+
+    float a,b,rslt;
+    uint32_t result_f32;
+    uint16_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1 =0x%08x\n",op1);
+    printf("Debug : op2 =0x%08x\n",op2);
+    #endif
+
+    if(isNaNF16UI(op1) || isNaNF16UI(op2))
+       return FP16_DEF_NAN;
+
+    op1_f32 = f16_to_f32(op1);
+    op2_f32 = f16_to_f32(op2);
+
+    u_op1.ui = op1_f32;
+    u_op2.ui = op2_f32;
+    a = u_op1.f;
+    b = u_op2.f;
+
+    rslt = (a>b) ? a : b;
+    // +0 is evaluated equal to -0 in C. Handeling that case separatly
+    if( (fabs(a) == 0.0f) &&
+        (fabs(b) == 0.0f) && (signF16UI(op1) != signF16UI(op2)) )
+    {
+       rslt = signF16UI(op1) ? b : a;
+    }
+    u_rslt.f = rslt;
+    result_f32 = u_rslt.ui;
+
+    result = f32_to_f16(result_f32);
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : b = %f\n",b);
+    printf("Debug : rslt = %f\n",rslt);
+    printf("Debug : result =0x%08x\n",result);
+    #endif
+
+    return result;
+
+}
+
+uint32_t fp_max_sf(uint32_t op1,uint32_t op2)
+{
+    union ui32_f32 u_op1;
+    union ui32_f32 u_op2;
+    union ui32_f32 u_rslt;
+
+    float a,b,rslt;
+    uint32_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1 =0x%08x\n",op1);
+    printf("Debug : op2 =0x%08x\n",op2);
+    #endif
+
+    if(isNaNF32UI(op1) || isNaNF32UI(op2))
+       return FP32_DEF_NAN;
+
+    u_op1.ui = op1;
+    u_op2.ui = op2;
+    a = u_op1.f;
+    b = u_op2.f;
+    rslt = (a>b) ? a : b;
+    // +0 is evaluated equal to -0 in C. Handeling that case separatly
+    if( (fabs(a) == 0.0f) && (fabs(b) == 0.0f) &&
+         (signF32UI(op1) != signF32UI(op2)) )
+    {
+       rslt = signF32UI(op1) ? b : a;
+    }
+    u_rslt.f = rslt;
+    result = u_rslt.ui;
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : b = %f\n",b);
+    printf("Debug : rslt = %f\n",rslt);
+    printf("Debug : result =0x%08x\n",result);
+    #endif
+
+    return result;
+}
+
+uint16_t fp_max_bf(uint16_t op1,uint16_t op2)
+{
+    uint32_t op1_f32;
+    uint32_t op2_f32;
+
+    uint32_t result_f32;
+    uint16_t result;
+
+    op1_f32 = ((uint32_t)op1) << 16;
+    op2_f32 = ((uint32_t)op2) << 16;
+
+    result_f32 = fp_max_sf(op1_f32, op2_f32);
+    result_f32 = result_f32 >> 16;
+    result = result_f32 & 0xFFFF;
+    return result;
+}
+
+uint16_t fp_abs_bf(uint16_t op1)
+{
+    union ui32_f32 u_op1;
+
+    float result_f;
+    uint32_t result_f32;
+    uint16_t result;
+
+    u_op1.ui = ((uint32_t)op1) << 16;
+
+    result_f = fabs(u_op1.f);
+    u_op1.f = result_f;
+    result_f32 = u_op1.ui >> 16;
+    result = result_f32 & 0xFFFF;
+    return result;
+}
+
+uint16_t fp_neg_bf(uint16_t op1)
+{
+    union ui32_f32 u_op1;
+
+    float result_f;
+    uint32_t result_f32;
+    uint16_t result;
+
+    u_op1.ui = ((uint32_t)op1) << 16;
+
+    result_f = -(u_op1.f);
+    u_op1.f = result_f;
+    result_f32 = u_op1.ui >> 16;
+    result = result_f32 & 0xFFFF;
+    return result;
+}
+
+//float fmaf( float x, float y, float z );
+uint16_t fp_mult_hf_hf_acc_dumb (uint16_t op1, uint16_t op2, uint16_t acc)
+{
+    union ui32_f32 u_op1;
+    union ui32_f32 u_op2;
+    union ui32_f32 u_acc;
+    union ui32_f32 u_rslt;
+
+    uint32_t op1_f32;
+    uint32_t op2_f32;
+    uint32_t acc_f32;
+
+    float a,b,facc,rslt;
+    uint32_t result_f32;
+    uint16_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1 =0x%04x\n",op1);
+    printf("Debug : op2 =0x%04x\n",op2);
+    printf("Debug : acc =0x%04x\n",acc);
+    #endif
+
+    if(isNaNF16UI(op1) || isNaNF16UI(op2) || isNaNF16UI(acc))
+       return FP16_DEF_NAN;
+
+    op1_f32 = f16_to_f32(op1);
+    op2_f32 = f16_to_f32(op2);
+    acc_f32 = f16_to_f32(acc);
+
+    u_op1.ui = op1_f32;
+    u_op2.ui = op2_f32;
+    u_acc.ui = acc_f32;
+    a = u_op1.f;
+    b = u_op2.f;
+    facc = u_acc.f;
+    //rslt = fma(a,b,facc);
+    rslt = (a * b) + facc;
+    u_rslt.f = rslt;
+    result_f32 = u_rslt.ui;
+
+    result = f32_to_f16(result_f32);
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : b = %f\n",b);
+    printf("Debug : facc = %f\n",facc);
+    printf("Debug : rslt = %f\n",rslt);
+    printf("Debug : result =0x%04x\n",result);
+    #endif
+
+    return result;
+}
+
+uint32_t fp_mult_sf_hf_acc (uint16_t op1, uint16_t op2, uint32_t acc)
+{
+    union ui32_f32 u_op1;
+    union ui32_f32 u_op2;
+    union ui32_f32 u_acc;
+    union ui32_f32 u_rslt;
+
+    uint32_t op1_f32;
+    uint32_t op2_f32;
+
+    float a,b,facc,rslt;
+    uint32_t result;
+
+    #ifdef DEBUG
+    printf("Debug : op1 =0x%04x\n",op1);
+    printf("Debug : op2 =0x%04x\n",op2);
+    printf("Debug : acc =0x%08x\n",acc);
+    #endif
+
+    if(isNaNF16UI(op1) || isNaNF16UI(op2) || isNaNF32UI(acc))
+       return FP32_DEF_NAN;
+
+    op1_f32 = f16_to_f32(op1);
+    op2_f32 = f16_to_f32(op2);
+
+    u_op1.ui = op1_f32;
+    u_op2.ui = op2_f32;
+    u_acc.ui = acc;
+    a = u_op1.f;
+    b = u_op2.f;
+    facc = u_acc.f;
+    //rslt = fma(a,b,facc);
+    rslt = (a * b) + facc;
+    u_rslt.f = rslt;
+    result = u_rslt.ui;
+    result = isNaNF32UI(result) ? FP32_DEF_NAN : result;
+
+    #ifdef DEBUG
+    printf("Debug : a = %f\n",a);
+    printf("Debug : b = %f\n",b);
+    printf("Debug : facc = %f\n",facc);
+    printf("Debug : rslt = %f\n",rslt);
+    printf("Debug : result =0x%04x\n",result);
+    #endif
+
+    return result;
+}
diff --git a/target/hexagon/mmvec/kvx_ieee.h b/target/hexagon/mmvec/kvx_ieee.h
new file mode 100644
index 0000000000000..ad80b70239254
--- /dev/null
+++ b/target/hexagon/mmvec/kvx_ieee.h
@@ -0,0 +1,141 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef KVX_COMPACT_H
+#define KVX_COMPACT_H 1
+
+#include <math.h>
+#include "hex_arch_types.h"
+
+//Double precision
+#define signF64UI( a ) ((bool) ((uint64_t) (a)>>63))
+#define expF64UI( a ) ((int_fast16_t) ((a)>>52) & 0x7FF)
+#define fracF64UI( a ) ((a) & UINT64_C( 0x000FFFFFFFFFFFFF ))
+#define packToF64UI( sign, exp, sig ) ((uint64_t) (((uint_fast64_t) (sign)<<63) + ((uint_fast64_t) (exp)<<52) + (sig)))
+#define isNaNF64UI( a ) (((~(a) & UINT64_C( 0x7FF0000000000000 )) == 0) && ((a) & UINT64_C( 0x000FFFFFFFFFFFFF )))
+
+//SF defines
+#define FP32_DEF_NAN      0x7FFFFFFF
+#define isNaNF32UI( a ) (((~(a) & 0x7F800000) == 0) && ((a) & 0x007FFFFF))
+#define isInfF32UI( a ) (((~(a) & 0x7F800000) == 0) && (((a) & 0x007FFFFF) == 0))
+#define signF32UI( a ) ((bool) ((uint32_t) (a)>>31))
+#define expF32UI( a ) ((int_fast16_t) ((a)>>23) & 0xFF)
+#define fracF32UI( a ) ((a) & 0x007FFFFF)
+#define packToF32UI( sign, exp, sig ) (((uint32_t) (sign)<<31) + ((uint32_t) (exp)<<23) + (sig))
+
+//HF defines
+#define FP16_DEF_NAN      0x7FFF
+#define isNaNF16UI( a ) (((~(a) & 0x7C00) == 0) && ((a) & 0x03FF))
+#define isInfF16UI( a ) (((~(a) & 0x7C00) == 0) && (((a) & 0x03FF) == 0))
+#define signF16UI( a ) ((bool) ((uint16_t) (a)>>15))
+#define expF16UI( a ) ((int_fast8_t) ((a)>>10) & 0x1F)
+#define fracF16UI( a ) ((a) & 0x03FF)
+#define packToF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<10) + (sig))
+
+#define UHW_MIN           0
+#define UHW_MAX           65535
+#define HW_MIN            -32768
+#define HW_MAX            32767
+
+#define UBYTE_MIN         0
+#define UBYTE_MAX         255
+#define BYTE_MIN          -128
+#define BYTE_MAX          127
+
+//union ui16_f16 { uint16_t ui; float16_t f; };
+union ui32_f32 { uint32_t ui; float  f; };
+union ui64_f64 { uint64_t ui; double f; };
+struct exp8_sig16 { int_fast8_t exp; uint_fast16_t sig; };
+
+uint32_t shiftRightJam32( uint32_t a, uint_fast16_t dist );
+uint_fast8_t countLeadingZeros16( uint16_t a );
+struct exp8_sig16 normSubnormalF16Sig( uint_fast16_t sig );
+uint16_t roundPackToF16( bool sign, int_fast16_t exp, uint_fast16_t sig );
+
+//--------------------------------------------------------------------------
+// IEEE - FP Convert instructions
+//--------------------------------------------------------------------------
+uint16_t f32_to_f16 ( uint32_t a);
+uint32_t f16_to_f32( uint16_t a );
+
+uint16_t f16_to_uh( uint16_t op1);
+int16_t  f16_to_h ( uint16_t op1);
+uint8_t  f16_to_ub( uint16_t op1);
+int8_t   f16_to_b ( uint16_t op1);
+
+uint16_t uh_to_f16(uint16_t op1);
+uint16_t h_to_f16 (int16_t op1);
+uint16_t ub_to_f16(uint8_t op1);
+uint16_t b_to_f16 (int8_t op1);
+
+uint16_t sf_to_bf (int32_t op1);
+
+//--------------------------------------------------------------------------
+// IEEE - FP ADD/SUB/MPY instructions
+//--------------------------------------------------------------------------
+
+//size4s_t fp_mult(size4s_t input_1, size4s_t input_2);
+uint32_t fp_mult_sf_sf (uint32_t op1, uint32_t op2);
+uint32_t fp_add_sf_sf  (uint32_t op1, uint32_t op2);
+uint32_t fp_sub_sf_sf  (uint32_t op1, uint32_t op2);
+
+uint16_t fp_mult_hf_hf (uint16_t op1, uint16_t op2);
+uint16_t fp_add_hf_hf  (uint16_t op1, uint16_t op2);
+uint16_t fp_sub_hf_hf  (uint16_t op1, uint16_t op2);
+
+uint32_t fp_mult_sf_hf (uint16_t op1, uint16_t op2);
+uint32_t fp_add_sf_hf  (uint16_t op1, uint16_t op2);
+uint32_t fp_sub_sf_hf  (uint16_t op1, uint16_t op2);
+
+uint32_t fp_mult_sf_bf (uint16_t op1, uint16_t op2);
+uint32_t fp_add_sf_bf  (uint16_t op1, uint16_t op2);
+uint32_t fp_sub_sf_bf  (uint16_t op1, uint16_t op2);
+
+//--------------------------------------------------------------------------
+// IEEE - FP Accumulate instructions
+//--------------------------------------------------------------------------
+
+uint16_t fp_mult_hf_hf_acc (uint16_t op1, uint16_t op2, uint16_t acc);
+uint32_t fp_mult_sf_bf_acc (uint16_t op1, uint16_t op2, uint32_t acc);
+uint32_t fp_mult_sf_hf_acc (uint16_t op1, uint16_t op2, uint32_t acc);
+
+//--------------------------------------------------------------------------
+// IEEE - FP Reduce instructions
+//--------------------------------------------------------------------------
+
+uint32_t fp_vdmpy      (uint16_t op1_u,uint16_t op1_l,uint16_t op2_u,uint16_t op2_l);
+uint32_t fp_vdmpy_acc  (uint32_t acc,uint16_t op1_u,uint16_t op1_l,uint16_t op2_u,uint16_t op2_l);
+
+//--------------------------------------------------------------------------
+// IEEE - FP Select instructions
+//--------------------------------------------------------------------------
+
+uint16_t fp_min_hf(uint16_t op1,uint16_t op2);
+uint16_t fp_max_hf(uint16_t op1,uint16_t op2);
+uint32_t fp_min_sf(uint32_t op1,uint32_t op2);
+uint32_t fp_max_sf(uint32_t op1,uint32_t op2);
+uint16_t fp_min_bf(uint16_t op1,uint16_t op2);
+uint16_t fp_max_bf(uint16_t op1,uint16_t op2);
+uint16_t fp_abs_bf(uint16_t op1);
+uint16_t fp_neg_bf(uint16_t op1);
+
+//--------------------------------------------------------------------------
+// IEEE - FP Experiment Implementations
+//--------------------------------------------------------------------------
+uint16_t fp_mult_hf_hf_acc_dumb (uint16_t op1, uint16_t op2, uint16_t acc);
+uint32_t fp_vdmpy_acc_dumb  (uint32_t acc,uint16_t op1_u,uint16_t op1_l,uint16_t op2_u,uint16_t op2_l);
+#endif
diff --git a/target/hexagon/mmvec/kvx_mac_reduce.c b/target/hexagon/mmvec/kvx_mac_reduce.c
new file mode 100644
index 0000000000000..e11e41ae5891a
--- /dev/null
+++ b/target/hexagon/mmvec/kvx_mac_reduce.c
@@ -0,0 +1,1156 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "kvx_ieee.h"
+
+#define DF_MANTBITS() 52
+#define SF_MANTBITS() 23
+#define HF_MANTBITS() 10
+
+#define DF_INF_EXP 0x7ff
+#define DF_BIAS 1023
+
+#define SF_INF_EXP 0xff
+#define SF_BIAS 127
+
+#define HF_INF_EXP 0x1f
+#define HF_BIAS 15
+
+#define WAY_BIG_EXP 4096
+
+#define isz(X) (fabs(X) == 0.0f)
+
+
+typedef union {
+	double f;
+	size8u_t i;
+#ifndef SLOWLARIS
+	struct {
+		size8u_t mant:52;
+		size8u_t exp:11;
+		size8u_t sign:1;
+	} x;
+#else
+	struct {
+		size8u_t sign:1;
+		size8u_t exp:11;
+		size8u_t mant:52;
+	} x;
+#endif
+} df_t;
+
+typedef union {
+	float f;
+	size4u_t i;
+#ifndef SLOWLARIS
+	struct {
+		size4u_t mant:23;
+		size4u_t exp:8;
+		size4u_t sign:1;
+	} x;
+#else
+	struct {
+		size4u_t sign:1;
+		size4u_t exp:8;
+		size4u_t mant:23;
+	} x;
+#endif
+} sf_t;
+
+typedef struct {
+	union {
+		size8u_t low;
+		struct {
+#ifndef SLOWLARIS
+			size4u_t w0;
+			size4u_t w1;
+#else
+			size4u_t w1;
+			size4u_t w0;
+#endif
+		};
+	};
+	union {
+		size8u_t high;
+		struct {
+#ifndef SLOWLARIS
+			size4u_t w2;
+			size4u_t w3;
+#else
+			size4u_t w3;
+			size4u_t w2;
+#endif
+		};
+	};
+} int128_t;
+
+typedef struct {
+	int128_t mant;
+	size4s_t exp;
+	size1u_t sign;
+	size1u_t guard;
+	size1u_t round;
+	size1u_t sticky;
+} xf_t;
+
+static inline void xf_init(xf_t * p)
+{
+	p->mant.low = 0;
+	p->mant.high = 0;
+	p->exp = 0;
+	p->sign = 0;
+	p->guard = 0;
+	p->round = 0;
+	p->sticky = 0;
+}
+
+size8u_t df_getmant_kvx(df_t a);
+size8u_t df_getmant_kvx(df_t a)
+{
+	//int class = fpclassify(a.f);
+	//switch (class) {
+	//case FP_NORMAL:
+		return (a.x.mant | 1ULL << 52);
+	//case FP_ZERO:
+	//	return 0;
+	//case FP_SUBNORMAL:
+	//	return a.x.mant;
+	//default:
+	//	return -1;
+	//};
+}
+
+size4s_t df_getexp_kvx(df_t a);
+size4s_t df_getexp_kvx(df_t a)
+{
+	//int class = fpclassify(a.f);
+	//switch (class) {
+	//case FP_NORMAL:
+		return a.x.exp;
+	//case FP_SUBNORMAL:
+	//	return a.x.exp + 1;
+	//default:
+	//	return -1;
+	//};
+}
+
+size8u_t sf_getmant_kvx(sf_t a);
+size8u_t sf_getmant_kvx(sf_t a)
+{
+	//case FP_ZERO:
+	if((a.x.mant == 0) && (a.x.exp == 0))
+		return 0;
+	//case FP_SUBNORMAL:
+        else if((a.x.mant != 0) && (a.x.exp == 0))
+		return a.x.mant;
+	//case FP_NORMAL:
+        else if((a.x.exp != 0xFF) && (a.x.exp != 0))
+		return (a.x.mant | 1ULL << 23);
+	//default:
+        else
+		return -1;
+}
+
+size4s_t sf_getexp_kvx(sf_t a);
+size4s_t sf_getexp_kvx(sf_t a)
+{
+	//case FP_SUBNORMAL:
+        if((a.x.mant != 0) && (a.x.exp == 0))
+		return a.x.exp + 1;
+	//case FP_NORMAL:
+        else if((a.x.exp != 0xFF) && (a.x.exp != 0))
+		return a.x.exp;
+	//default:
+	else
+		return -1;
+}
+
+static inline void xf_debug(const char *msg, xf_t a)
+{
+#ifdef DEBUG
+	printf("%s %c0x%016llx_%016llx /%d/%d/%d p%d\n", msg,
+				  a.sign ? '-' : '+', a.mant.high, a.mant.low, a.guard,
+				  a.round, a.sticky, a.exp);
+#endif
+}
+
+static inline int128_t int128_shl(int128_t a, size4u_t amt)
+{
+	int128_t ret;
+	if (amt == 0)
+		return a;
+	if (amt > 128) {
+		ret.high = 0;
+		ret.low = 0;
+		return ret;
+	}
+	if (amt >= 64) {
+		amt -= 64;
+		a.high = a.low;
+		a.low = 0;
+	}
+	ret.high = a.high << amt;
+	ret.high |= (a.low >> (64 - amt));
+	ret.low = a.low << amt;
+	return ret;
+}
+
+static inline int128_t int128_shr(int128_t a, size4u_t amt)
+{
+	int128_t ret;
+	if (amt == 0)
+		return a;
+	if (amt > 128) {
+		ret.high = 0;
+		ret.low = 0;
+		return ret;
+	}
+	if (amt >= 64) {
+		amt -= 64;
+		a.low = a.high;
+		a.high = 0;
+	}
+	ret.low = a.low >> amt;
+	ret.low |= (a.high << (64 - amt));
+	ret.high = a.high >> amt;
+	return ret;
+}
+
+
+#define int128_gt kvx_int128_gt
+static inline int kvx_int128_gt(int128_t a, int128_t b)
+{
+	if (a.high == b.high)
+		return (a.low > b.low);
+	return (a.high > b.high);
+}
+
+static inline xf_t xf_norm_left(xf_t a)
+{
+	a.exp--;
+	a.mant = int128_shl(a.mant, 1);
+	a.mant.low |= a.guard;
+	a.guard = a.round;
+	a.round = a.sticky;
+	return a;
+}
+
+static inline xf_t xf_norm_right(xf_t a, int amt)
+{
+	if (amt > 130) {
+		a.sticky |=
+			a.round | a.guard | (a.mant.low != 0) | (a.mant.high != 0);
+		a.guard = a.round = a.mant.high = a.mant.low = 0;
+		a.exp += amt;
+		return a;
+
+	}
+	while (amt >= 64) {
+		a.sticky |= a.round | a.guard | (a.mant.low != 0);
+		a.guard = (a.mant.low >> 63) & 1;
+		a.round = (a.mant.low >> 62) & 1;
+		a.mant.low = a.mant.high;
+		a.mant.high = 0;
+		a.exp += 64;
+		amt -= 64;
+	}
+	while (amt > 0) {
+		a.exp++;
+		a.sticky |= a.round;
+		a.round = a.guard;
+		a.guard = a.mant.low & 1;
+		a.mant = int128_shr(a.mant, 1);
+		amt--;
+	}
+	return a;
+}
+
+#define int128_add  kvx_int128_add
+static inline int128_t kvx_int128_add(int128_t a, int128_t b)
+{
+	int128_t ret;
+	ret.low = a.low + b.low;
+	if ((ret.low < a.low) || (ret.low < b.low)) {
+		/* carry into high part */
+		a.high += 1;
+	}
+	ret.high = a.high + b.high;
+	return ret;
+}
+
+#define int128_sub kvx_int128_sub
+static inline int128_t kvx_int128_sub(int128_t a, int128_t b, int borrow)
+{
+	int128_t ret;
+	ret.low = a.low - b.low;
+	if (ret.low > a.low) {
+		/* borrow into high part */
+		a.high -= 1;
+	}
+	ret.high = a.high - b.high;
+	if (borrow == 0) {
+		return ret;
+	} else {
+		a.high = 0;
+		a.low = 1;
+		return int128_sub(ret, a, 0);
+	}
+}
+
+/* Return an infinity with the same sign as a */
+static inline df_t infinite_df_t(xf_t a)
+{
+	df_t ret;
+	ret.x.sign = a.sign;
+	ret.x.exp = DF_INF_EXP;
+	ret.x.mant = 0ULL;
+	return ret;
+}
+
+/* Return a maximum finite value with the same sign as a */
+static inline df_t maxfinite_df_t(xf_t a)
+{
+	df_t ret;
+	ret.x.sign = a.sign;
+	ret.x.exp = DF_INF_EXP - 1;
+	ret.x.mant = 0x000fffffffffffffULL;
+	return ret;
+}
+
+static inline df_t f2df_t(double in)
+{
+	df_t ret;
+	ret.f = in;
+	return ret;
+}
+
+/* Return an infinity with the same sign as a */
+static inline sf_t infinite_sf_t(xf_t a)
+{
+	sf_t ret;
+	ret.x.sign = a.sign;
+	ret.x.exp = SF_INF_EXP;
+	ret.x.mant = 0ULL;
+	return ret;
+}
+
+/* Return a maximum finite value with the same sign as a */
+static inline sf_t maxfinite_sf_t(xf_t a)
+{
+	sf_t ret;
+	ret.x.sign = a.sign;
+	ret.x.exp = SF_INF_EXP - 1;
+	ret.x.mant = 0x007fffffUL;
+	return ret;
+}
+
+static inline sf_t f2sf_t(float in)
+{
+	sf_t ret;
+	ret.f = in;
+	return ret;
+}
+
+#define GEN_XF_ROUND(TYPE,MANTBITS,INF_EXP) \
+TYPE xf_round_kvx_##TYPE(xf_t a); \
+TYPE xf_round_kvx_##TYPE(xf_t a) \
+{ \
+	TYPE ret; \
+	ret.i = 0; \
+	ret.x.sign = a.sign; \
+	if ((a.mant.high == 0) && (a.mant.low == 0) \
+		&& ((a.guard | a.round | a.sticky) == 0)) { \
+		/* result zero */ \
+		/*switch (fegetround()) { */\
+		/*case FE_DOWNWARD: */\
+		/*	return f2##TYPE(-0.0); */\
+		/*default: */\
+		if(a.sign) return f2##TYPE(-0.0); \
+		else return f2##TYPE(0.0); \
+		/*} */\
+	} \
+	/* Normalize right */ \
+	/* We want MANTBITS bits of mantissa plus the leading one. */ \
+	/* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \
+	/* So we need to normalize right while the high word is non-zero and \
+	 * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \
+	xf_debug("input: ", a); \
+	while ((a.mant.high != 0) || ((a.mant.low >> (MANTBITS+1)) != 0)) { \
+		a = xf_norm_right(a, 1); \
+	} \
+	xf_debug("norm_right: ", a); \
+	/* OK, now normalize left */ \
+	/* We want to normalize left until we have a leading one in bit 24 */ \
+	/* Theoretically, we only need to shift a maximum of one to the left if we \
+	 * shifted out lots of bits from B, or if we had no shift / 1 shift sticky shoudl be 0  \
+	 */ \
+	while ((a.mant.low & (1ULL << MANTBITS)) == 0) { \
+		a = xf_norm_left(a); \
+	} \
+	xf_debug("norm_left: ", a); \
+	/* OK, now we might need to denormalize because of potential underflow.  We need \
+	 * to do this before rounding, and rounding might make us normal again */ \
+	while (a.exp <= 0) { \
+		a = xf_norm_right(a, 1 - a.exp); \
+		/* Do we have underflow?  That's when we get an inexact answer because we \
+		 * ran out of bits in a denormal. */ \
+		if (a.guard || a.round || a.sticky) { \
+			/*feraiseexcept(FE_UNDERFLOW);*/ \
+		} \
+	} \
+	xf_debug("norm_denorm: ", a); \
+	/* OK, we're relatively canonical... now we need to round */ \
+	if (a.guard || a.round || a.sticky) { \
+		/*feraiseexcept(FE_INEXACT);*/ \
+		/*switch (fegetround()) { */\
+		/*case FE_TOWARDZERO: */\
+		/*	 Chop and we're done */ \
+		/*	break; */\
+		/*case FE_UPWARD: */\
+		/*	if (a.sign == 0) a.mant.low += 1; */\
+		/*	break; */\
+		/*case FE_DOWNWARD: */\
+		/*	if (a.sign != 0) a.mant.low += 1; */\
+		/*	break; */\
+		/*default: */\
+			if (a.round || a.sticky) { \
+				/* round up if guard is 1, down if guard is zero */ \
+				a.mant.low += a.guard; \
+			} else if (a.guard) { \
+				/* exactly .5, round up if odd */ \
+				a.mant.low += (a.mant.low & 1); \
+			} \
+			/*break; */\
+		/*}*/ \
+	} \
+	xf_debug("post_round: ", a); \
+	/* OK, now we might have carried all the way up.  So we might need to shr once */ \
+	/* at least we know that the lsb should be zero if we rounded and got a carry out... */ \
+	if ((a.mant.low >> (MANTBITS+1)) != 0) { \
+		a = xf_norm_right(a, 1); \
+	} \
+	xf_debug("once_norm_right: ", a); \
+	/* Overflow? */ \
+	if (a.exp >= INF_EXP) { \
+		/* Yep, inf result */ \
+		xf_debug("inf: ", a); \
+		/*feraiseexcept(FE_OVERFLOW);*/ \
+		/*feraiseexcept(FE_INEXACT);*/ \
+		/*switch (fegetround()) { */\
+		/*case FE_TOWARDZERO: */\
+		/*	return maxfinite_##TYPE(a); */\
+		/*case FE_UPWARD: */\
+		/*	if (a.sign == 0) */\
+		/*		return infinite_##TYPE(a); */\
+		/*	else */\
+		/*		return maxfinite_##TYPE(a); */\
+		/*case FE_DOWNWARD: */\
+		/*	if (a.sign != 0) */\
+		/*		return infinite_##TYPE(a); */\
+		/*	else */\
+		/*		return maxfinite_##TYPE(a); */\
+		/*default: */\
+			return infinite_##TYPE(a); \
+		/*} */\
+	} \
+	/* Underflow? */ \
+	if (a.mant.low & (1ULL << MANTBITS)) { \
+		/* Leading one means: No, we're normal. So, we should be done... */ \
+		xf_debug("norm: ", a); \
+		ret.x.exp = a.exp; \
+		ret.x.mant = a.mant.low; \
+		return ret; \
+	} \
+	xf_debug("denorm: ", a); \
+	if (a.exp != 1) \
+		/*printf("a.exp == %d\n", a.exp);*/ \
+	assert(a.exp == 1); \
+	ret.x.exp = 0; \
+	ret.x.mant = a.mant.low; \
+	return ret; \
+}
+
+#define GEN_HF_ROUND(TYPE,MANTBITS,INF_EXP) \
+TYPE hf_round_##TYPE(xf_t a); \
+TYPE hf_round_##TYPE(xf_t a) \
+{ \
+	TYPE ret; \
+	ret.i = 0; \
+	ret.x.sign = a.sign; \
+	if ((a.mant.high == 0) && (a.mant.low == 0) \
+		&& ((a.guard | a.round | a.sticky) == 0)) { \
+		/* result zero */ \
+		/*switch (fegetround()) { */\
+		/*case FE_DOWNWARD: */\
+		/*	return f2##TYPE(-0.0); */\
+		/*default: */\
+		if(a.sign) return f2##TYPE(-0.0); \
+		else return f2##TYPE(0.0); \
+		/*} */\
+	} \
+	/* Normalize right */ \
+	/* We want MANTBITS bits of mantissa plus the leading one. */ \
+	/* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \
+	/* So we need to normalize right while the high word is non-zero and \
+	 * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \
+	xf_debug("input: ", a); \
+	while ((a.mant.high != 0) || ((a.mant.low >> (MANTBITS+1)) != 0)) { \
+		a = xf_norm_right(a, 1); \
+	} \
+	xf_debug("norm_right: ", a); \
+	/* OK, now normalize left */ \
+	/* We want to normalize left until we have a leading one in bit 24 */ \
+	/* Theoretically, we only need to shift a maximum of one to the left if we \
+	 * shifted out lots of bits from B, or if we had no shift / 1 shift sticky shoudl be 0  \
+	 */ \
+	while ((a.mant.low & (1ULL << MANTBITS)) == 0) { \
+		a = xf_norm_left(a); \
+	} \
+	xf_debug("norm_left: ", a); \
+	/* OK, now we might need to denormalize because of potential underflow.  We need \
+	 * to do this before rounding, and rounding might make us normal again */ \
+	while (a.exp <= 0) { \
+		a = xf_norm_right(a, 1 - a.exp); \
+		/* Do we have underflow?  That's when we get an inexact answer because we \
+		 * ran out of bits in a denormal. */ \
+		if (a.guard || a.round || a.sticky) { \
+			/*feraiseexcept(FE_UNDERFLOW);*/ \
+		} \
+	} \
+	xf_debug("norm_denorm: ", a); \
+	/* OK, we're relatively canonical... now we need to round */ \
+	/*if (a.guard || a.round || a.sticky) { */\
+		/*feraiseexcept(FE_INEXACT);*/ \
+		/*switch (fegetround()) { */\
+		/*case FE_TOWARDZERO: */\
+		/*	   Chop and we're done */ \
+		/*	break; */\
+		/*case FE_UPWARD: */\
+		/*	if (a.sign == 0) a.mant.low += 1; */\
+		/*	break; */\
+		/*case FE_DOWNWARD: */\
+		/*	if (a.sign != 0) a.mant.low += 1; */\
+		/*	break; */\
+		/*default: */\
+			if (a.round || a.sticky || a.guard) { \
+				/* round up if guard is 1, down if guard is zero */ \
+				if ((a.mant.low & 0xFFF) == 0) a.mant.low += 1;  \
+	/*		} else if (a.guard) {*/ \
+				/* exactly .5, round up if odd */ \
+	/*			a.mant.low += (a.mant.low & 1); */\
+			} \
+			/*break; */\
+		/*}*/ \
+	/*} */\
+	xf_debug("post_round: ", a); \
+	/* OK, now we might have carried all the way up.  So we might need to shr once */ \
+	/* at least we know that the lsb should be zero if we rounded and got a carry out... */ \
+	if ((a.mant.low >> (MANTBITS+1)) != 0) { \
+		a = xf_norm_right(a, 1); \
+	} \
+	xf_debug("once_norm_right: ", a); \
+	/* Overflow? */ \
+	if (a.exp >= INF_EXP) { \
+		/* Yep, inf result */ \
+		xf_debug("inf: ", a); \
+		/*feraiseexcept(FE_OVERFLOW);*/ \
+		/*feraiseexcept(FE_INEXACT);*/ \
+		/*switch (fegetround()) { */\
+		/*case FE_TOWARDZERO: */\
+		/*	return maxfinite_##TYPE(a); */\
+		/*case FE_UPWARD: */\
+		/*	if (a.sign == 0) */\
+		/*		return infinite_##TYPE(a); */\
+		/*	else */\
+		/*		return maxfinite_##TYPE(a); */\
+		/*case FE_DOWNWARD: */\
+		/*	if (a.sign != 0) */\
+		/*		return infinite_##TYPE(a); */\
+		/*	else */\
+		/*		return maxfinite_##TYPE(a); */\
+		/*default: */\
+			return infinite_##TYPE(a); \
+		/*} */\
+	} \
+	/* Underflow? */ \
+	if (a.mant.low & (1ULL << MANTBITS)) { \
+		/* Leading one means: No, we're normal. So, we should be done... */ \
+		xf_debug("norm: ", a); \
+		ret.x.exp = a.exp; \
+		ret.x.mant = a.mant.low; \
+		return ret; \
+	} \
+	xf_debug("denorm: ", a); \
+	if (a.exp != 1) \
+		/*printf("a.exp == %d\n", a.exp);*/ \
+	assert(a.exp == 1); \
+	ret.x.exp = 0; \
+	ret.x.mant = a.mant.low; \
+	return ret; \
+}
+
+
+GEN_XF_ROUND(df_t,DF_MANTBITS(),DF_INF_EXP)
+GEN_XF_ROUND(sf_t,SF_MANTBITS(),SF_INF_EXP)
+GEN_HF_ROUND(sf_t,SF_MANTBITS(),SF_INF_EXP)
+
+#define int128_mult_6464 kvx_int128_mult_6464
+static inline int128_t kvx_int128_mult_6464(size8u_t ai, size8u_t bi)
+{
+	int128_t ret;
+	int128_t a, b;
+	size8u_t pp0, pp1a, pp1b, pp1s, pp2;
+
+#ifdef DEBUG
+	printf("ai/bi: 0x%016llx/0x%016llx\n", ai, bi);
+#endif
+	a.high = b.high = 0;
+	a.low = ai;
+	b.low = bi;
+	pp0 = (size8u_t) a.w0 * (size8u_t) b.w0;
+	pp1a = (size8u_t) a.w1 * (size8u_t) b.w0;
+	pp1b = (size8u_t) b.w1 * (size8u_t) a.w0;
+	pp2 = (size8u_t) a.w1 * (size8u_t) b.w1;
+#ifdef DEBUG
+	printf("pp2/1b/1a/0: 0x%016llx/0x%016llx/0x%016llx/0x%016llx\n",
+				  pp2, pp1b, pp1a, pp0);
+#endif
+	pp1s = pp1a + pp1b;
+	if ((pp1s < pp1a) || (pp1s < pp1b)) {
+		pp2 += (1ULL << 32);
+	}
+	ret.low = pp0 + (pp1s << 32);
+	if ((ret.low < pp0) || (ret.low < (pp1s << 32)))
+		pp2 += 1;
+	ret.high = pp2 + (pp1s >> 32);
+#ifdef DEBUG
+	printf("pp1s/rethi/retlo: 0x%016llx/0x%016llx/0x%016llx\n",
+				  pp1s, ret.high, ret.low);
+#endif
+	return ret;
+}
+
+xf_t xf_add_kvx(xf_t a, xf_t b);
+
+xf_t xf_sub_kvx(xf_t a, xf_t b, int negate);
+xf_t xf_sub_kvx(xf_t a, xf_t b, int negate)
+{
+	xf_t ret;
+	xf_init(&ret);
+	int borrow;
+	xf_debug("-->Sub/a: ", a);
+	xf_debug("-->Sub/b: ", b);
+	if (a.sign != b.sign) {
+		b.sign = !b.sign;
+		return xf_add_kvx(a, b);
+	}
+	if (b.exp > a.exp) {
+		/* small - big == - (big - small) */
+		return xf_sub_kvx(b, a, !negate);
+	}
+	if ((b.exp == a.exp) && (int128_gt(b.mant, a.mant))) {
+		/* small - big == - (big - small) */
+		return xf_sub_kvx(b, a, !negate);
+	}
+	xf_debug("OK: Sub/a: ", a);
+	xf_debug("OK: Sub/b: ", b);
+	while (a.exp > b.exp) {
+		/* Try to normalize exponents: shrink a exponent and grow mantissa */
+		if (a.mant.high & (1ULL << 62)) {
+			/* Can't grow a any more */
+			break;
+		} else {
+			a = xf_norm_left(a);
+		}
+	}
+	xf_debug("norm_l: Sub/a: ", a);
+	xf_debug("norm_l: Sub/b: ", b);
+	while (a.exp > b.exp) {
+		/* Try to normalize exponents: grow b exponent and shrink mantissa */
+		/* Keep around shifted out bits... we might need those later */
+		b = xf_norm_right(b, a.exp - b.exp);
+	}
+	xf_debug("norm_r: Sub/a: ", a);
+	xf_debug("norm_r: Sub/b: ", b);
+	if ((int128_gt(b.mant, a.mant))) {
+		xf_debug("retry: Sub/a: ", a);
+		xf_debug("retry: Sub/b: ", b);
+		return xf_sub_kvx(b, a, !negate);
+	}
+	/* OK, now things should be normalized! */
+	ret.sign = a.sign;
+	ret.exp = a.exp;
+	assert(!int128_gt(b.mant, a.mant));
+	borrow = (b.round << 2) | (b.guard << 1) | b.sticky;
+	ret.mant = int128_sub(a.mant, b.mant, (borrow != 0));
+	borrow = 0 - borrow;
+	ret.guard = (borrow >> 2) & 1;
+	ret.round = (borrow >> 1) & 1;
+	ret.sticky = (borrow >> 0) & 1;
+	if (negate)
+		ret.sign = !ret.sign;
+        //According to the IEEE standard, Zero result in a subtraction should always be positive
+	if ((ret.sign) && ((ret.mant.high == 0) && (ret.mant.low == 0) && ((ret.guard | ret.round | ret.sticky) == 0)))
+		ret.sign = !ret.sign;
+        xf_debug("ret: Sub ", ret);
+	return ret;
+}
+
+
+xf_t xf_add_kvx(xf_t a, xf_t b)
+{
+	xf_t ret;
+	xf_init(&ret);
+	xf_debug("-->Add/a: ", a);
+	xf_debug("-->Add/b: ", b);
+	if (a.sign != b.sign) {
+		b.sign = !b.sign;
+		return xf_sub_kvx(a, b, 0);
+	}
+	if (b.exp > a.exp) {
+		/* small + big ==  (big + small) */
+		return xf_add_kvx(b, a);
+	}
+	if ((b.exp == a.exp) && int128_gt(b.mant, a.mant)) {
+		/* small + big ==  (big + small) */
+		return xf_add_kvx(b, a);
+	}
+	xf_debug("OK? Add/a: ", a);
+	xf_debug("OK? Add/b: ", b);
+	while (a.exp > b.exp) {
+		/* Try to normalize exponents: shrink a exponent and grow mantissa */
+		if (a.mant.high & (1ULL << 62)) {
+			/* Can't grow a any more */
+			break;
+		} else {
+			a = xf_norm_left(a);
+		}
+	}
+	xf_debug("norm_l: Add/a: ", a);
+	xf_debug("norm_l: Add/b: ", b);
+	while (a.exp > b.exp) {
+		/* Try to normalize exponents: grow b exponent and shrink mantissa */
+		/* Keep around shifted out bits... we might need those later */
+		b = xf_norm_right(b, a.exp - b.exp);
+	}
+	xf_debug("norm_r: Add/a: ", a);
+	xf_debug("norm_r: Add/b: ", b);
+	/* OK, now things should be normalized! */
+	if (int128_gt(b.mant, a.mant)) {
+		xf_debug("retry: Add/a: ", a);
+		xf_debug("retry: Add/b: ", b);
+		return xf_add_kvx(b, a);
+	};
+	ret.sign = a.sign;
+	ret.exp = a.exp;
+	assert(!int128_gt(b.mant, a.mant));
+	ret.mant = int128_add(a.mant, b.mant);
+	ret.guard = b.guard;
+	ret.round = b.round;
+	ret.sticky = b.sticky;
+        xf_debug("ret: Add ", ret);
+	return ret;
+}
+
+
+float internal_fma_kvx(float a_in, float b_in, float c_in, int scale);
+float internal_fma_kvx(float a_in, float b_in, float c_in, int scale)
+{
+	sf_t a, b, c;
+	xf_t prod;
+	xf_t acc;
+	xf_t result;
+#if 0
+	df_t t;
+	fexcept_t flags_tmp;
+#endif
+	xf_init(&prod);
+	xf_init(&acc);
+	xf_init(&result);
+	a.f = a_in;
+	b.f = b_in;
+	c.f = c_in;
+//	printf("internal_fma_kvxx: 0x%016x * 0x%016x + 0x%016x sc: %d\n",
+//				  fUNFLOAT(a_in), fUNFLOAT(b_in), fUNFLOAT(c_in), scale);
+//	if (isinf(a.f) || isinf(b.f) || isinf(c.f))
+//		return special_fmaf(a, b, c);
+//	if (isnan(a.f) || isnan(b.f) || isnan(c.f))
+//		return special_fmaf(a, b, c);
+	if ((scale == 0) && (isz(a.f) || isz(b.f)))
+		return (a.f * b.f + c.f);
+	/* Is a*b exact?  If so, we don't have to go the slow way */
+	/* EJP: axe this for simplicity? */
+#if 0
+	fegetexceptflag(&flags_tmp, FE_ALL_EXCEPT);
+	feclearexcept(FE_ALL_EXCEPT);
+	t.f = a.f * b.f;
+	if (0 && (scale == 0) && isfinite(t.f)
+		&& fetestexcept(FE_ALL_EXCEPT) == 0) {
+		/* It's exactly correct, we can just do the add and return */
+		fesetexceptflag(&flags_tmp, FE_ALL_EXCEPT);
+		asm volatile ("");
+		t.f = (t.f + c.f);
+		return t.f;
+	}
+	fesetexceptflag(&flags_tmp, FE_ALL_EXCEPT);
+#endif
+	/* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */
+	prod.mant = int128_mult_6464(sf_getmant_kvx(a), sf_getmant_kvx(b));
+	/* Note: extracting the mantissa into an int is multiplying by 2**23, so adjust here: */
+	prod.exp = sf_getexp_kvx(a) + sf_getexp_kvx(b) - SF_BIAS - 23;
+	prod.sign = a.x.sign ^ b.x.sign;
+	if (isz(a.f) || isz(b.f)) prod.exp = -2*WAY_BIG_EXP;
+	xf_debug("prod: ", prod);
+	if ((scale > 0) /*&& (fpclassify(c.f) == FP_SUBNORMAL)*/) {
+		acc.mant = int128_mult_6464(0,0);
+		acc.exp = -WAY_BIG_EXP;
+		acc.sign = c.x.sign;
+		acc.sticky = 1;
+		xf_debug("special denorm acc: ",acc);
+		result = xf_add_kvx(prod,acc);
+	} else if (!isz(c.f)) {
+		acc.mant = int128_mult_6464(sf_getmant_kvx(c), 1);
+		acc.exp = sf_getexp_kvx(c);
+		acc.sign = c.x.sign;
+		xf_debug("acc: ", acc);
+		result = xf_add_kvx(prod, acc);
+	} else {
+		result = prod;
+	}
+	xf_debug("sum: ", result);
+#ifdef DEBUG
+	printf("Scaling: %d\n", scale);
+#endif
+	result.exp += scale;
+	xf_debug("post-scale: ", result);
+	return hf_round_sf_t(result).f;
+}
+
+// result = (a*c) + (b*d) + acc
+float internal_vdmpy_acc(float a_in, float b_in, float c_in, float d_in, float acc_in, int scale);
+float internal_vdmpy_acc(float a_in, float b_in, float c_in, float d_in, float acc_in, int scale)
+{
+	sf_t a, b, c, d, accm;
+	xf_t prod1; //a*c
+	xf_t prod2; //b*d
+	xf_t acc;
+	xf_t result_temp;
+	xf_t result;
+
+        xf_init(&prod1);
+        xf_init(&prod2);
+	xf_init(&acc);
+	xf_init(&result_temp);
+	xf_init(&result);
+
+	a.f = a_in;
+	b.f = b_in;
+	c.f = c_in;
+	d.f = d_in;
+        accm.f = acc_in;
+
+        /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */
+	prod1.mant = int128_mult_6464(sf_getmant_kvx(a), sf_getmant_kvx(c));
+	/* Note: extracting the mantissa into an int is multiplying by 2**23, so adjust here: */
+	prod1.exp = sf_getexp_kvx(a) + sf_getexp_kvx(c) - SF_BIAS - 23;
+	prod1.sign = a.x.sign ^ c.x.sign;
+
+        /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */
+	prod2.mant = int128_mult_6464(sf_getmant_kvx(b), sf_getmant_kvx(d));
+	/* Note: extracting the mantissa into an int is multiplying by 2**23, so adjust here: */
+	prod2.exp = sf_getexp_kvx(b) + sf_getexp_kvx(d) - SF_BIAS - 23;
+	prod2.sign = b.x.sign ^ d.x.sign;
+
+
+	if (isz(a.f) || isz(c.f)) prod1.exp = -2*WAY_BIG_EXP;
+	if (isz(b.f) || isz(d.f)) prod2.exp = -2*WAY_BIG_EXP;
+
+	xf_debug("prod1: ", prod1);
+	xf_debug("prod2: ", prod2);
+
+	if ((scale > 0) /*&& (fpclassify(c.f) == FP_SUBNORMAL)*/) {
+		acc.mant = int128_mult_6464(0,0);
+		acc.exp = -WAY_BIG_EXP;
+		acc.sign = c.x.sign;
+		acc.sticky = 1;
+		xf_debug("special denorm acc: ",acc);
+		//result = xf_add_kvx(prod,acc);
+	} else /*if (!isz(accm.f)) */{
+		acc.mant = int128_mult_6464(sf_getmant_kvx(accm), 1);
+		acc.exp  = sf_getexp_kvx(accm);
+		acc.sign = accm.x.sign;
+		xf_debug("acc: ", acc);
+		//result = xf_add_kvx(prod, acc);
+	} /*else {
+		result = xf_add_kvx(prod1, prod2);
+	}*/
+
+        //Add the 3 numbers: prod1 prod2 acc
+        //result_temp = xf_add_kvx(prod1,prod2);
+        //result = xf_add_kvx(result_temp,acc);
+        result_temp = xf_add_kvx(prod1,prod2);
+        result = xf_add_kvx(result_temp,acc);
+
+	xf_debug("sum: ", result);
+#ifdef DEBUG
+	printf("Scaling: %d\n", scale);
+#endif
+	result.exp += scale;
+	xf_debug("post-scale: ", result);
+	return xf_round_kvx_sf_t(result).f;
+}
+
+
+uint32_t fp_vdmpy_acc  (uint32_t acc,uint16_t op1_u,uint16_t op1_l,uint16_t op2_u,uint16_t op2_l)
+{
+    union ui32_f32 u_op;
+    union ui32_f32 u_acc;
+    union ui32_f32 u_rslt;
+
+    uint32_t op1_u_f32, op1_l_f32, op2_u_f32, op2_l_f32;
+    float f_op1_u, f_op1_l, f_op2_u, f_op2_l, f_acc;
+    float f_prod_l = 0, f_prod_u = 0, rslt;
+    uint32_t result;
+
+#ifdef DEBUG
+    printf("Debug : op1_u =0x%04x\n",op1_u);
+    printf("Debug : op1_l =0x%04x\n",op1_l);
+    printf("Debug : op2_u =0x%04x\n",op2_u);
+    printf("Debug : op2_l =0x%04x\n",op2_l);
+    printf("Debug : acc   =0x%08x\n",acc);
+#endif
+
+    if(isNaNF16UI(op1_u) || isNaNF16UI(op1_l) || isNaNF16UI(op2_u) || isNaNF16UI(op2_l) || isNaNF32UI(acc))
+       return FP32_DEF_NAN;
+
+    op1_u_f32 = f16_to_f32(op1_u);
+    op1_l_f32 = f16_to_f32(op1_l);
+    op2_u_f32 = f16_to_f32(op2_u);
+    op2_l_f32 = f16_to_f32(op2_l);
+
+#ifdef DEBUG
+    printf("Debug : op1_u_f32 =0x%08x\n",op1_u_f32);
+    printf("Debug : op1_l_f32 =0x%08x\n",op1_l_f32);
+    printf("Debug : op2_u_f32 =0x%08x\n",op2_u_f32);
+    printf("Debug : op2_l_f32 =0x%08x\n",op2_l_f32);
+#endif
+
+    u_op.ui = op1_u_f32;
+    f_op1_u = u_op.f;
+
+    u_op.ui = op1_l_f32;
+    f_op1_l = u_op.f;
+
+    u_op.ui = op2_l_f32;
+    f_op2_l = u_op.f;
+
+    u_op.ui = op2_u_f32;
+    f_op2_u = u_op.f;
+
+    u_acc.ui = acc;
+    f_acc   = u_acc.f;
+
+#ifdef DEBUG
+    printf("Debug_0 : f_op1_u = %f\n",f_op1_u);
+    printf("Debug_0 : f_op1_l   = %f\n",f_op1_l);
+    printf("Debug_0 : f_op2_u = %f\n",f_op2_u);
+    printf("Debug_0 : f_op2_l   = %f\n",f_op2_l);
+    printf("Debug_0 : f_acc   = %f\n",f_acc);
+#endif
+
+    f_prod_l =  (f_op1_l * f_op2_l);
+    f_prod_u =  (f_op1_u * f_op2_u);
+
+    if(isInfF16UI(op1_u) || isInfF16UI(op1_l) || isInfF16UI(op2_u) || isInfF16UI(op2_l) || isInfF32UI(acc))
+    {
+       rslt     =  (f_prod_u + f_prod_l + f_acc);
+#ifdef DEBUG
+       printf("Debug_inf : rslt = %f\n",rslt);
+#endif
+       u_rslt.f = rslt;
+       result = u_rslt.ui;
+#ifdef DEBUG
+       printf("Debug_inf : result =0x%08x\n",result);
+#endif
+       result = isNaNF32UI(result) ? FP32_DEF_NAN : result;
+#ifdef DEBUG
+       printf("Debug_inf : result final =0x%08x\n",result);
+#endif
+       return result;
+    }
+
+    //If any of the below is a zero, we can use easy approach
+    if(isz(f_prod_l) || isz(f_prod_u) || isz(f_acc))
+    {
+       rslt     =  (f_prod_u + f_prod_l + f_acc);
+#ifdef DEBUG
+       printf("Debug_inf : rslt = %f\n",rslt);
+#endif
+       u_rslt.f = rslt;
+       result = u_rslt.ui;
+#ifdef DEBUG
+       printf("Debug_inf : result =0x%08x\n",result);
+#endif
+       result = isNaNF32UI(result) ? FP32_DEF_NAN : result;
+#ifdef DEBUG
+       printf("Debug_inf : result final =0x%08x\n",result);
+#endif
+       return result;
+    }
+
+
+////----------------------------------------------------------------------------------------------------
+//    f_prod_l =  (f_op1_l * f_op2_l);
+//    f_prod_u =  (f_op1_u * f_op2_u);
+//
+//    printf("Debug_1 : f_prod_l = %f\n",f_prod_l);
+//    printf("Debug_1 : f_prod_u = %f\n",f_prod_u);
+//
+//    rslt     =  (f_prod_u + f_prod_l + f_acc);
+//    printf("Debug_1 : rslt = %f\n",rslt);
+//    u_rslt.f = rslt;
+//    result = u_rslt.ui;
+//    printf("Debug_1 : result =0x%08x\n",result);
+////----------------------------------------------------------------------------------------------------
+
+    rslt     =  internal_vdmpy_acc(f_op1_u, f_op1_l,f_op2_u,f_op2_l,f_acc,0);
+    u_rslt.f = rslt;
+    result = u_rslt.ui;
+#ifdef DEBUG
+    printf("Debug_2 : rslt = %f\n",rslt);
+    printf("Debug_2 : result =0x%08x\n",result);
+#endif
+
+    result = isNaNF32UI(result) ? FP32_DEF_NAN : result;
+
+#ifdef DEBUG
+    printf("Debug : f_op1_u = %f\n",f_op1_u);
+    printf("Debug : f_op1_l = %f\n",f_op1_l);
+    printf("Debug : f_op2_u = %f\n",f_op2_u);
+    printf("Debug : f_op2_l = %f\n",f_op2_l);
+    printf("Debug : f_acc   = %f\n",f_acc);
+    printf("Debug : f_prod_l = %f\n",f_prod_l);
+    printf("Debug : f_prod_u = %f\n",f_prod_u);
+    printf("Debug : rslt = %f\n",rslt);
+    printf("Debug : result =0x%08x\n",result);
+#endif
+
+    return result;
+}
+
+
+uint16_t fp_mult_hf_hf_acc (uint16_t op1, uint16_t op2, uint16_t acc)
+{
+    union ui32_f32 u_op1;
+    union ui32_f32 u_op2;
+    union ui32_f32 u_acc;
+    union ui32_f32 u_rslt;
+
+    uint32_t op1_f32;
+    uint32_t op2_f32;
+    uint32_t acc_f32;
+
+    float a,b,facc,rslt;
+    uint32_t result_f32;
+    uint16_t result;
+
+#ifdef DEBUG
+    printf("Debug : op1 =0x%04x\n",op1);
+    printf("Debug : op2 =0x%04x\n",op2);
+    printf("Debug : acc =0x%04x\n",acc);
+#endif
+
+    if(isNaNF16UI(op1) || isNaNF16UI(op2) || isNaNF16UI(acc))
+       return FP16_DEF_NAN;
+
+    op1_f32 = f16_to_f32(op1);
+    op2_f32 = f16_to_f32(op2);
+    acc_f32 = f16_to_f32(acc);
+
+#ifdef DEBUG
+    printf("Debug : op1_f32 = 0x%08x\n",op1_f32);
+    printf("Debug : op2_f32 = 0x%08x\n",op2_f32);
+    printf("Debug : acc_f32 = 0x%08x\n",acc_f32);
+#endif
+
+    u_op1.ui = op1_f32;
+    u_op2.ui = op2_f32;
+    u_acc.ui = acc_f32;
+    a = u_op1.f;
+    b = u_op2.f;
+    facc = u_acc.f;
+
+#ifdef DEBUG
+    printf("Debug_1 : a = %f\n",a);
+    printf("Debug_1 : b = %f\n",b);
+    printf("Debug_1 : facc = %f\n",facc);
+#endif
+
+    if(isInfF16UI(op1) || isInfF16UI(op2) || isInfF16UI(acc))
+    {
+       rslt = (a * b) + facc;
+#ifdef DEBUG
+       printf("Debug_inf : rslt = %f\n",rslt);
+#endif
+       u_rslt.f = rslt;
+       result_f32 = u_rslt.ui;
+       result = f32_to_f16(result_f32);
+#ifdef DEBUG
+       printf("Debug_inf : result_f32 =0x%08x\n",result_f32);
+       printf("Debug_inf : result =0x%04x\n",result);
+#endif
+       result = isNaNF16UI(result) ? FP16_DEF_NAN : result;
+#ifdef DEBUG
+       printf("Debug_inf : result final =0x%04x\n",result);
+#endif
+       return result;
+    }
+
+//    //----------------------------------------------------------------------------------------------------
+//    rslt = (a * b) + facc;
+//    u_rslt.f = rslt;
+//    result_f32 = u_rslt.ui;
+//    printf("Debug_3 : result_f32 =0x%08x\n",result_f32);
+//    result = f32_to_f16(result_f32);
+//    printf("Debug_3 : result =0x%04x\n",result);
+//    //----------------------------------------------------------------------------------------------------
+
+    //rslt = fma(a,b,facc);
+    rslt = internal_fma_kvx(a, b, facc, 0);
+    u_rslt.f = rslt;
+    result_f32 = u_rslt.ui;
+#ifdef DEBUG
+    printf("Debug_2 : rslt = %f\n",rslt);
+    printf("Debug_2 : result_f32 =0x%08x\n",result_f32);
+#endif
+
+    result = f32_to_f16(result_f32);
+
+#ifdef DEBUG
+    printf("Debug_2 : result =0x%04x\n",result);
+#endif
+
+    result = isNaNF16UI(result) ? FP16_DEF_NAN : result;
+
+#ifdef DEBUG
+    printf("Debug_2 : result final =0x%04x\n",result);
+#endif
+
+    return result;
+}
+
diff --git a/target/hexagon/mmvec/macros.h b/target/hexagon/mmvec/macros.h
index bcd4a1e8973c3..645ec9280972a 100644
--- a/target/hexagon/mmvec/macros.h
+++ b/target/hexagon/mmvec/macros.h
@@ -354,3 +354,16 @@
     } while (0);
 
 #endif
+
+#define fPARSEHF(A) parse_hf(A)
+#define fPARSESF(A) parse_sf(A)
+#define fPARSEQF16(A) parse_qf16(A)
+#define fPARSEQF32(A) parse_qf32(A)
+
+#define fRNDSATHF(A,B) rnd_sat_hf(A,B)
+#define fRNDSATSF(A,B) rnd_sat_sf(A,B)
+#define fRNDSATQF16(A,B,C) rnd_sat_qf16(A,B,C)
+#define fRNDSATQF32(A,B,C) rnd_sat_qf32(A,B,C)
+
+#define fNEGQF16(A) negate16(A)
+#define fNEGQF32(A) negate32(A)
diff --git a/target/hexagon/mmvec/macros_auto.h b/target/hexagon/mmvec/macros_auto.h
new file mode 100644
index 0000000000000..479cb225c70c3
--- /dev/null
+++ b/target/hexagon/mmvec/macros_auto.h
@@ -0,0 +1,221 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HEXAGON_MMVEC_MACROS_AUTO_H
+#define HEXAGON_MMVEC_MACROS_AUTO_H
+
+
+#include "mmvec/macros.h"
+
+#include "q6v_defines.h"
+#pragma GCC diagnostic ignored "-Wtype-limits"
+#define fDUMPQ(STR,REG) do { printf(STR ":" #REG ": 0x%016llx\n",REG.ud[0]); } while (0)
+#define fRT8NOTE()
+#define fEXPERIMENTAL()
+#define fBFLOAT()
+#define fCVI_VX_NO_TMP_LD()
+#define fNOTQ(VAL) ({mmqreg_t _ret ={0}; int _i_; for (_i_ = 0; _i_ < fVECSIZE()/64; _i_++) _ret.ud[_i_] = ~VAL.ud[_i_]; _ret;})
+#define fGETQBITS(REG,WIDTH,MASK,BITNO) ((MASK) & (REG.w[(BITNO)>>5] >> ((BITNO) & 0x1f)))
+#define fGETQBIT(REG,BITNO) fGETQBITS(REG,1,1,BITNO)
+#define fGENMASKW(QREG,IDX) (((fGETQBIT(QREG,(IDX*4+0)) ? 0xFF : 0x0) << 0) |((fGETQBIT(QREG,(IDX*4+1)) ? 0xFF : 0x0) << 8) |((fGETQBIT(QREG,(IDX*4+2)) ? 0xFF : 0x0) << 16) |((fGETQBIT(QREG,(IDX*4+3)) ? 0xFF : 0x0) << 24))
+#define fGET10BIT(COE,VAL,POS) { COE = (((((fGETUBYTE(3,VAL) >> (2 * POS)) & 3) << 8) | fGETUBYTE(POS,VAL)) << 6); COE >>= 6; }
+#define fVMAX(X,Y) (X>Y) ? X : Y
+#define fREAD_VEC(DST,IDX) (DST = READ_VREG(fMODCIRCU((IDX),5)))
+#define fREAD_ZVEC(DST,IDX) (DST = READ_ZREG(fMODCIRCU((IDX),5)))
+#define fREAD_ZVEC_WORD(DST,IDX) { mmvector_t ZReg = READ_ZREG(0); DST = ZReg.uw[IDX]; }
+#define fREAD_ZVEC_ALL(DST,N,NZ) { int __idx = 0; for (__idx = 0; __idx < NZ/N; __idx++) { memcpy(&DST[N*__idx], &THREAD2STRUCT->ZRegs[__idx], N); } }
+#define fZREGB(Z,IDX) ((size1s_t)Z[IDX])
+#define fZREGUB(Z,IDX) ((size1u_t)Z[IDX])
+#define fZREGH(Z,IDX) ((size2s_t)Z[IDX])
+#define fZREGUB(Z,IDX) ((size1u_t)Z[IDX])
+#define fGETNIBBLE(IDX,SRC) ( fSXTN(4,8,(SRC >> (4*IDX)) & 0xF) )
+#define fGETCRUMB(IDX,SRC) ( fSXTN(2,8,(SRC >> (2*IDX)) & 0x3) )
+#define fGETCRUMB_SYMMETRIC(IDX,SRC) ( (fGETCRUMB(IDX,SRC)>=0 ? (2-fGETCRUMB(IDX,SRC)) : fGETCRUMB(IDX,SRC) ) )
+#define fWRITE_VEC(IDX,VAR) (WRITE_VREG(fMODCIRCU((IDX),5),VAR))
+#define fGENMASKH(QREG,IDX) (((fGETQBIT(QREG,(IDX*2+0)) ? 0xFF : 0x0) << 0) |((fGETQBIT(QREG,(IDX*2+1)) ? 0xFF : 0x0) << 8))
+#define fGETMASKW(VREG,QREG,IDX) (VREG.w[IDX] & fGENMASKW((QREG),IDX))
+#define fGETMASKH(VREG,QREG,IDX) (VREG.h[IDX] & fGENMASKH((QREG),IDX))
+#define fCONDMASK8(QREG,IDX,YESVAL,NOVAL) (fGETQBIT(QREG,IDX) ? (YESVAL) : (NOVAL))
+#define fCONDMASK16(QREG,IDX,YESVAL,NOVAL) ((fGENMASKH(QREG,IDX) & (YESVAL)) | (fGENMASKH(fNOTQ(QREG),IDX) & (NOVAL)))
+#define fCONDMASK32(QREG,IDX,YESVAL,NOVAL) ((fGENMASKW(QREG,IDX) & (YESVAL)) | (fGENMASKW(fNOTQ(QREG),IDX) & (NOVAL)))
+#define fSETQBITS(REG,WIDTH,MASK,BITNO,VAL) do { size4u_t __TMP = (VAL); REG.w[(BITNO)>>5] &= ~((MASK) << ((BITNO) & 0x1f)); REG.w[(BITNO)>>5] |= (((__TMP) & (MASK)) << ((BITNO) & 0x1f)); } while (0)
+#define fSETQBIT(REG,BITNO,VAL) fSETQBITS(REG,1,1,BITNO,VAL)
+#define fVBYTES() (fVECSIZE())
+#define fVHALVES() (fVECSIZE()/2)
+#define fVWORDS() (fVECSIZE()/4)
+#define fVDWORDS() (fVECSIZE()/8)
+#define fVALIGN(ADDR, LOG2_ALIGNMENT) ( ADDR = ADDR & ~(LOG2_ALIGNMENT-1))
+#define fVLASTBYTE(ADDR, LOG2_ALIGNMENT) ( ADDR = ADDR | (LOG2_ALIGNMENT-1))
+#define fVELEM(WIDTH) ((fVECSIZE()*8)/WIDTH)
+#define fVECLOGSIZE() (MAX_VEC_SIZE_LOGBYTES)
+#define fVBUF_IDX(EA) (((EA) >> fVECLOGSIZE()) & 0xFF)
+#define fREAD_VBUF(IDX,WIDX) READ_VBUF(IDX,WIDX)
+#define fLOG_VBUF(IDX,VAL,WIDX) LOG_VBUF(IDX,VAL,WIDX)
+#define fVECSIZE() (1<<fVECLOGSIZE())
+#define fSWAPB(A, B) { size1u_t tmp = A; A = B; B = tmp; }
+#define fVZERO() mmvec_zero_vector()
+#define fNEWVREG(VNUM) ((THREAD2STRUCT->VRegs_updated & (((VRegMask)1)<<VNUM)) ? THREAD2STRUCT->future_VRegs[VNUM] : mmvec_zero_vector())
+#define fV_AL_CHECK(EA,MASK) if ((EA) & (MASK)) { warn("aligning misaligned vector. PC=%08x EA=%08x",thread->Regs[REG_PC],(EA)); }
+#define fSCATTER_INIT( REGION_START, LENGTH, ELEMENT_SIZE) { mem_vector_scatter_init(thread, insn, REGION_START, LENGTH, ELEMENT_SIZE); if (EXCEPTION_DETECTED) return; }
+#define fGATHER_INIT( REGION_START, LENGTH, ELEMENT_SIZE) { mem_vector_gather_init(thread, insn, REGION_START, LENGTH, ELEMENT_SIZE); if (EXCEPTION_DETECTED) return; }
+#ifdef CONFIG_USER_ONLY
+#define fSCATTER_FINISH(OP)
+#define fGATHER_FINISH()
+#else
+#define fSCATTER_FINISH(OP) { if (EXCEPTION_DETECTED) return; mem_vector_scatter_finish(thread, insn, OP); }
+#define fGATHER_FINISH() { if (EXCEPTION_DETECTED) return; mem_vector_gather_finish(thread, insn); }
+#endif
+#define CHECK_VTCM_PAGE(FLAG, BASE, LENGTH, OFFSET, ALIGNMENT) { int slot = insn->slot; paddr_t pa = thread->mem_access[slot].paddr+OFFSET; pa = pa & ~(ALIGNMENT-1); FLAG = (pa < (thread->mem_access[slot].paddr+LENGTH)); }
+#define COUNT_OUT_OF_BOUNDS(FLAG, SIZE) { if (!FLAG) { THREAD2STRUCT->vtcm_log.oob_access += SIZE; warn("Scatter/Gather out of bounds of region"); } }
+#define fLOG_SCATTER_OP(SIZE) { thread->vtcm_log.op = 1; thread->vtcm_log.op_size = SIZE; }
+#define fVLOG_VTCM_GATHER_WORD(EA,OFFSET,IDX, LEN) { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 4, IDX, 1); }
+#define fVLOG_VTCM_GATHER_HALFWORD(EA,OFFSET,IDX, LEN) { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, IDX, 1); }
+#define fVLOG_VTCM_GATHER_HALFWORD_DV(EA,OFFSET,IDX,IDX2,IDX_H, LEN) { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), 1); }
+#define fVLOG_VTCM_GATHER_WORDQ(EA,OFFSET,IDX, Q, LEN) { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 4, IDX, fGETQBIT(QsV,4*IDX+i0)); }
+#define fVLOG_VTCM_GATHER_HALFWORDQ(EA,OFFSET,IDX, Q, LEN) { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, IDX, fGETQBIT(QsV,2*IDX+i0)); }
+#define fVLOG_VTCM_GATHER_HALFWORDQ_DV(EA,OFFSET,IDX,IDX2,IDX_H, Q, LEN) { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), fGETQBIT(QsV,2*IDX+i0)); }
+#define DEBUG_LOG_ADDR(OFFSET) { if (thread->processor_ptr->arch_proc_options->mmvec_network_addr_log2) { int slot = insn->slot; paddr_t pa = thread->mem_access[slot].paddr+OFFSET; } }
+//#define SCATTER_OP_WRITE_TO_MEM(TYPE) { for (int i = 0; i < mmvecx->vtcm_log.size; i+=sizeof(TYPE)) { if ( mmvecx->vtcm_log.mask.ub[i] != 0) { TYPE dst = 0; TYPE inc = 0; for(int j = 0; j < sizeof(TYPE); j++) { dst |= (sim_mem_read1(thread->system_ptr, thread->threadId, mmvecx->vtcm_log.pa[i+j]) << (8*j)); inc |= mmvecx->vtcm_log.data.ub[j+i] << (8*j); mmvecx->vtcm_log.mask.ub[j+i] = 0; mmvecx->vtcm_log.data.ub[j+i] = 0; mmvecx->vtcm_log.offsets.ub[j+i] = 0; } dst += inc; for(int j = 0; j < sizeof(TYPE); j++) { sim_mem_write1(thread->system_ptr,thread->threadId, mmvecx->vtcm_log.pa[i+j], (dst >> (8*j))& 0xFF ); } } } }
+#define fVLOG_VTCM_HALFWORD(EA,OFFSET,IN,IDX, LEN) { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, IDX, 1, IN); }
+#define fVLOG_VTCM_WORD(EA,OFFSET,IN,IDX,LEN) { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 4, IDX, 1, IN); }
+#define fVLOG_VTCM_HALFWORDQ(EA,OFFSET,IN,IDX,Q,LEN) { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, IDX, fGETQBIT(QsV,2*IDX+i0), IN); }
+#define fVLOG_VTCM_WORDQ(EA,OFFSET,IN,IDX,Q,LEN) { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 4, IDX, fGETQBIT(QsV,4*IDX+i0), IN); }
+#define fVLOG_VTCM_HALFWORD_DV(EA,OFFSET,IN,IDX,IDX2,IDX_H, LEN) { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), 1, IN); }
+#define fVLOG_VTCM_HALFWORDQ_DV(EA,OFFSET,IN,IDX,Q,IDX2,IDX_H, LEN) { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), fGETQBIT(QsV,2*IDX+i0), IN); }
+#define fSTORERELEASE(EA,TYPE) { fV_AL_CHECK(EA,fVECSIZE()-1); mem_store_release(thread, insn, fVECSIZE(), EA&~(fVECSIZE()-1), EA, TYPE, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }
+#define fVFETCH_AL(EA) { fV_AL_CHECK(EA,fVECSIZE()-1); mem_fetch_vector(thread, insn, EA&~(fVECSIZE()-1), slot, fVECSIZE()); }
+#define fLOADMMV_AL(EA, ALIGNMENT, LEN, DST) { fV_AL_CHECK(EA,ALIGNMENT-1); /*thread->last_pkt->double_access_vec = 0;*/ mem_load_vector_oddva(thread, 0, EA&~(ALIGNMENT-1), EA, slot, LEN, &DST.ub[0], LEN, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }
+#ifdef QEMU_GENERATE
+#define fLOADMMV(EA, DST) gen_vreg_load(ctx, DST##_off, EA, true)
+#else
+#define fLOADMMV(EA, DST) fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST)
+#endif
+#define fLOADMMZ(EA,DST) { mmvector_t load_vec; fV_AL_CHECK(EA,fVECSIZE()-1); mem_load_vector_oddva(thread, 0, EA&~(fVECSIZE()-1), EA, slot, fVECSIZE(), &load_vec.ub[0], fVECSIZE(), fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); int idx = (EA & 0x80)>0; DST.v[idx] = load_vec; }
+#define fLOADZ_LOAD(EA,EAU,WIDTH,DST) {/* thread->last_pkt->ext_slot_cancelled = 0; thread->last_pkt->double_access_vec = 0;*/ int etm_size = ((EA % width) ==0) ? fVECSIZE() : 0; if (thread->processor_ptr->options->testgen_mode) etm_size = ((EA % width) ==0) ? WIDTH : 0; mem_load_vector_oddva(thread, 0, EA, EAU, slot, WIDTH, &DST.ub[0], etm_size, fUSE_LOOKUP_ADDRESS()); }
+#define fELSE_CANCELZ() else { /*if (thread->last_pkt) { thread->mem_access[slot].dropped_z = 1; thread->last_pkt->ext_slot_cancelled |= (1<<slot); } */ }
+#define fPOST_INC4(R) R+=4;
+#define fPOST_INC8(R) R+=8;
+#define fPOST_INC16(R) R+=16;
+#define fEXTRACTZ(DST,IDX) (DST = READ_ZREG(fMODCIRCU((IDX),5)))
+#define fLOADZ_UPDATE(EA,WIDTH,ZN,N,SRC) { mmvector_t Z[2]; Z[0] = READ_ZREG(0); Z[1] = READ_ZREG(1); for(int k = 0; k < WIDTH; k++) { int element_idx = (EA+k)%N; int z_idx = ((EA+k)%ZN)/N; Z[z_idx].ub[element_idx] = SRC.ub[k]; } WRITE_EXT_ZREG(0,Z[0],0); WRITE_EXT_ZREG(1,Z[1],0); }
+#define fSTOREZ(EA,WIDTH,ZN,N) { mmvector_t store_vec; mmvector_t maskvec = {0}; mmvector_t Z[2]; Z[0] = READ_ZREG(0); Z[1] = READ_ZREG(1); for(int k = 0; k < WIDTH; k++) { int element_idx = (EA+k)%N; int z_idx = ((EA+k)%ZN)/N; store_vec.ub[k] = Z[z_idx].ub[element_idx]; maskvec.ub[k] = 1; } mem_store_vector_oddva(thread, 0, EA, EA, slot, WIDTH, &store_vec.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }
+#define fLOADMMVQ(EA,DST,QVAL) do { int __i; fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST); fVFOREACH(8,__i) if (!fGETQBIT(QVAL,__i)) DST.b[__i] = 0; } while (0)
+#define fLOADMMVNQ(EA,DST,QVAL) do { int __i; fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST); fVFOREACH(8,__i) if (fGETQBIT(QVAL,__i)) DST.b[__i] = 0; } while (0)
+#define fLOADMMVU_AL(EA, ALIGNMENT, LEN, DST) { size4u_t size2 = (EA)&(ALIGNMENT-1); size4u_t size1 = LEN-size2; /*thread->last_pkt->double_access_vec = 1;*/ mem_load_vector_oddva(thread, 0, EA+size1, EA+fVECSIZE(), 1, size2, &DST.ub[size1], size2, fUSE_LOOKUP_ADDRESS()); mem_load_vector_oddva(thread, 0, EA, EA, 0, size1, &DST.ub[0], size1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }
+#ifdef QEMU_GENERATE
+#define fLOADMMVU(EA, DST) gen_vreg_load(ctx, DST##_off, EA, false)
+#else
+#define fLOADMMVU(EA, DST) { /*thread->last_pkt->pkt_has_vtcm_access = 0; thread->last_pkt->pkt_access_count = 0;*/ if ( (EA & (fVECSIZE()-1)) == 0) { /*thread->last_pkt->pkt_has_vmemu_access = 0; thread->last_pkt->double_access = 0;*/ fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST); } else { /*thread->last_pkt->pkt_has_vmemu_access = 1; thread->last_pkt->double_access = 1;*/ fLOADMMVU_AL(EA,fVECSIZE(),fVECSIZE(),DST); } }
+#endif
+#define fSTOREMMV_AL(EA, ALIGNMENT, LEN, SRC) { fV_AL_CHECK(EA,ALIGNMENT-1); mem_store_vector_oddva(thread, 0, EA&~(ALIGNMENT-1), EA, slot, LEN, &SRC.ub[0], 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }
+#ifdef QEMU_GENERATE
+#define fSTOREMMV(EA, SRC) gen_vreg_store(ctx, EA, SRC##_off, insn->slot, true)
+#else
+#define fSTOREMMV(EA, SRC) fSTOREMMV_AL(EA,fVECSIZE(),fVECSIZE(),SRC)
+#endif
+#define fSTOREMMVQ_AL(EA, ALIGNMENT, LEN, SRC, MASK) do { mmvector_t maskvec; int i; for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i); mem_store_vector_oddva(thread, 0, EA&~(ALIGNMENT-1), EA, slot, LEN, &SRC.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } while (0)
+#ifdef QEMU_GENERATE
+#define fSTOREMMVQ(EA, SRC, MASK) \
+    gen_vreg_masked_store(ctx, EA, SRC##_off, MASK##_off, insn->slot, false)
+#else
+#define fSTOREMMVQ(EA, SRC, MASK) fSTOREMMVQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK)
+#endif
+#define fSTOREMMVNQ_AL(EA, ALIGNMENT, LEN, SRC, MASK) { mmvector_t maskvec; int i; for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i); fV_AL_CHECK(EA,ALIGNMENT-1); mem_store_vector_oddva(thread, 0, EA&~(ALIGNMENT-1), EA, slot, LEN, &SRC.ub[0], &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }
+#ifdef QEMU_GENERATE
+#define fSTOREMMVNQ(EA, SRC, MASK) \
+    gen_vreg_masked_store(ctx, EA, SRC##_off, MASK##_off, insn->slot, true)
+#else
+#define fSTOREMMVNQ(EA, SRC, MASK) fSTOREMMVNQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK)
+#endif
+#define fSTOREMMVU_AL(EA, ALIGNMENT, LEN, SRC) { size4u_t size1 = ALIGNMENT-((EA)&(ALIGNMENT-1)); size4u_t size2; if (size1>LEN) size1 = LEN; size2 = LEN-size1; mem_store_vector_oddva(thread, 0, EA+size1, EA+fVECSIZE(), 1, size2, &SRC.ub[size1], 0, 0, fUSE_LOOKUP_ADDRESS()); mem_store_vector_oddva(thread, 0, EA, EA, 0, size1, &SRC.ub[0], 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }
+#ifdef QEMU_GENERATE
+#define fSTOREMMVU(EA, SRC) \
+    gen_vreg_store(ctx, EA, SRC##_off, insn->slot, false)
+#else
+#define fSTOREMMVU(EA, SRC) { /*thread->last_pkt->pkt_has_vtcm_access = 0; thread->last_pkt->pkt_access_count = 0;*/ if ( (EA & (fVECSIZE()-1)) == 0) { /*thread->last_pkt->double_access = 0;*/ fSTOREMMV_AL(EA,fVECSIZE(),fVECSIZE(),SRC); } else { /*thread->last_pkt->double_access = 1; thread->last_pkt->pkt_has_vmemu_access = 1;*/ fSTOREMMVU_AL(EA,fVECSIZE(),fVECSIZE(),SRC); } }
+#endif
+#define fSTOREMMVQU_AL(EA, ALIGNMENT, LEN, SRC, MASK) { size4u_t size1 = ALIGNMENT-((EA)&(ALIGNMENT-1)); size4u_t size2; mmvector_t maskvec; int i; for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i); if (size1>LEN) size1 = LEN; size2 = LEN-size1; mem_store_vector_oddva(thread, 0, EA+size1, EA+fVECSIZE(), 1, size2, &SRC.ub[size1], &maskvec.ub[size1], 0, fUSE_LOOKUP_ADDRESS()); mem_store_vector_oddva(thread, 0, EA, 0, size1, &SRC.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }
+#define fSTOREMMVQU(EA, SRC, MASK) { /*thread->last_pkt->pkt_has_vtcm_access = 0; thread->last_pkt->pkt_access_count = 0;*/ if ( (EA & (fVECSIZE()-1)) == 0) { /*thread->last_pkt->double_access = 0;*/ fSTOREMMVQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK); } else { /*thread->last_pkt->double_access = 1; thread->last_pkt->pkt_has_vmemu_access = 1;*/ fSTOREMMVQU_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK); } }
+#define fSTOREMMVNQU_AL(EA, ALIGNMENT, LEN, SRC, MASK) { size4u_t size1 = ALIGNMENT-((EA)&(ALIGNMENT-1)); size4u_t size2; mmvector_t maskvec; int i; for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i); if (size1>LEN) size1 = LEN; size2 = LEN-size1; mem_store_vector_oddva(thread, 0, EA+size1, EA+fVECSIZE(), 1, size2, &SRC.ub[size1], &maskvec.ub[size1], 1, fUSE_LOOKUP_ADDRESS()); mem_store_vector_oddva(thread, 0, EA, EA, 0, size1, &SRC.ub[0], &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }
+#define fSTOREMMVNQU(EA, SRC, MASK) { /*thread->last_pkt->pkt_has_vtcm_access = 0; thread->last_pkt->pkt_access_count = 0;*/ if ( (EA & (fVECSIZE()-1)) == 0) { /*thread->last_pkt->double_access = 0;*/ fSTOREMMVNQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK); } else { /*thread->last_pkt->double_access = 1; thread->last_pkt->pkt_has_vmemu_access = 1;*/ fSTOREMMVNQU_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK); } }
+#define fVFOREACH(WIDTH, VAR) for (VAR = 0; VAR < fVELEM(WIDTH); VAR++)
+#define fVARRAY_ELEMENT_ACCESS(ARRAY, TYPE, INDEX) ARRAY.v[(INDEX) / (fVECSIZE()/(sizeof(ARRAY.TYPE[0])))].TYPE[(INDEX) % (fVECSIZE()/(sizeof(ARRAY.TYPE[0])))]
+#define fVNEWCANCEL(REGNUM) do { THREAD2STRUCT->VRegs_select &= ~(1<<(REGNUM)); } while (0)
+#define fTMPVDATA() mmvec_vtmp_data(thread)
+#define fVSATDW(U,V) fVSATW( ( ( ((long long)U)<<32 ) | fZXTN(32,64,V) ) )
+#define fVASL_SATHI(U,V) fVSATW(((U)<<1) | ((V)>>31))
+#define fVUADDSAT(WIDTH,U,V) fVSATUN( WIDTH, fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V))
+#define fVSADDSAT(WIDTH,U,V) ({size8s_t tmp5 = fSXTN(WIDTH, 2*WIDTH, U); size8s_t tmp6 = fSXTN(WIDTH, 2*WIDTH, V); size8s_t tmp7 = tmp5 + tmp6; fVSATN( WIDTH, tmp7); })
+#define fVUSUBSAT(WIDTH,U,V) fVSATUN( WIDTH, fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V))
+#define fVSSUBSAT(WIDTH,U,V) fVSATN( WIDTH, fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V))
+#define fVAVGU(WIDTH,U,V) ((fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V))>>1)
+#define fVAVGURND(WIDTH,U,V) ((fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V)+1)>>1)
+#define fVNAVGU(WIDTH,U,V) ((fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V))>>1)
+#define fVNAVGURNDSAT(WIDTH,U,V) fVSATUN(WIDTH,((fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V)+1)>>1))
+#define fVAVGS(WIDTH,U,V) ((fSXTN(WIDTH, 2*WIDTH, U) + fSXTN(WIDTH, 2*WIDTH, V))>>1)
+#define fVAVGSRND(WIDTH,U,V) ((fSXTN(WIDTH, 2*WIDTH, U) + fSXTN(WIDTH, 2*WIDTH, V)+1)>>1)
+#define fVNAVGS(WIDTH,U,V) ((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V))>>1)
+#define fVNAVGSRND(WIDTH,U,V) ((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V)+1)>>1)
+#define fVNAVGSRNDSAT(WIDTH,U,V) fVSATN(WIDTH,((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V)+1)>>1))
+#define fVNOROUND(VAL,SHAMT) VAL
+#define fVNOSAT(VAL) VAL
+#define fVROUND(VAL,SHAMT) ((VAL) + (((SHAMT)>0)?(1LL<<((SHAMT)-1)):0))
+#define fCARRY_FROM_ADD32(A,B,C) (((fZXTN(32,64,A)+fZXTN(32,64,B)+C) >> 32) & 1)
+#define fUARCH_NOTE_PUMP_4X()
+#define fUARCH_NOTE_PUMP_2X()
+#define UNLIKELY(X) __builtin_expect((X), 0)
+#define fVDOCHKPAGECROSS(BASE,SUM) if (UNLIKELY(thread->timing_on)) { thread->mem_access[slot].check_page_crosses = 1; thread->mem_access[slot].page_cross_base = BASE; thread->mem_access[slot].page_cross_sum = SUM; }
+#define fPARSEQF32(A) parse_qf32(A)
+#define fRNDSATQF32(A,B,C) rnd_sat_qf32(A,B,C)
+#define fPARSEQF16(A) parse_qf16(A)
+#define fRNDSATQF16(A,B,C) rnd_sat_qf16(A,B,C)
+#define fPARSESF(A) parse_sf(A)
+#define fRNDSATSF(A,B) rnd_sat_sf(A,B)
+#define fPARSEHF(A) parse_hf(A)
+#define fRNDSATHF(A,B) rnd_sat_hf(A,B)
+#define fRNDSATW(A,B) rnd_sat_w(A,B)
+#define fRNDSATUW(A,B) rnd_sat_uw(A,B)
+#define fRNDSATH(A,B) rnd_sat_h(A,B)
+#define fRNDSATUH(A,B) rnd_sat_uh(A,B)
+#define fRNDSATB(A,B) rnd_sat_b(A,B)
+#define fRNDSATUB(A,B) rnd_sat_ub(A,B)
+#define fNEGQF32(A) negate32(A)
+#define fNEGQF16(A) negate16(A)
+#define fNEGSF(A) negate_sf(A)
+#define fNEGHF(A) negate_hf(A)
+#define fCMPGT_QF32(A,B) cmpgt_qf32(A,B)
+#define fCMPGT_QF16(A,B) cmpgt_qf16(A,B)
+#define fCMPGT_SF(A,B) cmpgt_sf(A,B)
+#define fCMPGT_HF(A,B) cmpgt_hf(A,B)
+#define fCMPGT_BF(A,B) cmpgt_sf(((int)A) << 16,((int)B) << 16)
+#define fCMPGT_QF32_SF(A,B) cmpgt_qf32_sf(A,B)
+#define fCMPGT_QF16_HF(A,B) cmpgt_qf16_hf(A,B)
+#define fMAX_QF32(X,Y) max_qf32(X,Y)
+#define fMIN_QF32(X,Y) min_qf32(X,Y)
+#define fMAX_QF32_SF(X,Y) max_qf32_sf(X,Y)
+#define fMIN_QF32_SF(X,Y) min_qf32_sf(X,Y)
+#define fMAX_QF16(X,Y) max_qf16(X,Y)
+#define fMIN_QF16(X,Y) min_qf16(X,Y)
+#define fMAX_QF16_HF(X,Y) max_qf16_hf(X,Y)
+#define fMIN_QF16_HF(X,Y) min_qf16_hf(X,Y)
+#define fMAX_SF(X,Y) max_sf(X,Y)
+#define fMIN_SF(X,Y) min_sf(X,Y)
+#define fMAX_HF(X,Y) max_hf(X,Y)
+#define fMIN_HF(X,Y) min_hf(X,Y)
+
+#define fSTOREDOUBLEMMV(EA, SRC) fSTOREMMV_AL(EA,fVECSIZE(),2*fVECSIZE(),SRC)
+#endif
diff --git a/target/hexagon/mmvec/mmvec.h b/target/hexagon/mmvec/mmvec.h
index 52d470709c02d..906bf16d8258a 100644
--- a/target/hexagon/mmvec/mmvec.h
+++ b/target/hexagon/mmvec/mmvec.h
@@ -38,6 +38,11 @@ typedef union {
     int16_t   h[MAX_VEC_SIZE_BYTES / 2];
     uint8_t  ub[MAX_VEC_SIZE_BYTES / 1];
     int8_t    b[MAX_VEC_SIZE_BYTES / 1];
+    int32_t qf32[MAX_VEC_SIZE_BYTES / 4];
+    int16_t qf16[MAX_VEC_SIZE_BYTES / 2];
+    int32_t   sf[MAX_VEC_SIZE_BYTES / 4];
+    int16_t   hf[MAX_VEC_SIZE_BYTES / 2];
+    int16_t   bf[MAX_VEC_SIZE_BYTES / 2];
 } MMVector;
 
 typedef union {
diff --git a/target/hexagon/mmvec/mmvec_qfloat.c b/target/hexagon/mmvec/mmvec_qfloat.c
new file mode 100644
index 0000000000000..060ac4b14d8f1
--- /dev/null
+++ b/target/hexagon/mmvec/mmvec_qfloat.c
@@ -0,0 +1,2563 @@
+/*
+ *  Copyright(c) 2019-2020 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#if !defined(__clang__)
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+
+#include "qemu/osdep.h"
+#include "mmvec_qfloat.h"
+#include <math.h>
+
+#define UNUSED(var) do { (void)var; } while (0)
+
+//Take one's complement of the mantissa for QF32
+size4s_t negate32(size4s_t in)
+{
+  size4s_t out;
+  out = in>>8;
+  out = ~out;
+  out = (out<<8) | (in & 0xFF);
+  return out;
+}
+//Take one's complement of the mantissa for QF16
+size2s_t negate16(size2s_t in)
+{
+  size2s_t out;
+  out = in>>5;
+  out = ~out;
+  out = (out<<5) | (in & 0x1F);
+  return out;
+}
+//Change sign for SF
+size4s_t negate_sf(size4s_t in)
+{
+  size4s_t out;
+  int sign;
+  sign = (in>>31) & 1;
+  sign = ~sign;
+  out = (sign<<31) | (in & 0x7FFFFFFF);
+  return out;
+}
+//Change sign for SF
+size2s_t negate_hf(size2s_t in)
+{
+  size2s_t out;
+  int sign;
+  sign = (in>>15) & 1;
+  sign = ~sign;
+  out = (sign<<15) | (in & 0x7FFF);
+  return out;
+}
+unfloat parse_qf16(size2s_t in)
+{
+    unfloat out;
+
+	out.sign = (in>>15) & 0x1;
+
+    out.exp = (size1s_t)(0x00 | (in & 0x1F));
+    out.exp = out.exp - BIAS_QF16;
+
+    /*implied LSB=1*/
+    size2s_t signif;
+    /*take signif and sign extend, add LSB=1*/
+    signif= ((size4s_t)in >> 4) | 1;
+
+    out.sig = (double)signif * epsilon_hf;
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_QF16_parse]in=%x, exp=%d, sig=%10.20f\n", in,out.exp,out.sig);
+    printf("[ARCH_QF16_parse]exp_d=%d, sig_d=%10.20f\n", ilogb(out.sig),ldexp(out.sig, -ilogb(out.sig)));
+#endif
+    return out;
+}
+//Take signed int and generate sign, exp and ***signed sig
+unfloat parse_qf32(size4s_t in)
+{
+    unfloat out;
+
+	out.sign = (in>>31) & 0x1;
+
+    out.exp = (size2s_t)(0x0000 | (in & 0xFF));
+    out.exp = out.exp - BIAS_QF32;
+
+    /*implied LSB=1*/
+    size4s_t signif;
+    /*take signif and sign extend, add LSB=1*/
+    signif= ((size8s_t)in >> 7) | 1;
+
+    out.sig = (double)signif * epsilon;
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_QF32_parse]in=%x, exp=%d, sig=%10.20f\n", in,out.exp,out.sig);
+    printf("[ARCH_QF32_parse]exp_d=%d, sig_d=%10.20f\n", ilogb(out.sig),ldexp(out.sig, -ilogb(out.sig)));
+#endif
+    return out;
+}
+
+unfloat parse_hf(size2s_t in)
+{
+    unfloat out;
+
+	out.sign = (in>>15) & 0x1;
+    out.exp = (size1s_t)( (0x00 | (in>>10)) & 0x1F);
+
+    size2u_t sig;
+    //take signif and sign extend
+    sig = (size2u_t)(in & 0x3FF);
+
+    /*implied MSB=1*/
+    if(out.exp>0)
+        sig = (1<<10) | sig;
+
+    out.exp = out.exp - BIAS_HF;
+    if(out.exp<E_MIN_HF)
+        out.exp = E_MIN_HF;
+
+    //if(in == 0)
+    //    out.exp = E_MIN_QF16;
+
+    out.sig = (double)sig * epsilon_hf;
+
+    //if(out.sign)
+    //    out.sig = (-1.0)*out.sig;
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_HF_parse] in=%x, sign=%d, exp=%d, sig=%10.20f\n", in,out.sign,out.exp,out.sig);
+    printf("[ARCH_HF_parse]exp_d=%d, sig_d=%10.20f\n", ilogb(out.sig),ldexp(out.sig, -ilogb(out.sig)));
+#endif
+    return out;
+}
+//Take the magnitude and generate ******positive sig
+unfloat parse_sf(size4s_t in)
+{
+    unfloat out;
+
+	out.sign = (in>>31) & 0x1;
+    out.exp = (size2s_t)( (0x0000 | (in>>23)) & 0xFF);
+
+    size4u_t sig;
+    //take signif and sign extend
+    sig = (size4u_t)(in & 0x7FFFFF);
+
+    /*implied MSB=1*/
+    if(out.exp>0)
+        sig = (1<<23) | sig;
+
+    out.exp = out.exp - BIAS_SF;
+
+    if(out.exp<E_MIN_SF)
+        out.exp = E_MIN_SF;
+
+    //if(in == 0)
+    //    out.exp = E_MIN_QF32;
+
+    out.sig = (double)sig * epsilon;
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_SF_parse] in=%x, sign=%d, exp=%d, sig=%x(%10.30f)\n", in,out.sign,out.exp,sig,out.sig);
+    printf("[ARCH_SF_parse]exp_d=%d, sig_d=%10.20f\n", ilogb(out.sig),ldexp(out.sig, -ilogb(out.sig)));
+#endif
+    return out;
+}
+
+
+size4s_t rnd_sat_qf_sig(int* exp_in, double sig, double sig_low, f_type ft);
+size4s_t rnd_sat_qf_sig(int* exp_in, double sig, double sig_low, f_type ft)
+{
+    double scale;
+    double sig_s;
+    double sig_f=0.0;
+    double R1, R2, R3, R_low;
+    int exp_ovf=0;
+    int exp_adj=0;
+    int exp_undf=0;
+    int exp = *exp_in;
+    int sign = (sig>=0.0)? 0:1;
+
+#ifndef DEBUG_MMVEC_QF
+    UNUSED(R_low);
+#endif
+
+    int prod_ovf=0;
+    if(fabs(sig)>=2.0L && sig != -2.0L)
+        prod_ovf = 1;
+
+    int E_MIN=E_MIN_QF32;
+    int E_MAX=E_MAX_QF32;
+    int BIAS=BIAS_QF32;
+    double _epsilon=epsilon;
+    double _units=units;
+    if(ft==QF32)
+    {
+        E_MIN = E_MIN_QF32;
+        E_MAX = E_MAX_QF32;
+        BIAS = BIAS_QF32;
+        _epsilon = epsilon;
+        _units= units;
+    }
+    else if(ft==QF16)
+    {
+        E_MIN = E_MIN_QF16;
+        E_MAX = E_MAX_QF16;
+        BIAS = BIAS_QF16;
+        _epsilon = epsilon_hf;
+        _units= units_hf;
+    }
+    else if(ft==SF)
+    {
+        E_MIN = E_MIN_SF;
+        E_MAX = E_MAX_SF;
+        BIAS = BIAS_SF;
+        _epsilon = epsilon;
+        _units= units;
+    }
+    else if(ft==HF)
+    {
+        E_MIN = E_MIN_HF;
+        E_MAX = E_MAX_HF;
+        BIAS = BIAS_HF;
+        _epsilon = epsilon_hf;
+        _units= units_hf;
+    }
+
+    //Set scale factor
+    if((exp == (E_MIN-1)) || (prod_ovf && (exp<E_MAX)))
+        scale = 2.0;
+    else
+        scale =1.0;
+
+    //Scale the significand
+    sig_s = sig/scale;
+
+    //Get remainder from the scaled significand
+    R1 = sig_s*_units;
+    if(sig_low>0.0)
+      R_low = 0.25;
+    else if(sig_low<0.0)
+      R_low = -0.25;
+    else
+      R_low = 0;
+
+    //R2 = floor((R1+R_low)/4.0)*4.0;
+    //R3 = (R1+R_low) - R2;
+    R2 = floor(R1/4.0)*4.0;
+    R3 = R1 - R2;
+
+    //Check for exp overflow/underflow
+    if(exp>=(E_MAX+1) || (prod_ovf && exp==E_MAX))
+    {
+        exp_ovf=1;
+    }
+    else if(exp<=(E_MIN-2))
+    {
+        exp_undf=1;
+    }
+    else if(exp == E_MAX)//exp=E_MAX
+    {
+        //if(R3-2.0)+sig_low<=0.0
+        if((R3==0.0) && (sig_low<0.0))
+        {
+            sig_f = sig_s + (3.0-R3-4.0)*_epsilon;
+        }
+        else if((R3<2.0) || (R3==2.0 && sig_low<=0.0))
+        //if(R3<=2.0)
+        {
+            sig_f = sig_s + (1.0-R3)*_epsilon;
+        }
+        else
+        {
+            sig_f = sig_s + (3.0-R3)*_epsilon;
+        }
+    }
+    else if(exp == (E_MIN-1))
+    {
+        exp_adj = 1;
+        if((R3==0.0) && (sig_low<0.0))
+        {
+            sig_f = sig_s + (3.0-R3-4.0)*_epsilon;
+        }
+        else if((R3<2.0) || (R3==2.0 && sig_low<=0.0))
+        //if(R3<=2.0)
+        {
+            sig_f = sig_s + (1.0-R3)*_epsilon;
+        }
+        else
+        {
+            sig_f = sig_s + (3.0-R3)*_epsilon;
+        }
+    }
+    else if(prod_ovf && (exp < E_MAX))
+    {
+        exp_adj = 1;
+        if((R3==0.0) && (sig_low<0.0))
+        {
+            sig_f = sig_s + (3.0-R3-4.0)*_epsilon;
+        }
+        else if((R3<2.0) || (R3==2.0 && sig_low<=0.0))
+        //if(R3<=2.0)
+        {
+            sig_f = sig_s + (1.0-R3)*_epsilon;
+        }
+        else
+        {
+            sig_f = sig_s + (3.0-R3)*_epsilon;
+        }
+    }
+    else if(!prod_ovf)
+    {
+        if((R3==0.0) && (sig_low<0.0))
+        {
+            sig_f = sig_s + (3.0-R3-4.0)*_epsilon;
+        }
+        else if((R3<1.5) || (R3==1.5 && sig_low<=0.0))
+        //if(R3<=1.5)
+        {
+            sig_f = sig_s + (1.0-R3)*_epsilon;
+        }
+        //else if(R3<=2.5)
+        else if((R3<2.5) || (R3==2.5 && sig_low<=0.0))
+        {
+            sig_f = (sig + (2.0-R3)*_epsilon)*0.5;
+            exp_adj=1;
+        }
+        else
+        {
+            sig_f = sig_s + (3.0-R3)*_epsilon;
+        }
+    }
+    //get the binary bits from the double-precision significand
+    //Either sig is positive or negative, IEEE double sig has magnitude
+    //Check for sign at the last stage and take 2's complement if negative
+    uint64_t sig_64_org, sig_64;
+    sig_64_org = *(uint64_t *)&sig_f;
+    sig_64 = sig_64_org;
+    uint32_t sig_32=0;
+    int32_t sig_32_out=0;
+
+    int exp_df;
+
+    exp_df = (sig_64_org >> 52) & 0x7FF;
+    exp_df = exp_df - BIAS_DF;
+
+    if(exp_ovf)
+    {
+        exp=E_MAX+BIAS;
+        if(ft==QF32 || ft==SF)
+            sig_32 = (sign-1) & 0x7FFFFF;
+        else if(ft==QF16 || ft==HF)
+            sig_32 = (sign-1) & 0x3FF;
+    }
+    else if(exp_undf)
+    {
+        exp=E_MIN+BIAS;
+        if(ft==QF32 || ft==SF)
+            sig_32 = ((-1)*sign) & 0x7FFFFF;
+        else if(ft==QF16 || ft==HF)
+            sig_32 = ((-1)*sign) & 0x3FF;
+    }
+    else
+    {
+        exp += BIAS+exp_adj;
+        //Add MSB, generates 53bits (52+1)
+        sig_64 = (sig_64_org & 0xFFFFFFFFFFFFF) | 0x10000000000000;
+        //Shift out exponent 11 bits
+        sig_64 = sig_64<<11;
+        sig_64 = (exp_df>=0)? (sig_64 << exp_df):(sig_64>>abs(exp_df));
+        if(ft==QF32)
+        {
+        sig_64 = sig_64 >> 41;
+        sig_32 = sig_64 & 0x7FFFFF;
+        }
+        else if(ft==QF16)
+        {
+        sig_64 = sig_64 >> 54;
+        sig_32 = sig_64 & 0x3FF;
+        }
+
+        if(sign)
+            sig_32 = ~sig_32;
+    }
+
+    sig_32_out = (sign<<23) | sig_32;
+
+    if(ft==QF16 ||ft==HF)
+        sig_32_out = (sign<<10) | sig_32;
+
+
+    if( (ft ==QF16) ||  (ft==QF32)) {
+        if ((sig == 0.0) && (sig_low == 0.0)) {
+            exp = 0;
+            //printf("Squash to zero!\n");
+        }
+
+    }
+
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_QF_rnd_sat]sign=%d exp_in=%d sig=%10.30f sig_low=%10.30f\n",sign, *exp_in, sig, sig_low);
+    printf("[ARCH_QF_rnd_sat]sig_s=%10.30f, sig_f=%10.30f\n",sig_s, sig_f);
+    printf("[ARCH_QF_rnd_sat]prod_ovf=%d exp_adj=%d exp_ovf=%d exp_undf=%d\n",prod_ovf,exp_adj, exp_ovf, exp_undf);
+    printf("[ARCH_QF_rnd_sat]sig_64_org=%lx sig_64=%lx sig_32=%x exp_df=%d exp=%d\n",sig_64_org, sig_64, sig_32, exp_df, exp);
+    printf("[ARCH_QF_rnd_sat]R1=%10.30f R_low=%1.128f R2=%10.30f R3=%10.30f eps=%10.30f\n",R1,R_low,R2,R3,_epsilon);
+
+    double final = ldexp(sig_f, (exp-BIAS));
+    printf("[ARCH_QF_norm] sig_f:%10.30f, exp-BIAS:%d, ldexp:%10.128f \n",sig_f, exp-BIAS, final);
+    printf("[ARCH_QF_norm] sig_32_out:%x, exp:%x \n",sig_32_out, exp);
+#endif
+
+    *exp_in = exp;
+    return sig_32_out;
+}
+
+//size4s_t rnd_sat_qf32(int sign, int exp, double sig, double sig_low)
+size4s_t rnd_sat_qf32(int exp, double sig, double sig_low)
+{
+
+    //size4u_t sig_32=rnd_sat_qf_sig(sign, &exp, sig, sig_low, QF32);
+    //size4u_t sig_32=rnd_sat_qf_sig(&exp, sig, sig_low, QF32);
+    size4s_t sig_32=rnd_sat_qf_sig(&exp, sig, sig_low, QF32);
+
+    size4s_t result;
+    //result = (sign<<31) | (sig_32 <<8) | (exp & 0xFF);
+    result = (sig_32 <<8) | (exp & 0xFF);
+
+    return result;
+}
+
+
+size4u_t get_ieee_sig(int *exp, double sig, f_type ft);
+size4u_t get_ieee_sig(int *exp, double sig, f_type ft)
+{
+    //Extract bits from double precision significand
+    uint64_t sig_64_org=0, sig_52=0, sig_53=0;
+    double value = 0.0;
+    int exp_d=0, exp_org=*exp;
+    int E_MIN;
+    E_MIN = (ft==SF)?  E_MIN_SF: E_MIN_HF;
+    double _epsilon;
+    _epsilon = (ft==SF)?  epsilon: epsilon_hf;
+    uint32_t sig_32=0;
+    size4s_t signif=0;
+    //int sign = (sig>=0.0)? 0:1;
+
+    value = ldexp(sig, exp_org);
+
+    sig_64_org = *(uint64_t *)&value;
+    exp_d = (sig_64_org >> 52) & 0x7FF;
+    exp_d = exp_d - BIAS_DF;
+    sig_52 = (sig_64_org & 0xFFFFFFFFFFFFF);
+    sig_53 = sig_52     | 0x10000000000000;
+
+    //Check if exp is one less than the MIN
+    //shifting right the excess amount of bits from E_MIN
+    int shift = E_MIN - exp_d;
+
+    int lsb =0;
+    int rem =0;
+    int sticky =0;
+    int sig_f =0;
+#ifndef DEBUG_MMVEC_QF
+    UNUSED(lsb);
+    UNUSED(rem);
+    UNUSED(sticky);
+    UNUSED(sig_f);
+    UNUSED(_epsilon);
+#endif
+
+    if(exp_d <= (E_MIN-1))
+    {
+        sig_53 = sig_53 >> shift;
+    }
+
+    if(shift >=53)
+        sig_53=0;
+
+    double R1, R2, R3;
+    if(ft==SF)
+    {
+        signif = sig_53 >> 29;
+        sig_32 = signif & 0x7FFFFF;
+
+        lsb = signif & 1;
+        rem = (sig_53 >>28) & 1;
+        sticky = (sig_53 & 0xFFFFFFF)? 1:0;
+
+        R1 = sig_53/pow(2,29);
+        R2 = floor(R1/2.0)*2;
+        R3 = R1 - R2;
+
+        if(fabs(value) >= SF_MAX)
+        {
+            //sig_32 = (1-sign)*0x7FFFFF;
+            sig_32 = 0x7FFFFF;
+        }
+        else if((R3>0.5 && R3<1.0) || (R3>=1.5))
+        {
+            if(sig_32 == 0x7FFFFF)
+            {
+                sig_32 = 0;
+                exp_d = exp_d +1;
+            }
+            else
+                sig_32 = sig_32 +1;
+        }
+        sig_f = 0x800000 | (sig_32 & 0x7FFFFF);
+    }
+    else
+    {
+        signif = sig_53 >> 42;
+        sig_32 = signif & 0x3FF;
+
+        lsb = signif & 1;
+        rem = (sig_53 >> 41) & 1;
+        sticky = (sig_53 & 0x1FFFFFFFFFF)? 1:0;
+
+        R1 = sig_53/pow(2,42);
+        R2 = floor(R1/2.0)*2;
+        R3 = R1 - R2;
+
+        //if((rem==1 && sticky==1) || (lsb==1 && rem==1))
+        if(fabs(value) >= HF_MAX)
+        {
+            //sig_32 = (1-sign)*0x3FF;
+            sig_32 = 0x3FF;
+        }
+        else if((R3>0.5 && R3<1.0) || (R3>=1.5))
+        {
+            if(sig_32 == 0x3FF)
+            {
+                sig_32 = 0;
+                exp_d = exp_d +1;
+            }
+            else
+                sig_32 = sig_32 +1;
+        }
+        sig_f = 0x400 | (sig_32 & 0x3FF);
+
+    }
+
+    if(sig ==0.0 && exp_org == (E_MIN-1))
+    {
+        sig_64_org = 0;
+        exp_d = 0;
+        sig_32=0;
+        sig_f =0;
+    }
+    *exp = exp_d;
+
+
+
+#ifdef DEBUG_MMVEC_QF
+    int sign = (sig>=0.0)? 0: 1;
+    double param = (double)sig_f*_epsilon;
+    if(sign) param = (-1.0)*param;
+    int exp_f = (exp_d<=E_MIN-1)? E_MIN: exp_d;
+    double final = ldexp(param, exp_f);
+    int exp_1 = (value != 0.0)? ilogb(value): 0;
+    int exp_2 = (exp_1 > E_MIN)? exp_1: E_MIN;
+    double sig_1 = ldexp(value, exp_1-exp_2);
+
+    printf("[IEEE_sig]exp_1=%d, exp_2=%d, sig_1=%10.20f\n",exp_1,exp_2,sig_1);
+    printf("[IEEE_sig]exp_org=%d, sig=%10.20f, value=%10.20f, shift=%d\n",exp_org, sig, value, shift);
+    printf("[IEEE_sig]sign=%d exp_d=%d sig_64_org=%lx sig_52=%lx sig_53=%lx sig_32=%x signif=%x sig_f=%x\n",sign, exp_d, sig_64_org, sig_52, sig_53, sig_32, signif, sig_f);
+    printf("[IEEE_sig]lsb=%d, rem=%d, sticky=%d\n",lsb, rem, sticky);
+    printf("[IEEE_sig] param:%10.20f, exp_d:%d, exp_f:%d, ldexp:%10.20f \n",param, exp_d, exp_f, final);
+    printf("[IEEE_sig]R1=%lf, R2=%lf, R3=%lf\n",R1, R2, R3);
+#endif
+
+    return sig_32;
+}
+
+size2s_t rnd_sat_hf_rint(int exp_in, double sig_in);
+size2s_t rnd_sat_hf_rint(int exp_in, double sig_in)
+{
+    // normalize and decompose again limiting to EMIN of target
+    double val=0.0;
+    double den=0.0;
+    double sig=0.0;
+    double mant=0.0;
+    int exp=0, exp_d=0, exp_ub=0;
+    size2s_t result=0;
+
+    val = ldexp(sig_in, exp); // normalize - convert to simple float (double)
+    exp_d = (val != 0.0)? ilogb(val): 0;
+    exp_ub = (exp_d> E_MIN_HF)? exp_d: E_MIN_HF; // EMIN=-14 for fp16
+    den = ldexp(val, -exp_ub); // denormalized if we hit EMIN
+    int sign = (sig<0)? 1:0;
+    sig = fabs(den);
+    // round to final mantissa
+    mant = rint(ldexp(sig, FRAC_HF)); // FRAC=10 for fp16; RNE
+    // post-round exponent adjust
+    exp = exp_ub + BIAS_HF; // BIAS=15 for fp16
+    // -1 for -1.0 (denorm) or +1 for >=2.0 (round up to next exponent)
+    int exp_mant = (mant != 0.0)? ilogb(mant): 0;
+    int exp_adj = (exp_mant-FRAC_HF > -1)? (exp_mant - FRAC_HF): -1;
+    exp = exp - exp_adj;
+    // overflow
+    if (exp>E_MAX_HF) { // +16 for fp16 w/o inf/nan
+        exp = E_MAX_HF;
+        mant = -1;
+    }
+    // final result// better to use a struct for fp16 instead
+//    result = (mant&((1<<FRAC_HF)-1)) | (exp<<FRAC_HF) | (sign<<15));
+    result = (sign<<15)| (exp<<FRAC_HF) | ((int)mant & 0x3FF);
+
+    printf("[RND_SAT_HF]sign=%d, exp_in=%d, exp_d=%d, exp_ub=%d, exp=%d\n",sign, exp_in, exp_d, exp_ub,exp);
+    printf("[RND_SAT_HF]sig_in=%10.20f, val=%10.20f, den=%10.20f, sig=%10.20f\n",sig_in, val, den, sig);
+    printf("[RND_SAT_HF]mant=%lf, result=%x\n",mant, result);
+
+    return result;
+}
+
+
+size2s_t rnd_sat_hf(int exp, double sig)
+{
+
+    int sign = (sig>=0.0)? 0:1;
+    //size4u_t sig_32=0;//rnd_sat_ieee_sig(&exp, sig, sig_low, SF);
+    size4u_t sig_32 = get_ieee_sig(&exp, sig, HF);
+
+    //exp is unbiased
+    size2s_t result;
+    if(exp==(E_MIN_HF-1) && sig==0.0)
+    {
+        result = 0;
+    }
+    else if(exp > E_MAX_HF)
+    {
+        result = (sign<<15) | (0x1F << 10) | 0x3FF;
+    }
+    //else if((exp < E_MIN_HF-11) ||((exp == E_MIN_HF-11) && (sig_32 ==0)))
+    //{
+    //    result = (sign<<15);
+    //}
+    else
+    {
+        exp = exp + BIAS_HF;
+        if(exp < 0)
+            exp = 0;
+        else if(exp > 31)
+            exp = 31;
+        result = (sign<<15) | ((exp & 0x1F) << 10) | sig_32;
+    }
+
+
+    return result;
+}
+
+
+//Take signed sig, produce normalized ieee sf output
+size4s_t rnd_sat_sf(int exp, double sig)
+{
+
+    int sign = (sig>=0.0)? 0: 1;
+    size4u_t sig_32 = get_ieee_sig(&exp, sig, SF);
+
+    size4s_t result;
+
+    if(exp==0 && sig==0.0)
+    {
+        result = 0;
+    }
+    else
+    {
+        exp = exp + BIAS_SF;
+        if(exp < 0)
+            exp = 0;
+        else if(exp > 255)
+            exp = 255;
+        result = (sign<<31) | ((exp & 0xFF)<< 23) | (sig_32 & 0x7FFFFF);
+    }
+
+    return result;
+}
+
+//size2s_t rnd_sat_qf16(int sign, int exp_ab, double sig, double sig_low)
+size2s_t rnd_sat_qf16(int exp_ab, double sig, double sig_low)
+{
+    int exp=exp_ab;
+
+
+    //size4u_t sig_32=rnd_sat_qf_sig(&exp, sig, sig_low, QF16);
+    //printf("sig low=%f sig=%f\n", sig, sig_low);
+    size4s_t sig_32=rnd_sat_qf_sig(&exp, sig, sig_low, QF16);
+
+    size2s_t result;
+    result = (sig_32<<5) | (exp & 0x1F);
+    //result = (sign_ab<<15) | (sig_16<<5) | (exp_ab & 0x1F);
+
+    return result;
+}
+
+size4s_t mpy_qf32(size4s_t in_a, size4s_t in_b ) {
+	size2s_t exp;
+    double sig;
+
+    unfloat a, b;
+
+    //Get double precision significands and unbiased exp
+    a = parse_qf32(in_a);
+    b = parse_qf32(in_b);
+
+    //Unbiased: after removing bias
+    exp = a.exp + b.exp;
+    sig = a.sig * b.sig;
+
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_QF32_pre_rnd] a.sig:%10.20f, b.sig:%10.20f, sig:%10.20f, ilogb(sig):%d, exp:%d\n", a.sig, b.sig, sig, ilogb(sig), exp);
+#endif
+
+    size4s_t result;
+    //result = rnd_sat_qf32(sign, exp_ab, sig_ab, 0.0);
+    result = rnd_sat_qf32(exp, sig, 0.0);
+
+    return result;
+}
+
+size4s_t mpy_qf32_sf(size4s_t in_a, size4s_t in_b ) {
+    int sign;
+	size2s_t exp;
+    double sig;
+    unfloat a, b;
+
+    //Get double precision significands and unbiased exp
+    a = parse_sf(in_a);
+    b = parse_sf(in_b);
+
+    //Unbiased: after removing bias
+    sign = a.sign ^ b.sign;
+    exp = a.exp + b.exp;
+    sig = a.sig * b.sig;
+
+    size4s_t result;
+    result = rnd_sat_qf32(exp, sig, 0.0);
+    if(sign) result = negate32(result);
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_SF_parse]sign:%d, a.sig:%10.20f, b.sig:%10.20f, sig:%10.20f exp:%d\n",sign, a.sig, b.sig, sig, exp);
+#endif
+    return result;
+}
+
+size4s_t mpy_qf32_mix_sf(size4s_t in_a, size4s_t in_b ) {
+	size2s_t exp;
+    double sig;
+    unfloat a, b;
+
+    //Get double precision significands and unbiased exp
+    a = parse_qf32(in_a);
+    b = parse_sf(in_b);
+
+    //Unbiased: after removing bias
+    exp = a.exp + b.exp;
+    sig = a.sig * b.sig;
+
+    size4s_t result;
+    result = rnd_sat_qf32(exp, sig, 0.0);
+    if(b.sign) result = negate32(result);
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_SF_parse]a.sign:%d, a.sig:%10.20f, b.sign:%d, b.sig:%10.20f, sig:%10.20f exp:%d\n",a.sign, a.sig, b.sign, b.sig, sig, exp);
+#endif
+    return result;
+}
+
+//QF32 output out of two QF16 muls
+size8s_t mpy_qf32_qf16(size4s_t in_a, size4s_t in_b ) {
+
+    double sig_0, sig_1;
+    int exp_0, exp_1;
+
+    unfloat u0,u1,v0,v1;
+
+    u0 = parse_qf16((in_a & 0xFFFF));
+    u1 = parse_qf16(((in_a>>16) & 0xFFFF));
+    v0 = parse_qf16((in_b & 0xFFFF));
+    v1 = parse_qf16(((in_b>>16) & 0xFFFF));
+
+    //Unbiased: after removing bias
+    exp_0 = u0.exp + v0.exp;
+    exp_1 = u1.exp + v1.exp;
+    sig_0 = u0.sig * v0.sig;
+    sig_1 = u1.sig * v1.sig;
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_QF32_QF16_parse]u0.exp:%d, u0.sig:%10.20f, v0.exp:%d, v0.sig:%10.20f, sig_0:%10.20f exp_0:%d\n", u0.exp, u0.sig, v0.exp, v0.sig, sig_0, exp_0);
+    printf("[ARCH_QF32_QF16_parse]u1.exp:%d, u1.sig:%10.20f, v1.exp:%d, v1.sig:%10.20f, sig_1:%10.20f exp_1:%d\n", u1.exp, u1.sig, v1.exp, v1.sig, sig_1, exp_1);
+#endif
+
+    size4s_t result_0, result_1;
+    size8s_t result;
+    result_0 = rnd_sat_qf32(exp_0, sig_0, 0.0);
+    result_1 = rnd_sat_qf32(exp_1, sig_1, 0.0);
+
+    result = ((size8s_t)result_1 <<32) | (result_0 &0xFFFFFFFF);
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_QF32_QF16_norm]result_1:%x, result_0:%x, result:%llx\n",result_1, result_0, result);
+#endif
+
+    return result;
+}
+
+//QF32 output out of two HF muls
+size8s_t mpy_qf32_hf(size4s_t in_a, size4s_t in_b ) {
+
+    double sig_0, sig_1;
+    int exp_0, exp_1;
+
+    unfloat u0,u1,v0,v1;
+
+    u0 = parse_hf((in_a & 0xFFFF));
+    u1 = parse_hf(((in_a>>16) & 0xFFFF));
+    v0 = parse_hf((in_b & 0xFFFF));
+    v1 = parse_hf(((in_b>>16) & 0xFFFF));
+
+    //Unbiased: after removing bias
+    exp_0 = u0.exp + v0.exp;
+    exp_1 = u1.exp + v1.exp;
+    sig_0 = u0.sig * v0.sig;
+    sig_1 = u1.sig * v1.sig;
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_QF32_HF_parse]u0.exp:%d, u0.sig:%10.20f, v0.exp:%d, v0.sig:%10.20f, sig_0:%10.20f exp_0:%d\n", u0.exp, u0.sig, v0.exp, v0.sig, sig_0, exp_0);
+    printf("[ARCH_QF32_HF_parse]u1.exp:%d, u1.sig:%10.20f, v1.exp:%d, v1.sig:%10.20f, sig_1:%10.20f exp_1:%d\n", u1.exp, u1.sig, v1.exp, v1.sig, sig_1, exp_1);
+#endif
+    size4s_t result_0, result_1;
+    size8s_t result;
+    result_0 = rnd_sat_qf32(exp_0, sig_0, 0.0);
+    result_1 = rnd_sat_qf32(exp_1, sig_1, 0.0);
+
+    if(u0.sign ^ v0.sign)
+      result_0 = negate32(result_0);
+
+    if(u1.sign ^ v1.sign)
+      result_1 = negate32(result_1);
+
+    result = ((size8s_t)result_1 <<32) | (result_0 & 0xFFFFFFFF);
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_QF32_HF_norm]result_1:%x, result_0:%x, result:%llx\n",result_1, result_0, result);
+#endif
+
+    return result;
+}
+
+//QF32 output out of mix of QF16 and HF muls
+size8s_t mpy_qf32_mix_hf(size4s_t in_a, size4s_t in_b ) {
+
+    double sig_0, sig_1;
+    int exp_0, exp_1;
+
+    unfloat u0,u1,v0,v1;
+
+    u0 = parse_qf16((in_a & 0xFFFF));
+    u1 = parse_qf16(((in_a>>16) & 0xFFFF));
+    v0 = parse_hf((in_b & 0xFFFF));
+    v1 = parse_hf(((in_b>>16) & 0xFFFF));
+
+    //Unbiased: after removing bias
+    exp_0 = u0.exp + v0.exp;
+    exp_1 = u1.exp + v1.exp;
+    sig_0 = u0.sig * v0.sig;
+    sig_1 = u1.sig * v1.sig;
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_QF32_mix_hf_parse]u0.exp:%d, u0.sig:%10.20f, v0.exp:%d, v0.sig:%10.20f, sig_0:%10.20f exp_0:%d\n", u0.exp, u0.sig, v0.exp, v0.sig, sig_0, exp_0);
+    printf("[ARCH_QF32_mix_hf_parse]u1.exp:%d, u1.sig:%10.20f, v1.exp:%d, v1.sig:%10.20f, sig_1:%10.20f exp_1:%d\n", u1.exp, u1.sig, v1.exp, v1.sig, sig_1, exp_1);
+#endif
+
+    size4s_t result_0, result_1;
+    size8s_t result;
+    result_0 = rnd_sat_qf32(exp_0, sig_0, 0.0);
+    result_1 = rnd_sat_qf32(exp_1, sig_1, 0.0);
+
+    if(v0.sign)
+      result_0 = negate32(result_0);
+    if(v1.sign)
+      result_1 = negate32(result_1);
+
+    result = ((size8s_t)result_1 <<32) | (result_0 & 0xFFFFFFFF);
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_QF32_mix_hf_norm]result_1:%x, result_0:%x, result:%llx\n",result_1, result_0, result);
+#endif
+
+    return result;
+}
+
+/* VMPY_QF16 */
+//ITERATOR_INSN_MPY_SLOT(16,vmpy_qf16,"Vd32.qf16=vmpy(Vu32.qf16,Vv32.qf16)",
+//"Vector multiply of qf16 format",
+size2s_t mpy_qf16(size2s_t in_a, size2s_t in_b ) {
+	size1s_t exp;
+    double sig;
+
+    unfloat a, b;
+
+    //Get double precision significands and unbiased exp
+    a = parse_qf16(in_a);
+    b = parse_qf16(in_b);
+
+    //Unbiased: after removing bias
+    exp = a.exp + b.exp;
+    sig = a.sig * b.sig;
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_QF16_parse] a.exp:%d, a.sig:%10.20f, b.exp:%d, b.sig:%10.20f, sig:%10.20f exp:%d\n", a.exp, a.sig, b.exp, b.sig, sig, exp);
+#endif
+
+    size2s_t result;
+    result = rnd_sat_qf16(exp, sig, 0.0);
+
+    return result;
+}
+
+size2s_t mpy_qf16_hf(size2s_t in_a, size2s_t in_b ) {
+    int sign;
+	size2s_t exp;
+    double sig;
+
+    unfloat a, b;
+
+    //Get double precision significands and unbiased exp
+    a = parse_hf(in_a);
+    b = parse_hf(in_b);
+
+    //Unbiased: after removing bias
+    exp = a.exp + b.exp;
+    sig = a.sig * b.sig;
+    sign = a.sign^b.sign;
+
+    size2s_t result;
+    result = rnd_sat_qf16(exp, sig, 0.0);
+    if(sign) result = negate16(result);
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_HF_parse]a.exp:%d, a.sig:%10.20f, b.exp:%d, b.sig:%10.20f, sig:%10.20f exp:%d\n",a.exp, a.sig, b.exp, b.sig, sig, exp);
+#endif
+
+    return result;
+}
+
+size2s_t mpy_qf16_mix_hf(size2s_t in_a, size2s_t in_b ) {
+	size2s_t exp;
+    double sig;
+    unfloat a, b;
+
+    //Get double precision significands and unbiased exp
+    a = parse_qf16(in_a);
+    b = parse_hf(in_b);
+
+    //Unbiased: after removing bias
+    exp = a.exp + b.exp;
+    sig = a.sig * b.sig;
+
+    size2s_t result;
+    result = rnd_sat_qf16(exp, sig, 0.0);
+    if(b.sign) result = negate16(result);
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_HF_parse]a.exp:%d, a.sig:%10.20f, b.exp:%d, b.sig:%10.20f, sig:%10.20f exp:%d\n",a.exp, a.sig, b.exp, b.sig, sig, exp);
+#endif
+
+    return result;
+}
+
+size4s_t add_qf32(size4s_t in_a, size4s_t in_b ) {
+	size2s_t exp_ab;
+
+    unfloat a, b;
+
+    //Get double precision significands
+    a = parse_qf32(in_a);
+    b = parse_qf32(in_b);
+
+    if(a.exp>b.exp){
+        exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_SF+1)):ilogb(a.sig));
+        if(exp_ab<b.exp)
+          exp_ab= b.exp;
+    }
+    else{
+        exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_SF+1)):ilogb(b.sig));
+        if(exp_ab<a.exp)
+          exp_ab= a.exp;
+    }
+
+    double sig_ab;
+
+    //Scale sig to the bigger exp
+    double sig_a, sig_b;
+    sig_a = ldexp(a.sig, a.exp-exp_ab);
+    sig_b = ldexp(b.sig, b.exp-exp_ab);
+
+    sig_ab = sig_a + sig_b;
+    double sig_low;
+	sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a);
+	//sig_low = (b.sign)? (-1.0*epsilon): epsilon;
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_add_qf32] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig));
+    printf("[ARCH_add_qf32] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low);
+#endif
+
+    size4s_t result;
+
+    result = rnd_sat_qf32(exp_ab, sig_ab, sig_low);
+
+    return result;
+}
+
+
+size4s_t add_sf(size4s_t in_a, size4s_t in_b ) {
+	size2s_t exp_ab;
+
+    unfloat a, b;
+
+    //Get double precision significands
+    a = parse_sf(in_a);
+    b = parse_sf(in_b);
+
+    if(a.exp>b.exp){
+        exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_SF+1)):ilogb(a.sig));
+        if(exp_ab<b.exp)
+          exp_ab= b.exp;
+    }
+    else{
+        exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_SF+1)):ilogb(b.sig));
+        if(exp_ab<a.exp)
+          exp_ab= a.exp;
+    }
+    //Scale sig to the bigger exp
+    double sig_a, sig_b;
+    sig_a = ldexp(a.sig, a.exp-exp_ab);
+    sig_b = ldexp(b.sig, b.exp-exp_ab);
+
+    double sig_ab;
+    double sig_low;
+    if((a.sign ^ b.sign) == 0)
+    {
+        sig_ab = sig_a + sig_b;
+	    sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a);
+    }
+    else if(a.sign==0 && b.sign==1)
+    {
+        sig_ab = sig_a - sig_b;
+	    sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab));
+    }
+    else// if(a.sign==1 && b.sign==0)
+    {
+        sig_ab = sig_b - sig_a;
+	    sig_low = (b.exp>a.exp) ? ((sig_b-sig_ab)-sig_a) : (sig_b -(sig_a+sig_ab));
+    }
+
+    size4s_t result;
+    result = rnd_sat_qf32(exp_ab, sig_ab, sig_low);
+
+    if((a.sign==1) && (b.sign== 1))
+        result = negate32(result);
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_add_sf] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig));
+    printf("[ARCH_add_sf] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_b-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_b-sig_ab,sig_low);
+    printf("[ARCH_add_sf] result:%x \n\n", result);
+#endif
+
+
+    return result;
+}
+
+size4s_t add_qf32_mix(size4s_t in_a, size4s_t in_b ) {
+	int exp_ab;
+
+    unfloat a, b;
+
+    //Get double precision significands
+    a = parse_qf32(in_a);
+    b = parse_sf(in_b);
+
+    if(b.sign) b.sig = (-1.0)*b.sig;
+
+    if(a.exp>b.exp){
+        exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_SF+1)):ilogb(a.sig));
+        if(exp_ab<b.exp)
+          exp_ab= b.exp;
+    }
+    else{
+        exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_SF+1)):ilogb(b.sig));
+        //exp_ab = b.exp+ilogb(b.sig);
+        if(exp_ab<a.exp)
+          exp_ab= a.exp;
+    }
+
+    double sig_ab;
+
+    //Scale sig to the bigger exp
+    double sig_a, sig_b;
+    sig_a = ldexp(a.sig, a.exp-exp_ab);
+    sig_b = ldexp(b.sig, b.exp-exp_ab);
+
+    sig_ab = sig_a + sig_b;
+    double sig_low;
+	sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a);
+	//sig_low = (b.sign)? (-1.0*epsilon): epsilon;
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_add_qf32_mix] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig));
+    printf("[ARCH_add_qf32_mix] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low);
+#endif
+
+    size4s_t result;
+
+    result = rnd_sat_qf32(exp_ab, sig_ab, sig_low);
+
+    return result;
+}
+
+size4s_t sub_qf32(size4s_t in_a, size4s_t in_b ) {
+	size2s_t exp_ab;
+
+    unfloat a, b;
+
+    //Get double precision significands
+    a = parse_qf32(in_a);
+    b = parse_qf32(in_b);
+
+    if(a.exp>b.exp){
+        exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_SF+1)):ilogb(a.sig));
+        if(exp_ab<b.exp)
+          exp_ab= b.exp;
+    }
+    else{
+        exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_SF+1)):ilogb(b.sig));
+        if(exp_ab<a.exp)
+          exp_ab= a.exp;
+    }
+
+    double sig_ab;
+
+    //Scale sig to the bigger exp
+    double sig_a, sig_b;
+    sig_a = ldexp(a.sig, a.exp-exp_ab);
+    sig_b = ldexp(b.sig, b.exp-exp_ab);
+
+    sig_ab = sig_a - sig_b;
+    double sig_low;
+	sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab));
+	//sig_low = (b.sign)? (-1.0*epsilon): epsilon;
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_sub_qf32] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig));
+    printf("[ARCH_sub_qf32] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low);
+    printf("[ARCH_sub_qf32] a:%10.30f, a_adj:%10.30f, fabs(sig_b):%f\n", ldexp(a.sig, a.exp), ldexp(sig_a, exp_ab), fabs(sig_b));
+#endif
+
+    size4s_t result;
+
+    result = rnd_sat_qf32(exp_ab, sig_ab, sig_low);
+
+    return result;
+}
+
+size4s_t sub_sf(size4s_t in_a, size4s_t in_b ) {
+	size2s_t exp_ab;
+    unfloat a, b;
+
+    //Get double precision significands
+    a = parse_sf(in_a);
+    b = parse_sf(in_b);
+
+    if(a.exp>b.exp){
+        exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_SF+1)):ilogb(a.sig));
+        if(exp_ab<b.exp)
+          exp_ab= b.exp;
+    }
+    else{
+        exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_SF+1)):ilogb(b.sig));
+        if(exp_ab<a.exp)
+          exp_ab= a.exp;
+    }
+    //Scale sig to the bigger exp
+    double sig_a, sig_b;
+    sig_a = ldexp(a.sig, a.exp-exp_ab);
+    sig_b = ldexp(b.sig, b.exp-exp_ab);
+
+    double sig_ab;
+    double sig_low;
+    if((a.sign==0) && (b.sign==0))
+    {
+        sig_ab = sig_a - sig_b;
+	    sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab));
+    }
+    else if(a.sign ^ b.sign)
+    {
+        sig_ab = sig_a + sig_b;
+	    sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a);
+    }
+    else// if(a.sign && b.sign)
+    {
+        sig_ab = sig_b - sig_a;
+	    sig_low = (b.exp>a.exp) ? ((sig_b-sig_ab)-sig_a) : (sig_b -(sig_a+sig_ab));
+    }
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_sub_sf] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig));
+    printf("[ARCH_sub_sf] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_b-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_b-sig_ab,sig_low);
+#endif
+
+    size4s_t result;
+
+    result = rnd_sat_qf32(exp_ab, sig_ab, sig_low);
+
+    if((a.sign==1) && (b.sign==0))
+        result = negate32(result);
+
+    return result;
+}
+
+size4s_t sub_qf32_mix(size4s_t in_a, size4s_t in_b ) {
+	size2s_t exp_ab;
+
+    unfloat a, b;
+
+    //Get double precision significands
+    a = parse_qf32(in_a);
+    b = parse_sf(in_b);
+
+    if(b.sign) b.sig = (-1.0)*b.sig;
+
+    if(a.exp>b.exp){
+        exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_SF+1)):ilogb(a.sig));
+        if(exp_ab<b.exp)
+          exp_ab= b.exp;
+    }
+    else{
+        exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_SF+1)):ilogb(b.sig));
+        if(exp_ab<a.exp)
+          exp_ab= a.exp;
+    }
+
+    double sig_ab;
+
+    //Scale sig to the bigger exp
+    double sig_a, sig_b;
+    sig_a = ldexp(a.sig, a.exp-exp_ab);
+    sig_b = ldexp(b.sig, b.exp-exp_ab);
+
+    sig_ab = sig_a - sig_b;
+    double sig_low;
+	//sig_low = (a.exp>b.exp) ? ((sig_ab-sig_a)-sig_b) : ((sig_ab-sig_b)-sig_a);
+	//sig_low = (a.exp>b.exp) ? ((sig_ab-sig_a)+sig_b) : (sig_a-(sig_b+sig_ab));
+	sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab));
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_sub_qf32_mix] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig));
+    printf("[ARCH_sub_qf32_mix] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low);
+#endif
+
+    size4s_t result;
+
+    result = rnd_sat_qf32(exp_ab, sig_ab, sig_low);
+
+    return result;
+}
+//add_qf16
+size2s_t add_qf16(size2s_t in_a, size2s_t in_b ) {
+	size1s_t exp_ab;
+    unfloat a, b;
+
+    //Get double precision significands
+    a = parse_qf16(in_a);
+    b = parse_qf16(in_b);
+
+    if(a.exp>b.exp){
+        exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_HF+1)):ilogb(a.sig));
+        if(exp_ab<b.exp)
+          exp_ab= b.exp;
+    }
+    else{
+        exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_HF+1)):ilogb(b.sig));
+        if(exp_ab<a.exp)
+          exp_ab= a.exp;
+    }
+
+    double sig_ab;
+
+    //Scale sig to the bigger exp
+    double sig_a, sig_b;
+    sig_a = ldexp(a.sig, a.exp-exp_ab);
+    sig_b = ldexp(b.sig, b.exp-exp_ab);
+
+    sig_ab = sig_a + sig_b;
+    double sig_low;
+	sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a);
+	//sig_low = (b.sign)? (-1.0*epsilon): epsilon;
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_add_qf16] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig));
+    printf("[ARCH_add_qf16] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low);
+#endif
+
+    size2s_t result;
+
+    result = rnd_sat_qf16(exp_ab, sig_ab, sig_low);
+
+    return result;
+}
+
+size2s_t add_hf(size2s_t in_a, size2s_t in_b ) {
+	size1s_t exp_ab;
+    unfloat a, b;
+
+    //Get double precision significands
+    a = parse_hf(in_a);
+    b = parse_hf(in_b);
+
+    if(a.exp>b.exp){
+        exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_HF+1)):ilogb(a.sig));
+        if(exp_ab<b.exp)
+          exp_ab= b.exp;
+    }
+    else{
+        exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_HF+1)):ilogb(b.sig));
+        if(exp_ab<a.exp)
+          exp_ab= a.exp;
+    }
+    //Scale sig to the bigger exp
+    double sig_a, sig_b;
+    sig_a = ldexp(a.sig, a.exp-exp_ab);
+    sig_b = ldexp(b.sig, b.exp-exp_ab);
+
+    double sig_ab;
+    double sig_low;
+    if((a.sign ^ b.sign) == 0)
+    {
+        sig_ab = sig_a + sig_b;
+	    sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a);
+    }
+    else if(a.sign==0 && b.sign==1)
+    {
+        sig_ab = sig_a - sig_b;
+	    sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab));
+    }
+    else// if(a.sign==1 && b.sign==0)
+    {
+        sig_ab = sig_b - sig_a;
+	    sig_low = (b.exp>a.exp) ? ((sig_b-sig_ab)-sig_a) : (sig_b -(sig_a+sig_ab));
+    }
+
+    size2s_t result;
+
+    result = rnd_sat_qf16(exp_ab, sig_ab, sig_low);
+    if((a.sign==1) && (b.sign== 1))
+        result = negate16(result);
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_add_hf] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig));
+    printf("[ARCH_add_hf] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_b-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_b-sig_ab,sig_low);
+    printf("[ARCH_add_sf] result:%x \n\n", result);
+#endif
+
+
+    return result;
+}
+
+size2s_t add_qf16_mix(size2s_t in_a, size2s_t in_b ) {
+	size1s_t exp_ab;
+    unfloat a, b;
+
+    //Get double precision significands
+    a = parse_qf16(in_a);
+    b = parse_hf(in_b);
+
+    if(b.sign) b.sig = (-1.0)*b.sig;
+
+    if(a.exp>b.exp){
+        exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_HF+1)):ilogb(a.sig));
+        if(exp_ab<b.exp)
+          exp_ab= b.exp;
+    }
+    else{
+        exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_HF+1)):ilogb(b.sig));
+        if(exp_ab<a.exp)
+          exp_ab= a.exp;
+    }
+
+    double sig_ab;
+
+    //Scale sig to the bigger exp
+    double sig_a, sig_b;
+    sig_a = ldexp(a.sig, a.exp-exp_ab);
+    sig_b = ldexp(b.sig, b.exp-exp_ab);
+
+    sig_ab = sig_a + sig_b;
+    double sig_low;
+	sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a);
+	//sig_low = (b.sign)? (-1.0*epsilon): epsilon;
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_add_qf16_mix] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig));
+    printf("[ARCH_add_qf16_mix] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low);
+#endif
+
+    size2s_t result;
+
+    result = rnd_sat_qf16(exp_ab, sig_ab, sig_low);
+
+    return result;
+}
+
+size2s_t sub_qf16(size2s_t in_a, size2s_t in_b ) {
+	size1s_t exp_ab;
+
+    unfloat a, b;
+
+    //Get double precision significands
+    a = parse_qf16(in_a);
+    b = parse_qf16(in_b);
+
+    if(a.exp>b.exp){
+        exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_HF+1)):ilogb(a.sig));
+        if(exp_ab<b.exp)
+          exp_ab= b.exp;
+    }
+    else{
+        exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_HF+1)):ilogb(b.sig));
+        if(exp_ab<a.exp)
+          exp_ab= a.exp;
+    }
+
+    double sig_ab;
+
+    //Scale sig to the bigger exp
+    double sig_a, sig_b;
+    sig_a = ldexp(a.sig, a.exp-exp_ab);
+    sig_b = ldexp(b.sig, b.exp-exp_ab);
+
+    sig_ab = sig_a - sig_b;
+    double sig_low;
+	//sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab));
+	//sig_low = (a.exp>b.exp) ? ((sig_ab-sig_a)+sig_b) : (sig_a-(sig_b+sig_ab));
+	sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab));
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_sub_qf16] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig));
+    printf("[ARCH_sub_qf16] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low);
+    printf("[ARCH_sub_qf32] a:%10.30f, a_adj:%10.30f, fabs(sig_b):%f\n", ldexp(a.sig, a.exp), ldexp(sig_a, exp_ab), fabs(sig_b));
+#endif
+
+    size2s_t result;
+
+    result = rnd_sat_qf16(exp_ab, sig_ab, sig_low);
+
+    return result;
+}
+
+
+size2s_t sub_hf(size2s_t in_a, size2s_t in_b ) {
+	size1s_t exp_ab;
+
+    unfloat a, b;
+
+    //Get double precision significands
+    a = parse_hf(in_a);
+    b = parse_hf(in_b);
+
+    if(a.exp>b.exp){
+        exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_HF+1)):ilogb(a.sig));
+        if(exp_ab<b.exp)
+          exp_ab= b.exp;
+    }
+    else{
+        exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_HF+1)):ilogb(b.sig));
+        if(exp_ab<a.exp)
+          exp_ab= a.exp;
+    }
+    //Scale sig to the bigger exp
+    double sig_a, sig_b;
+    sig_a = ldexp(a.sig, a.exp-exp_ab);
+    sig_b = ldexp(b.sig, b.exp-exp_ab);
+
+    double sig_ab;
+    double sig_low;
+    if((a.sign==0) && (b.sign==0))
+    {
+        sig_ab = sig_a - sig_b;
+	    sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab));
+    }
+    else if(a.sign ^ b.sign)
+    {
+        sig_ab = sig_a + sig_b;
+	    sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a);
+    }
+    else// if(a.sign && b.sign)
+    {
+        sig_ab = sig_b - sig_a;
+	    sig_low = (b.exp>a.exp) ? ((sig_b-sig_ab)-sig_a) : (sig_b -(sig_a+sig_ab));
+    }
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_sub_hf] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig));
+    printf("[ARCH_sub_hf] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.30f, sig_ab:%1.30f, sig_ab-sig_a:%1.30f, sig_low:%1.30f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_ab-sig_a,sig_low);
+#endif
+
+    size2s_t result;
+
+    result = rnd_sat_qf16(exp_ab, sig_ab, sig_low);
+    if((a.sign==1) && (b.sign==0))
+        result = negate16(result);
+
+    return result;
+}
+
+size2s_t sub_qf16_mix(size2s_t in_a, size2s_t in_b ) {
+	size1s_t exp_ab;
+
+    unfloat a, b;
+
+    //Get double precision significands
+    a = parse_qf16(in_a);
+    b = parse_hf(in_b);
+
+    if(b.sign) b.sig = (-1.0)*b.sig;
+
+    if(a.exp>b.exp){
+        exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_HF+1)):ilogb(a.sig));
+        if(exp_ab<b.exp)
+          exp_ab= b.exp;
+    }
+    else{
+        exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_HF+1)):ilogb(b.sig));
+        if(exp_ab<a.exp)
+          exp_ab= a.exp;
+    }
+
+    double sig_ab;
+
+    //Scale sig to the bigger exp
+    double sig_a, sig_b;
+    sig_a = ldexp(a.sig, a.exp-exp_ab);
+    sig_b = ldexp(b.sig, b.exp-exp_ab);
+
+    sig_ab = sig_a - sig_b;
+    double sig_low;
+	//sig_low = (a.exp>b.exp) ? ((sig_ab-sig_a)-sig_b) : ((sig_ab-sig_b)-sig_a);
+	//sig_low = (a.exp>b.exp) ? ((sig_ab-sig_a)+sig_b) : (sig_a-(sig_b+sig_ab));
+	sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab));
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[ARCH_sub_qf16_mix] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig));
+    printf("[ARCH_sub_qf16_mix] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low);
+#endif
+
+    size2s_t result;
+
+    result = rnd_sat_qf16(exp_ab, sig_ab, sig_low);
+
+    return result;
+}
+
+//FP conversion QF32 to IEEE SF
+size4s_t conv_sf_qf32(size4s_t a)
+{
+
+    size4s_t result;
+    unfloat u = parse_qf32(a);
+
+    result = rnd_sat_sf(u.exp, u.sig);
+
+#ifdef DEBUG_MMVEC_QF
+    double final = ldexp(u.sig, u.exp);
+    printf("[SF_parse_conv_sf_qf32] u.sig:%lf, u.exp:%d, ldexp:%10.20f \n",u.sig, u.exp, final);
+#endif
+
+    return result;
+}
+
+//FP conversion W to IEEE SF
+size4s_t conv_sf_w(size4s_t a)
+{
+
+    size4s_t result;
+    int exp=0;
+    double sig=0.0;
+    if(a !=0)
+    {
+        exp = ilogb(a);
+        sig = (double)a/scalbn(1.0, exp);
+    }
+    result = rnd_sat_sf(exp, sig);
+
+#ifdef DEBUG_MMVEC_QF
+    double final = ldexp(sig, exp);
+    printf("[SF_parse_conv_sf_w] sig:%lf, exp:%d, ldexp:%10.20f \n",sig, exp, final);
+#endif
+
+    return result;
+}
+
+//FP conversion UW to IEEE SF
+size4s_t conv_sf_uw(size4u_t a)
+{
+
+    size4s_t result;
+    int exp=0;
+    double sig=0.0;
+    if(a !=0)
+    {
+        exp = ilogb(a);
+        sig = (double)(unsigned)a/scalbn(1.0, exp);
+    }
+    result = rnd_sat_sf(exp, sig);
+
+//#ifdef DEBUG_MMVEC_QF
+//    double final = ldexp(sig, exp);
+//    printf("[SF_parse_conv_sf_uw] sig:%lf, exp:%d, ldexp:%10.20f \n",sig, exp, final);
+//#endif
+
+    return result;
+}
+
+//FP conversion QF16 to IEEE HF
+size2s_t conv_hf_qf16(size2s_t a)
+{
+
+    size2s_t result;
+    unfloat u = parse_qf16(a);
+
+    result = rnd_sat_hf(u.exp, u.sig);
+
+//#ifdef DEBUG_MMVEC_QF
+//    double final = ldexp(u.sig, u.exp);
+//    printf("[HF_parse_conv_hf_qf16] u.sig:%lf, u.exp:%d, ldexp:%10.20f \n",u.sig, u.exp, final);
+//#endif
+
+    return result;
+}
+
+//FP conversion H to IEEE HF
+size2s_t conv_hf_h(size2s_t a)
+{
+    size2s_t result;
+    int exp=0;
+    double sig=0.0;
+    if(a !=0)
+    {
+        exp = ilogb(a);
+        sig = (double)a/scalbn(1.0, exp);
+    }
+    result = rnd_sat_hf(exp, sig);
+
+#ifdef DEBUG_MMVEC_QF
+    double final = ldexp(sig, exp);
+    double f_rint = rint(final);
+    printf("[HF_parse_conv_hf_h] sig:%lf, exp:%d, ldexp:%10.20f, rint:%lf \n",sig, exp, final, f_rint);
+#endif
+    return result;
+}
+
+//FP conversion UH to IEEE HF
+size2s_t conv_hf_uh(size2u_t a)
+{
+
+    size2s_t result;
+    int exp=0;
+    double sig=0.0;
+    if(a !=0)
+    {
+        exp = ilogb(a);
+        sig = (double)(unsigned)a/scalbn(1.0, exp);
+    }
+    result = rnd_sat_hf(exp, sig);
+
+//#ifdef DEBUG_MMVEC_QF
+//    double final = ldexp(sig, exp);
+//    printf("[SF_parse_conv_hf_uh] sig:%lf, exp:%d, ldexp:%10.20f \n",sig, exp, final);
+//#endif
+
+    return result;
+}
+
+//FP conversion two QF32 to two QF16
+size4s_t conv_hf_qf32(size8s_t a)
+{
+
+    size2s_t result0, result1;
+    size4s_t result;
+    size4s_t a0, a1;
+    a0 = a & 0xFFFFFFFF;
+    a1 = (a>>32) & 0xFFFFFFFF;
+
+    unfloat u0 = parse_qf32(a0);
+    unfloat u1 = parse_qf32(a1);
+
+    result0 = rnd_sat_hf(u0.exp, u0.sig);
+    result1 = rnd_sat_hf(u1.exp, u1.sig);
+
+    result = ((size4s_t)result1 << 16) | (result0 & 0xFFFF);
+
+/*
+#ifdef DEBUG_MMVEC_QF
+    double final0 = ldexp(u0.sig, u0.exp);
+    double final1 = ldexp(u1.sig, u1.exp);
+
+    printf("[HF_parse_conv_hf_qf32] u0.sig:%lf, u0.exp:%d, ldexp0:%10.20f \n",u0.sig, u0.exp, final0);
+    printf("[HF_parse_conv_hf_qf32] u1.sig:%lf, u1.exp:%d, ldexp1:%10.20f \n",u1.sig, u1.exp, final1);
+#endif
+*/
+
+    return result;
+}
+
+//FP conversion two W to two IEEE HF
+size4s_t conv_hf_w(size8s_t a)
+{
+    size2s_t result0, result1;
+    size4s_t result;
+    size4s_t a0, a1;
+    a0 = a & 0xFFFFFFFF;
+    a1 = (a>>32) & 0xFFFFFFFF;
+
+    int exp0=0, exp1=0;
+    double sig0=0.0, sig1=0.0;
+    if(a0 !=0)
+    {
+        exp0 = ilogb(a0);
+        sig0 = (double)a0/scalbn(1.0, exp0);
+    }
+    if(a1 !=0)
+    {
+        exp1 = ilogb(a1);
+        sig1 = (double)a1/scalbn(1.0, exp1);
+    }
+    result0 = rnd_sat_hf(exp0, sig0);
+    result1 = rnd_sat_hf(exp1, sig1);
+
+    result = ((size4s_t)result1 << 16) | (result0 & 0xFFFF);
+
+/*
+#ifdef DEBUG_MMVEC_QF
+    double final0 = ldexp(sig0, exp0);
+    double final1 = ldexp(sig1, exp1);
+
+    printf("[HF_parse_conv_hf_w] sig0:%lf, exp0:%d, ldexp0:%10.20f \n",sig0, exp0, final0);
+    printf("[HF_parse_conv_hf_w] sig1:%lf, exp1:%d, ldexp1:%10.20f \n",sig1, exp1, final1);
+#endif
+*/
+    return result;
+}
+
+//FP conversion two UW to two IEEE HF
+size4s_t conv_hf_uw(size8u_t a)
+{
+    size2s_t result0, result1;
+    size4s_t result;
+    size4u_t a0, a1;
+    a0 = a & 0xFFFFFFFF;
+    a1 = (a>>32) & 0xFFFFFFFF;
+
+    int exp0=0, exp1=0;
+    double sig0=0.0, sig1=0.0;
+    if(a0 !=0)
+    {
+        exp0 = ilogb(a0);
+        sig0 = (double)(unsigned)a0/scalbn(1.0, exp0);
+    }
+    if(a1 !=0)
+    {
+        exp1 = ilogb(a1);
+        sig1 = (double)(unsigned)a1/scalbn(1.0, exp1);
+    }
+    result0 = rnd_sat_hf(exp0, sig0);
+    result1 = rnd_sat_hf(exp1, sig1);
+
+    result = ((size4s_t)result1 << 16) | (result0 & 0xFFFF);
+/*
+#ifdef DEBUG_MMVEC_QF
+    double final0 = ldexp(sig0, exp0);
+    double final1 = ldexp(sig1, exp1);
+
+    printf("[HF_parse_conv_hf_uw] sig0:%lf, exp0:%d, ldexp0:%10.20f \n",sig0, exp0, final0);
+    printf("[HF_parse_conv_hf_uw] sig1:%lf, exp1:%d, ldexp1:%10.20f \n",sig1, exp1, final1);
+#endif
+*/
+    return result;
+}
+
+size4s_t rnd_sat_w(int exp, double sig)
+{
+    size4s_t result=0;
+    size4s_t W_MAX = 0x7fffffff;
+    size4s_t W_MIN = 0x80000000;
+
+    int sign = (sig>=0.0)? 0: 1;
+
+    double R1=0.0;
+    double R2=0.0;
+    double R3=0.0;
+    if(exp > 30)
+    {
+        result = (sign)? W_MIN:W_MAX;
+        result = (sign <<31) | result;
+    }
+    else
+    {
+        R1 = ldexp(sig, exp);
+        R2 = floor(R1/2.0)*2;
+        R3 = R1 - R2;
+        if(sign==0)
+        {
+            if(R3<=0.5)
+                result = (size4s_t) R1;
+            else if(R3>0.5 && R3<1.5)
+                result =  (size4s_t) round(R1);
+            else if(R3>=1.5)
+                result =  (size4s_t) R1+1;
+        }
+        else
+            result = (size4s_t)round(R1);
+    }
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[RND_conv_w_qf32] sig:%lf, exp:%d, R1:%10.20f, R2:%10.20f, R3:%10.20f, result:%x(%d)\n",sig, exp, R1, R2, R3, result, result);
+#endif
+
+    return result;
+}
+
+size4u_t rnd_sat_uw(int exp, double sig)
+{
+    size4u_t result=0;
+    size4u_t W_MAX = 0xffffffff;
+
+    double R1=0.0;
+    double R2=0.0;
+    double R3=0.0;
+    if(sig<0.0)
+        result = 0;
+    else if(exp > 31)
+    {
+        result = W_MAX;
+    }
+    else
+    {
+        R1 = ldexp(sig, exp);
+        R2 = floor(R1/2.0)*2;
+        R3 = R1 - R2;
+        if(R3<=0.5)
+            result = (size4s_t) R1;
+        else if(R3>0.5 && R3<1.5)
+            result =  (size4s_t) round(R1);
+        else if(R3>=1.5)
+            result =  (size4s_t) R1+1;
+    }
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[RND_conv_uw_qf32] sig:%lf, exp:%d, R1:%10.20f, R2:%10.20f, R3:%10.20f, result:%x(%d)\n",sig, exp, R1, R2, R3, result, result);
+#endif
+
+    return result;
+}
+
+size2s_t rnd_sat_h(int exp, double sig)
+{
+    size2s_t result=0;
+    size2s_t W_MAX = 0x7fff;
+    size2s_t W_MIN = 0x8000;
+
+    int sign = (sig>=0.0)? 0: 1;
+
+    double R1=0.0;
+    double R2=0.0;
+    double R3=0.0;
+    if(exp > 14)
+    {
+        result = (sign)? W_MIN:W_MAX;
+        result = (sign <<15) | result;
+    }
+    else
+    {
+        R1 = ldexp(sig, exp);
+        R2 = floor(R1/2.0)*2;
+        R3 = R1 - R2;
+        if(sign==0)
+        {
+            if(R3<=0.5)
+                result = (size2s_t) R1;
+            else if(R3>0.5 && R3<1.5)
+                result =  (size2s_t) round(R1);
+            else if(R3>=1.5)
+                result =  (size2s_t) R1+1;
+        }
+        else
+        {
+            if(R3<=0.5 && R3 !=0.0)
+                result = (size2s_t)R1 -1;
+            else if(R3>0.5 && R3<1.5)
+                result = (size2s_t)round(R1);
+            else// if(R3>=1.5)
+                result = (size2s_t)R1;
+        }
+    }
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[RND_conv_h_qf16] sig:%lf, exp:%d, R1:%10.20f, R2:%10.20f, R3:%10.20f, result:%x(%d)\n",sig, exp, R1, R2, R3, result, result);
+#endif
+
+    return result;
+}
+
+size2u_t rnd_sat_uh(int exp, double sig)
+{
+    size2u_t result=0;
+    size2u_t W_MAX = 0xffff;
+
+    double R1=0.0;
+    double R2=0.0;
+    double R3=0.0;
+    if(sig<0.0)
+        result = 0;
+    else if(exp > 15)
+    {
+        result = W_MAX;
+    }
+    else
+    {
+        R1 = ldexp(sig, exp);
+        R2 = floor(R1/2.0)*2;
+        R3 = R1 - R2;
+        if(R3<=0.5)
+            result = (size2s_t) R1;
+        else if(R3>0.5 && R3<1.5)
+            result =  (size2s_t) round(R1);
+        else if(R3>=1.5)
+            result =  (size2s_t) R1+1;
+    }
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[RND_conv_uh_qf16] sig:%lf, exp:%d, R1:%10.20f, R2:%10.20f, R3:%10.20f, result:%x(%d)\n",sig, exp, R1, R2, R3, result, result);
+#endif
+
+    return result;
+}
+
+size1s_t rnd_sat_b(int exp, double sig)
+{
+    size1s_t result=0;
+    size1s_t W_MAX = 0x7f;
+    size1s_t W_MIN = 0x80;
+
+    int sign = (sig>=0.0)? 0: 1;
+
+    double R1=0.0;
+    double R2=0.0;
+    double R3=0.0;
+    if(exp > 6)
+    {
+        result = (sign)? W_MIN:W_MAX;
+        result = (sign <<7) | result;
+    }
+    else
+    {
+        R1 = ldexp(sig, exp);
+        R2 = floor(R1/2.0)*2;
+        R3 = R1 - R2;
+        if(sign==0)
+        {
+            if(R3<=0.5)
+                result = (size1s_t) R1;
+            else if(R3>0.5 && R3<1.5)
+                result =  (size1s_t) round(R1);
+            else if(R3>=1.5)
+                result =  (size1s_t) R1+1;
+        }
+        else
+        {
+            if(R3<=0.5 && R3 !=0.0)
+                result = (size1s_t)R1 -1;
+            else if(R3>0.5 && R3<1.5)
+                result = (size1s_t)round(R1);
+            else// if(R3>=1.5)
+                result = (size1s_t)R1;
+        }
+    }
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[RND_conv_b_qf16] sig:%lf, exp:%d, R1:%10.20f, R2:%10.20f, R3:%10.20f, result:%x(%d)\n",sig, exp, R1, R2, R3, result, result);
+#endif
+
+    return result;
+}
+
+size1u_t rnd_sat_ub(int exp, double sig)
+{
+    size1u_t result=0;
+    size1u_t W_MAX = 0xff;
+
+    double R1=0.0;
+    double R2=0.0;
+    double R3=0.0;
+    if(sig<0.0)
+        result = 0;
+    else if(exp > 7)
+    {
+        result = W_MAX;
+    }
+    else
+    {
+        R1 = ldexp(sig, exp);
+        R2 = floor(R1/2.0)*2;
+        R3 = R1 - R2;
+
+        if(R3<=0.5)
+            result = (size1s_t) R1;
+        else if(R3>0.5 && R3<1.5)
+            result =  (size1s_t) round(R1);
+        else if(R3>=1.5)
+            result =  (size1s_t) R1+1;
+    }
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[RND_conv_ub_qf16] sig:%lf, exp:%d, R1:%10.20f, R2:%10.20f, R3:%10.20f, result:%x(%d)\n",sig, exp, R1, R2, R3, result, result);
+#endif
+
+    return result;
+}
+
+//FP conversion QF32 to 32bit W
+size4s_t conv_w_qf32(size4s_t a)
+{
+
+    size4s_t result;
+    unfloat u = parse_qf32(a);
+
+    result = rnd_sat_w(u.exp, u.sig);
+
+    return result;
+}
+
+size4s_t conv_w_sf(size4s_t op1)
+{
+    sf_union input;
+    size4s_t W_MAX = 0x7fffffff;
+    size4s_t W_MIN = 0x80000000;
+    input.i = op1;
+    size4s_t  result;
+
+    if(isNaNF32(op1) || isInfF32(op1) || (input.f >= (float)W_MAX) || (input.f <= (float)W_MIN))
+    {
+        if(input.x.sign == 1){
+            result = W_MIN;
+        }
+        else{
+            result = W_MAX;
+        }
+    }
+    else{
+        //convert and round to the zero
+        result = (int)input.f;
+    }
+
+#ifdef DEBUG_MMVEC_QF
+    printf("Debug : result =0x%08x\n",result);
+#endif
+    return result;
+}
+
+size2s_t conv_h_hf(size2s_t op1)
+{
+    sf_union input;
+    size4s_t op1_ext = op1;
+    size2s_t HW_MAX = 0x7fff;
+    size2s_t HW_MIN = 0x8000;
+    input.i = ((op1_ext & 0x8000) << 16) + (((op1_ext & 0x7c00) + 0x1c000) << 13) + ((op1_ext & 0x03ff) << 13); //grabbing sign, exp, and significand and ocnverting to sf32 format
+    size2s_t  result;
+
+    if(isNaNF16(op1) || isInfF16(op1) || (input.f >= (float)HW_MAX) || (input.f <= (float)HW_MIN))
+    {
+        if(input.x.sign == 1){
+            result = HW_MIN;
+        }
+        else{
+            result = HW_MAX;
+        }
+    }
+    else{
+        //convert and round to the zero
+        result = (short)input.f;
+    }
+
+#ifdef DEBUG_MMVEC_QF
+    printf("Debug : result =0x%08x\n",result);
+#endif
+    return result;
+}
+
+//FP conversion QF32 to 32bit UW
+size4u_t conv_uw_qf32(size4s_t a)
+{
+
+    size4u_t result;
+    unfloat u = parse_qf32(a);
+
+    result = rnd_sat_uw(u.exp, u.sig);
+
+    return result;
+}
+
+//FP conversion QF16 to 16bit H
+size2s_t conv_h_qf16(size2s_t a)
+{
+
+    size2s_t result;
+    unfloat u = parse_qf16(a);
+
+    result = rnd_sat_h(u.exp, u.sig);
+
+    return result;
+}
+
+//FP conversion QF32 to 32bit UW
+size2u_t conv_uh_qf16(size2s_t a)
+{
+
+    size2u_t result;
+    unfloat u = parse_qf16(a);
+
+    result = rnd_sat_uh(u.exp, u.sig);
+
+    return result;
+}
+
+//FP conversion double QF32 to double H
+size4s_t conv_h_qf32(size8s_t a)
+{
+    size2s_t result0, result1;
+    size4s_t result;
+    size4s_t a0, a1;
+    a0 = a & 0xFFFFFFFF;
+    a1 = (a>>32) & 0xFFFFFFFF;
+
+    unfloat u0 = parse_qf32(a0);
+    unfloat u1 = parse_qf32(a1);
+
+    result0 = rnd_sat_h(u0.exp, u0.sig);
+    result1 = rnd_sat_h(u1.exp, u1.sig);
+
+    result = ((size4s_t)result1 << 16) | (result0 & 0xFFFF);
+
+#ifdef DEBUG_MMVEC_QF
+    double final0 = ldexp(u0.sig, u0.exp);
+    double final1 = ldexp(u1.sig, u1.exp);
+
+    printf("[H_parse_conv_h_qf32] u0.sig:%lf, u0.exp:%d, ldexp0:%10.20f \n",u0.sig, u0.exp, final0);
+    printf("[H_parse_conv_h_qf32] u1.sig:%lf, u1.exp:%d, ldexp1:%10.20f \n",u1.sig, u1.exp, final1);
+#endif
+
+    return result;
+}
+
+//FP conversion QF32 to 32bit UW
+size4u_t conv_uh_qf32(size8s_t a)
+{
+    size2u_t result0, result1;
+    size4u_t result;
+    size4s_t a0, a1;
+    a0 = a & 0xFFFFFFFF;
+    a1 = (a>>32) & 0xFFFFFFFF;
+
+    unfloat u0 = parse_qf32(a0);
+    unfloat u1 = parse_qf32(a1);
+
+    result0 = rnd_sat_uh(u0.exp, u0.sig);
+    result1 = rnd_sat_uh(u1.exp, u1.sig);
+
+    result = ((size4u_t)result1 << 16) | (result0 & 0xFFFF);
+
+#ifdef DEBUG_MMVEC_QF
+    double final0 = ldexp(u0.sig, u0.exp);
+    double final1 = ldexp(u1.sig, u1.exp);
+
+    printf("[UH_parse_conv_uh_qf32] u0.sig:%lf, u0.exp:%d, ldexp0:%10.20f \n",u0.sig, u0.exp, final0);
+    printf("[UH_parse_conv_uh_qf32] u1.sig:%lf, u1.exp:%d, ldexp1:%10.20f \n",u1.sig, u1.exp, final1);
+#endif
+
+    return result;
+}
+
+//FP conversion double QF16 to double B
+size2s_t conv_b_qf16(size4s_t a)
+{
+    size1s_t result0, result1;
+    size2s_t result;
+    size2s_t a0, a1;
+    a0 = a & 0xFFFF;
+    a1 = (a>>16) & 0xFFFF;
+
+    unfloat u0 = parse_qf16(a0);
+    unfloat u1 = parse_qf16(a1);
+
+    result0 = rnd_sat_b(u0.exp, u0.sig);
+    result1 = rnd_sat_b(u1.exp, u1.sig);
+
+    result = ((size2s_t)result1 << 8) | (result0 & 0xFF);
+
+#ifdef DEBUG_MMVEC_QF
+    double final0 = ldexp(u0.sig, u0.exp);
+    double final1 = ldexp(u1.sig, u1.exp);
+
+    printf("[B_parse_conv_b_qf16] u0.sig:%lf, u0.exp:%d, ldexp0:%10.20f \n",u0.sig, u0.exp, final0);
+    printf("[B_parse_conv_b_qf16] u1.sig:%lf, u1.exp:%d, ldexp1:%10.20f \n",u1.sig, u1.exp, final1);
+#endif
+
+    return result;
+}
+
+//FP conversion QF32 to 32bit UW
+size2u_t conv_ub_qf16(size4s_t a)
+{
+    size1u_t result0, result1;
+    size2u_t result;
+    size2s_t a0, a1;
+    a0 = a & 0xFFFF;
+    a1 = (a>>16) & 0xFFFF;
+
+    unfloat u0 = parse_qf16(a0);
+    unfloat u1 = parse_qf16(a1);
+
+    result0 = rnd_sat_ub(u0.exp, u0.sig);
+    result1 = rnd_sat_ub(u1.exp, u1.sig);
+
+    result = ((size2u_t)result1 << 8) | (result0 & 0xFF);
+
+#ifdef DEBUG_MMVEC_QF
+    double final0 = ldexp(u0.sig, u0.exp);
+    double final1 = ldexp(u1.sig, u1.exp);
+
+    printf("[UB_parse_conv_ub_qf16] u0.sig:%lf, u0.exp:%d, ldexp0:%10.20f \n",u0.sig, u0.exp, final0);
+    printf("[UB_parse_conv_ub_qf16] u1.sig:%lf, u1.exp:%d, ldexp1:%10.20f \n",u1.sig, u1.exp, final1);
+#endif
+
+    return result;
+}
+
+//Neg/Abs
+size4s_t neg_qf32(size4s_t a)
+{
+    size4s_t result;
+    result = negate32(a);
+    return result;
+}
+size4s_t abs_qf32(size4s_t a)
+{
+    size4s_t result;
+    if((a>>31) & 1)
+        result = negate32(a);
+    else
+        result = a;
+    return result;
+}
+size2s_t neg_qf16(size2s_t a)
+{
+    size2s_t result;
+    result = negate16(a);
+    return result;
+}
+size2s_t abs_qf16(size2s_t a)
+{
+    size2s_t result;
+    if((a>>15) & 1)
+        result = negate16(a);
+    else
+        result = a;
+    return result;
+}
+size4s_t neg_sf(size4s_t a)
+{
+    size4s_t result;
+    result = negate_sf(a);
+    return result;
+}
+size4s_t abs_sf(size4s_t a)
+{
+    size4s_t result;
+    if((a>>31) & 1)
+        result = negate_sf(a);
+    else
+        result = a;
+    return result;
+}
+size2s_t neg_hf(size2s_t a)
+{
+    size2s_t result;
+    result = negate_hf(a);
+    return result;
+}
+size2s_t abs_hf(size2s_t a)
+{
+    size2s_t result;
+    if((a>>15) & 1)
+        result = negate_hf(a);
+    else
+        result = a;
+    return result;
+}
+
+//FP Compare
+int cmpgt_fp(unfloat a, unfloat b)
+{
+    int result=0;
+    double a_d, b_d;
+    a_d = ldexp(a.sig, a.exp);
+    b_d = ldexp(b.sig, b.exp);
+
+    //Filter out +0/-0 by checking the sign
+    if(a_d > b_d)
+        result=1;
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[CMPGT]a:%10.30f, b:%10.30f\n",a_d, b_d);
+#endif
+
+    return result;
+}
+
+int cmpgt_qf32(size4s_t in_a, size4s_t in_b)
+{
+    unfloat a, b;
+    a= parse_qf32(in_a);
+    b= parse_qf32(in_b);
+
+    int result=0;
+
+    result = cmpgt_fp(a,b);
+
+    return result;
+}
+
+int cmpgt_qf16(size2s_t in_a, size2s_t in_b)
+{
+
+    unfloat a, b;
+    a= parse_qf16(in_a);
+    b= parse_qf16(in_b);
+
+    int result=0;
+    result = cmpgt_fp(a,b);
+
+    return result;
+}
+
+int cmpgt_sf(size4s_t in_a, size4s_t in_b)
+{
+
+    unfloat a, b;
+    a= parse_sf(in_a);
+    b= parse_sf(in_b);
+
+    if(a.sign)
+        a.sig = (-1.0)*a.sig;
+    if(b.sign)
+        b.sig = (-1.0)*b.sig;
+
+    int result=0;
+    result = cmpgt_fp(a,b);
+
+    return result;
+}
+
+int cmpgt_hf(size2s_t in_a, size2s_t in_b)
+{
+
+    unfloat a, b;
+    a= parse_hf(in_a);
+    b= parse_hf(in_b);
+
+    if(a.sign)
+        a.sig = (-1.0)*a.sig;
+    if(b.sign)
+        b.sig = (-1.0)*b.sig;
+
+    int result=0;
+    result = cmpgt_fp(a,b);
+
+    return result;
+}
+
+int cmpgt_qf32_sf(size4s_t in_a, size4s_t in_b)
+{
+    unfloat a = parse_qf32(in_a);
+    unfloat b = parse_sf(in_b);
+    if(b.sign)
+        b.sig = (-1.0)*b.sig;
+
+    int result=0;
+    result = cmpgt_fp(a,b);
+
+    return result;
+}
+
+int cmpgt_qf16_hf(size2s_t in_a, size2s_t in_b)
+{
+    unfloat a = parse_qf16(in_a);
+    unfloat b = parse_hf(in_b);
+    if(b.sign)
+        b.sig = (-1.0)*b.sig;
+
+    int result=0;
+    result = cmpgt_fp(a,b);
+    return result;
+}
+//max/min
+ //if a==b, a is returned
+size4s_t max_qf32(    size4s_t in_a, size4s_t in_b) { return cmpgt_qf32(    in_b, in_a) ? in_b : in_a; }
+size2s_t max_qf16(    size2s_t in_a, size2s_t in_b) { return cmpgt_qf16(    in_b, in_a) ? in_b : in_a; }
+
+
+
+size4s_t is_check_zero_sf(size4s_t in_a);
+size4s_t is_check_zero_sf(size4s_t in_a) {
+    return (in_a == 0) || ((in_a & 0xFFFFFFFF) == 0x80000000);
+}
+size2s_t is_check_zero_hf(size2s_t in_a);
+size2s_t is_check_zero_hf(size2s_t in_a) {
+    return (in_a == 0) || ((in_a & 0xFFFF) == 0x8000);
+}
+
+size4s_t max_sf(      size4s_t in_a, size4s_t in_b) {
+    if (is_check_zero_sf(in_a) && is_check_zero_sf(in_b) ) {
+        return (in_a == 0) ? in_a : in_b;       // Return in_a if it's positive 0, otherwise return the other one
+    }
+    return cmpgt_sf(      in_b, in_a) ? in_b : in_a;
+
+}
+size2s_t max_hf(      size2s_t in_a, size2s_t in_b)
+{
+    if (is_check_zero_hf(in_a) && is_check_zero_hf(in_b) ) {
+        return (in_a == 0) ? in_a : in_b;
+    }
+    return cmpgt_hf(      in_b, in_a) ? in_b : in_a;
+}
+
+
+//size2s_t max_qf16_hf( size2s_t in_a, size2s_t in_b) { return cmpgt_qf16_hf( in_b, in_a) ? in_b : in_a; }
+//size4s_t max_qf32_sf( size4s_t in_a, size4s_t in_b) { return cmpgt_qf32_sf( in_b, in_a) ? in_b : in_a; }
+
+size4s_t min_qf32(    size4s_t in_a, size4s_t in_b) { return cmpgt_qf32(    in_a, in_b) ? in_b : in_a; }
+size2s_t min_qf16(    size2s_t in_a, size2s_t in_b) { return cmpgt_qf16(    in_a, in_b) ? in_b : in_a; }
+
+size4s_t min_sf(      size4s_t in_a, size4s_t in_b) {
+    if (is_check_zero_sf(in_a) && is_check_zero_sf(in_b) ) {
+        return (in_a == 0) ? in_b : in_a;
+    }
+    return cmpgt_sf(      in_a, in_b) ? in_b : in_a;
+}
+size2s_t min_hf(      size2s_t in_a, size2s_t in_b) {
+    if (is_check_zero_hf(in_a) && is_check_zero_hf(in_b) ) {
+        return (in_a == 0) ? in_b : in_a;
+    }
+    return cmpgt_hf(      in_a, in_b) ? in_b : in_a;
+}
+//size2s_t min_qf16_hf( size2s_t in_a, size2s_t in_b) { return cmpgt_qf16_hf( in_a, in_b) ? in_b : in_a; }
+//size4s_t min_qf32_sf( size4s_t in_a, size4s_t in_b) { return cmpgt_qf32_sf( in_a, in_b) ? in_b : in_a; }
+
+
+size4s_t max_qf32_sf(size4s_t in_a, size4s_t in_b)
+{
+    size4s_t result=0;
+    unfloat a,b;
+    a= parse_qf32(in_a);
+    b= parse_sf(in_b);
+    if(b.sign)
+        b.sig = (-1)*b.sig;
+
+    double a_d, b_d;
+    a_d = ldexp(a.sig, a.exp);
+    b_d = ldexp(b.sig, b.exp);
+
+    if(a_d >= b_d)
+        result = in_a;
+    else
+        result = in_b;
+
+#ifdef DEBUG_MMVEC_QF
+    printf("[max_qf32_sf]a:%10.30f, b:%10.30f\n",a_d, b_d);
+#endif
+
+    return result;
+}
+size4s_t min_qf32_sf(size4s_t in_a, size4s_t in_b)
+{
+    size4s_t result=0;
+    unfloat a,b;
+    a= parse_qf32(in_a);
+    b= parse_sf(in_b);
+    if(b.sign)
+        b.sig = (-1)*b.sig;
+    double a_d, b_d;
+    a_d = ldexp(a.sig, a.exp);
+    b_d = ldexp(b.sig, b.exp);
+    if(a_d <= b_d)
+      result = in_a;
+    else
+      result = in_b;
+#ifdef DEBUG_MMVEC_QF
+    printf("[min_qf32_sf]a:%10.30f, b:%10.30f\n",a_d, b_d);
+#endif
+    return result;
+}
+
+size2s_t max_qf16_hf(size2s_t in_a, size2s_t in_b)
+{
+    size2s_t result=0;
+    unfloat a,b;
+    a= parse_qf16(in_a);
+    b= parse_hf(in_b);
+    if(b.sign)
+        b.sig = (-1)*b.sig;
+    double a_d, b_d;
+    a_d = ldexp(a.sig, a.exp);
+    b_d = ldexp(b.sig, b.exp);
+    if(a_d >= b_d)
+      result = in_a;
+    else
+      result = in_b;
+#ifdef DEBUG_MMVEC_QF
+    printf("[max_qf16_hf]a:%10.30f, b:%10.30f\n",a_d, b_d);
+#endif
+    return result;
+}
+size2s_t min_qf16_hf(size2s_t in_a, size2s_t in_b)
+{
+    size2s_t result=0;
+    unfloat a,b;
+    a= parse_qf16(in_a);
+    b= parse_hf(in_b);
+    if(b.sign)
+        b.sig = (-1)*b.sig;
+    double a_d, b_d;
+    a_d = ldexp(a.sig, a.exp);
+    b_d = ldexp(b.sig, b.exp);
+    if(a_d <= b_d)
+      result = in_a;
+    else
+      result = in_b;
+#ifdef DEBUG_MMVEC_QF
+    printf("[min_qf16_hf]a:%10.30f, b:%10.30f\n",a_d, b_d);
+#endif
+    return result;
+}
diff --git a/target/hexagon/mmvec/mmvec_qfloat.h b/target/hexagon/mmvec/mmvec_qfloat.h
new file mode 100644
index 0000000000000..dc15cd17408b0
--- /dev/null
+++ b/target/hexagon/mmvec/mmvec_qfloat.h
@@ -0,0 +1,199 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MMVEC_QFLOAT_H
+#define MMVEC_QFLOAT_H 1
+
+#define HF_MAX 131008  //pow(2,17)-pow(2,6) =(2-1.0/pow(2,10))*pow(2,16)
+#define HF_MIN 1.0/pow(2,24)
+#define SF_MAX pow(2,129)-pow(2,105) //(2-1.0/pow(2,23))*pow(2,128)
+#define SF_MIN 1.0/pow(2,149)
+
+#define E_MAX_QF32 128
+#define E_MIN_QF32 -127
+#define E_MAX_QF16 16
+#define E_MIN_QF16 -15
+#define E_MAX_SF 128
+#define E_MIN_SF -126
+#define E_MAX_HF 16
+#define E_MIN_HF -14
+#define BIAS_QF32 127
+#define BIAS_QF16 15
+#define BIAS_DF 1023
+#define BIAS_SF 127
+#define BIAS_HF 15
+#define FRAC_HF 10
+#define FRAC_SF 23
+#define isNaNF32( a ) (((~(a) & 0x7F800000) == 0) && ((a) & 0x007FFFFF))
+#define isInfF32( a ) (((~(a) & 0x7F800000) == 0) && (((a) & 0x007FFFFF) == 0))
+#define isNaNF16( a ) (((~(a) & 0x7C00) == 0) && ((a) & 0x03FF))
+#define isInfF16( a ) (((~(a) & 0x7C00) == 0) && (((a) & 0x03FF) == 0))
+
+//#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+//#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
+
+#include "cpu.h"
+#include "hex_arch_types.h"
+
+#define epsilon 1.0/pow(2,23)
+#define units 1.0*pow(2,23)
+#define epsilon_hf 1.0/pow(2,10)
+#define units_hf 1.0*pow(2,10)
+
+typedef struct{
+  int sign;
+  int exp;
+  double sig;
+} unfloat; //Un-Normalized Float
+
+typedef struct{
+  int sign;
+  int sig;
+  int exp;
+} qf_t;
+
+typedef struct{
+  int32_t sig : 24;
+  uint32_t exp : 8;
+} qf32_t;
+
+typedef struct{
+  int32_t sig : 11;
+  uint32_t exp : 5;
+} qf16_t;
+
+typedef enum float_type{
+  QF32,
+  QF16,
+  SF,
+  HF
+} f_type;
+
+typedef union {
+	float f;
+	size4u_t i;
+	struct {
+		size4u_t mant:23;
+		size4u_t exp:8;
+		size4u_t sign:1;
+	} x;
+} sf_union;
+
+//MPY
+size4s_t mpy_qf32(size4s_t a, size4s_t b);
+size4s_t mpy_qf32_sf(size4s_t a, size4s_t b);
+size4s_t mpy_qf32_mix_sf(size4s_t a, size4s_t b);
+size2s_t mpy_qf16(size2s_t a, size2s_t b);
+size2s_t mpy_qf16_hf(size2s_t a, size2s_t b);
+size2s_t mpy_qf16_mix_hf(size2s_t a, size2s_t b);
+size8s_t mpy_qf32_qf16(size4s_t a, size4s_t b);
+size8s_t mpy_qf32_hf(size4s_t a, size4s_t b);
+size8s_t mpy_qf32_mix_hf(size4s_t a, size4s_t b);
+
+unfloat parse_qf32(size4s_t a);
+unfloat parse_qf16(size2s_t a);
+unfloat parse_sf(size4s_t a);
+unfloat parse_hf(size2s_t a);
+size4s_t rnd_sat_qf32(int exp, double sig, double sig_low);
+size2s_t rnd_sat_qf16(int exp, double sig, double sig_low);
+size4s_t rnd_sat_sf(int exp, double sig);
+size2s_t rnd_sat_hf(int exp, double sig);
+size4s_t rnd_sat_w(int exp, double sig);
+size4u_t rnd_sat_uw(int exp, double sig);
+size2s_t rnd_sat_h(int exp, double sig);
+size2u_t rnd_sat_uh(int exp, double sig);
+size1s_t rnd_sat_b(int exp, double sig);
+size1u_t rnd_sat_ub(int exp, double sig);
+size4s_t negate32(size4s_t);
+size2s_t negate16(size2s_t);
+size4s_t negate_sf(size4s_t);
+size2s_t negate_hf(size2s_t);
+
+//ADD
+size4s_t add_qf32(size4s_t a, size4s_t b);
+size4s_t add_sf(size4s_t a, size4s_t b);
+size4s_t add_qf32_mix(size4s_t a, size4s_t b);
+size2s_t add_qf16(size2s_t a, size2s_t b);
+size2s_t add_hf(size2s_t a, size2s_t b);
+size2s_t add_qf16_mix(size2s_t a, size2s_t b);
+
+//SUB
+size4s_t sub_qf32(size4s_t a, size4s_t b);
+size4s_t sub_sf(size4s_t a, size4s_t b);
+size4s_t sub_qf32_mix(size4s_t a, size4s_t b);
+size2s_t sub_qf16(size2s_t a, size2s_t b);
+size2s_t sub_hf(size2s_t a, size2s_t b);
+size2s_t sub_qf16_mix(size2s_t a, size2s_t b);
+
+//Convert
+size4s_t conv_sf_qf32(size4s_t a);
+size4s_t conv_sf_w(size4s_t a);
+size4s_t conv_sf_uw(size4u_t a);
+size2s_t conv_hf_qf16(size2s_t a);
+size2s_t conv_hf_h(size2s_t a);
+size2s_t conv_hf_uh(size2u_t a);
+size4s_t conv_hf_qf32(size8s_t a);
+size4s_t conv_hf_w(size8s_t a);
+size4s_t conv_hf_uw(size8u_t a);
+
+size4s_t conv_w_qf32(size4s_t a);
+size4u_t conv_uw_qf32(size4s_t a);
+size2s_t conv_h_qf16(size2s_t a);
+size2u_t conv_uh_qf16(size2s_t a);
+size4s_t conv_h_qf32(size8s_t a);
+size4u_t conv_uh_qf32(size8s_t a);
+size2s_t conv_b_qf16(size4s_t a);
+size2u_t conv_ub_qf16(size4s_t a);
+
+size4s_t conv_w_sf(size4s_t a);
+// size4u_t conv_uw_sf(size4s_t a);
+size2s_t conv_h_hf(size2s_t a);
+// size2u_t conv_uh_sf(size2s_t a);
+
+//Neg/Abs
+size4s_t neg_qf32(size4s_t a);
+size4s_t abs_qf32(size4s_t a);
+size2s_t neg_qf16(size2s_t a);
+size2s_t abs_qf16(size2s_t a);
+size4s_t neg_sf(size4s_t a);
+size4s_t abs_sf(size4s_t a);
+size2s_t neg_hf(size2s_t a);
+size2s_t abs_hf(size2s_t a);
+
+//Compare
+int cmpgt_fp(unfloat a,  unfloat b);
+int cmpgt_qf32(size4s_t a,  size4s_t b);
+int cmpgt_qf16(size2s_t a,  size2s_t b);
+int cmpgt_sf(size4s_t a,  size4s_t b);
+int cmpgt_hf(size2s_t a,  size2s_t b);
+int cmpgt_qf32_sf(size4s_t a,  size4s_t b);
+int cmpgt_qf16_hf(size2s_t a,  size2s_t b);
+
+//max/min
+size4s_t max_qf32(size4s_t a, size4s_t b);
+size4s_t min_qf32(size4s_t a, size4s_t b);
+size4s_t max_qf32_sf(size4s_t a, size4s_t b);
+size4s_t min_qf32_sf(size4s_t a, size4s_t b);
+size4s_t max_sf(size4s_t a, size4s_t b);
+size4s_t min_sf(size4s_t a, size4s_t b);
+size2s_t max_qf16(size2s_t a, size2s_t b);
+size2s_t min_qf16(size2s_t a, size2s_t b);
+size2s_t max_qf16_hf(size2s_t a, size2s_t b);
+size2s_t min_qf16_hf(size2s_t a, size2s_t b);
+size2s_t max_hf(size2s_t a, size2s_t b);
+size2s_t min_hf(size2s_t a, size2s_t b);
+#endif
diff --git a/target/hexagon/monitor.c b/target/hexagon/monitor.c
new file mode 100644
index 0000000000000..534ca2abe63a8
--- /dev/null
+++ b/target/hexagon/monitor.c
@@ -0,0 +1,36 @@
+/*
+ *  Copyright(c) 2022-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "cpu_bits.h"
+#include "monitor/monitor.h"
+#include "monitor/hmp-target.h"
+#include "monitor/hmp.h"
+#include "hex_mmu.h"
+
+const MonitorDef monitor_defs[] = {
+    { NULL },
+};
+
+const MonitorDef *target_monitor_defs(void)
+{
+    return monitor_defs;
+}
+
+void hmp_info_tlb(Monitor *mon, const QDict *qdict)
+{
+#if !defined(CONFIG_USER_ONLY)
+    CPUArchState *env = mon_get_cpu_env(mon);
+    if (!env) {
+        monitor_printf(mon, "No CPU available\n");
+        return;
+    }
+
+    dump_mmu(env);
+#endif
+}
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index 6da8db8ea5c59..983f0f0c3b0ce 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -17,6 +17,8 @@
 
 #include "qemu/osdep.h"
 #include "qemu/log.h"
+#include "qemu/main-loop.h"
+#include "qemu/timer.h"
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "exec/helper-proto.h"
@@ -24,31 +26,59 @@
 #include "cpu.h"
 #include "internal.h"
 #include "macros.h"
+#include "sys_macros.h"
 #include "arch.h"
 #include "hex_arch_types.h"
 #include "fma_emu.h"
 #include "mmvec/mmvec.h"
 #include "mmvec/macros.h"
+#include "mmvec/mmvec_qfloat.h"
 #include "op_helper.h"
+#include "cpu_helper.h"
 #include "translate.h"
+#ifndef CONFIG_USER_ONLY
+#include "hex_mmu.h"
+#include "hw/intc/l2vic.h"
+#include "hw/timer/qct-qtimer.h"
+#include "hex_interrupts.h"
+#include "hexswi.h"
+#endif
 
 #define SF_BIAS        127
 #define SF_MANTBITS    23
 
 /* Exceptions processing helpers */
+G_NORETURN
+void do_raise_exception(CPUHexagonState *env, uint32_t exception,
+                        target_ulong PC, uintptr_t retaddr)
+{
+    CPUState *cs = env_cpu(env);
+#ifdef CONFIG_USER_ONLY
+    qemu_log_mask(CPU_LOG_INT, "%s: 0x%08x\n", __func__, exception);
+#else
+    qemu_log_mask(CPU_LOG_INT, "%s: 0x%08x, @ %08" PRIx32 "\n",
+                  __func__, exception, PC);
+
+    ASSERT_DIRECT_TO_GUEST_UNSET(env, exception);
+#endif
+
+    env->gpr[HEX_REG_PC] = PC;
+    cs->exception_index = exception;
+    cpu_loop_exit_restore(cs, retaddr);
+    cs->halted = false;
+}
+
 G_NORETURN void hexagon_raise_exception_err(CPUHexagonState *env,
                                             uint32_t exception,
                                             uintptr_t pc)
 {
-    CPUState *cs = env_cpu(env);
-    qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception);
-    cs->exception_index = exception;
-    cpu_loop_exit_restore(cs, pc);
+    do_raise_exception(env, exception, pc, 0);
 }
 
-G_NORETURN void HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp)
+G_NORETURN void HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp,
+                                        target_ulong PC)
 {
-    hexagon_raise_exception_err(env, excp, 0);
+    hexagon_raise_exception_err(env, excp, PC);
 }
 
 void log_store32(CPUHexagonState *env, target_ulong addr,
@@ -463,11 +493,11 @@ void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask)
  * If the load is in slot 0 and there is a store in slot1 (that
  * wasn't cancelled), we have to do the store first.
  */
-static void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1,
+static void check_noshuf(CPUHexagonState *env, bool pkt_has_scalar_store_s1,
                          uint32_t slot, target_ulong vaddr, int size,
                          uintptr_t ra)
 {
-    if (slot == 0 && pkt_has_store_s1 &&
+    if (slot == 0 && pkt_has_scalar_store_s1 &&
         ((env->slot_cancelled & (1 << 1)) == 0)) {
         probe_read(env, vaddr, size, MMU_USER_IDX, ra);
         commit_store(env, 1, ra);
@@ -1149,6 +1179,119 @@ float64 HELPER(dfmpyhh)(CPUHexagonState *env, float64 RxxV,
     return RxxV;
 }
 
+#ifndef CONFIG_USER_ONLY
+void HELPER(modify_ssr)(CPUHexagonState *env, uint32_t new, uint32_t old)
+{
+    BQL_LOCK_GUARD();
+    hexagon_modify_ssr(env, new, old);
+}
+
+static void hex_k0_lock(CPUHexagonState *env)
+{
+    BQL_LOCK_GUARD();
+    g_assert((env->k0_lock_count == 0) || (env->k0_lock_count == 1));
+
+    uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG);
+    if (GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg)) {
+        if (env->k0_lock_state == HEX_LOCK_QUEUED) {
+            env->next_PC += 4;
+            env->k0_lock_count++;
+            env->k0_lock_state = HEX_LOCK_OWNER;
+            SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 1);
+            return;
+        }
+        if (env->k0_lock_state == HEX_LOCK_OWNER) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "Double k0lock at PC: 0x%x, thread may hang\n",
+                          env->next_PC);
+            env->next_PC += 4;
+            CPUState *cs = env_cpu(env);
+            cpu_interrupt(cs, CPU_INTERRUPT_HALT);
+            return;
+        }
+        env->k0_lock_state = HEX_LOCK_WAITING;
+        CPUState *cs = env_cpu(env);
+        cpu_interrupt(cs, CPU_INTERRUPT_HALT);
+    } else {
+        env->next_PC += 4;
+        env->k0_lock_count++;
+        env->k0_lock_state = HEX_LOCK_OWNER;
+        SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 1);
+    }
+
+}
+
+static void hex_k0_unlock(CPUHexagonState *env)
+{
+    BQL_LOCK_GUARD();
+    g_assert((env->k0_lock_count == 0) || (env->k0_lock_count == 1));
+
+    /* Nothing to do if the k0 isn't locked by this thread */
+    uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG);
+    if ((GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg) == 0) ||
+        (env->k0_lock_state != HEX_LOCK_OWNER)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "thread %d attempted to unlock k0 without having the "
+                      "lock, k0_lock state = %d, syscfg:k0 = %d\n",
+                      env->threadId, env->k0_lock_state,
+                      GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg));
+        g_assert(env->k0_lock_state != HEX_LOCK_WAITING);
+        return;
+    }
+
+    env->k0_lock_count--;
+    env->k0_lock_state = HEX_LOCK_UNLOCKED;
+    SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 0);
+
+    /* Look for a thread to unlock */
+    unsigned int this_threadId = env->threadId;
+    CPUHexagonState *unlock_thread = NULL;
+    CPUState *cs;
+    CPU_FOREACH(cs) {
+        CPUHexagonState *thread = cpu_env(cs);
+
+        /*
+         * The hardware implements round-robin fairness, so we look for threads
+         * starting at env->threadId + 1 and incrementing modulo the number of
+         * threads.
+         *
+         * To implement this, we check if thread is a earlier in the modulo
+         * sequence than unlock_thread.
+         *     if unlock thread is higher than this thread
+         *         thread must be between this thread and unlock_thread
+         *     else
+         *         thread higher than this thread is ahead of unlock_thread
+         *         thread must be lower then unlock thread
+         */
+        if (thread->k0_lock_state == HEX_LOCK_WAITING) {
+            if (!unlock_thread) {
+                unlock_thread = thread;
+            } else if (unlock_thread->threadId > this_threadId) {
+                if (this_threadId < thread->threadId &&
+                    thread->threadId < unlock_thread->threadId) {
+                    unlock_thread = thread;
+                }
+            } else {
+                if (thread->threadId > this_threadId) {
+                    unlock_thread = thread;
+                }
+                if (thread->threadId < unlock_thread->threadId) {
+                    unlock_thread = thread;
+                }
+            }
+        }
+    }
+    if (unlock_thread) {
+        cs = env_cpu(unlock_thread);
+        unlock_thread->k0_lock_state = HEX_LOCK_QUEUED;
+        SET_SYSCFG_FIELD(unlock_thread, SYSCFG_K0LOCK, 1);
+        cpu_interrupt(cs, CPU_INTERRUPT_K0_UNLOCK);
+    }
+
+}
+#endif
+
+
 /* Histogram instructions */
 
 void HELPER(vhist)(CPUHexagonState *env)
@@ -1314,6 +1457,560 @@ void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV)
     }
 }
 
+#ifndef CONFIG_USER_ONLY
+static void hexagon_set_vid(CPUHexagonState *env, uint32_t offset, uint32_t val)
+{
+    g_assert((offset == L2VIC_VID_0) || (offset == L2VIC_VID_1));
+    CPUState *cs = env_cpu(env);
+    HexagonCPU *cpu = HEXAGON_CPU(cs);
+    const hwaddr pend_mem = cpu->l2vic_base_addr + offset;
+    cpu_physical_memory_write(pend_mem, &val, sizeof(val));
+}
+
+static void hexagon_clear_last_irq(CPUHexagonState *env, uint32_t offset)
+{
+    hexagon_set_vid(env, offset, L2VIC_CIAD_INSTRUCTION);
+}
+
+/*
+ * ciad - clear interrupt auto disable
+ *  - When taking an interrupt the hardware will set ipend.iad to
+ *    prevent another thread from servicing the interrupt.  At the completion
+ *    of service software uses ciad to clear the bit indicating the
+ *    interrupt can be accepted again.
+ *  - ciad also handshakes with the l2vic allowing a new vid on the vector
+ *    port.
+ */
+void HELPER(ciad)(CPUHexagonState *env, uint32_t mask)
+{
+    uint32_t ipendad;
+    uint32_t iad;
+
+    BQL_LOCK_GUARD();
+    ipendad = READ_SREG(HEX_SREG_IPENDAD);
+    iad = fGET_FIELD(ipendad, IPENDAD_IAD);
+    fSET_FIELD(ipendad, IPENDAD_IAD, iad & ~(mask));
+    arch_set_system_reg(env, HEX_SREG_IPENDAD, ipendad);
+    hexagon_clear_last_irq(env, L2VIC_VID_0);
+    hex_interrupt_update(env);
+}
+
+void HELPER(siad)(CPUHexagonState *env, uint32_t mask)
+{
+    uint32_t ipendad;
+    uint32_t iad;
+
+    BQL_LOCK_GUARD();
+    ipendad = READ_SREG(HEX_SREG_IPENDAD);
+    iad = fGET_FIELD(ipendad, IPENDAD_IAD);
+    fSET_FIELD(ipendad, IPENDAD_IAD, iad | mask);
+    arch_set_system_reg(env, HEX_SREG_IPENDAD, ipendad);
+    hex_interrupt_update(env);
+}
+
+void HELPER(swi)(CPUHexagonState *env, uint32_t mask)
+{
+    BQL_LOCK_GUARD();
+    hex_raise_interrupts(env, mask, CPU_INTERRUPT_SWI);
+}
+
+void HELPER(cswi)(CPUHexagonState *env, uint32_t mask)
+{
+    BQL_LOCK_GUARD();
+    hex_clear_interrupts(env, mask, CPU_INTERRUPT_SWI);
+}
+
+void HELPER(iassignw)(CPUHexagonState *env, uint32_t src)
+{
+    uint32_t modectl;
+    uint32_t thread_enabled_mask;
+    CPUState *cpu;
+
+    BQL_LOCK_GUARD();
+    modectl = arch_get_system_reg(env, HEX_SREG_MODECTL);
+    thread_enabled_mask = GET_FIELD(MODECTL_E, modectl);
+
+    CPU_FOREACH(cpu) {
+        CPUHexagonState *thread_env = &(HEXAGON_CPU(cpu)->env);
+        uint32_t thread_id_mask = 0x1 << thread_env->threadId;
+        if (thread_enabled_mask & thread_id_mask) {
+            uint32_t imask = arch_get_system_reg(thread_env, HEX_SREG_IMASK);
+            uint32_t intbitpos = (src >> 16) & 0xF;
+            uint32_t val = (src >> thread_env->threadId) & 0x1;
+            imask = deposit32(imask, intbitpos, 1, val);
+            arch_set_system_reg(thread_env, HEX_SREG_IMASK, imask);
+
+            qemu_log_mask(CPU_LOG_INT, "%s: thread " TARGET_FMT_ld
+               ", new imask 0x%" PRIx32 "\n", __func__,
+               thread_env->threadId, imask);
+        }
+    }
+    hex_interrupt_update(env);
+}
+
+uint32_t HELPER(iassignr)(CPUHexagonState *env, uint32_t src)
+{
+    uint32_t modectl;
+    uint32_t thread_enabled_mask;
+    uint32_t intbitpos;
+    uint32_t dest_reg;
+    CPUState *cpu;
+
+    BQL_LOCK_GUARD();
+    modectl = arch_get_system_reg(env, HEX_SREG_MODECTL);
+    thread_enabled_mask = GET_FIELD(MODECTL_E, modectl);
+    /* src fields are in same position as modectl, but mean different things */
+    intbitpos = GET_FIELD(MODECTL_W, src);
+    dest_reg = 0;
+    CPU_FOREACH(cpu) {
+        CPUHexagonState *thread_env = &(HEXAGON_CPU(cpu)->env);
+        uint32_t thread_id_mask = 0x1 << thread_env->threadId;
+        if (thread_enabled_mask & thread_id_mask) {
+            uint32_t imask = arch_get_system_reg(thread_env, HEX_SREG_IMASK);
+            dest_reg |= ((imask >> intbitpos) & 0x1) << thread_env->threadId;
+        }
+    }
+
+    return dest_reg;
+}
+
+void HELPER(start)(CPUHexagonState *env, uint32_t imask)
+{
+    hexagon_start_threads(env, imask);
+}
+
+void HELPER(stop)(CPUHexagonState *env)
+{
+    hexagon_stop_thread(env);
+}
+
+static inline QEMU_ALWAYS_INLINE void resched(CPUHexagonState *env)
+{
+    uint32_t schedcfg;
+    uint32_t schedcfg_en;
+    int int_number;
+    CPUState *cs;
+    uint32_t lowest_th_prio = 0; /* 0 is highest prio */
+    uint32_t bestwait_reg;
+    uint32_t best_prio;
+
+    BQL_LOCK_GUARD();
+    qemu_log_mask(CPU_LOG_INT, "%s: check resched\n", __func__);
+    schedcfg = arch_get_system_reg(env, HEX_SREG_SCHEDCFG);
+    schedcfg_en = GET_FIELD(SCHEDCFG_EN, schedcfg);
+    int_number = GET_FIELD(SCHEDCFG_INTNO, schedcfg);
+
+    if (!schedcfg_en) {
+        return;
+    }
+
+    CPU_FOREACH(cs) {
+        HexagonCPU *thread = HEXAGON_CPU(cs);
+        CPUHexagonState *thread_env = &(thread->env);
+        uint32_t th_prio = GET_FIELD(
+            STID_PRIO, arch_get_system_reg(thread_env, HEX_SREG_STID));
+        if (!hexagon_thread_is_enabled(thread_env)) {
+            continue;
+        }
+
+        lowest_th_prio = (lowest_th_prio > th_prio)
+            ? lowest_th_prio
+            : th_prio;
+    }
+
+    bestwait_reg = arch_get_system_reg(env, HEX_SREG_BESTWAIT);
+    best_prio = GET_FIELD(BESTWAIT_PRIO, bestwait_reg);
+
+    /*
+     * If the lowest priority thread is lower priority than the
+     * value in the BESTWAIT register, we must raise the reschedule
+     * interrupt on the lowest priority thread.
+     */
+    if (lowest_th_prio > best_prio) {
+        qemu_log_mask(CPU_LOG_INT,
+                "%s: raising resched int %d, cur PC 0x" TARGET_FMT_lx "\n",
+                __func__, int_number, arch_get_thread_reg(env, HEX_REG_PC));
+        SET_SYSTEM_FIELD(env, HEX_SREG_BESTWAIT, BESTWAIT_PRIO, 0x1ff);
+        hex_raise_interrupts(env, 1 << int_number, CPU_INTERRUPT_SWI);
+    }
+}
+
+void HELPER(resched)(CPUHexagonState *env)
+{
+    resched(env);
+}
+
+void HELPER(wait)(CPUHexagonState *env, target_ulong PC)
+{
+    BQL_LOCK_GUARD();
+
+    if (!fIN_DEBUG_MODE(env->threadId)) {
+        hexagon_wait_thread(env, PC);
+    }
+}
+
+void HELPER(resume)(CPUHexagonState *env, uint32_t mask)
+{
+    BQL_LOCK_GUARD();
+    hexagon_resume_threads(env, mask);
+}
+
+uint32_t HELPER(getimask)(CPUHexagonState *env, uint32_t tid)
+{
+    CPUState *cs;
+    CPU_FOREACH(cs) {
+        HexagonCPU *found_cpu = HEXAGON_CPU(cs);
+        CPUHexagonState *found_env = &found_cpu->env;
+        if (found_env->threadId == tid) {
+            target_ulong imask = arch_get_system_reg(found_env, HEX_SREG_IMASK);
+            qemu_log_mask(CPU_LOG_INT, "%s: tid %d imask = 0x%x\n",
+                          __func__, env->threadId,
+                          (unsigned)GET_FIELD(IMASK_MASK, imask));
+            return GET_FIELD(IMASK_MASK, imask);
+        }
+    }
+    return 0;
+}
+
+void HELPER(setimask)(CPUHexagonState *env, uint32_t pred, uint32_t imask)
+{
+    CPUState *cs;
+
+    BQL_LOCK_GUARD();
+    CPU_FOREACH(cs) {
+        HexagonCPU *found_cpu = HEXAGON_CPU(cs);
+        CPUHexagonState *found_env = &found_cpu->env;
+
+        if (pred == found_env->threadId) {
+            SET_SYSTEM_FIELD(found_env, HEX_SREG_IMASK, IMASK_MASK, imask);
+            qemu_log_mask(CPU_LOG_INT, "%s: tid %d imask 0x%x\n",
+                          __func__, found_env->threadId, imask);
+            hex_interrupt_update(env);
+            return;
+        }
+    }
+    hex_interrupt_update(env);
+}
+
+static bool handle_pmu_sreg_write(CPUHexagonState *env, uint32_t reg,
+                                  uint32_t val)
+{
+    if (reg == HEX_SREG_PMUSTID0 || reg == HEX_SREG_PMUSTID1
+        || reg == HEX_SREG_PMUCFG || reg == HEX_SREG_PMUEVTCFG
+        || reg == HEX_SREG_PMUEVTCFG1
+        || (reg >= HEX_SREG_PMUCNT4 && reg <= HEX_SREG_PMUCNT3)) {
+        qemu_log_mask(LOG_UNIMP, "PMU registers not yet implemented");
+        return true;
+    }
+    return false;
+}
+
+static void modify_syscfg(CPUHexagonState *env, uint32_t val)
+{
+    g_assert(bql_locked());
+
+    uint32_t old;
+    uint32_t syscfg_read_only_mask = 0x80001c00;
+    uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG);
+
+    /* clear read-only bits if they are set in the new value. */
+    val &= ~syscfg_read_only_mask;
+    /* if read-only are currently set in syscfg keep them set. */
+    val |= (syscfg & syscfg_read_only_mask);
+
+    uint32_t tmp = val;
+    old = arch_get_system_reg(env, HEX_SREG_SYSCFG);
+    arch_set_system_reg(env, HEX_SREG_SYSCFG, tmp);
+
+    /* Check for change in MMU enable */
+    target_ulong old_mmu_enable = GET_SYSCFG_FIELD(SYSCFG_MMUEN, old);
+    uint8_t old_en = GET_SYSCFG_FIELD(SYSCFG_PCYCLEEN, old);
+    uint8_t old_gie = GET_SYSCFG_FIELD(SYSCFG_GIE, old);
+    target_ulong new_mmu_enable =
+        GET_SYSCFG_FIELD(SYSCFG_MMUEN, val);
+    if (new_mmu_enable && !old_mmu_enable) {
+        hex_mmu_on(env);
+    } else if (!new_mmu_enable && old_mmu_enable) {
+        hex_mmu_off(env);
+    }
+
+    /* Changing pcycle enable from 0 to 1 resets the counters */
+    uint8_t new_en = GET_SYSCFG_FIELD(SYSCFG_PCYCLEEN, val);
+    CPUState *cs;
+    if (old_en == 0 && new_en == 1) {
+        CPU_FOREACH(cs) {
+            CPUHexagonState *_env = cpu_env(cs);
+            _env->t_cycle_count = 0;
+        }
+    }
+
+    /* See if global interrupts are turned on */
+    uint8_t new_gie = GET_SYSCFG_FIELD(SYSCFG_GIE, val);
+    if (!old_gie && new_gie) {
+        qemu_log_mask(CPU_LOG_INT, "%s: global interrupts enabled\n", __func__);
+        hex_interrupt_update(env);
+    }
+
+    if (qemu_loglevel_mask(LOG_UNIMP)) {
+        int new_v2x = GET_SYSCFG_FIELD(SYSCFG_V2X, val);
+        if (!new_v2x) {
+            qemu_log("HVX: 64 byte vector length is unsupported\n");
+        }
+    }
+}
+
+static uint32_t hexagon_find_last_irq(CPUHexagonState *env, uint32_t vid)
+{
+    int offset = (vid ==  HEX_SREG_VID) ? L2VIC_VID_0 : L2VIC_VID_1;
+    CPUState *cs = env_cpu(env);
+    HexagonCPU *cpu = HEXAGON_CPU(cs);
+    const hwaddr pend_mem = cpu->l2vic_base_addr + offset;
+    uint32_t irq;
+    cpu_physical_memory_read(pend_mem, &irq, sizeof(irq));
+    return irq;
+}
+
+static void hexagon_read_timer(CPUHexagonState *env, uint32_t *low,
+                               uint32_t *high)
+{
+    CPUState *cs = env_cpu(env);
+    HexagonCPU *cpu = HEXAGON_CPU(cs);
+    const hwaddr low_addr  = cpu->qtimer_base_addr + QCT_QTIMER_CNTPCT_LO;
+    const hwaddr high_addr = cpu->qtimer_base_addr + QCT_QTIMER_CNTPCT_HI;
+
+    cpu_physical_memory_read(low_addr, low, sizeof(*low));
+    cpu_physical_memory_read(high_addr, high, sizeof(*high));
+}
+
+static inline bool ssr_ce_enabled(CPUHexagonState *env)
+{
+    target_ulong ssr = arch_get_system_reg(env, HEX_SREG_SSR);
+    return GET_SSR_FIELD(SSR_CE, ssr);
+}
+
+static uint32_t creg_read(CPUHexagonState *env, uint32_t reg)
+{
+    uint32_t low, high;
+    switch (reg) {
+    case HEX_REG_UPCYCLELO:
+        return ssr_ce_enabled(env) ? hexagon_get_sys_pcycle_count_low(env) : 0;
+    case HEX_REG_UPCYCLEHI:
+        return ssr_ce_enabled(env) ? hexagon_get_sys_pcycle_count_high(env) : 0;
+    case HEX_REG_UTIMERLO:
+        hexagon_read_timer(env, &low, &high);
+        return low;
+    case HEX_REG_UTIMERHI:
+        hexagon_read_timer(env, &low, &high);
+        return high;
+    default:
+        return env->gpr[reg];
+    }
+}
+
+uint32_t HELPER(creg_read)(CPUHexagonState *env, uint32_t reg)
+{
+    return creg_read(env, reg);
+}
+
+uint64_t HELPER(creg_read_pair)(CPUHexagonState *env, uint32_t reg)
+{
+    return  (uint64_t)creg_read(env, reg) |
+           (((uint64_t)creg_read(env, reg + 1)) << 32);
+}
+
+
+static inline QEMU_ALWAYS_INLINE void sreg_write(CPUHexagonState *env,
+                                                 uint32_t reg, uint32_t val)
+
+{
+    g_assert(bql_locked());
+    if ((reg == HEX_SREG_VID) || (reg == HEX_SREG_VID1)) {
+        hexagon_set_vid(env, (reg == HEX_SREG_VID) ? L2VIC_VID_0 : L2VIC_VID_1,
+                        val);
+        arch_set_system_reg(env, reg, val);
+    } else if (reg == HEX_SREG_SYSCFG) {
+        modify_syscfg(env, val);
+    } else if (reg == HEX_SREG_IMASK) {
+        val = GET_FIELD(IMASK_MASK, val);
+        arch_set_system_reg(env, reg, val);
+    } else if (reg == HEX_SREG_PCYCLELO) {
+        hexagon_set_sys_pcycle_count_low(env, val);
+    } else if (reg == HEX_SREG_PCYCLEHI) {
+        hexagon_set_sys_pcycle_count_high(env, val);
+    } else if (!handle_pmu_sreg_write(env, reg, val)) {
+        if (reg >= HEX_SREG_GLB_START) {
+            arch_set_system_reg(env, reg, val);
+        } else {
+            arch_set_system_reg(env, reg, val);
+        }
+    }
+}
+
+void HELPER(sreg_write)(CPUHexagonState *env, uint32_t reg, uint32_t val)
+{
+    BQL_LOCK_GUARD();
+    sreg_write(env, reg, val);
+}
+
+void hexagon_gdb_sreg_write(CPUHexagonState *env, uint32_t reg, uint32_t val)
+{
+    BQL_LOCK_GUARD();
+    sreg_write(env, reg, val);
+    /*
+     * The above is needed to run special logic for regs like syscfg, but it
+     * won't set read-only bits. This will:
+     */
+    arch_set_system_reg(env, reg, val);
+}
+
+void HELPER(sreg_write_pair)(CPUHexagonState *env, uint32_t reg, uint64_t val)
+{
+    BQL_LOCK_GUARD();
+    sreg_write(env, reg, val & 0xFFFFFFFF);
+    sreg_write(env, reg + 1, val >> 32);
+}
+
+static inline QEMU_ALWAYS_INLINE uint32_t sreg_read(CPUHexagonState *env,
+                                                    uint32_t reg)
+{
+    g_assert(bql_locked());
+    if (reg == HEX_SREG_PMUSTID0 || reg == HEX_SREG_PMUSTID1
+        || reg == HEX_SREG_PMUCFG || reg == HEX_SREG_PMUEVTCFG
+        || reg == HEX_SREG_PMUEVTCFG1
+        || (reg >= HEX_SREG_PMUCNT4 && reg <= HEX_SREG_PMUCNT3)) {
+        qemu_log_mask(LOG_UNIMP, "PMU registers not yet implemented");
+        return 0;
+    }
+    if ((reg == HEX_SREG_VID) || (reg == HEX_SREG_VID1)) {
+        const uint32_t vid = hexagon_find_last_irq(env, reg);
+        arch_set_system_reg(env, reg, vid);
+    } else if ((reg == HEX_SREG_TIMERLO) || (reg == HEX_SREG_TIMERHI)) {
+        uint32_t low = 0;
+        uint32_t high = 0;
+        hexagon_read_timer(env, &low, &high);
+        arch_set_system_reg(env, HEX_SREG_TIMERLO, low);
+        arch_set_system_reg(env, HEX_SREG_TIMERHI, high);
+    } else if (reg == HEX_SREG_BADVA) {
+        target_ulong ssr = arch_get_system_reg(env, HEX_SREG_SSR);
+        if (GET_SSR_FIELD(SSR_BVS, ssr)) {
+            return arch_get_system_reg(env, HEX_SREG_BADVA1);
+        }
+        return arch_get_system_reg(env, HEX_SREG_BADVA0);
+    }
+    return arch_get_system_reg(env, reg);
+}
+
+uint32_t HELPER(sreg_read)(CPUHexagonState *env, uint32_t reg)
+{
+    BQL_LOCK_GUARD();
+    return sreg_read(env, reg);
+}
+
+uint32_t hexagon_sreg_read(CPUHexagonState *env, uint32_t reg)
+{
+    return sreg_read(env, reg);
+}
+
+uint64_t HELPER(sreg_read_pair)(CPUHexagonState *env, uint32_t reg)
+{
+    BQL_LOCK_GUARD();
+    if (reg == HEX_SREG_TIMERLO) {
+        uint32_t low = 0;
+        uint32_t high = 0;
+        hexagon_read_timer(env, &low, &high);
+        arch_set_system_reg(env, HEX_SREG_TIMERLO, low);
+        arch_set_system_reg(env, HEX_SREG_TIMERHI, high);
+    } else if (reg == HEX_SREG_PCYCLELO) {
+        return hexagon_get_sys_pcycle_count(env);
+    }
+    return   (uint64_t)sreg_read(env, reg) |
+           (((uint64_t)sreg_read(env, reg + 1)) << 32);
+}
+
+uint32_t HELPER(greg_read)(CPUHexagonState *env, uint32_t reg)
+
+{
+    return hexagon_greg_read(env, reg);
+}
+
+uint64_t HELPER(greg_read_pair)(CPUHexagonState *env, uint32_t reg)
+
+{
+    if (reg == HEX_GREG_G0 || reg == HEX_GREG_G2) {
+        return (uint64_t)(env->greg[reg]) |
+               (((uint64_t)(env->greg[reg + 1])) << 32);
+    }
+    switch (reg) {
+    case HEX_GREG_GPCYCLELO: {
+        target_ulong ssr = arch_get_system_reg(env, HEX_SREG_SSR);
+        int ssr_ce = GET_SSR_FIELD(SSR_CE, ssr);
+        return ssr_ce ? hexagon_get_sys_pcycle_count(env) : 0;
+    }
+    default:
+        return (uint64_t)hexagon_greg_read(env, reg) |
+               ((uint64_t)(hexagon_greg_read(env, reg + 1)) << 32);
+    }
+}
+
+void HELPER(setprio)(CPUHexagonState *env, uint32_t thread, uint32_t prio)
+{
+    CPUState *cs;
+
+    BQL_LOCK_GUARD();
+    CPU_FOREACH(cs) {
+        HexagonCPU *found_cpu = HEXAGON_CPU(cs);
+        CPUHexagonState *found_env = &found_cpu->env;
+        if (thread == found_env->threadId) {
+            SET_SYSTEM_FIELD(found_env, HEX_SREG_STID, STID_PRIO, prio);
+            qemu_log_mask(CPU_LOG_INT, "%s: tid %d prio = 0x%x\n",
+                          __func__, found_env->threadId, prio);
+            resched(env);
+            return;
+        }
+    }
+    g_assert_not_reached();
+}
+
+void HELPER(nmi)(CPUHexagonState *env, uint32_t thread_mask)
+{
+    g_assert_not_reached();
+}
+
+void HELPER(pending_interrupt)(CPUHexagonState *env)
+{
+    BQL_LOCK_GUARD();
+    hex_interrupt_update(env);
+}
+#endif
+
+#ifdef CONFIG_USER_ONLY
+uint32_t HELPER(creg_read)(CPUHexagonState *env, uint32_t reg)
+{
+    /* These are handled directly by gen_read_ctrl_reg(). */
+    g_assert(reg != HEX_REG_UPCYCLELO && reg != HEX_REG_UPCYCLEHI);
+
+    if (reg == HEX_REG_UTIMERHI) {
+        return cpu_get_host_ticks() >> 32;
+    } else if (reg == HEX_REG_UTIMERLO) {
+        return extract32(cpu_get_host_ticks(), 0, 32);
+    }
+    return 0;
+}
+
+uint64_t HELPER(creg_read_pair)(CPUHexagonState *env, uint32_t reg)
+{
+    if (reg == HEX_REG_UPCYCLELO) {
+        /* Pretend SSR[CE] is always set. */
+        return hexagon_get_sys_pcycle_count(env);
+    }
+    if (reg == HEX_REG_UTIMERLO) {
+        return cpu_get_host_ticks();
+    }
+    return 0;
+}
+#endif
+
+
 /* These macros can be referenced in the generated helper functions */
 #define warn(...) /* Nothing */
 #define fatal(...) g_assert_not_reached();
@@ -1321,4 +2018,5 @@ void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV)
 #define BOGUS_HELPER(tag) \
     printf("ERROR: bogus helper: " #tag "\n")
 
+#include "mmvec/kvx_ieee.h"
 #include "helper_funcs_generated.c.inc"
diff --git a/target/hexagon/printinsn.c b/target/hexagon/printinsn.c
index 4865cdd133b5b..f780092586cf6 100644
--- a/target/hexagon/printinsn.c
+++ b/target/hexagon/printinsn.c
@@ -24,16 +24,17 @@
 
 static const char *sreg2str(unsigned int reg)
 {
-    if (reg < TOTAL_PER_THREAD_REGS) {
-        return hexagon_regnames[reg];
-    } else {
-        return "???";
+#ifndef CONFIG_USER_ONLY
+    if (reg < NUM_SREGS) {
+        return hexagon_sregnames[reg];
     }
+#endif
+    return "???";
 }
 
 static const char *creg2str(unsigned int reg)
 {
-    return sreg2str(reg + HEX_REG_SA0);
+    return hexagon_regnames[reg + HEX_REG_SA0];
 }
 
 static void snprintinsn(GString *buf, Insn *insn)
diff --git a/target/hexagon/reg_fields_def.h.inc b/target/hexagon/reg_fields_def.h.inc
index f2a58d486c55e..d2c706d56b554 100644
--- a/target/hexagon/reg_fields_def.h.inc
+++ b/target/hexagon/reg_fields_def.h.inc
@@ -39,3 +39,110 @@ DEF_REG_FIELD(USR_FPDBZE,        26, 1)
 DEF_REG_FIELD(USR_FPOVFE,        27, 1)
 DEF_REG_FIELD(USR_FPUNFE,        28, 1)
 DEF_REG_FIELD(USR_FPINPE,        29, 1)
+
+DEF_REG_FIELD(IPENDAD_IAD, 16, 16)
+DEF_REG_FIELD(IPENDAD_IPEND, 0, 16)
+
+DEF_REG_FIELD(SCHEDCFG_EN, 8, 1)
+DEF_REG_FIELD(SCHEDCFG_INTNO, 0, 4)
+DEF_REG_FIELD(BESTWAIT_PRIO, 0, 9)
+
+
+/* PTE (aka TLB entry) fields */
+DEF_REG_FIELD(PTE_PPD, 0, 24)
+DEF_REG_FIELD(PTE_C, 24, 4)
+DEF_REG_FIELD(PTE_U, 28, 1)
+DEF_REG_FIELD(PTE_R, 29, 1)
+DEF_REG_FIELD(PTE_W, 30, 1)
+DEF_REG_FIELD(PTE_X, 31, 1)
+DEF_REG_FIELD(PTE_VPN, 32, 20)
+DEF_REG_FIELD(PTE_ASID, 52, 7)
+DEF_REG_FIELD(PTE_ATR0, 59, 1)
+DEF_REG_FIELD(PTE_ATR1, 60, 1)
+DEF_REG_FIELD(PTE_PA35, 61, 1)
+DEF_REG_FIELD(PTE_G, 62, 1)
+DEF_REG_FIELD(PTE_V, 63, 1)
+
+/* SYSCFG fields */
+DEF_REG_FIELD(SYSCFG_MMUEN, 0, 1)
+DEF_REG_FIELD(SYSCFG_ICEN, 1, 1)
+DEF_REG_FIELD(SYSCFG_DCEN, 2, 1)
+DEF_REG_FIELD(SYSCFG_ISDBTRUSTED, 3, 1)
+DEF_REG_FIELD(SYSCFG_GIE, 4, 1)
+DEF_REG_FIELD(SYSCFG_ISDBREADY, 5, 1)
+DEF_REG_FIELD(SYSCFG_PCYCLEEN, 6, 1)
+DEF_REG_FIELD(SYSCFG_V2X, 7, 1)
+DEF_REG_FIELD(SYSCFG_IGNOREDABORT, 8, 1)
+DEF_REG_FIELD(SYSCFG_PM, 9, 1)
+DEF_REG_FIELD(SYSCFG_TLBLOCK, 11, 1)
+DEF_REG_FIELD(SYSCFG_K0LOCK, 12, 1)
+DEF_REG_FIELD(SYSCFG_BQ, 13, 1)
+DEF_REG_FIELD(SYSCFG_PRIO, 14, 1)
+DEF_REG_FIELD(SYSCFG_DMT, 15, 1)
+DEF_REG_FIELD(SYSCFG_L2CFG, 16, 3)
+DEF_REG_FIELD(SYSCFG_ITCM, 19, 1)
+DEF_REG_FIELD(SYSCFG_L2NWA, 21, 1)
+DEF_REG_FIELD(SYSCFG_L2NRA, 22, 1)
+DEF_REG_FIELD(SYSCFG_L2WB, 23, 1)
+DEF_REG_FIELD(SYSCFG_L2P, 24, 1)
+DEF_REG_FIELD(SYSCFG_SLVCTL0, 25, 2)
+DEF_REG_FIELD(SYSCFG_SLVCTL1, 27, 2)
+DEF_REG_FIELD(SYSCFG_L2PARTSIZE, 29, 2)
+DEF_REG_FIELD(SYSCFG_L2GCA, 31, 1)
+
+/* SSR fields */
+DEF_REG_FIELD(SSR_CAUSE, 0, 8)
+DEF_REG_FIELD(SSR_ASID, 8, 7)
+DEF_REG_FIELD(SSR_UM, 16, 1)
+DEF_REG_FIELD(SSR_EX, 17, 1)
+DEF_REG_FIELD(SSR_IE, 18, 1)
+DEF_REG_FIELD(SSR_GM, 19, 1)
+DEF_REG_FIELD(SSR_V0, 20, 1)
+DEF_REG_FIELD(SSR_V1, 21, 1)
+DEF_REG_FIELD(SSR_BVS, 22, 1)
+DEF_REG_FIELD(SSR_CE, 23, 1)
+DEF_REG_FIELD(SSR_PE, 24, 1)
+DEF_REG_FIELD(SSR_BP, 25, 1)
+DEF_REG_FIELD(SSR_XE2, 26, 1)
+DEF_REG_FIELD(SSR_XA, 27, 3)
+DEF_REG_FIELD(SSR_SS, 30, 1)
+DEF_REG_FIELD(SSR_XE, 31, 1)
+
+/* misc registers */
+DEF_REG_FIELD(IMASK_MASK, 0, 16)
+
+DEF_REG_FIELD(STID_PRIO, 16, 8)
+DEF_REG_FIELD(STID_STID, 0, 8)
+
+/* MODECTL fields */
+DEF_REG_FIELD(MODECTL_E, 0, 8)
+DEF_REG_FIELD(MODECTL_W, 16, 8)
+
+DEF_REG_FIELD(CCR_L1ICP, 0, 2)
+DEF_REG_FIELD(CCR_L1DCP, 3, 2)
+DEF_REG_FIELD(CCR_L2CP, 6, 2)
+
+DEF_REG_FIELD(CCR_HFI, 16, 1)
+DEF_REG_FIELD(CCR_HFD, 17, 1)
+DEF_REG_FIELD(CCR_HFIL2, 18, 1)
+DEF_REG_FIELD(CCR_HFDL2, 19, 1)
+DEF_REG_FIELD(CCR_SFD, 20, 1)
+
+DEF_REG_FIELD(CCR_GIE, 24, 1)
+DEF_REG_FIELD(CCR_GTE, 25, 1)
+DEF_REG_FIELD(CCR_GEE, 26, 1)
+DEF_REG_FIELD(CCR_GRE, 27, 1)
+DEF_REG_FIELD(CCR_VV1, 29, 1)
+DEF_REG_FIELD(CCR_VV2, 30, 1)
+DEF_REG_FIELD(CCR_VV3, 31, 1)
+
+/* ISDB ST fields */
+DEF_REG_FIELD(ISDBST_WAITRUN, 24, 8)
+DEF_REG_FIELD(ISDBST_ONOFF, 16, 8)
+DEF_REG_FIELD(ISDBST_DEBUGMODE, 8, 8)
+DEF_REG_FIELD(ISDBST_STUFFSTATUS, 5, 1)
+DEF_REG_FIELD(ISDBST_CMDSTATUS, 4, 1)
+DEF_REG_FIELD(ISDBST_PROCMODE, 3, 1)
+DEF_REG_FIELD(ISDBST_MBXINSTATUS, 2, 1)
+DEF_REG_FIELD(ISDBST_MBXOUTSTATUS, 1, 1)
+DEF_REG_FIELD(ISDBST_READY, 0, 1)
diff --git a/target/hexagon/sys_macros.h b/target/hexagon/sys_macros.h
new file mode 100644
index 0000000000000..e5dc1ce0ab9fd
--- /dev/null
+++ b/target/hexagon/sys_macros.h
@@ -0,0 +1,238 @@
+/*
+ * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HEXAGON_SYS_MACROS_H
+#define HEXAGON_SYS_MACROS_H
+
+/*
+ * Macro definitions for Hexagon system mode
+ */
+
+#ifndef CONFIG_USER_ONLY
+
+#define READ_SREG(NUM) arch_get_system_reg(env, NUM)
+#define READ_SGP0()    arch_get_system_reg(env, HEX_SREG_SGP0)
+#define READ_SGP1()    arch_get_system_reg(env, HEX_SREG_SGP1)
+#define READ_SGP10()   ((uint64_t)arch_get_system_reg(env, HEX_SREG_SGP0) | \
+    ((uint64_t)arch_get_system_reg(env, HEX_SREG_SGP1) << 32))
+
+#define WRITE_SREG(NUM, VAL)      log_sreg_write(env, NUM, VAL, slot)
+#define WRITE_SGP0(VAL)           log_sreg_write(env, HEX_SREG_SGP0, VAL, slot)
+#define WRITE_SGP1(VAL)           log_sreg_write(env, HEX_SREG_SGP1, VAL, slot)
+#define WRITE_SGP10(VAL) \
+    do { \
+        log_sreg_write(env, HEX_SREG_SGP0, (VAL) & 0xFFFFFFFF, slot); \
+        log_sreg_write(env, HEX_SREG_SGP1, (VAL) >> 32, slot); \
+    } while (0)
+
+#ifdef QEMU_GENERATE
+#define GET_SSR_FIELD(RES, FIELD) \
+    GET_FIELD(RES, FIELD, hex_t_sreg[HEX_SREG_SSR])
+#else
+
+#define GET_SSR_FIELD(FIELD, REGIN) \
+    (uint32_t)GET_FIELD(FIELD, REGIN)
+#define GET_SYSCFG_FIELD(FIELD, REGIN) \
+    (uint32_t)GET_FIELD(FIELD, REGIN)
+#define SET_SYSTEM_FIELD(ENV, REG, FIELD, VAL) \
+    do { \
+        uint32_t regval = arch_get_system_reg(ENV, REG); \
+        fINSERT_BITS(regval, reg_field_info[FIELD].width, \
+                     reg_field_info[FIELD].offset, (VAL)); \
+        arch_set_system_reg(ENV, REG, regval); \
+    } while (0)
+#define SET_SSR_FIELD(ENV, FIELD, VAL) \
+    SET_SYSTEM_FIELD(ENV, HEX_SREG_SSR, FIELD, VAL)
+#define SET_SYSCFG_FIELD(ENV, FIELD, VAL) \
+    SET_SYSTEM_FIELD(ENV, HEX_SREG_SYSCFG, FIELD, VAL)
+
+#define CCR_FIELD_SET(ENV, FIELD) \
+    (!!GET_FIELD(FIELD, arch_get_system_reg(ENV, HEX_SREG_CCR)))
+
+/*
+ * Direct-to-guest is not implemented yet, continuing would cause unexpected
+ * behavior, so we abort.
+ */
+#define ASSERT_DIRECT_TO_GUEST_UNSET(ENV, EXCP) \
+    do { \
+        switch (EXCP) { \
+        case HEX_EVENT_TRAP0: \
+            g_assert(!CCR_FIELD_SET(ENV, CCR_GTE)); \
+            break; \
+        case HEX_EVENT_IMPRECISE: \
+        case HEX_EVENT_PRECISE: \
+        case HEX_EVENT_FPTRAP: \
+            g_assert(!CCR_FIELD_SET(ENV, CCR_GEE)); \
+            break; \
+        default: \
+            if ((EXCP) >= HEX_EVENT_INT0) { \
+                g_assert(!CCR_FIELD_SET(ENV, CCR_GIE)); \
+            } \
+            break; \
+        } \
+    } while (0)
+#endif
+
+#define fREAD_ELR() (READ_SREG(HEX_SREG_ELR))
+
+#define fLOAD_PHYS(NUM, SIZE, SIGN, SRC1, SRC2, DST) { \
+  const uintptr_t rs = ((unsigned long)(unsigned)(SRC1)) & 0x7ff; \
+  const uintptr_t rt = ((unsigned long)(unsigned)(SRC2)) << 11; \
+  const uintptr_t addr = rs + rt;         \
+  cpu_physical_memory_read(addr, &DST, sizeof(uint32_t)); \
+}
+
+#define fPOW2_HELP_ROUNDUP(VAL) \
+    ((VAL) | \
+     ((VAL) >> 1) | \
+     ((VAL) >> 2) | \
+     ((VAL) >> 4) | \
+     ((VAL) >> 8) | \
+     ((VAL) >> 16))
+#define fPOW2_ROUNDUP(VAL) (fPOW2_HELP_ROUNDUP((VAL) - 1) + 1)
+
+#define fFRAMECHECK(ADDR, EA)  g_assert_not_reached();
+
+#define fTRAP(TRAPTYPE, IMM) \
+    register_trap_exception(env, TRAPTYPE, IMM, PC)
+
+#define fVIRTINSN_SPSWAP(IMM, REG)
+#define fVIRTINSN_GETIE(IMM, REG) { REG = 0xdeafbeef; }
+#define fVIRTINSN_SETIE(IMM, REG)
+#define fVIRTINSN_RTE(IMM, REG)
+#define fGRE_ENABLED() GET_FIELD(CCR_GRE, READ_SREG(HEX_SREG_CCR))
+#define fTRAP1_VIRTINSN(IMM) \
+    (fGRE_ENABLED() && \
+        (((IMM) == 1) || ((IMM) == 3) || ((IMM) == 4) || ((IMM) == 6)))
+
+/* Not modeled in qemu */
+
+#define MARK_LATE_PRED_WRITE(RNUM)
+#define fICINVIDX(REG)
+#define fICKILL()
+#define fDCKILL()
+#define fL2KILL()
+#define fL2UNLOCK()
+#define fL2CLEAN()
+#define fL2CLEANINV()
+#define fL2CLEANPA(REG)
+#define fL2CLEANINVPA(REG)
+#define fL2CLEANINVIDX(REG)
+#define fL2CLEANIDX(REG)
+#define fL2INVIDX(REG)
+#define fL2TAGR(INDEX, DST, DSTREG)
+#define fL2UNLOCKA(VA) ((void) VA)
+#define fL2TAGW(INDEX, PART2)
+#define fDCCLEANIDX(REG)
+#define fDCCLEANINVIDX(REG)
+
+/* Always succeed: */
+#define fL2LOCKA(EA, PDV, PDN) ((void) EA, PDV = 0xFF)
+#define fCLEAR_RTE_EX() \
+    do { \
+        uint32_t tmp = 0; \
+        tmp = arch_get_system_reg(env, HEX_SREG_SSR); \
+        fINSERT_BITS(tmp, reg_field_info[SSR_EX].width, \
+                     reg_field_info[SSR_EX].offset, 0); \
+        log_sreg_write(env, HEX_SREG_SSR, tmp, slot); \
+    } while (0)
+
+#define fDCINVIDX(REG)
+#define fDCINVA(REG) do { REG = REG; } while (0) /* Nothing to do in qemu */
+
+#define fSET_TLB_LOCK()       hex_tlb_lock(env);
+#define fCLEAR_TLB_LOCK()     hex_tlb_unlock(env);
+
+#define fSET_K0_LOCK()        hex_k0_lock(env);
+#define fCLEAR_K0_LOCK()      hex_k0_unlock(env);
+
+#define fTLB_IDXMASK(INDEX) \
+    ((INDEX) & (fPOW2_ROUNDUP(fCAST4u(env_archcpu(env)->num_tlbs)) - 1))
+
+#define fTLB_NONPOW2WRAP(INDEX)                 \
+    (((INDEX) >= env_archcpu(env)->num_tlbs) ?  \
+         ((INDEX) - env_archcpu(env)->num_tlbs) : \
+         (INDEX))
+
+
+#define fTLBW(INDEX, VALUE) \
+    hex_tlbw(env, (INDEX), (VALUE))
+#define fTLBW_EXTENDED(INDEX, VALUE) \
+    hex_tlbw(env, (INDEX), (VALUE))
+#define fTLB_ENTRY_OVERLAP(VALUE) \
+    (hex_tlb_check_overlap(env, VALUE, -1) != -2)
+#define fTLB_ENTRY_OVERLAP_IDX(VALUE) \
+    hex_tlb_check_overlap(env, VALUE, -1)
+#define fTLBR(INDEX) \
+    (env->hex_tlb->entries[fTLB_NONPOW2WRAP(fTLB_IDXMASK(INDEX))])
+#define fTLBR_EXTENDED(INDEX) \
+    (env->hex_tlb->entries[fTLB_NONPOW2WRAP(fTLB_IDXMASK(INDEX))])
+#define fTLBP(TLBHI) \
+    hex_tlb_lookup(env, ((TLBHI) >> 12), ((TLBHI) << 12))
+#define iic_flush_cache(p)
+
+#define fIN_DEBUG_MODE(TNUM) \
+    ((GET_FIELD(ISDBST_DEBUGMODE, arch_get_system_reg(env, HEX_SREG_ISDBST)) \
+        & (0x1 << (TNUM))) != 0)
+
+#define fIN_DEBUG_MODE_NO_ISDB(TNUM) false
+#define fIN_DEBUG_MODE_WARN(TNUM) false
+
+#ifdef QEMU_GENERATE
+
+/*
+ * Read tags back as zero for now:
+ *
+ * tag value in RD[31:10] for 32k, RD[31:9] for 16k
+ */
+#define fICTAGR(RS, RD, RD2) \
+    do { \
+        RD = ctx->zero; \
+    } while (0)
+#define fICTAGW(RS, RD)
+#define fICDATAR(RS, RD) \
+    do { \
+        RD = ctx->zero; \
+    } while (0)
+#define fICDATAW(RS, RD)
+
+#define fDCTAGW(RS, RT)
+/* tag: RD[23:0], state: RD[30:29] */
+#define fDCTAGR(INDEX, DST, DST_REG_NUM) \
+    do { \
+        DST = ctx->zero; \
+    } while (0)
+#else
+
+/*
+ * Read tags back as zero for now:
+ *
+ * tag value in RD[31:10] for 32k, RD[31:9] for 16k
+ */
+#define fICTAGR(RS, RD, RD2) \
+    do { \
+        RD = 0x00; \
+    } while (0)
+#define fICTAGW(RS, RD)
+#define fICDATAR(RS, RD) \
+    do { \
+        RD = 0x00; \
+    } while (0)
+#define fICDATAW(RS, RD)
+
+#define fDCTAGW(RS, RT)
+/* tag: RD[23:0], state: RD[30:29] */
+#define fDCTAGR(INDEX, DST, DST_REG_NUM) \
+    do { \
+        DST = HEX_DC_STATE_INVALID | 0x00; \
+    } while (0)
+#endif
+
+#endif
+
+#define NUM_TLB_REGS(x) (env_archcpu(env)->num_tlbs)
+
+#endif
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index fe7858703c8cb..35765d48ba11a 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -49,6 +49,7 @@ static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = {
 TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
 TCGv hex_pred[NUM_PREGS];
 TCGv hex_slot_cancelled;
+TCGv hex_next_PC;
 TCGv hex_new_value_usr;
 TCGv hex_store_addr[STORES_MAX];
 TCGv hex_store_width[STORES_MAX];
@@ -57,9 +58,19 @@ TCGv_i64 hex_store_val64[STORES_MAX];
 TCGv hex_llsc_addr;
 TCGv hex_llsc_val;
 TCGv_i64 hex_llsc_val_i64;
+TCGv_i64 hex_cycle_count;
 TCGv hex_vstore_addr[VSTORES_MAX];
 TCGv hex_vstore_size[VSTORES_MAX];
 TCGv hex_vstore_pending[VSTORES_MAX];
+static bool need_next_PC(DisasContext *ctx);
+
+#ifndef CONFIG_USER_ONLY
+TCGv hex_greg[NUM_GREGS];
+TCGv hex_t_sreg[NUM_SREGS];
+TCGv_ptr hex_g_sreg_ptr;
+TCGv hex_g_sreg[NUM_SREGS];
+TCGv hex_cause_code;
+#endif
 
 static const char * const hexagon_prednames[] = {
   "p0", "p1", "p2", "p3"
@@ -113,11 +124,28 @@ intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
     return offset;
 }
 
-static void gen_exception_raw(int excp)
+static void gen_exception(int excp, target_ulong PC)
+{
+    gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp),
+                               tcg_constant_tl(PC));
+}
+
+#ifndef CONFIG_USER_ONLY
+static inline void gen_precise_exception(int excp, target_ulong PC)
 {
-    gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp));
+    tcg_gen_movi_tl(hex_cause_code, excp);
+    gen_exception(HEX_EVENT_PRECISE, PC);
 }
 
+static inline void gen_pcycle_counters(DisasContext *ctx)
+{
+    if (ctx->pcycle_enabled) {
+        tcg_gen_addi_i64(hex_cycle_count, hex_cycle_count, ctx->num_cycles);
+        ctx->num_cycles = 0;
+    }
+}
+#endif
+
 static void gen_exec_counters(DisasContext *ctx)
 {
     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT],
@@ -126,6 +154,10 @@ static void gen_exec_counters(DisasContext *ctx)
                     hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns);
     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT],
                     hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns);
+
+#ifndef CONFIG_USER_ONLY
+   gen_pcycle_counters(ctx);
+#endif
 }
 
 static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
@@ -156,6 +188,9 @@ static void gen_end_tb(DisasContext *ctx)
 
     gen_exec_counters(ctx);
 
+    if (ctx->need_next_pc) {
+        tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC);
+    }
     if (ctx->branch_cond != TCG_COND_NEVER) {
         if (ctx->branch_cond != TCG_COND_ALWAYS) {
             TCGLabel *skip = gen_new_label();
@@ -185,13 +220,14 @@ static void gen_end_tb(DisasContext *ctx)
     ctx->base.is_jmp = DISAS_NORETURN;
 }
 
-static void gen_exception_end_tb(DisasContext *ctx, int excp)
+void hex_gen_exception_end_tb(DisasContext *ctx, int excp)
 {
-    gen_exec_counters(ctx);
-    tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC);
-    gen_exception_raw(excp);
+#ifdef CONFIG_USER_ONLY
+    gen_exception(excp, ctx->pkt->pc);
+#else
+    gen_precise_exception(excp, ctx->pkt->pc);
+#endif
     ctx->base.is_jmp = DISAS_NORETURN;
-
 }
 
 static int read_packet_words(CPUHexagonState *env, DisasContext *ctx,
@@ -232,6 +268,18 @@ static bool check_for_attrib(Packet *pkt, int attrib)
     return false;
 }
 
+#ifndef CONFIG_USER_ONLY
+static bool check_for_opcode(Packet *pkt, uint16_t opcode)
+{
+    for (int i = 0; i < pkt->num_insns; i++) {
+        if (pkt->insn[i].opcode == opcode) {
+            return true;
+        }
+    }
+    return false;
+}
+#endif
+
 static bool need_slot_cancelled(Packet *pkt)
 {
     /* We only need slot_cancelled for conditional store instructions */
@@ -245,21 +293,111 @@ static bool need_slot_cancelled(Packet *pkt)
     return false;
 }
 
-static bool need_next_PC(DisasContext *ctx)
+#ifndef CONFIG_USER_ONLY
+static bool sreg_write_to_global(int reg_num)
 {
-    Packet *pkt = ctx->pkt;
+    return reg_num == HEX_SREG_SSR ||
+           reg_num == HEX_SREG_STID ||
+           reg_num == HEX_SREG_IMASK ||
+           reg_num == HEX_SREG_IPENDAD ||
+           reg_num == HEX_SREG_BESTWAIT ||
+           reg_num == HEX_SREG_SCHEDCFG;
+}
 
-    /* Check for conditional control flow or HW loop end */
+static bool has_sreg_write_to_global(Packet const *pkt)
+{
     for (int i = 0; i < pkt->num_insns; i++) {
-        uint16_t opcode = pkt->insn[i].opcode;
-        if (GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) {
-            return true;
+        Insn const *insn = &pkt->insn[i];
+        uint16_t opcode = insn->opcode;
+        if (opcode == Y2_tfrsrcr) {
+            /* Write to a single sreg */
+            int reg_num = insn->regno[0];
+            if (sreg_write_to_global(reg_num)) {
+                return true;
+            }
+        } else if (opcode == Y4_tfrspcp) {
+            /* Write to a sreg pair */
+            int reg_num = insn->regno[0];
+            if (sreg_write_to_global(reg_num)) {
+                return true;
+            }
+            if (sreg_write_to_global(reg_num + 1)) {
+                return true;
+            }
         }
-        if (GET_ATTRIB(opcode, A_HWLOOP0_END) ||
-            GET_ATTRIB(opcode, A_HWLOOP1_END)) {
-            return true;
+    }
+    return false;
+}
+#endif
+
+static bool pkt_ends_tb(Packet *pkt)
+{
+    if (pkt->pkt_has_cof) {
+        return true;
+    }
+#ifndef CONFIG_USER_ONLY
+    /* System mode instructions that end TLB */
+    if (check_for_opcode(pkt, Y2_swi) ||
+        check_for_opcode(pkt, Y2_cswi) ||
+        check_for_opcode(pkt, Y2_ciad) ||
+        check_for_opcode(pkt, Y4_siad) ||
+        check_for_opcode(pkt, Y2_wait) ||
+        check_for_opcode(pkt, Y2_resume) ||
+        check_for_opcode(pkt, Y2_iassignw) ||
+        check_for_opcode(pkt, Y2_setimask) ||
+        check_for_opcode(pkt, Y4_nmi) ||
+        check_for_opcode(pkt, Y2_setprio) ||
+        check_for_opcode(pkt, Y2_start) ||
+        check_for_opcode(pkt, Y2_stop) ||
+        check_for_opcode(pkt, Y2_k0lock) ||
+        check_for_opcode(pkt, Y2_k0unlock) ||
+        check_for_opcode(pkt, Y2_tlblock) ||
+        check_for_opcode(pkt, Y2_tlbunlock) ||
+        check_for_opcode(pkt, Y2_break) ||
+        check_for_opcode(pkt, Y2_isync) ||
+        check_for_opcode(pkt, Y2_syncht) ||
+        check_for_opcode(pkt, Y2_tlbp) ||
+        check_for_opcode(pkt, Y2_tlbw) ||
+        check_for_opcode(pkt, Y5_ctlbw) ||
+        check_for_opcode(pkt, Y5_tlbasidi)) {
+        return true;
+    }
+
+    /*
+     * Check for sreg writes that would end the TB
+     */
+    if (check_for_attrib(pkt, A_IMPLICIT_WRITES_SSR)) {
+        return true;
+    }
+    if (has_sreg_write_to_global(pkt)) {
+        return true;
+    }
+#endif
+    return false;
+}
+
+
+static bool need_next_PC(DisasContext *ctx)
+{
+    Packet *pkt = ctx->pkt;
+    if (pkt->pkt_has_cof || ctx->pkt_ends_tb) {
+        for (int i = 0; i < pkt->num_insns; i++) {
+            uint16_t opcode = pkt->insn[i].opcode;
+            if ((GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) ||
+                GET_ATTRIB(opcode, A_HWLOOP0_END) ||
+                GET_ATTRIB(opcode, A_HWLOOP1_END)) {
+                return true;
+            }
         }
     }
+    /*
+     * We end the TB on some instructions that do not change the flow (for
+     * other reasons). In these cases, we must set pc too, as the insn won't
+     * do it themselves.
+     */
+    if (ctx->pkt_ends_tb && !check_for_attrib(pkt, A_COF)) {
+        return true;
+    }
     return false;
 }
 
@@ -291,6 +429,16 @@ static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum)
     }
 }
 
+#ifndef CONFIG_USER_ONLY
+static void mark_implicit_sreg_write(DisasContext *ctx, int attrib, int snum)
+{
+    uint16_t opcode = ctx->insn->opcode;
+    if (GET_ATTRIB(opcode, attrib)) {
+        ctx_log_sreg_write(ctx, snum);
+    }
+}
+#endif
+
 static void mark_implicit_reg_writes(DisasContext *ctx)
 {
     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP,  HEX_REG_FP);
@@ -302,6 +450,12 @@ static void mark_implicit_reg_writes(DisasContext *ctx)
     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1);
     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR);
     mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR);
+
+#ifndef CONFIG_USER_ONLY
+    mark_implicit_sreg_write(ctx, A_IMPLICIT_WRITES_SGP0, HEX_SREG_SGP0);
+    mark_implicit_sreg_write(ctx, A_IMPLICIT_WRITES_SGP1, HEX_SREG_SGP1);
+    mark_implicit_sreg_write(ctx, A_IMPLICIT_WRITES_SSR, HEX_SREG_SSR);
+#endif
 }
 
 static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum)
@@ -400,7 +554,14 @@ static void analyze_packet(DisasContext *ctx)
 static void gen_start_packet(DisasContext *ctx)
 {
     Packet *pkt = ctx->pkt;
+#ifndef CONFIG_USER_ONLY
+    target_ulong next_PC = (check_for_opcode(pkt, Y2_k0lock) ||
+                            check_for_opcode(pkt, Y2_tlblock)) ?
+                               ctx->base.pc_next :
+                               ctx->base.pc_next + pkt->encod_pkt_size_in_bytes;
+#else
     target_ulong next_PC = ctx->base.pc_next + pkt->encod_pkt_size_in_bytes;
+#endif
     int i;
 
     /* Clear out the disassembly context */
@@ -408,6 +569,10 @@ static void gen_start_packet(DisasContext *ctx)
     ctx->reg_log_idx = 0;
     bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
     bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
+#ifndef CONFIG_USER_ONLY
+    ctx->greg_log_idx = 0;
+    ctx->sreg_log_idx = 0;
+#endif
     ctx->preg_log_idx = 0;
     bitmap_zero(ctx->pregs_written, NUM_PREGS);
     ctx->future_vregs_idx = 0;
@@ -440,6 +605,25 @@ static void gen_start_packet(DisasContext *ctx)
      * gen phase, so clear it again.
      */
     bitmap_zero(ctx->pregs_written, NUM_PREGS);
+#ifndef CONFIG_USER_ONLY
+    for (i = 0; i < NUM_SREGS; i++) {
+        ctx->t_sreg_new_value[i] = NULL;
+    }
+    for (i = 0; i < ctx->sreg_log_idx; i++) {
+        int reg_num = ctx->sreg_log[i];
+        if (reg_num < HEX_SREG_GLB_START) {
+            ctx->t_sreg_new_value[reg_num] = tcg_temp_new();
+            tcg_gen_mov_tl(ctx->t_sreg_new_value[reg_num], hex_t_sreg[reg_num]);
+        }
+    }
+    for (i = 0; i < NUM_GREGS; i++) {
+        ctx->greg_new_value[i] = NULL;
+    }
+    for (i = 0; i < ctx->greg_log_idx; i++) {
+        int reg_num = ctx->greg_log[i];
+        ctx->greg_new_value[reg_num] = tcg_temp_new();
+    }
+#endif
 
     /* Initialize the runtime state for packet semantics */
     if (need_slot_cancelled(pkt)) {
@@ -448,12 +632,14 @@ static void gen_start_packet(DisasContext *ctx)
     ctx->branch_taken = NULL;
     if (pkt->pkt_has_cof) {
         ctx->branch_taken = tcg_temp_new();
-        if (pkt->pkt_has_multi_cof) {
-            tcg_gen_movi_tl(ctx->branch_taken, 0);
-        }
-        if (need_next_PC(ctx)) {
-            tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC);
-        }
+    }
+    if (pkt->pkt_has_multi_cof) {
+        tcg_gen_movi_tl(ctx->branch_taken, 0);
+    }
+    ctx->pkt_ends_tb = pkt_ends_tb(pkt);
+    ctx->need_next_pc = need_next_PC(ctx);
+    if (ctx->need_next_pc) {
+        tcg_gen_movi_tl(hex_next_PC, next_PC);
     }
 
     /* Preload the predicated registers into get_result_gpr(ctx, i) */
@@ -558,7 +744,7 @@ static void gen_insn(DisasContext *ctx)
         ctx->insn->generate(ctx);
         mark_store_width(ctx);
     } else {
-        gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_OPCODE);
+        hex_gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_OPCODE);
     }
 }
 
@@ -589,6 +775,59 @@ static void gen_reg_writes(DisasContext *ctx)
     }
 }
 
+#ifndef CONFIG_USER_ONLY
+static void gen_greg_writes(DisasContext *ctx)
+{
+    int i;
+
+    for (i = 0; i < ctx->greg_log_idx; i++) {
+        int reg_num = ctx->greg_log[i];
+
+        tcg_gen_mov_tl(hex_greg[reg_num], ctx->greg_new_value[reg_num]);
+    }
+}
+
+
+static void gen_sreg_writes(DisasContext *ctx)
+{
+    int i;
+
+    TCGv old_reg = tcg_temp_new();
+    for (i = 0; i < ctx->sreg_log_idx; i++) {
+        int reg_num = ctx->sreg_log[i];
+
+        if (reg_num == HEX_SREG_SSR) {
+            tcg_gen_mov_tl(old_reg, hex_t_sreg[reg_num]);
+            tcg_gen_mov_tl(hex_t_sreg[reg_num], ctx->t_sreg_new_value[reg_num]);
+            gen_helper_modify_ssr(tcg_env, ctx->t_sreg_new_value[reg_num],
+                                  old_reg);
+            /* This can change processor state, so end the TB */
+            ctx->base.is_jmp = DISAS_NORETURN;
+        } else if ((reg_num == HEX_SREG_STID) ||
+                   (reg_num == HEX_SREG_IMASK) ||
+                   (reg_num == HEX_SREG_IPENDAD)) {
+            if (reg_num < HEX_SREG_GLB_START) {
+                tcg_gen_mov_tl(old_reg, hex_t_sreg[reg_num]);
+                tcg_gen_mov_tl(hex_t_sreg[reg_num],
+                               ctx->t_sreg_new_value[reg_num]);
+            }
+            /* This can change the interrupt state, so end the TB */
+            gen_helper_pending_interrupt(tcg_env);
+            ctx->base.is_jmp = DISAS_NORETURN;
+        } else if ((reg_num == HEX_SREG_BESTWAIT) ||
+                   (reg_num == HEX_SREG_SCHEDCFG)) {
+            /* This can trigger resched interrupt, so end the TB */
+            gen_helper_resched(tcg_env);
+            ctx->base.is_jmp = DISAS_NORETURN;
+        }
+
+        if (reg_num < HEX_SREG_GLB_START) {
+            tcg_gen_mov_tl(hex_t_sreg[reg_num], ctx->t_sreg_new_value[reg_num]);
+        }
+    }
+}
+#endif
+
 static void gen_pred_writes(DisasContext *ctx)
 {
     /* Early exit if not needed or the log is empty */
@@ -693,11 +932,11 @@ static void process_store_log(DisasContext *ctx)
      *  the memory accesses overlap.
      */
     Packet *pkt = ctx->pkt;
-    if (pkt->pkt_has_store_s1) {
+    if (pkt->pkt_has_scalar_store_s1) {
         g_assert(!pkt->pkt_has_dczeroa);
         process_store(ctx, 1);
     }
-    if (pkt->pkt_has_store_s0) {
+    if (pkt->pkt_has_scalar_store_s0) {
         g_assert(!pkt->pkt_has_dczeroa);
         process_store(ctx, 0);
     }
@@ -779,6 +1018,7 @@ static void gen_commit_hvx(DisasContext *ctx)
     }
 }
 
+static const int PCYCLES_PER_PACKET = 3;
 static void update_exec_counters(DisasContext *ctx)
 {
     Packet *pkt = ctx->pkt;
@@ -798,6 +1038,7 @@ static void update_exec_counters(DisasContext *ctx)
     }
 
     ctx->num_packets++;
+    ctx->num_cycles += PCYCLES_PER_PACKET;
     ctx->num_insns += num_real_insns;
     ctx->num_hvx_insns += num_hvx_insns;
 }
@@ -822,8 +1063,9 @@ static void gen_commit_packet(DisasContext *ctx)
      * involved in committing the packet.
      */
     Packet *pkt = ctx->pkt;
-    bool has_store_s0 = pkt->pkt_has_store_s0;
-    bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed);
+    bool has_store_s0 = pkt->pkt_has_scalar_store_s0;
+    bool has_store_s1 =
+        (pkt->pkt_has_scalar_store_s1 && !ctx->s1_store_processed);
     bool has_hvx_store = pkt_has_hvx_store(pkt);
     if (pkt->pkt_has_dczeroa) {
         /*
@@ -886,6 +1128,10 @@ static void gen_commit_packet(DisasContext *ctx)
     process_store_log(ctx);
 
     gen_reg_writes(ctx);
+#if !defined(CONFIG_USER_ONLY)
+    gen_greg_writes(ctx);
+    gen_sreg_writes(ctx);
+#endif
     gen_pred_writes(ctx);
     if (pkt->pkt_has_hvx) {
         gen_commit_hvx(ctx);
@@ -898,7 +1144,7 @@ static void gen_commit_packet(DisasContext *ctx)
         pkt->vhist_insn->generate(ctx);
     }
 
-    if (pkt->pkt_has_cof) {
+    if (ctx->pkt_ends_tb || ctx->base.is_jmp == DISAS_NORETURN) {
         gen_end_tb(ctx);
     }
 }
@@ -912,7 +1158,7 @@ static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx)
 
     nwords = read_packet_words(env, ctx, words);
     if (!nwords) {
-        gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_PACKET);
+        hex_gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_PACKET);
         return;
     }
 
@@ -927,7 +1173,7 @@ static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx)
         gen_commit_packet(ctx);
         ctx->base.pc_next += pkt.encod_pkt_size_in_bytes;
     } else {
-        gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_PACKET);
+        hex_gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_PACKET);
     }
 }
 
@@ -938,13 +1184,16 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
     HexagonCPU *hex_cpu = env_archcpu(cpu_env(cs));
     uint32_t hex_flags = dcbase->tb->flags;
 
-    ctx->mem_idx = MMU_USER_IDX;
+    ctx->mem_idx = FIELD_EX32(hex_flags, TB_FLAGS, MMU_INDEX);
     ctx->num_packets = 0;
+    ctx->num_cycles = 0;
     ctx->num_insns = 0;
     ctx->num_hvx_insns = 0;
     ctx->branch_cond = TCG_COND_NEVER;
     ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP);
     ctx->short_circuit = hex_cpu->short_circuit;
+    ctx->pcycle_enabled = FIELD_EX32(hex_flags, TB_FLAGS, PCYCLE_ENABLED);
+    ctx->need_next_pc = false;
 }
 
 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
@@ -1050,6 +1299,26 @@ void hexagon_translate_init(void)
 
     opcode_init();
 
+#ifndef CONFIG_USER_ONLY
+    for (i = 0; i < NUM_GREGS; i++) {
+            hex_greg[i] = tcg_global_mem_new(tcg_env,
+                offsetof(CPUHexagonState, greg[i]),
+                hexagon_gregnames[i]);
+    }
+    hex_g_sreg_ptr = tcg_global_mem_new_ptr(tcg_env,
+            offsetof(CPUHexagonState, g_sreg), "hex_g_sreg_ptr");
+    for (i = 0; i < NUM_SREGS; i++) {
+        if (i < HEX_SREG_GLB_START) {
+            hex_t_sreg[i] = tcg_global_mem_new(tcg_env,
+                offsetof(CPUHexagonState, t_sreg[i]),
+                hexagon_sregnames[i]);
+        } else {
+            hex_g_sreg[i] = tcg_global_mem_new(hex_g_sreg_ptr,
+                i * sizeof(target_ulong),
+                hexagon_sregnames[i]);
+        }
+    }
+#endif
     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
         hex_gpr[i] = tcg_global_mem_new(tcg_env,
             offsetof(CPUHexagonState, gpr[i]),
@@ -1071,6 +1340,15 @@ void hexagon_translate_init(void)
         offsetof(CPUHexagonState, llsc_val), "llsc_val");
     hex_llsc_val_i64 = tcg_global_mem_new_i64(tcg_env,
         offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64");
+    hex_cycle_count = tcg_global_mem_new_i64(tcg_env,
+            offsetof(CPUHexagonState, t_cycle_count), "t_cycle_count");
+#ifndef CONFIG_USER_ONLY
+    hex_cause_code = tcg_global_mem_new(tcg_env,
+        offsetof(CPUHexagonState, cause_code), "cause_code");
+#endif
+    hex_next_PC = tcg_global_mem_new(tcg_env,
+        offsetof(CPUHexagonState, next_PC), "next_PC");
+
     for (i = 0; i < STORES_MAX; i++) {
         snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i);
         hex_store_addr[i] = tcg_global_mem_new(tcg_env,
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index d251e2233fda7..ad1a2f4045347 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -39,6 +39,14 @@ typedef struct DisasContext {
     int reg_log_idx;
     DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS);
     DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS);
+#ifndef CONFIG_USER_ONLY
+    int greg_log[GREG_WRITES_MAX];
+    int greg_log_idx;
+    int sreg_log[SREG_WRITES_MAX];
+    int sreg_log_idx;
+    TCGv t_sreg_new_value[NUM_SREGS];
+    TCGv greg_new_value[NUM_GREGS];
+#endif
     int preg_log[PRED_WRITES_MAX];
     int preg_log_idx;
     DECLARE_BITMAP(pregs_written, NUM_PREGS);
@@ -75,10 +83,42 @@ typedef struct DisasContext {
     TCGv new_pred_value[NUM_PREGS];
     TCGv branch_taken;
     TCGv dczero_addr;
+    bool pcycle_enabled;
+    bool pkt_ends_tb;
+    bool need_next_pc;
+    uint32_t num_cycles;
 } DisasContext;
 
 bool is_gather_store_insn(DisasContext *ctx);
 
+#ifndef CONFIG_USER_ONLY
+static inline void ctx_log_greg_write(DisasContext *ctx, int rnum)
+{
+    if (rnum <= HEX_GREG_G3) {
+        ctx->greg_log[ctx->greg_log_idx] = rnum;
+        ctx->greg_log_idx++;
+    }
+}
+
+static inline void ctx_log_greg_write_pair(DisasContext *ctx, int rnum)
+{
+    ctx_log_greg_write(ctx, rnum);
+    ctx_log_greg_write(ctx, rnum + 1);
+}
+
+static inline void ctx_log_sreg_write(DisasContext *ctx, int rnum)
+{
+    ctx->sreg_log[ctx->sreg_log_idx] = rnum;
+    ctx->sreg_log_idx++;
+}
+
+static inline void ctx_log_sreg_write_pair(DisasContext *ctx, int rnum)
+{
+    ctx_log_sreg_write(ctx, rnum);
+    ctx_log_sreg_write(ctx, rnum + 1);
+}
+#endif
+
 static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
 {
     if (!test_bit(pnum, ctx->pregs_written)) {
@@ -267,6 +307,7 @@ static inline void ctx_log_qreg_read(DisasContext *ctx,
 }
 
 extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
+extern TCGv hex_next_PC;
 extern TCGv hex_pred[NUM_PREGS];
 extern TCGv hex_slot_cancelled;
 extern TCGv hex_new_value_usr;
@@ -280,6 +321,15 @@ extern TCGv_i64 hex_llsc_val_i64;
 extern TCGv hex_vstore_addr[VSTORES_MAX];
 extern TCGv hex_vstore_size[VSTORES_MAX];
 extern TCGv hex_vstore_pending[VSTORES_MAX];
+#ifndef CONFIG_USER_ONLY
+extern TCGv hex_greg[NUM_GREGS];
+extern TCGv hex_t_sreg[NUM_SREGS];
+extern TCGv_ptr hex_g_sreg_ptr;
+extern TCGv hex_g_sreg[NUM_SREGS];
+#endif
+
+
+void hex_gen_exception_end_tb(DisasContext *ctx, int excp);
 
 void process_store(DisasContext *ctx, int slot_num);
 
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
index 5655677431c68..2a85495d02f9e 100644
--- a/target/hppa/cpu.c
+++ b/target/hppa/cpu.c
@@ -131,10 +131,12 @@ static void hppa_restore_state_to_opc(CPUState *cs,
     env->psw_n = 0;
 }
 
+#ifndef CONFIG_USER_ONLY
 static bool hppa_cpu_has_work(CPUState *cs)
 {
     return cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI);
 }
+#endif /* !CONFIG_USER_ONLY */
 
 static int hppa_cpu_mmu_index(CPUState *cs, bool ifetch)
 {
@@ -150,6 +152,7 @@ static int hppa_cpu_mmu_index(CPUState *cs, bool ifetch)
 static void hppa_cpu_disas_set_info(CPUState *cs, disassemble_info *info)
 {
     info->mach = bfd_mach_hppa20;
+    info->endian = BFD_ENDIAN_BIG;
     info->print_insn = print_insn_hppa;
 }
 
@@ -241,11 +244,12 @@ static ObjectClass *hppa_cpu_class_by_name(const char *cpu_model)
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps hppa_sysemu_ops = {
+    .has_work = hppa_cpu_has_work,
     .get_phys_page_debug = hppa_cpu_get_phys_page_debug,
 };
 #endif
 
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 
 static const TCGCPUOps hppa_tcg_ops = {
     .initialize = hppa_translate_init,
@@ -277,7 +281,6 @@ static void hppa_cpu_class_init(ObjectClass *oc, void *data)
                                        &acc->parent_phases);
 
     cc->class_by_name = hppa_cpu_class_by_name;
-    cc->has_work = hppa_cpu_has_work;
     cc->mmu_index = hppa_cpu_mmu_index;
     cc->dump_state = hppa_cpu_dump_state;
     cc->set_pc = hppa_cpu_set_pc;
diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c
index 8ff4b4480490c..a62d9d30831a3 100644
--- a/target/hppa/fpu_helper.c
+++ b/target/hppa/fpu_helper.c
@@ -67,6 +67,7 @@ void HELPER(loaded_fr0)(CPUHPPAState *env)
     set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
     /* Default NaN: sign bit clear, msb-1 frac bit set */
     set_float_default_nan_pattern(0b00100000, &env->fp_status);
+    set_snan_bit_is_one(true, &env->fp_status);
     /*
      * "PA-RISC 2.0 Architecture" says it is IMPDEF whether the flushing
      * enabled by FPSR.D happens before or after rounding. We pick "before"
diff --git a/target/hppa/mem_helper.c b/target/hppa/mem_helper.c
index 304f0b61e256b..fb1d93ef1f194 100644
--- a/target/hppa/mem_helper.c
+++ b/target/hppa/mem_helper.c
@@ -21,6 +21,7 @@
 #include "qemu/log.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/page-protection.h"
 #include "exec/helper-proto.h"
 #include "hw/core/cpu.h"
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index b5dd60d2812e0..1b64ceaaba467 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -247,6 +247,9 @@ static uint32_t max_thread_ids_for_cache(X86CPUTopoInfo *topo_info,
     case CPU_TOPOLOGY_LEVEL_CORE:
         num_ids = 1 << apicid_core_offset(topo_info);
         break;
+    case CPU_TOPOLOGY_LEVEL_MODULE:
+        num_ids = 1 << apicid_module_offset(topo_info);
+        break;
     case CPU_TOPOLOGY_LEVEL_DIE:
         num_ids = 1 << apicid_die_offset(topo_info);
         break;
@@ -255,7 +258,7 @@ static uint32_t max_thread_ids_for_cache(X86CPUTopoInfo *topo_info,
         break;
     default:
         /*
-         * Currently there is no use case for THREAD and MODULE, so use
+         * Currently there is no use case for THREAD, so use
          * assert directly to facilitate debugging.
          */
         g_assert_not_reached();
@@ -5495,6 +5498,130 @@ static const X86CPUDefinition builtin_x86_defs[] = {
         .model_id = "AMD EPYC-Genoa Processor",
         .cache_info = &epyc_genoa_cache_info,
     },
+    {
+        .name = "YongFeng",
+        .level = 0x1F,
+        .vendor = CPUID_VENDOR_ZHAOXIN1,
+        .family = 7,
+        .model = 11,
+        .stepping = 3,
+        /* missing: CPUID_HT, CPUID_TM, CPUID_PBE */
+        .features[FEAT_1_EDX] =
+            CPUID_SS | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
+            CPUID_ACPI | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV |
+            CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC |
+            CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC |
+            CPUID_PSE | CPUID_DE | CPUID_VME | CPUID_FP87,
+        /*
+         * missing: CPUID_EXT_OSXSAVE, CPUID_EXT_XTPR, CPUID_EXT_TM2,
+         * CPUID_EXT_EST, CPUID_EXT_SMX, CPUID_EXT_VMX
+         */
+        .features[FEAT_1_ECX] =
+            CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX |
+            CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_TSC_DEADLINE_TIMER |
+            CPUID_EXT_POPCNT | CPUID_EXT_MOVBE | CPUID_EXT_X2APIC |
+            CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | CPUID_EXT_PCID |
+            CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 |
+            CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3,
+        .features[FEAT_7_0_EBX] =
+            CPUID_7_0_EBX_SHA_NI | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_ADX |
+            CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_BMI2 |
+            CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_BMI1 |
+            CPUID_7_0_EBX_FSGSBASE,
+        /* missing: CPUID_7_0_ECX_OSPKE */
+        .features[FEAT_7_0_ECX] =
+            CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_UMIP,
+        .features[FEAT_7_0_EDX] =
+            CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_SPEC_CTRL,
+        .features[FEAT_8000_0001_EDX] =
+            CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB |
+            CPUID_EXT2_NX | CPUID_EXT2_SYSCALL,
+        .features[FEAT_8000_0001_ECX] =
+            CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM,
+        .features[FEAT_8000_0007_EDX] = CPUID_APM_INVTSC,
+        /*
+         * TODO: When the Linux kernel introduces other existing definitions
+         * for this leaf, remember to update the definitions here.
+         */
+        .features[FEAT_C000_0001_EDX] =
+            CPUID_C000_0001_EDX_PMM_EN | CPUID_C000_0001_EDX_PMM |
+            CPUID_C000_0001_EDX_PHE_EN | CPUID_C000_0001_EDX_PHE |
+            CPUID_C000_0001_EDX_ACE2 |
+            CPUID_C000_0001_EDX_XCRYPT_EN | CPUID_C000_0001_EDX_XCRYPT |
+            CPUID_C000_0001_EDX_XSTORE_EN | CPUID_C000_0001_EDX_XSTORE,
+        .features[FEAT_XSAVE] =
+            CPUID_XSAVE_XSAVEOPT,
+        .features[FEAT_ARCH_CAPABILITIES] =
+            MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY |
+            MSR_ARCH_CAP_MDS_NO | MSR_ARCH_CAP_PSCHANGE_MC_NO |
+            MSR_ARCH_CAP_SSB_NO,
+        .features[FEAT_VMX_PROCBASED_CTLS] =
+            VMX_CPU_BASED_VIRTUAL_INTR_PENDING | VMX_CPU_BASED_HLT_EXITING |
+            VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_INVLPG_EXITING |
+            VMX_CPU_BASED_MWAIT_EXITING | VMX_CPU_BASED_RDPMC_EXITING |
+            VMX_CPU_BASED_RDTSC_EXITING | VMX_CPU_BASED_CR3_LOAD_EXITING |
+            VMX_CPU_BASED_CR3_STORE_EXITING | VMX_CPU_BASED_CR8_LOAD_EXITING |
+            VMX_CPU_BASED_CR8_STORE_EXITING | VMX_CPU_BASED_TPR_SHADOW |
+            VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_MOV_DR_EXITING |
+            VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS |
+            VMX_CPU_BASED_MONITOR_TRAP_FLAG | VMX_CPU_BASED_USE_MSR_BITMAPS |
+            VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING |
+            VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS,
+        /*
+         * missing: VMX_SECONDARY_EXEC_PAUSE_LOOP_EXITING,
+         * VMX_SECONDARY_EXEC_TSC_SCALING
+         */
+        .features[FEAT_VMX_SECONDARY_CTLS] =
+            VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+            VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC |
+            VMX_SECONDARY_EXEC_RDTSCP | VMX_SECONDARY_EXEC_ENABLE_VPID |
+            VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
+            VMX_SECONDARY_EXEC_WBINVD_EXITING |
+            VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST |
+            VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT |
+            VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
+            VMX_SECONDARY_EXEC_RDRAND_EXITING |
+            VMX_SECONDARY_EXEC_ENABLE_INVPCID |
+            VMX_SECONDARY_EXEC_ENABLE_VMFUNC |
+            VMX_SECONDARY_EXEC_SHADOW_VMCS |
+            VMX_SECONDARY_EXEC_ENABLE_PML,
+        .features[FEAT_VMX_PINBASED_CTLS] =
+            VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING |
+            VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER |
+            VMX_PIN_BASED_POSTED_INTR,
+        .features[FEAT_VMX_EXIT_CTLS] =
+            VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | VMX_VM_EXIT_HOST_ADDR_SPACE_SIZE |
+            VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
+            VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT |
+            VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER |
+            VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER,
+        /* missing: VMX_VM_ENTRY_SMM, VMX_VM_ENTRY_DEACT_DUAL_MONITOR */
+        .features[FEAT_VMX_ENTRY_CTLS] =
+            VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE |
+            VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
+            VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER,
+        /*
+         * missing: MSR_VMX_MISC_ACTIVITY_SHUTDOWN,
+         * MSR_VMX_MISC_ACTIVITY_WAIT_SIPI
+         */
+        .features[FEAT_VMX_MISC] =
+            MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT |
+            MSR_VMX_MISC_VMWRITE_VMEXIT,
+        /* missing: MSR_VMX_EPT_UC */
+        .features[FEAT_VMX_EPT_VPID_CAPS] =
+            MSR_VMX_EPT_EXECONLY | MSR_VMX_EPT_PAGE_WALK_LENGTH_4 |
+            MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB |
+            MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS |
+            MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT |
+            MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID |
+            MSR_VMX_EPT_INVVPID_ALL_CONTEXT | MSR_VMX_EPT_INVVPID_SINGLE_ADDR |
+            MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS,
+        .features[FEAT_VMX_BASIC] =
+            MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS,
+        .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING,
+        .xlevel = 0x80000008,
+        .model_id = "Zhaoxin YongFeng Processor",
+    },
 };
 
 /*
@@ -6166,7 +6293,7 @@ static void x86_cpu_list_entry(gpointer data, gpointer user_data)
         desc = g_strdup_printf("%s [%s]", model_id, cc->model->note);
     }
     if (!desc) {
-        desc = g_strdup_printf("%s", model_id);
+        desc = g_strdup(model_id);
     }
 
     if (cc->model && cc->model->cpudef->deprecation_note) {
@@ -6436,7 +6563,7 @@ void x86_cpu_apply_props(X86CPU *cpu, PropValue *props)
  * Only for builtin_x86_defs models initialized with x86_register_cpudef_types.
  */
 
-static void x86_cpu_apply_version_props(X86CPU *cpu, X86CPUModel *model)
+static void x86_cpu_apply_version_props(X86CPU *cpu, const X86CPUModel *model)
 {
     const X86CPUVersionDefinition *vdef;
     X86CPUVersion version = x86_cpu_model_resolve_version(model);
@@ -6465,7 +6592,7 @@ static void x86_cpu_apply_version_props(X86CPU *cpu, X86CPUModel *model)
 }
 
 static const CPUCaches *x86_cpu_get_versioned_cache_info(X86CPU *cpu,
-                                                         X86CPUModel *model)
+                                                       const X86CPUModel *model)
 {
     const X86CPUVersionDefinition *vdef;
     X86CPUVersion version = x86_cpu_model_resolve_version(model);
@@ -6493,7 +6620,7 @@ static const CPUCaches *x86_cpu_get_versioned_cache_info(X86CPU *cpu,
  * Load data from X86CPUDefinition into a X86CPU object.
  * Only for builtin_x86_defs models initialized with x86_register_cpudef_types.
  */
-static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model)
+static void x86_cpu_load_model(X86CPU *cpu, const X86CPUModel *model)
 {
     const X86CPUDefinition *def = model->cpudef;
     CPUX86State *env = &cpu->env;
@@ -6563,7 +6690,7 @@ static const gchar *x86_gdb_arch_name(CPUState *cs)
 
 static void x86_cpu_cpudef_class_init(ObjectClass *oc, void *data)
 {
-    X86CPUModel *model = data;
+    const X86CPUModel *model = data;
     X86CPUClass *xcc = X86_CPU_CLASS(oc);
     CPUClass *cc = CPU_CLASS(oc);
 
@@ -7677,9 +7804,10 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
         /*
          * The Linux kernel checks for the CMPLegacy bit and
          * discards multiple thread information if it is set.
-         * So don't set it here for Intel to make Linux guests happy.
+         * So don't set it here for Intel (and other processors
+         * following Intel's behavior) to make Linux guests happy.
          */
-        if (!IS_INTEL_CPU(env)) {
+        if (!IS_INTEL_CPU(env) && !IS_ZHAOXIN_CPU(env)) {
             env->features[FEAT_8000_0001_ECX] |= CPUID_EXT3_CMP_LEG;
         }
     }
@@ -7903,6 +8031,64 @@ static void x86_cpu_hyperv_realize(X86CPU *cpu)
     cpu->hyperv_limits[2] = 0;
 }
 
+#ifndef CONFIG_USER_ONLY
+static bool x86_cpu_update_smp_cache_topo(MachineState *ms, X86CPU *cpu,
+                                          Error **errp)
+{
+    CPUX86State *env = &cpu->env;
+    CpuTopologyLevel level;
+
+    level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D);
+    if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) {
+        env->cache_info_cpuid4.l1d_cache->share_level = level;
+        env->cache_info_amd.l1d_cache->share_level = level;
+    } else {
+        machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D,
+            env->cache_info_cpuid4.l1d_cache->share_level);
+        machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D,
+            env->cache_info_amd.l1d_cache->share_level);
+    }
+
+    level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I);
+    if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) {
+        env->cache_info_cpuid4.l1i_cache->share_level = level;
+        env->cache_info_amd.l1i_cache->share_level = level;
+    } else {
+        machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I,
+            env->cache_info_cpuid4.l1i_cache->share_level);
+        machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I,
+            env->cache_info_amd.l1i_cache->share_level);
+    }
+
+    level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2);
+    if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) {
+        env->cache_info_cpuid4.l2_cache->share_level = level;
+        env->cache_info_amd.l2_cache->share_level = level;
+    } else {
+        machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2,
+            env->cache_info_cpuid4.l2_cache->share_level);
+        machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2,
+            env->cache_info_amd.l2_cache->share_level);
+    }
+
+    level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3);
+    if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) {
+        env->cache_info_cpuid4.l3_cache->share_level = level;
+        env->cache_info_amd.l3_cache->share_level = level;
+    } else {
+        machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3,
+            env->cache_info_cpuid4.l3_cache->share_level);
+        machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3,
+            env->cache_info_amd.l3_cache->share_level);
+    }
+
+    if (!machine_check_smp_cache(ms, errp)) {
+        return false;
+    }
+    return true;
+}
+#endif
+
 static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
 {
     CPUState *cs = CPU(dev);
@@ -8142,6 +8328,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
 
 #ifndef CONFIG_USER_ONLY
     MachineState *ms = MACHINE(qdev_get_machine());
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+
+    if (mc->smp_props.has_caches) {
+        if (!x86_cpu_update_smp_cache_topo(ms, cpu, errp)) {
+            return;
+        }
+    }
+
     qemu_register_reset(x86_cpu_machine_reset_cb, cpu);
 
     if (cpu->env.features[FEAT_1_EDX] & CPUID_APIC || ms->smp.cpus > 1) {
@@ -8410,16 +8604,15 @@ static vaddr x86_cpu_get_pc(CPUState *cs)
     return cpu->env.eip + cpu->env.segs[R_CS].base;
 }
 
+#if !defined(CONFIG_USER_ONLY)
 int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request)
 {
     X86CPU *cpu = X86_CPU(cs);
     CPUX86State *env = &cpu->env;
 
-#if !defined(CONFIG_USER_ONLY)
     if (interrupt_request & CPU_INTERRUPT_POLL) {
         return CPU_INTERRUPT_POLL;
     }
-#endif
     if (interrupt_request & CPU_INTERRUPT_SIPI) {
         return CPU_INTERRUPT_SIPI;
     }
@@ -8440,14 +8633,12 @@ int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request)
                      (env->eflags & IF_MASK &&
                       !(env->hflags & HF_INHIBIT_IRQ_MASK))))) {
             return CPU_INTERRUPT_HARD;
-#if !defined(CONFIG_USER_ONLY)
         } else if (env->hflags2 & HF2_VGIF_MASK) {
             if((interrupt_request & CPU_INTERRUPT_VIRQ) &&
                    (env->eflags & IF_MASK) &&
                    !(env->hflags & HF_INHIBIT_IRQ_MASK)) {
                         return CPU_INTERRUPT_VIRQ;
             }
-#endif
         }
     }
 
@@ -8458,6 +8649,7 @@ static bool x86_cpu_has_work(CPUState *cs)
 {
     return x86_cpu_pending_interrupt(cs, cs->interrupt_request) != 0;
 }
+#endif /* !CONFIG_USER_ONLY */
 
 int x86_mmu_index_pl(CPUX86State *env, unsigned pl)
 {
@@ -8497,6 +8689,7 @@ static void x86_disas_set_info(CPUState *cs, disassemble_info *info)
     X86CPU *cpu = X86_CPU(cs);
     CPUX86State *env = &cpu->env;
 
+    info->endian = BFD_ENDIAN_LITTLE;
     info->mach = (env->hflags & HF_CS64_MASK ? bfd_mach_x86_64
                   : env->hflags & HF_CS32_MASK ? bfd_mach_i386_i386
                   : bfd_mach_i386_i8086);
@@ -8698,6 +8891,7 @@ static const Property x86_cpu_properties[] = {
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps i386_sysemu_ops = {
+    .has_work = x86_cpu_has_work,
     .get_memory_mapping = x86_cpu_get_memory_mapping,
     .get_paging_enabled = x86_cpu_get_paging_enabled,
     .get_phys_page_attrs_debug = x86_cpu_get_phys_page_attrs_debug,
@@ -8731,7 +8925,6 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
 
     cc->class_by_name = x86_cpu_class_by_name;
     cc->parse_features = x86_cpu_parse_featurestr;
-    cc->has_work = x86_cpu_has_work;
     cc->mmu_index = x86_cpu_mmu_index;
     cc->dump_state = x86_cpu_dump_state;
     cc->set_pc = x86_cpu_set_pc;
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index c67b42d34fc68..76f24446a55d9 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1110,6 +1110,27 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
 /* CPUID[0x80000007].EDX flags: */
 #define CPUID_APM_INVTSC       (1U << 8)
 
+/* "rng" RNG present (xstore) */
+#define CPUID_C000_0001_EDX_XSTORE             (1U << 2)
+/* "rng_en" RNG enabled */
+#define CPUID_C000_0001_EDX_XSTORE_EN          (1U << 3)
+/* "ace" on-CPU crypto (xcrypt) */
+#define CPUID_C000_0001_EDX_XCRYPT             (1U << 6)
+/* "ace_en" on-CPU crypto enabled */
+#define CPUID_C000_0001_EDX_XCRYPT_EN          (1U << 7)
+/* Advanced Cryptography Engine v2 */
+#define CPUID_C000_0001_EDX_ACE2               (1U << 8)
+/* ACE v2 enabled */
+#define CPUID_C000_0001_EDX_ACE2_EN            (1U << 9)
+/* PadLock Hash Engine */
+#define CPUID_C000_0001_EDX_PHE                (1U << 10)
+/* PHE enabled */
+#define CPUID_C000_0001_EDX_PHE_EN             (1U << 11)
+/* PadLock Montgomery Multiplier */
+#define CPUID_C000_0001_EDX_PMM                (1U << 12)
+/* PMM enabled */
+#define CPUID_C000_0001_EDX_PMM_EN             (1U << 13)
+
 #define CPUID_VENDOR_SZ      12
 
 #define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
@@ -1122,7 +1143,16 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
 #define CPUID_VENDOR_AMD_3   0x444d4163 /* "cAMD" */
 #define CPUID_VENDOR_AMD   "AuthenticAMD"
 
-#define CPUID_VENDOR_VIA   "CentaurHauls"
+#define CPUID_VENDOR_ZHAOXIN1_1 0x746E6543 /* "Cent" */
+#define CPUID_VENDOR_ZHAOXIN1_2 0x48727561 /* "aurH" */
+#define CPUID_VENDOR_ZHAOXIN1_3 0x736C7561 /* "auls" */
+
+#define CPUID_VENDOR_ZHAOXIN2_1 0x68532020 /* "  Sh" */
+#define CPUID_VENDOR_ZHAOXIN2_2 0x68676E61 /* "angh" */
+#define CPUID_VENDOR_ZHAOXIN2_3 0x20206961 /* "ai  " */
+
+#define CPUID_VENDOR_ZHAOXIN1   "CentaurHauls"
+#define CPUID_VENDOR_ZHAOXIN2   "  Shanghai  "
 
 #define CPUID_VENDOR_HYGON    "HygonGenuine"
 
@@ -1132,6 +1162,15 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
 #define IS_AMD_CPU(env) ((env)->cpuid_vendor1 == CPUID_VENDOR_AMD_1 && \
                          (env)->cpuid_vendor2 == CPUID_VENDOR_AMD_2 && \
                          (env)->cpuid_vendor3 == CPUID_VENDOR_AMD_3)
+#define IS_ZHAOXIN1_CPU(env) \
+    ((env)->cpuid_vendor1 == CPUID_VENDOR_ZHAOXIN1_1 && \
+     (env)->cpuid_vendor2 == CPUID_VENDOR_ZHAOXIN1_2 && \
+     (env)->cpuid_vendor3 == CPUID_VENDOR_ZHAOXIN1_3)
+#define IS_ZHAOXIN2_CPU(env) \
+    ((env)->cpuid_vendor1 == CPUID_VENDOR_ZHAOXIN2_1 && \
+     (env)->cpuid_vendor2 == CPUID_VENDOR_ZHAOXIN2_2 && \
+     (env)->cpuid_vendor3 == CPUID_VENDOR_ZHAOXIN2_3)
+#define IS_ZHAOXIN_CPU(env) (IS_ZHAOXIN1_CPU(env) || IS_ZHAOXIN2_CPU(env))
 
 #define CPUID_MWAIT_IBE     (1U << 1) /* Interrupts can exit capability */
 #define CPUID_MWAIT_EMX     (1U << 0) /* enumeration supported */
@@ -2288,7 +2327,7 @@ struct X86CPUClass {
      * CPU definition, automatically loaded by instance_init if not NULL.
      * Should be eventually replaced by subclass-specific property defaults.
      */
-    X86CPUModel *model;
+    const X86CPUModel *model;
 
     bool host_cpuid_required;
     int ordering;
@@ -2310,8 +2349,6 @@ struct X86CPUClass {
 extern const VMStateDescription vmstate_x86_cpu;
 #endif
 
-int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request);
-
 int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu,
                              int cpuid, DumpState *s);
 int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cpu,
@@ -2334,6 +2371,8 @@ void x86_cpu_list(void);
 int cpu_x86_support_mca_broadcast(CPUX86State *env);
 
 #ifndef CONFIG_USER_ONLY
+int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request);
+
 hwaddr x86_cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
                                          MemTxAttrs *attrs);
 int cpu_get_pic_interrupt(CPUX86State *s);
diff --git a/target/i386/helper.c b/target/i386/helper.c
index 3bc15fba6ee89..c07b1b16ea171 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -20,7 +20,7 @@
 #include "qemu/osdep.h"
 #include "qapi/qapi-events-run-state.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/translation-block.h"
 #include "system/runstate.h"
 #ifndef CONFIG_USER_ONLY
diff --git a/target/i386/hvf/hvf-cpu.c b/target/i386/hvf/hvf-cpu.c
index 560b5a0594020..b5f4c80028fce 100644
--- a/target/i386/hvf/hvf-cpu.c
+++ b/target/i386/hvf/hvf-cpu.c
@@ -14,7 +14,7 @@
 #include "system/system.h"
 #include "hw/boards.h"
 #include "system/hvf.h"
-#include "hw/core/accel-cpu.h"
+#include "accel/accel-cpu-target.h"
 #include "hvf-i386.h"
 
 static void hvf_cpu_max_instance_init(X86CPU *cpu)
diff --git a/target/i386/hvf/hvf-i386.h b/target/i386/hvf/hvf-i386.h
index e99c02cd4bfe1..044ad236ae804 100644
--- a/target/i386/hvf/hvf-i386.h
+++ b/target/i386/hvf/hvf-i386.h
@@ -18,7 +18,9 @@
 
 uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, int reg);
 
-void hvf_handle_io(CPUArchState *, uint16_t, void *, int, int, int);
+void hvf_handle_io(CPUState *, uint16_t, void *, int, int, int);
+void hvf_simulate_rdmsr(CPUX86State *env);
+void hvf_simulate_wrmsr(CPUX86State *env);
 
 /* Host specific functions */
 int hvf_inject_interrupt(CPUArchState *env, int vector);
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index ca08f0753f0dd..9ba0e04ac7569 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -61,6 +61,7 @@
 #include "vmx.h"
 #include "x86.h"
 #include "x86_descr.h"
+#include "x86_flags.h"
 #include "x86_mmu.h"
 #include "x86_decode.h"
 #include "x86_emu.h"
@@ -103,7 +104,7 @@ static void update_apic_tpr(CPUState *cpu)
 
 #define VECTORING_INFO_VECTOR_MASK     0xff
 
-void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer,
+void hvf_handle_io(CPUState *env, uint16_t port, void *buffer,
                   int direction, int size, int count)
 {
     int i;
@@ -434,6 +435,264 @@ static void hvf_cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
     }
 }
 
+void hvf_load_regs(CPUState *cs)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+
+    int i = 0;
+    RRX(env, R_EAX) = rreg(cs->accel->fd, HV_X86_RAX);
+    RRX(env, R_EBX) = rreg(cs->accel->fd, HV_X86_RBX);
+    RRX(env, R_ECX) = rreg(cs->accel->fd, HV_X86_RCX);
+    RRX(env, R_EDX) = rreg(cs->accel->fd, HV_X86_RDX);
+    RRX(env, R_ESI) = rreg(cs->accel->fd, HV_X86_RSI);
+    RRX(env, R_EDI) = rreg(cs->accel->fd, HV_X86_RDI);
+    RRX(env, R_ESP) = rreg(cs->accel->fd, HV_X86_RSP);
+    RRX(env, R_EBP) = rreg(cs->accel->fd, HV_X86_RBP);
+    for (i = 8; i < 16; i++) {
+        RRX(env, i) = rreg(cs->accel->fd, HV_X86_RAX + i);
+    }
+
+    env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS);
+    rflags_to_lflags(env);
+    env->eip = rreg(cs->accel->fd, HV_X86_RIP);
+}
+
+void hvf_store_regs(CPUState *cs)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+
+    int i = 0;
+    wreg(cs->accel->fd, HV_X86_RAX, RAX(env));
+    wreg(cs->accel->fd, HV_X86_RBX, RBX(env));
+    wreg(cs->accel->fd, HV_X86_RCX, RCX(env));
+    wreg(cs->accel->fd, HV_X86_RDX, RDX(env));
+    wreg(cs->accel->fd, HV_X86_RSI, RSI(env));
+    wreg(cs->accel->fd, HV_X86_RDI, RDI(env));
+    wreg(cs->accel->fd, HV_X86_RBP, RBP(env));
+    wreg(cs->accel->fd, HV_X86_RSP, RSP(env));
+    for (i = 8; i < 16; i++) {
+        wreg(cs->accel->fd, HV_X86_RAX + i, RRX(env, i));
+    }
+
+    lflags_to_rflags(env);
+    wreg(cs->accel->fd, HV_X86_RFLAGS, env->eflags);
+    macvm_set_rip(cs, env->eip);
+}
+
+void hvf_simulate_rdmsr(CPUX86State *env)
+{
+    X86CPU *cpu = env_archcpu(env);
+    CPUState *cs = env_cpu(env);
+    uint32_t msr = ECX(env);
+    uint64_t val = 0;
+
+    switch (msr) {
+    case MSR_IA32_TSC:
+        val = rdtscp() + rvmcs(cs->accel->fd, VMCS_TSC_OFFSET);
+        break;
+    case MSR_IA32_APICBASE:
+        val = cpu_get_apic_base(cpu->apic_state);
+        break;
+    case MSR_APIC_START ... MSR_APIC_END: {
+        int ret;
+        int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START;
+
+        ret = apic_msr_read(index, &val);
+        if (ret < 0) {
+            x86_emul_raise_exception(env, EXCP0D_GPF, 0);
+        }
+
+        break;
+    }
+    case MSR_IA32_UCODE_REV:
+        val = cpu->ucode_rev;
+        break;
+    case MSR_EFER:
+        val = rvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER);
+        break;
+    case MSR_FSBASE:
+        val = rvmcs(cs->accel->fd, VMCS_GUEST_FS_BASE);
+        break;
+    case MSR_GSBASE:
+        val = rvmcs(cs->accel->fd, VMCS_GUEST_GS_BASE);
+        break;
+    case MSR_KERNELGSBASE:
+        val = rvmcs(cs->accel->fd, VMCS_HOST_FS_BASE);
+        break;
+    case MSR_STAR:
+        abort();
+        break;
+    case MSR_LSTAR:
+        abort();
+        break;
+    case MSR_CSTAR:
+        abort();
+        break;
+    case MSR_IA32_MISC_ENABLE:
+        val = env->msr_ia32_misc_enable;
+        break;
+    case MSR_MTRRphysBase(0):
+    case MSR_MTRRphysBase(1):
+    case MSR_MTRRphysBase(2):
+    case MSR_MTRRphysBase(3):
+    case MSR_MTRRphysBase(4):
+    case MSR_MTRRphysBase(5):
+    case MSR_MTRRphysBase(6):
+    case MSR_MTRRphysBase(7):
+        val = env->mtrr_var[(ECX(env) - MSR_MTRRphysBase(0)) / 2].base;
+        break;
+    case MSR_MTRRphysMask(0):
+    case MSR_MTRRphysMask(1):
+    case MSR_MTRRphysMask(2):
+    case MSR_MTRRphysMask(3):
+    case MSR_MTRRphysMask(4):
+    case MSR_MTRRphysMask(5):
+    case MSR_MTRRphysMask(6):
+    case MSR_MTRRphysMask(7):
+        val = env->mtrr_var[(ECX(env) - MSR_MTRRphysMask(0)) / 2].mask;
+        break;
+    case MSR_MTRRfix64K_00000:
+        val = env->mtrr_fixed[0];
+        break;
+    case MSR_MTRRfix16K_80000:
+    case MSR_MTRRfix16K_A0000:
+        val = env->mtrr_fixed[ECX(env) - MSR_MTRRfix16K_80000 + 1];
+        break;
+    case MSR_MTRRfix4K_C0000:
+    case MSR_MTRRfix4K_C8000:
+    case MSR_MTRRfix4K_D0000:
+    case MSR_MTRRfix4K_D8000:
+    case MSR_MTRRfix4K_E0000:
+    case MSR_MTRRfix4K_E8000:
+    case MSR_MTRRfix4K_F0000:
+    case MSR_MTRRfix4K_F8000:
+        val = env->mtrr_fixed[ECX(env) - MSR_MTRRfix4K_C0000 + 3];
+        break;
+    case MSR_MTRRdefType:
+        val = env->mtrr_deftype;
+        break;
+    case MSR_CORE_THREAD_COUNT:
+        val = cpu_x86_get_msr_core_thread_count(cpu);
+        break;
+    default:
+        /* fprintf(stderr, "%s: unknown msr 0x%x\n", __func__, msr); */
+        val = 0;
+        break;
+    }
+
+    RAX(env) = (uint32_t)val;
+    RDX(env) = (uint32_t)(val >> 32);
+}
+
+void hvf_simulate_wrmsr(CPUX86State *env)
+{
+    X86CPU *cpu = env_archcpu(env);
+    CPUState *cs = env_cpu(env);
+    uint32_t msr = ECX(env);
+    uint64_t data = ((uint64_t)EDX(env) << 32) | EAX(env);
+
+    switch (msr) {
+    case MSR_IA32_TSC:
+        break;
+    case MSR_IA32_APICBASE: {
+        int r;
+
+        r = cpu_set_apic_base(cpu->apic_state, data);
+        if (r < 0) {
+            x86_emul_raise_exception(env, EXCP0D_GPF, 0);
+        }
+
+        break;
+    }
+    case MSR_APIC_START ... MSR_APIC_END: {
+        int ret;
+        int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START;
+
+        ret = apic_msr_write(index, data);
+        if (ret < 0) {
+            x86_emul_raise_exception(env, EXCP0D_GPF, 0);
+        }
+
+        break;
+    }
+    case MSR_FSBASE:
+        wvmcs(cs->accel->fd, VMCS_GUEST_FS_BASE, data);
+        break;
+    case MSR_GSBASE:
+        wvmcs(cs->accel->fd, VMCS_GUEST_GS_BASE, data);
+        break;
+    case MSR_KERNELGSBASE:
+        wvmcs(cs->accel->fd, VMCS_HOST_FS_BASE, data);
+        break;
+    case MSR_STAR:
+        abort();
+        break;
+    case MSR_LSTAR:
+        abort();
+        break;
+    case MSR_CSTAR:
+        abort();
+        break;
+    case MSR_EFER:
+        /*printf("new efer %llx\n", EFER(cs));*/
+        wvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER, data);
+        if (data & MSR_EFER_NXE) {
+            hv_vcpu_invalidate_tlb(cs->accel->fd);
+        }
+        break;
+    case MSR_MTRRphysBase(0):
+    case MSR_MTRRphysBase(1):
+    case MSR_MTRRphysBase(2):
+    case MSR_MTRRphysBase(3):
+    case MSR_MTRRphysBase(4):
+    case MSR_MTRRphysBase(5):
+    case MSR_MTRRphysBase(6):
+    case MSR_MTRRphysBase(7):
+        env->mtrr_var[(ECX(env) - MSR_MTRRphysBase(0)) / 2].base = data;
+        break;
+    case MSR_MTRRphysMask(0):
+    case MSR_MTRRphysMask(1):
+    case MSR_MTRRphysMask(2):
+    case MSR_MTRRphysMask(3):
+    case MSR_MTRRphysMask(4):
+    case MSR_MTRRphysMask(5):
+    case MSR_MTRRphysMask(6):
+    case MSR_MTRRphysMask(7):
+        env->mtrr_var[(ECX(env) - MSR_MTRRphysMask(0)) / 2].mask = data;
+        break;
+    case MSR_MTRRfix64K_00000:
+        env->mtrr_fixed[ECX(env) - MSR_MTRRfix64K_00000] = data;
+        break;
+    case MSR_MTRRfix16K_80000:
+    case MSR_MTRRfix16K_A0000:
+        env->mtrr_fixed[ECX(env) - MSR_MTRRfix16K_80000 + 1] = data;
+        break;
+    case MSR_MTRRfix4K_C0000:
+    case MSR_MTRRfix4K_C8000:
+    case MSR_MTRRfix4K_D0000:
+    case MSR_MTRRfix4K_D8000:
+    case MSR_MTRRfix4K_E0000:
+    case MSR_MTRRfix4K_E8000:
+    case MSR_MTRRfix4K_F0000:
+    case MSR_MTRRfix4K_F8000:
+        env->mtrr_fixed[ECX(env) - MSR_MTRRfix4K_C0000 + 3] = data;
+        break;
+    case MSR_MTRRdefType:
+        env->mtrr_deftype = data;
+        break;
+    default:
+        break;
+    }
+
+    /* Related to support known hypervisor interface */
+    /* if (g_hypervisor_iface)
+         g_hypervisor_iface->wrmsr_handler(cs, msr, data);
+
+    printf("write msr %llx\n", RCX(cs));*/
+}
+
 int hvf_vcpu_exec(CPUState *cpu)
 {
     X86CPU *x86_cpu = X86_CPU(cpu);
@@ -517,10 +776,10 @@ int hvf_vcpu_exec(CPUState *cpu)
             if (ept_emulation_fault(slot, gpa, exit_qual)) {
                 struct x86_decode decode;
 
-                load_regs(cpu);
+                hvf_load_regs(cpu);
                 decode_instruction(env, &decode);
                 exec_instruction(env, &decode);
-                store_regs(cpu);
+                hvf_store_regs(cpu);
                 break;
             }
             break;
@@ -535,8 +794,8 @@ int hvf_vcpu_exec(CPUState *cpu)
 
             if (!string && in) {
                 uint64_t val = 0;
-                load_regs(cpu);
-                hvf_handle_io(env, port, &val, 0, size, 1);
+                hvf_load_regs(cpu);
+                hvf_handle_io(env_cpu(env), port, &val, 0, size, 1);
                 if (size == 1) {
                     AL(env) = val;
                 } else if (size == 2) {
@@ -547,21 +806,21 @@ int hvf_vcpu_exec(CPUState *cpu)
                     RAX(env) = (uint64_t)val;
                 }
                 env->eip += ins_len;
-                store_regs(cpu);
+                hvf_store_regs(cpu);
                 break;
             } else if (!string && !in) {
                 RAX(env) = rreg(cpu->accel->fd, HV_X86_RAX);
-                hvf_handle_io(env, port, &RAX(env), 1, size, 1);
+                hvf_handle_io(env_cpu(env), port, &RAX(env), 1, size, 1);
                 macvm_set_rip(cpu, rip + ins_len);
                 break;
             }
             struct x86_decode decode;
 
-            load_regs(cpu);
+            hvf_load_regs(cpu);
             decode_instruction(env, &decode);
             assert(ins_len == decode.len);
             exec_instruction(env, &decode);
-            store_regs(cpu);
+            hvf_store_regs(cpu);
 
             break;
         }
@@ -614,21 +873,21 @@ int hvf_vcpu_exec(CPUState *cpu)
         case EXIT_REASON_RDMSR:
         case EXIT_REASON_WRMSR:
         {
-            load_regs(cpu);
+            hvf_load_regs(cpu);
             if (exit_reason == EXIT_REASON_RDMSR) {
-                simulate_rdmsr(env);
+                hvf_simulate_rdmsr(env);
             } else {
-                simulate_wrmsr(env);
+                hvf_simulate_wrmsr(env);
             }
             env->eip += ins_len;
-            store_regs(cpu);
+            hvf_store_regs(cpu);
             break;
         }
         case EXIT_REASON_CR_ACCESS: {
             int cr;
             int reg;
 
-            load_regs(cpu);
+            hvf_load_regs(cpu);
             cr = exit_qual & 15;
             reg = (exit_qual >> 8) & 15;
 
@@ -656,16 +915,16 @@ int hvf_vcpu_exec(CPUState *cpu)
                 abort();
             }
             env->eip += ins_len;
-            store_regs(cpu);
+            hvf_store_regs(cpu);
             break;
         }
         case EXIT_REASON_APIC_ACCESS: { /* TODO */
             struct x86_decode decode;
 
-            load_regs(cpu);
+            hvf_load_regs(cpu);
             decode_instruction(env, &decode);
             exec_instruction(env, &decode);
-            store_regs(cpu);
+            hvf_store_regs(cpu);
             break;
         }
         case EXIT_REASON_TPR: {
@@ -674,7 +933,7 @@ int hvf_vcpu_exec(CPUState *cpu)
         }
         case EXIT_REASON_TASK_SWITCH: {
             uint64_t vinfo = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_INFO);
-            x68_segment_selector sel = {.sel = exit_qual & 0xffff};
+            x86_segment_selector sel = {.sel = exit_qual & 0xffff};
             vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3,
              vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo
              & VMCS_INTR_T_MASK);
diff --git a/target/i386/hvf/x86.c b/target/i386/hvf/x86.c
index 80e36136d045b..a0ede138865eb 100644
--- a/target/i386/hvf/x86.c
+++ b/target/i386/hvf/x86.c
@@ -48,7 +48,7 @@
 
 bool x86_read_segment_descriptor(CPUState *cpu,
                                  struct x86_segment_descriptor *desc,
-                                 x68_segment_selector sel)
+                                 x86_segment_selector sel)
 {
     target_ulong base;
     uint32_t limit;
@@ -78,7 +78,7 @@ bool x86_read_segment_descriptor(CPUState *cpu,
 
 bool x86_write_segment_descriptor(CPUState *cpu,
                                   struct x86_segment_descriptor *desc,
-                                  x68_segment_selector sel)
+                                  x86_segment_selector sel)
 {
     target_ulong base;
     uint32_t limit;
diff --git a/target/i386/hvf/x86.h b/target/i386/hvf/x86.h
index 3570f29aa9d2a..063cd0b83ec91 100644
--- a/target/i386/hvf/x86.h
+++ b/target/i386/hvf/x86.h
@@ -183,7 +183,7 @@ static inline uint32_t x86_call_gate_offset(x86_call_gate *gate)
 #define GDT_SEL     0
 #define LDT_SEL     1
 
-typedef struct x68_segment_selector {
+typedef struct x86_segment_selector {
     union {
         uint16_t sel;
         struct {
@@ -192,7 +192,7 @@ typedef struct x68_segment_selector {
             uint16_t index:13;
         };
     };
-} __attribute__ ((__packed__)) x68_segment_selector;
+} __attribute__ ((__packed__)) x86_segment_selector;
 
 /* useful register access  macros */
 #define x86_reg(cpu, reg) ((x86_register *) &cpu->regs[reg])
@@ -250,10 +250,10 @@ typedef struct x68_segment_selector {
 /* deal with GDT/LDT descriptors in memory */
 bool x86_read_segment_descriptor(CPUState *cpu,
                                  struct x86_segment_descriptor *desc,
-                                 x68_segment_selector sel);
+                                 x86_segment_selector sel);
 bool x86_write_segment_descriptor(CPUState *cpu,
                                   struct x86_segment_descriptor *desc,
-                                  x68_segment_selector sel);
+                                  x86_segment_selector sel);
 
 bool x86_read_call_gate(CPUState *cpu, struct x86_call_gate *idt_desc,
                         int gate);
diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c
index a4a28f113fdcc..5fea2dd3cc036 100644
--- a/target/i386/hvf/x86_decode.c
+++ b/target/i386/hvf/x86_decode.c
@@ -61,8 +61,8 @@ uint64_t sign(uint64_t val, int size)
 static inline uint64_t decode_bytes(CPUX86State *env, struct x86_decode *decode,
                                     int size)
 {
-    target_ulong val = 0;
-    
+    uint64_t val = 0;
+
     switch (size) {
     case 1:
     case 2:
@@ -1893,6 +1893,16 @@ static void decode_prefix(CPUX86State *env, struct x86_decode *decode)
     }
 }
 
+static struct x86_segment_descriptor get_cs_descriptor(CPUState *s)
+{
+    struct vmx_segment vmx_cs;
+    x86_segment_descriptor cs;
+    vmx_read_segment_descriptor(s, &vmx_cs, R_CS);
+    vmx_segment_to_x86_descriptor(s, &vmx_cs, &cs);
+
+    return cs;
+}
+
 void set_addressing_size(CPUX86State *env, struct x86_decode *decode)
 {
     decode->addressing_size = -1;
@@ -1904,10 +1914,9 @@ void set_addressing_size(CPUX86State *env, struct x86_decode *decode)
         }
     } else if (!x86_is_long_mode(env_cpu(env))) {
         /* protected */
-        struct vmx_segment cs;
-        vmx_read_segment_descriptor(env_cpu(env), &cs, R_CS);
+        x86_segment_descriptor cs = get_cs_descriptor(env_cpu(env));
         /* check db */
-        if ((cs.ar >> 14) & 1) {
+        if (cs.db) {
             if (decode->addr_size_override) {
                 decode->addressing_size = 2;
             } else {
@@ -1941,10 +1950,9 @@ void set_operand_size(CPUX86State *env, struct x86_decode *decode)
         }
     } else if (!x86_is_long_mode(env_cpu(env))) {
         /* protected */
-        struct vmx_segment cs;
-        vmx_read_segment_descriptor(env_cpu(env), &cs, R_CS);
+        x86_segment_descriptor cs = get_cs_descriptor(env_cpu(env));
         /* check db */
-        if ((cs.ar >> 14) & 1) {
+        if (cs.db) {
             if (decode->op_size_override) {
                 decode->operand_size = 2;
             } else{
diff --git a/target/i386/hvf/x86_descr.c b/target/i386/hvf/x86_descr.c
index f33836d6cbaa5..7b599c9037778 100644
--- a/target/i386/hvf/x86_descr.c
+++ b/target/i386/hvf/x86_descr.c
@@ -60,14 +60,14 @@ uint64_t vmx_read_segment_base(CPUState *cpu, X86Seg seg)
     return rvmcs(cpu->accel->fd, vmx_segment_fields[seg].base);
 }
 
-x68_segment_selector vmx_read_segment_selector(CPUState *cpu, X86Seg seg)
+x86_segment_selector vmx_read_segment_selector(CPUState *cpu, X86Seg seg)
 {
-    x68_segment_selector sel;
+    x86_segment_selector sel;
     sel.sel = rvmcs(cpu->accel->fd, vmx_segment_fields[seg].selector);
     return sel;
 }
 
-void vmx_write_segment_selector(CPUState *cpu, x68_segment_selector selector, X86Seg seg)
+void vmx_write_segment_selector(CPUState *cpu, x86_segment_selector selector, X86Seg seg)
 {
     wvmcs(cpu->accel->fd, vmx_segment_fields[seg].selector, selector.sel);
 }
@@ -90,7 +90,7 @@ void vmx_write_segment_descriptor(CPUState *cpu, struct vmx_segment *desc, X86Se
     wvmcs(cpu->accel->fd, sf->ar_bytes, desc->ar);
 }
 
-void x86_segment_descriptor_to_vmx(CPUState *cpu, x68_segment_selector selector,
+void x86_segment_descriptor_to_vmx(CPUState *cpu, x86_segment_selector selector,
                                    struct x86_segment_descriptor *desc,
                                    struct vmx_segment *vmx_desc)
 {
diff --git a/target/i386/hvf/x86_descr.h b/target/i386/hvf/x86_descr.h
index 9f06014b56a54..ce5de9834973e 100644
--- a/target/i386/hvf/x86_descr.h
+++ b/target/i386/hvf/x86_descr.h
@@ -34,10 +34,10 @@ void vmx_read_segment_descriptor(CPUState *cpu,
 void vmx_write_segment_descriptor(CPUState *cpu, struct vmx_segment *desc,
                                   enum X86Seg seg);
 
-x68_segment_selector vmx_read_segment_selector(CPUState *cpu,
+x86_segment_selector vmx_read_segment_selector(CPUState *cpu,
                                                enum X86Seg seg);
 void vmx_write_segment_selector(CPUState *cpu,
-                                x68_segment_selector selector,
+                                x86_segment_selector selector,
                                 enum X86Seg seg);
 
 uint64_t vmx_read_segment_base(CPUState *cpu, enum X86Seg seg);
@@ -45,7 +45,7 @@ void vmx_write_segment_base(CPUState *cpu, enum X86Seg seg,
                             uint64_t base);
 
 void x86_segment_descriptor_to_vmx(CPUState *cpu,
-                                   x68_segment_selector selector,
+                                   x86_segment_selector selector,
                                    struct x86_segment_descriptor *desc,
                                    struct vmx_segment *vmx_desc);
 
diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c
index 69c61c9c0737c..ebba80a36b506 100644
--- a/target/i386/hvf/x86_emu.c
+++ b/target/i386/hvf/x86_emu.c
@@ -44,9 +44,7 @@
 #include "x86_flags.h"
 #include "vmcs.h"
 #include "vmx.h"
-
-void hvf_handle_io(CPUState *cs, uint16_t port, void *data,
-                   int direction, int size, uint32_t count);
+#include "hvf-i386.h"
 
 #define EXEC_2OP_FLAGS_CMD(env, decode, cmd, FLAGS_FUNC, save_res) \
 {                                                       \
@@ -663,8 +661,7 @@ static void exec_lods(CPUX86State *env, struct x86_decode *decode)
     env->eip += decode->len;
 }
 
-static void raise_exception(CPUX86State *env, int exception_index,
-                            int error_code)
+void x86_emul_raise_exception(CPUX86State *env, int exception_index, int error_code)
 {
     env->exception_nr = exception_index;
     env->error_code = error_code;
@@ -672,227 +669,15 @@ static void raise_exception(CPUX86State *env, int exception_index,
     env->exception_injected = 1;
 }
 
-void simulate_rdmsr(CPUX86State *env)
-{
-    X86CPU *cpu = env_archcpu(env);
-    CPUState *cs = env_cpu(env);
-    uint32_t msr = ECX(env);
-    uint64_t val = 0;
-
-    switch (msr) {
-    case MSR_IA32_TSC:
-        val = rdtscp() + rvmcs(cs->accel->fd, VMCS_TSC_OFFSET);
-        break;
-    case MSR_IA32_APICBASE:
-        val = cpu_get_apic_base(cpu->apic_state);
-        break;
-    case MSR_APIC_START ... MSR_APIC_END: {
-        int ret;
-        int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START;
-
-        ret = apic_msr_read(index, &val);
-        if (ret < 0) {
-            raise_exception(env, EXCP0D_GPF, 0);
-        }
-
-        break;
-    }
-    case MSR_IA32_UCODE_REV:
-        val = cpu->ucode_rev;
-        break;
-    case MSR_EFER:
-        val = rvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER);
-        break;
-    case MSR_FSBASE:
-        val = rvmcs(cs->accel->fd, VMCS_GUEST_FS_BASE);
-        break;
-    case MSR_GSBASE:
-        val = rvmcs(cs->accel->fd, VMCS_GUEST_GS_BASE);
-        break;
-    case MSR_KERNELGSBASE:
-        val = rvmcs(cs->accel->fd, VMCS_HOST_FS_BASE);
-        break;
-    case MSR_STAR:
-        abort();
-        break;
-    case MSR_LSTAR:
-        abort();
-        break;
-    case MSR_CSTAR:
-        abort();
-        break;
-    case MSR_IA32_MISC_ENABLE:
-        val = env->msr_ia32_misc_enable;
-        break;
-    case MSR_MTRRphysBase(0):
-    case MSR_MTRRphysBase(1):
-    case MSR_MTRRphysBase(2):
-    case MSR_MTRRphysBase(3):
-    case MSR_MTRRphysBase(4):
-    case MSR_MTRRphysBase(5):
-    case MSR_MTRRphysBase(6):
-    case MSR_MTRRphysBase(7):
-        val = env->mtrr_var[(ECX(env) - MSR_MTRRphysBase(0)) / 2].base;
-        break;
-    case MSR_MTRRphysMask(0):
-    case MSR_MTRRphysMask(1):
-    case MSR_MTRRphysMask(2):
-    case MSR_MTRRphysMask(3):
-    case MSR_MTRRphysMask(4):
-    case MSR_MTRRphysMask(5):
-    case MSR_MTRRphysMask(6):
-    case MSR_MTRRphysMask(7):
-        val = env->mtrr_var[(ECX(env) - MSR_MTRRphysMask(0)) / 2].mask;
-        break;
-    case MSR_MTRRfix64K_00000:
-        val = env->mtrr_fixed[0];
-        break;
-    case MSR_MTRRfix16K_80000:
-    case MSR_MTRRfix16K_A0000:
-        val = env->mtrr_fixed[ECX(env) - MSR_MTRRfix16K_80000 + 1];
-        break;
-    case MSR_MTRRfix4K_C0000:
-    case MSR_MTRRfix4K_C8000:
-    case MSR_MTRRfix4K_D0000:
-    case MSR_MTRRfix4K_D8000:
-    case MSR_MTRRfix4K_E0000:
-    case MSR_MTRRfix4K_E8000:
-    case MSR_MTRRfix4K_F0000:
-    case MSR_MTRRfix4K_F8000:
-        val = env->mtrr_fixed[ECX(env) - MSR_MTRRfix4K_C0000 + 3];
-        break;
-    case MSR_MTRRdefType:
-        val = env->mtrr_deftype;
-        break;
-    case MSR_CORE_THREAD_COUNT:
-        val = cpu_x86_get_msr_core_thread_count(cpu);
-        break;
-    default:
-        /* fprintf(stderr, "%s: unknown msr 0x%x\n", __func__, msr); */
-        val = 0;
-        break;
-    }
-
-    RAX(env) = (uint32_t)val;
-    RDX(env) = (uint32_t)(val >> 32);
-}
-
 static void exec_rdmsr(CPUX86State *env, struct x86_decode *decode)
 {
-    simulate_rdmsr(env);
+    hvf_simulate_rdmsr(env);
     env->eip += decode->len;
 }
 
-void simulate_wrmsr(CPUX86State *env)
-{
-    X86CPU *cpu = env_archcpu(env);
-    CPUState *cs = env_cpu(env);
-    uint32_t msr = ECX(env);
-    uint64_t data = ((uint64_t)EDX(env) << 32) | EAX(env);
-
-    switch (msr) {
-    case MSR_IA32_TSC:
-        break;
-    case MSR_IA32_APICBASE: {
-        int r;
-
-        r = cpu_set_apic_base(cpu->apic_state, data);
-        if (r < 0) {
-            raise_exception(env, EXCP0D_GPF, 0);
-        }
-
-        break;
-    }
-    case MSR_APIC_START ... MSR_APIC_END: {
-        int ret;
-        int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START;
-
-        ret = apic_msr_write(index, data);
-        if (ret < 0) {
-            raise_exception(env, EXCP0D_GPF, 0);
-        }
-
-        break;
-    }
-    case MSR_FSBASE:
-        wvmcs(cs->accel->fd, VMCS_GUEST_FS_BASE, data);
-        break;
-    case MSR_GSBASE:
-        wvmcs(cs->accel->fd, VMCS_GUEST_GS_BASE, data);
-        break;
-    case MSR_KERNELGSBASE:
-        wvmcs(cs->accel->fd, VMCS_HOST_FS_BASE, data);
-        break;
-    case MSR_STAR:
-        abort();
-        break;
-    case MSR_LSTAR:
-        abort();
-        break;
-    case MSR_CSTAR:
-        abort();
-        break;
-    case MSR_EFER:
-        /*printf("new efer %llx\n", EFER(cs));*/
-        wvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER, data);
-        if (data & MSR_EFER_NXE) {
-            hv_vcpu_invalidate_tlb(cs->accel->fd);
-        }
-        break;
-    case MSR_MTRRphysBase(0):
-    case MSR_MTRRphysBase(1):
-    case MSR_MTRRphysBase(2):
-    case MSR_MTRRphysBase(3):
-    case MSR_MTRRphysBase(4):
-    case MSR_MTRRphysBase(5):
-    case MSR_MTRRphysBase(6):
-    case MSR_MTRRphysBase(7):
-        env->mtrr_var[(ECX(env) - MSR_MTRRphysBase(0)) / 2].base = data;
-        break;
-    case MSR_MTRRphysMask(0):
-    case MSR_MTRRphysMask(1):
-    case MSR_MTRRphysMask(2):
-    case MSR_MTRRphysMask(3):
-    case MSR_MTRRphysMask(4):
-    case MSR_MTRRphysMask(5):
-    case MSR_MTRRphysMask(6):
-    case MSR_MTRRphysMask(7):
-        env->mtrr_var[(ECX(env) - MSR_MTRRphysMask(0)) / 2].mask = data;
-        break;
-    case MSR_MTRRfix64K_00000:
-        env->mtrr_fixed[ECX(env) - MSR_MTRRfix64K_00000] = data;
-        break;
-    case MSR_MTRRfix16K_80000:
-    case MSR_MTRRfix16K_A0000:
-        env->mtrr_fixed[ECX(env) - MSR_MTRRfix16K_80000 + 1] = data;
-        break;
-    case MSR_MTRRfix4K_C0000:
-    case MSR_MTRRfix4K_C8000:
-    case MSR_MTRRfix4K_D0000:
-    case MSR_MTRRfix4K_D8000:
-    case MSR_MTRRfix4K_E0000:
-    case MSR_MTRRfix4K_E8000:
-    case MSR_MTRRfix4K_F0000:
-    case MSR_MTRRfix4K_F8000:
-        env->mtrr_fixed[ECX(env) - MSR_MTRRfix4K_C0000 + 3] = data;
-        break;
-    case MSR_MTRRdefType:
-        env->mtrr_deftype = data;
-        break;
-    default:
-        break;
-    }
-
-    /* Related to support known hypervisor interface */
-    /* if (g_hypervisor_iface)
-         g_hypervisor_iface->wrmsr_handler(cs, msr, data);
-
-    printf("write msr %llx\n", RCX(cs));*/
-}
-
 static void exec_wrmsr(CPUX86State *env, struct x86_decode *decode)
 {
-    simulate_wrmsr(env);
+    hvf_simulate_wrmsr(env);
     env->eip += decode->len;
 }
 
@@ -1454,58 +1239,8 @@ static void init_cmd_handler(void)
     }
 }
 
-void load_regs(CPUState *cs)
-{
-    X86CPU *cpu = X86_CPU(cs);
-    CPUX86State *env = &cpu->env;
-
-    int i = 0;
-    RRX(env, R_EAX) = rreg(cs->accel->fd, HV_X86_RAX);
-    RRX(env, R_EBX) = rreg(cs->accel->fd, HV_X86_RBX);
-    RRX(env, R_ECX) = rreg(cs->accel->fd, HV_X86_RCX);
-    RRX(env, R_EDX) = rreg(cs->accel->fd, HV_X86_RDX);
-    RRX(env, R_ESI) = rreg(cs->accel->fd, HV_X86_RSI);
-    RRX(env, R_EDI) = rreg(cs->accel->fd, HV_X86_RDI);
-    RRX(env, R_ESP) = rreg(cs->accel->fd, HV_X86_RSP);
-    RRX(env, R_EBP) = rreg(cs->accel->fd, HV_X86_RBP);
-    for (i = 8; i < 16; i++) {
-        RRX(env, i) = rreg(cs->accel->fd, HV_X86_RAX + i);
-    }
-
-    env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS);
-    rflags_to_lflags(env);
-    env->eip = rreg(cs->accel->fd, HV_X86_RIP);
-}
-
-void store_regs(CPUState *cs)
-{
-    X86CPU *cpu = X86_CPU(cs);
-    CPUX86State *env = &cpu->env;
-
-    int i = 0;
-    wreg(cs->accel->fd, HV_X86_RAX, RAX(env));
-    wreg(cs->accel->fd, HV_X86_RBX, RBX(env));
-    wreg(cs->accel->fd, HV_X86_RCX, RCX(env));
-    wreg(cs->accel->fd, HV_X86_RDX, RDX(env));
-    wreg(cs->accel->fd, HV_X86_RSI, RSI(env));
-    wreg(cs->accel->fd, HV_X86_RDI, RDI(env));
-    wreg(cs->accel->fd, HV_X86_RBP, RBP(env));
-    wreg(cs->accel->fd, HV_X86_RSP, RSP(env));
-    for (i = 8; i < 16; i++) {
-        wreg(cs->accel->fd, HV_X86_RAX + i, RRX(env, i));
-    }
-
-    lflags_to_rflags(env);
-    wreg(cs->accel->fd, HV_X86_RFLAGS, env->eflags);
-    macvm_set_rip(cs, env->eip);
-}
-
 bool exec_instruction(CPUX86State *env, struct x86_decode *ins)
 {
-    /*if (hvf_vcpu_id(cs))
-    printf("%d, %llx: exec_instruction %s\n", hvf_vcpu_id(cs),  env->eip,
-          decode_cmd_to_string(ins->cmd));*/
-
     if (!_cmd_handler[ins->cmd].handler) {
         printf("Unimplemented handler (%llx) for %d (%x %x) \n", env->eip,
                 ins->cmd, ins->opcode[0],
diff --git a/target/i386/hvf/x86_emu.h b/target/i386/hvf/x86_emu.h
index 8bd97608c42e8..bc0fc72c761b6 100644
--- a/target/i386/hvf/x86_emu.h
+++ b/target/i386/hvf/x86_emu.h
@@ -25,12 +25,7 @@
 
 void init_emu(void);
 bool exec_instruction(CPUX86State *env, struct x86_decode *ins);
-
-void load_regs(CPUState *cpu);
-void store_regs(CPUState *cpu);
-
-void simulate_rdmsr(CPUX86State *env);
-void simulate_wrmsr(CPUX86State *env);
+void x86_emul_raise_exception(CPUX86State *env, int exception_index, int error_code);
 
 target_ulong read_reg(CPUX86State *env, int reg, int size);
 void write_reg(CPUX86State *env, int reg, target_ulong val, int size);
diff --git a/target/i386/hvf/x86_task.c b/target/i386/hvf/x86_task.c
index bcd844cff6060..161217991fc0d 100644
--- a/target/i386/hvf/x86_task.c
+++ b/target/i386/hvf/x86_task.c
@@ -76,16 +76,16 @@ static void load_state_from_tss32(CPUState *cpu, struct x86_tss_segment32 *tss)
     RSI(env) = tss->esi;
     RDI(env) = tss->edi;
 
-    vmx_write_segment_selector(cpu, (x68_segment_selector){{tss->ldt}}, R_LDTR);
-    vmx_write_segment_selector(cpu, (x68_segment_selector){{tss->es}}, R_ES);
-    vmx_write_segment_selector(cpu, (x68_segment_selector){{tss->cs}}, R_CS);
-    vmx_write_segment_selector(cpu, (x68_segment_selector){{tss->ss}}, R_SS);
-    vmx_write_segment_selector(cpu, (x68_segment_selector){{tss->ds}}, R_DS);
-    vmx_write_segment_selector(cpu, (x68_segment_selector){{tss->fs}}, R_FS);
-    vmx_write_segment_selector(cpu, (x68_segment_selector){{tss->gs}}, R_GS);
+    vmx_write_segment_selector(cpu, (x86_segment_selector){{tss->ldt}}, R_LDTR);
+    vmx_write_segment_selector(cpu, (x86_segment_selector){{tss->es}}, R_ES);
+    vmx_write_segment_selector(cpu, (x86_segment_selector){{tss->cs}}, R_CS);
+    vmx_write_segment_selector(cpu, (x86_segment_selector){{tss->ss}}, R_SS);
+    vmx_write_segment_selector(cpu, (x86_segment_selector){{tss->ds}}, R_DS);
+    vmx_write_segment_selector(cpu, (x86_segment_selector){{tss->fs}}, R_FS);
+    vmx_write_segment_selector(cpu, (x86_segment_selector){{tss->gs}}, R_GS);
 }
 
-static int task_switch_32(CPUState *cpu, x68_segment_selector tss_sel, x68_segment_selector old_tss_sel,
+static int task_switch_32(CPUState *cpu, x86_segment_selector tss_sel, x86_segment_selector old_tss_sel,
                           uint64_t old_tss_base, struct x86_segment_descriptor *new_desc)
 {
     struct x86_tss_segment32 tss_seg;
@@ -108,7 +108,7 @@ static int task_switch_32(CPUState *cpu, x68_segment_selector tss_sel, x68_segme
     return 0;
 }
 
-void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int reason, bool gate_valid, uint8_t gate, uint64_t gate_type)
+void vmx_handle_task_switch(CPUState *cpu, x86_segment_selector tss_sel, int reason, bool gate_valid, uint8_t gate, uint64_t gate_type)
 {
     uint64_t rip = rreg(cpu->accel->fd, HV_X86_RIP);
     if (!gate_valid || (gate_type != VMCS_INTR_T_HWEXCEPTION &&
@@ -119,10 +119,10 @@ void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int rea
         return;
     }
 
-    load_regs(cpu);
+    hvf_load_regs(cpu);
 
     struct x86_segment_descriptor curr_tss_desc, next_tss_desc;
-    x68_segment_selector old_tss_sel = vmx_read_segment_selector(cpu, R_TR);
+    x86_segment_selector old_tss_sel = vmx_read_segment_selector(cpu, R_TR);
     uint64_t old_tss_base = vmx_read_segment_base(cpu, R_TR);
     uint32_t desc_limit;
     struct x86_call_gate task_gate_desc;
@@ -140,7 +140,7 @@ void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int rea
         x86_read_call_gate(cpu, &task_gate_desc, gate);
 
         dpl = task_gate_desc.dpl;
-        x68_segment_selector cs = vmx_read_segment_selector(cpu, R_CS);
+        x86_segment_selector cs = vmx_read_segment_selector(cpu, R_CS);
         if (tss_sel.rpl > dpl || cs.rpl > dpl)
             ;//DPRINTF("emulate_gp");
     }
@@ -178,7 +178,7 @@ void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int rea
     x86_segment_descriptor_to_vmx(cpu, tss_sel, &next_tss_desc, &vmx_seg);
     vmx_write_segment_descriptor(cpu, &vmx_seg, R_TR);
 
-    store_regs(cpu);
+    hvf_store_regs(cpu);
 
     hv_vcpu_invalidate_tlb(cpu->accel->fd);
 }
diff --git a/target/i386/hvf/x86_task.h b/target/i386/hvf/x86_task.h
index 4eaa61a7dee69..b9afac6a47bb3 100644
--- a/target/i386/hvf/x86_task.h
+++ b/target/i386/hvf/x86_task.h
@@ -15,6 +15,6 @@
 #ifndef HVF_X86_TASK_H
 #define HVF_X86_TASK_H
 
-void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel,
+void vmx_handle_task_switch(CPUState *cpu, x86_segment_selector tss_sel,
         int reason, bool gate_valid, uint8_t gate, uint64_t gate_type);
 #endif
diff --git a/target/i386/hvf/x86hvf.h b/target/i386/hvf/x86hvf.h
index 423a89b6ad30e..8c46ce8ad02fd 100644
--- a/target/i386/hvf/x86hvf.h
+++ b/target/i386/hvf/x86hvf.h
@@ -31,4 +31,7 @@ void hvf_get_xsave(CPUState *cs);
 void hvf_get_msrs(CPUState *cs);
 void vmx_clear_int_window_exiting(CPUState *cs);
 void vmx_update_tpr(CPUState *cs);
+
+void hvf_load_regs(CPUState *cpu);
+void hvf_store_regs(CPUState *cpu);
 #endif
diff --git a/target/i386/kvm/hyperv-proto.h b/target/i386/kvm/hyperv-proto.h
index 464fbf09e35ab..a9f056f2f3eee 100644
--- a/target/i386/kvm/hyperv-proto.h
+++ b/target/i386/kvm/hyperv-proto.h
@@ -151,18 +151,6 @@
 #define HV_X64_MSR_STIMER3_CONFIG               0x400000B6
 #define HV_X64_MSR_STIMER3_COUNT                0x400000B7
 
-/*
- * Hyper-V Synthetic debug options MSR
- */
-#define HV_X64_MSR_SYNDBG_CONTROL               0x400000F1
-#define HV_X64_MSR_SYNDBG_STATUS                0x400000F2
-#define HV_X64_MSR_SYNDBG_SEND_BUFFER           0x400000F3
-#define HV_X64_MSR_SYNDBG_RECV_BUFFER           0x400000F4
-#define HV_X64_MSR_SYNDBG_PENDING_BUFFER        0x400000F5
-#define HV_X64_MSR_SYNDBG_OPTIONS               0x400000FF
-
-#define HV_X64_SYNDBG_OPTION_USE_HCALLS         BIT(2)
-
 /*
  * Guest crash notification MSRs
  */
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
index 1bda403f88b41..6269fa80452cd 100644
--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
@@ -15,7 +15,7 @@
 #include "hw/boards.h"
 
 #include "kvm_i386.h"
-#include "hw/core/accel-cpu.h"
+#include "accel/accel-cpu-target.h"
 
 static void kvm_set_guest_phys_bits(CPUState *cs)
 {
diff --git a/target/i386/machine.c b/target/i386/machine.c
index d9d4f25d1a2e5..70f632a36fdda 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -1,6 +1,6 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "hw/isa/isa.h"
 #include "migration/cpu.h"
 #include "kvm/hyperv.h"
diff --git a/target/i386/nvmm/nvmm-accel-ops.c b/target/i386/nvmm/nvmm-accel-ops.c
index e7b56662feec8..4e4e63de78e99 100644
--- a/target/i386/nvmm/nvmm-accel-ops.c
+++ b/target/i386/nvmm/nvmm-accel-ops.c
@@ -10,6 +10,7 @@
 #include "qemu/osdep.h"
 #include "system/kvm_int.h"
 #include "qemu/main-loop.h"
+#include "system/accel-ops.h"
 #include "system/cpus.h"
 #include "qemu/guest-random.h"
 
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index f112c6c673768..c1184ca2198ff 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -21,7 +21,7 @@
 #include <math.h>
 #include "cpu.h"
 #include "tcg-cpu.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/cpu_ldst.h"
 #include "exec/helper-proto.h"
 #include "fpu/softfloat.h"
@@ -1141,7 +1141,7 @@ void helper_f2xm1(CPUX86State *env)
     int32_t exp = extractFloatx80Exp(ST0);
     bool sign = extractFloatx80Sign(ST0);
 
-    if (floatx80_invalid_encoding(ST0)) {
+    if (floatx80_invalid_encoding(ST0, &env->fp_status)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST0 = floatx80_default_nan(&env->fp_status);
     } else if (floatx80_is_any_nan(ST0)) {
@@ -1383,8 +1383,8 @@ void helper_fpatan(CPUX86State *env)
     } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST1 = floatx80_silence_nan(ST1, &env->fp_status);
-    } else if (floatx80_invalid_encoding(ST0) ||
-               floatx80_invalid_encoding(ST1)) {
+    } else if (floatx80_invalid_encoding(ST0, &env->fp_status) ||
+               floatx80_invalid_encoding(ST1, &env->fp_status)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST1 = floatx80_default_nan(&env->fp_status);
     } else if (floatx80_is_any_nan(ST0)) {
@@ -1393,7 +1393,8 @@ void helper_fpatan(CPUX86State *env)
         /* Pass this NaN through.  */
     } else if (floatx80_is_zero(ST1) && !arg0_sign) {
         /* Pass this zero through.  */
-    } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) ||
+    } else if (((floatx80_is_infinity(ST0, &env->fp_status) &&
+                 !floatx80_is_infinity(ST1, &env->fp_status)) ||
                  arg0_exp - arg1_exp >= 80) &&
                !arg0_sign) {
         /*
@@ -1442,8 +1443,8 @@ void helper_fpatan(CPUX86State *env)
             rexp = pi_exp;
             rsig0 = pi_sig_high;
             rsig1 = pi_sig_low;
-        } else if (floatx80_is_infinity(ST1)) {
-            if (floatx80_is_infinity(ST0)) {
+        } else if (floatx80_is_infinity(ST1, &env->fp_status)) {
+            if (floatx80_is_infinity(ST0, &env->fp_status)) {
                 if (arg0_sign) {
                     rexp = pi_34_exp;
                     rsig0 = pi_34_sig_high;
@@ -1462,7 +1463,8 @@ void helper_fpatan(CPUX86State *env)
             rexp = pi_2_exp;
             rsig0 = pi_2_sig_high;
             rsig1 = pi_2_sig_low;
-        } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) {
+        } else if (floatx80_is_infinity(ST0, &env->fp_status) ||
+                   arg0_exp - arg1_exp >= 80) {
             /* ST0 is negative.  */
             rexp = pi_exp;
             rsig0 = pi_sig_high;
@@ -1817,7 +1819,7 @@ void helper_fxtract(CPUX86State *env)
                            &env->fp_status);
         fpush(env);
         ST0 = temp.d;
-    } else if (floatx80_invalid_encoding(ST0)) {
+    } else if (floatx80_invalid_encoding(ST0, &env->fp_status)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST0 = floatx80_default_nan(&env->fp_status);
         fpush(env);
@@ -1829,10 +1831,10 @@ void helper_fxtract(CPUX86State *env)
         }
         fpush(env);
         ST0 = ST1;
-    } else if (floatx80_is_infinity(ST0)) {
+    } else if (floatx80_is_infinity(ST0, &env->fp_status)) {
         fpush(env);
         ST0 = ST1;
-        ST1 = floatx80_infinity;
+        ST1 = floatx80_default_inf(0, &env->fp_status);
     } else {
         int expdif;
 
@@ -1868,7 +1870,8 @@ static void helper_fprem_common(CPUX86State *env, bool mod)
     env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
     if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
         exp0 == 0x7fff || exp1 == 0x7fff ||
-        floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) {
+        floatx80_invalid_encoding(ST0, &env->fp_status) ||
+        floatx80_invalid_encoding(ST1, &env->fp_status)) {
         ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
     } else {
         if (exp0 == 0) {
@@ -2064,8 +2067,8 @@ void helper_fyl2xp1(CPUX86State *env)
     } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST1 = floatx80_silence_nan(ST1, &env->fp_status);
-    } else if (floatx80_invalid_encoding(ST0) ||
-               floatx80_invalid_encoding(ST1)) {
+    } else if (floatx80_invalid_encoding(ST0, &env->fp_status) ||
+               floatx80_invalid_encoding(ST1, &env->fp_status)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST1 = floatx80_default_nan(&env->fp_status);
     } else if (floatx80_is_any_nan(ST0)) {
@@ -2162,8 +2165,8 @@ void helper_fyl2x(CPUX86State *env)
     } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST1 = floatx80_silence_nan(ST1, &env->fp_status);
-    } else if (floatx80_invalid_encoding(ST0) ||
-               floatx80_invalid_encoding(ST1)) {
+    } else if (floatx80_invalid_encoding(ST0, &env->fp_status) ||
+               floatx80_invalid_encoding(ST1, &env->fp_status)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST1 = floatx80_default_nan(&env->fp_status);
     } else if (floatx80_is_any_nan(ST0)) {
@@ -2173,7 +2176,7 @@ void helper_fyl2x(CPUX86State *env)
     } else if (arg0_sign && !floatx80_is_zero(ST0)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST1 = floatx80_default_nan(&env->fp_status);
-    } else if (floatx80_is_infinity(ST1)) {
+    } else if (floatx80_is_infinity(ST1, &env->fp_status)) {
         FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
                                              &env->fp_status);
         switch (cmp) {
@@ -2188,7 +2191,7 @@ void helper_fyl2x(CPUX86State *env)
             ST1 = floatx80_default_nan(&env->fp_status);
             break;
         }
-    } else if (floatx80_is_infinity(ST0)) {
+    } else if (floatx80_is_infinity(ST0, &env->fp_status)) {
         if (floatx80_is_zero(ST1)) {
             float_raise(float_flag_invalid, &env->fp_status);
             ST1 = floatx80_default_nan(&env->fp_status);
@@ -2329,7 +2332,8 @@ void helper_frndint(CPUX86State *env)
 void helper_fscale(CPUX86State *env)
 {
     uint8_t old_flags = save_exception_flags(env);
-    if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
+    if (floatx80_invalid_encoding(ST1, &env->fp_status) ||
+        floatx80_invalid_encoding(ST0, &env->fp_status)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST0 = floatx80_default_nan(&env->fp_status);
     } else if (floatx80_is_any_nan(ST1)) {
@@ -2341,11 +2345,11 @@ void helper_fscale(CPUX86State *env)
             float_raise(float_flag_invalid, &env->fp_status);
             ST0 = floatx80_silence_nan(ST0, &env->fp_status);
         }
-    } else if (floatx80_is_infinity(ST1) &&
-               !floatx80_invalid_encoding(ST0) &&
+    } else if (floatx80_is_infinity(ST1, &env->fp_status) &&
+               !floatx80_invalid_encoding(ST0, &env->fp_status) &&
                !floatx80_is_any_nan(ST0)) {
         if (floatx80_is_neg(ST1)) {
-            if (floatx80_is_infinity(ST0)) {
+            if (floatx80_is_infinity(ST0, &env->fp_status)) {
                 float_raise(float_flag_invalid, &env->fp_status);
                 ST0 = floatx80_default_nan(&env->fp_status);
             } else {
@@ -2358,9 +2362,8 @@ void helper_fscale(CPUX86State *env)
                 float_raise(float_flag_invalid, &env->fp_status);
                 ST0 = floatx80_default_nan(&env->fp_status);
             } else {
-                ST0 = (floatx80_is_neg(ST0) ?
-                       floatx80_chs(floatx80_infinity) :
-                       floatx80_infinity);
+                ST0 = floatx80_default_inf(floatx80_is_neg(ST0),
+                                           &env->fp_status);
             }
         }
     } else {
diff --git a/target/i386/tcg/misc_helper.c b/target/i386/tcg/misc_helper.c
index ed4cda8001e55..2b5f092a23f01 100644
--- a/target/i386/tcg/misc_helper.c
+++ b/target/i386/tcg/misc_helper.c
@@ -21,7 +21,7 @@
 #include "qemu/log.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "helper-tcg.h"
 
 /*
diff --git a/target/i386/tcg/system/excp_helper.c b/target/i386/tcg/system/excp_helper.c
index 864e3140e3de7..6876329de21e0 100644
--- a/target/i386/tcg/system/excp_helper.c
+++ b/target/i386/tcg/system/excp_helper.c
@@ -20,7 +20,7 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/cpu_ldst.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/page-protection.h"
 #include "tcg/helper-tcg.h"
 
diff --git a/target/i386/tcg/system/misc_helper.c b/target/i386/tcg/system/misc_helper.c
index c9c4d42f8449c..ce18c75b9f3bf 100644
--- a/target/i386/tcg/system/misc_helper.c
+++ b/target/i386/tcg/system/misc_helper.c
@@ -23,7 +23,7 @@
 #include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
 #include "exec/address-spaces.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "tcg/helper-tcg.h"
 #include "hw/i386/apic.h"
 
diff --git a/target/i386/tcg/system/svm_helper.c b/target/i386/tcg/system/svm_helper.c
index 5f95b5227b76a..f9982b72d17ec 100644
--- a/target/i386/tcg/system/svm_helper.c
+++ b/target/i386/tcg/system/svm_helper.c
@@ -21,7 +21,7 @@
 #include "qemu/log.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/cpu_ldst.h"
 #include "tcg/helper-tcg.h"
 
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
index 14ee038079a7d..b8aff825eec79 100644
--- a/target/i386/tcg/tcg-cpu.c
+++ b/target/i386/tcg/tcg-cpu.c
@@ -21,7 +21,7 @@
 #include "cpu.h"
 #include "helper-tcg.h"
 #include "qemu/accel.h"
-#include "hw/core/accel-cpu.h"
+#include "accel/accel-cpu-target.h"
 #include "exec/translation-block.h"
 
 #include "tcg-cpu.h"
@@ -105,7 +105,7 @@ static bool x86_debug_check_breakpoint(CPUState *cs)
 }
 #endif
 
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 
 static const TCGCPUOps x86_tcg_ops = {
     .initialize = tcg_x86_init,
diff --git a/target/i386/whpx/whpx-accel-ops.c b/target/i386/whpx/whpx-accel-ops.c
index ab2e014c9eaa2..81fdd06e487ae 100644
--- a/target/i386/whpx/whpx-accel-ops.c
+++ b/target/i386/whpx/whpx-accel-ops.c
@@ -11,6 +11,7 @@
 #include "qemu/osdep.h"
 #include "system/kvm_int.h"
 #include "qemu/main-loop.h"
+#include "system/accel-ops.h"
 #include "system/cpus.h"
 #include "qemu/guest-random.h"
 
diff --git a/target/i386/whpx/whpx-apic.c b/target/i386/whpx/whpx-apic.c
index 4245ab68a273d..630a9616d71d3 100644
--- a/target/i386/whpx/whpx-apic.c
+++ b/target/i386/whpx/whpx-apic.c
@@ -231,7 +231,7 @@ static void whpx_apic_mem_write(void *opaque, hwaddr addr,
 static const MemoryRegionOps whpx_apic_io_ops = {
     .read = whpx_apic_mem_read,
     .write = whpx_apic_mem_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static void whpx_apic_reset(APICCommonState *s)
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index e91f4a5239c76..ea1665e270531 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -14,6 +14,7 @@
 #include "system/tcg.h"
 #include "system/kvm.h"
 #include "kvm/kvm_loongarch.h"
+#include "hw/qdev-properties.h"
 #include "exec/exec-all.h"
 #include "exec/translation-block.h"
 #include "cpu.h"
@@ -349,11 +350,9 @@ static void loongarch_restore_state_to_opc(CPUState *cs,
 }
 #endif /* CONFIG_TCG */
 
+#ifndef CONFIG_USER_ONLY
 static bool loongarch_cpu_has_work(CPUState *cs)
 {
-#ifdef CONFIG_USER_ONLY
-    return true;
-#else
     bool has_work = false;
 
     if ((cs->interrupt_request & CPU_INTERRUPT_HARD) &&
@@ -362,8 +361,8 @@ static bool loongarch_cpu_has_work(CPUState *cs)
     }
 
     return has_work;
-#endif
 }
+#endif /* !CONFIG_USER_ONLY */
 
 static int loongarch_cpu_mmu_index(CPUState *cs, bool ifetch)
 {
@@ -406,7 +405,7 @@ static void loongarch_la464_initfn(Object *obj)
 {
     LoongArchCPU *cpu = LOONGARCH_CPU(obj);
     CPULoongArchState *env = &cpu->env;
-    uint32_t data = 0;
+    uint32_t data = 0, field;
     int i;
 
     for (i = 0; i < 21; i++) {
@@ -419,7 +418,13 @@ static void loongarch_la464_initfn(Object *obj)
     data = FIELD_DP32(data, CPUCFG1, ARCH, 2);
     data = FIELD_DP32(data, CPUCFG1, PGMMU, 1);
     data = FIELD_DP32(data, CPUCFG1, IOCSR, 1);
-    data = FIELD_DP32(data, CPUCFG1, PALEN, 0x2f);
+    if (kvm_enabled()) {
+        /* GPA address width of VM is 47, field value is 47 - 1 */
+        field = 0x2e;
+    } else {
+        field = 0x2f; /* 48 bit - 1 */
+    }
+    data = FIELD_DP32(data, CPUCFG1, PALEN, field);
     data = FIELD_DP32(data, CPUCFG1, VALEN, 0x2f);
     data = FIELD_DP32(data, CPUCFG1, UAL, 1);
     data = FIELD_DP32(data, CPUCFG1, RI, 1);
@@ -537,6 +542,7 @@ static void loongarch_max_initfn(Object *obj)
 
 static void loongarch_cpu_reset_hold(Object *obj, ResetType type)
 {
+    uint8_t tlb_ps;
     CPUState *cs = CPU(obj);
     LoongArchCPUClass *lacc = LOONGARCH_CPU_GET_CLASS(obj);
     CPULoongArchState *env = cpu_env(cs);
@@ -585,13 +591,17 @@ static void loongarch_cpu_reset_hold(Object *obj, ResetType type)
      */
     env->CSR_PGDH = 0;
     env->CSR_PGDL = 0;
-    env->CSR_PWCL = 0;
     env->CSR_PWCH = 0;
-    env->CSR_STLBPS = 0;
     env->CSR_EENTRY = 0;
     env->CSR_TLBRENTRY = 0;
     env->CSR_MERRENTRY = 0;
-
+    /* set CSR_PWCL.PTBASE and CSR_STLBPS.PS bits from CSR_PRCFG2 */
+    if (env->CSR_PRCFG2 == 0) {
+        env->CSR_PRCFG2 = 0x3fffff000;
+    }
+    tlb_ps = ctz32(env->CSR_PRCFG2);
+    env->CSR_STLBPS = FIELD_DP64(env->CSR_STLBPS, CSR_STLBPS, PS, tlb_ps);
+    env->CSR_PWCL = FIELD_DP64(env->CSR_PWCL, CSR_PWCL, PTBASE, tlb_ps);
     for (n = 0; n < 4; n++) {
         env->CSR_DMW[n] = FIELD_DP64(env->CSR_DMW[n], CSR_DMW, PLV0, 0);
         env->CSR_DMW[n] = FIELD_DP64(env->CSR_DMW[n], CSR_DMW, PLV1, 0);
@@ -617,6 +627,7 @@ static void loongarch_cpu_reset_hold(Object *obj, ResetType type)
 
 static void loongarch_cpu_disas_set_info(CPUState *s, disassemble_info *info)
 {
+    info->endian = BFD_ENDIAN_LITTLE;
     info->print_insn = print_insn_loongarch;
 }
 
@@ -634,12 +645,23 @@ static void loongarch_cpu_realizefn(DeviceState *dev, Error **errp)
 
     loongarch_cpu_register_gdb_regs_for_features(cs);
 
-    cpu_reset(cs);
     qemu_init_vcpu(cs);
+    cpu_reset(cs);
 
     lacc->parent_realize(dev, errp);
 }
 
+static void loongarch_cpu_unrealizefn(DeviceState *dev)
+{
+    LoongArchCPUClass *lacc = LOONGARCH_CPU_GET_CLASS(dev);
+
+#ifndef CONFIG_USER_ONLY
+    cpu_remove_sync(CPU(dev));
+#endif
+
+    lacc->parent_unrealize(dev);
+}
+
 static bool loongarch_get_lsx(Object *obj, Error **errp)
 {
     return LOONGARCH_CPU(obj)->lsx != ON_OFF_AUTO_OFF;
@@ -712,34 +734,12 @@ static void loongarch_set_lasx(Object *obj, bool value, Error **errp)
     cpu->env.cpucfg[2] = FIELD_DP32(val, CPUCFG2, LASX, value);
 }
 
-static bool loongarch_get_lbt(Object *obj, Error **errp)
-{
-    return LOONGARCH_CPU(obj)->lbt != ON_OFF_AUTO_OFF;
-}
-
-static void loongarch_set_lbt(Object *obj, bool value, Error **errp)
-{
-    LoongArchCPU *cpu = LOONGARCH_CPU(obj);
-
-    cpu->lbt = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
-}
-
-static bool loongarch_get_pmu(Object *obj, Error **errp)
-{
-    return LOONGARCH_CPU(obj)->pmu != ON_OFF_AUTO_OFF;
-}
-
-static void loongarch_set_pmu(Object *obj, bool value, Error **errp)
-{
-    LoongArchCPU *cpu = LOONGARCH_CPU(obj);
-
-    cpu->pmu = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
-}
-
 void loongarch_cpu_post_init(Object *obj)
 {
     LoongArchCPU *cpu = LOONGARCH_CPU(obj);
 
+    cpu->lbt = ON_OFF_AUTO_OFF;
+    cpu->pmu = ON_OFF_AUTO_OFF;
     cpu->lsx = ON_OFF_AUTO_AUTO;
     cpu->lasx = ON_OFF_AUTO_AUTO;
     object_property_add_bool(obj, "lsx", loongarch_get_lsx,
@@ -748,21 +748,7 @@ void loongarch_cpu_post_init(Object *obj)
                              loongarch_set_lasx);
     /* lbt is enabled only in kvm mode, not supported in tcg mode */
     if (kvm_enabled()) {
-        cpu->lbt = ON_OFF_AUTO_AUTO;
-        object_property_add_bool(obj, "lbt", loongarch_get_lbt,
-                                 loongarch_set_lbt);
-        object_property_set_description(obj, "lbt",
-                                   "Set off to disable Binary Tranlation.");
-
-        cpu->pmu = ON_OFF_AUTO_AUTO;
-        object_property_add_bool(obj, "pmu", loongarch_get_pmu,
-                                 loongarch_set_pmu);
-        object_property_set_description(obj, "pmu",
-                                   "Set off to performance monitor unit.");
-
-    } else {
-        cpu->lbt = ON_OFF_AUTO_OFF;
-        cpu->pmu = ON_OFF_AUTO_OFF;
+        kvm_loongarch_cpu_post_init(cpu);
     }
 }
 
@@ -875,7 +861,7 @@ static void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 }
 
 #ifdef CONFIG_TCG
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 
 static const TCGCPUOps loongarch_tcg_ops = {
     .initialize = loongarch_translate_init,
@@ -897,6 +883,7 @@ static const TCGCPUOps loongarch_tcg_ops = {
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps loongarch_sysemu_ops = {
+    .has_work = loongarch_cpu_has_work,
     .write_elf64_note = loongarch_cpu_write_elf64_note,
     .get_phys_page_debug = loongarch_cpu_get_phys_page_debug,
 };
@@ -909,6 +896,13 @@ static int64_t loongarch_cpu_get_arch_id(CPUState *cs)
 }
 #endif
 
+static const Property loongarch_cpu_properties[] = {
+    DEFINE_PROP_INT32("socket-id", LoongArchCPU, socket_id, 0),
+    DEFINE_PROP_INT32("core-id", LoongArchCPU, core_id, 0),
+    DEFINE_PROP_INT32("thread-id", LoongArchCPU, thread_id, 0),
+    DEFINE_PROP_INT32("node-id", LoongArchCPU, node_id, CPU_UNSET_NUMA_NODE_ID),
+};
+
 static void loongarch_cpu_class_init(ObjectClass *c, void *data)
 {
     LoongArchCPUClass *lacc = LOONGARCH_CPU_CLASS(c);
@@ -916,13 +910,15 @@ static void loongarch_cpu_class_init(ObjectClass *c, void *data)
     DeviceClass *dc = DEVICE_CLASS(c);
     ResettableClass *rc = RESETTABLE_CLASS(c);
 
+    device_class_set_props(dc, loongarch_cpu_properties);
     device_class_set_parent_realize(dc, loongarch_cpu_realizefn,
                                     &lacc->parent_realize);
+    device_class_set_parent_unrealize(dc, loongarch_cpu_unrealizefn,
+                                      &lacc->parent_unrealize);
     resettable_class_set_parent_phases(rc, NULL, loongarch_cpu_reset_hold, NULL,
                                        &lacc->parent_phases);
 
     cc->class_by_name = loongarch_cpu_class_by_name;
-    cc->has_work = loongarch_cpu_has_work;
     cc->mmu_index = loongarch_cpu_mmu_index;
     cc->dump_state = loongarch_cpu_dump_state;
     cc->set_pc = loongarch_cpu_set_pc;
@@ -940,6 +936,7 @@ static void loongarch_cpu_class_init(ObjectClass *c, void *data)
 #ifdef CONFIG_TCG
     cc->tcg_ops = &loongarch_tcg_ops;
 #endif
+    dc->user_creatable = true;
 }
 
 static const gchar *loongarch32_gdb_arch_name(CPUState *cs)
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index f2a23b7a43436..eae874c67bd68 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -287,6 +287,8 @@ enum loongarch_features {
     LOONGARCH_FEATURE_LASX,
     LOONGARCH_FEATURE_LBT, /* loongson binary translation extension */
     LOONGARCH_FEATURE_PMU,
+    LOONGARCH_FEATURE_PV_IPI,
+    LOONGARCH_FEATURE_STEALTIME,
 };
 
 typedef struct  LoongArchBT {
@@ -310,6 +312,7 @@ typedef struct CPUArchState {
     lbt_t  lbt;
 
     uint32_t cpucfg[21];
+    uint32_t pv_features;
 
     /* LoongArch CSRs */
     uint64_t CSR_CRMD;
@@ -390,6 +393,12 @@ typedef struct CPUArchState {
 #endif
 } CPULoongArchState;
 
+typedef struct LoongArchCPUTopo {
+    int32_t socket_id;  /* socket-id of this VCPU */
+    int32_t core_id;    /* core-id of this VCPU */
+    int32_t thread_id;  /* thread-id of this VCPU */
+} LoongArchCPUTopo;
+
 /**
  * LoongArchCPU:
  * @env: #CPULoongArchState
@@ -406,6 +415,12 @@ struct ArchCPU {
     OnOffAuto pmu;
     OnOffAuto lsx;
     OnOffAuto lasx;
+    OnOffAuto kvm_pv_ipi;
+    OnOffAuto kvm_steal_time;
+    int32_t socket_id;  /* socket-id of this CPU */
+    int32_t core_id;    /* core-id of this CPU */
+    int32_t thread_id;  /* thread-id of this CPU */
+    int32_t node_id;    /* NUMA node of this CPU */
 
     /* 'compatible' string for this CPU for Linux device trees */
     const char *dtb_compatible;
@@ -424,6 +439,7 @@ struct LoongArchCPUClass {
     CPUClass parent_class;
 
     DeviceRealize parent_realize;
+    DeviceUnrealize parent_unrealize;
     ResettablePhases parent_phases;
 };
 
@@ -491,4 +507,12 @@ static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, vaddr *pc,
 
 void loongarch_cpu_post_init(Object *obj);
 
+#ifdef CONFIG_KVM
+void kvm_loongarch_cpu_post_init(LoongArchCPU *cpu);
+#else
+static inline void kvm_loongarch_cpu_post_init(LoongArchCPU *cpu)
+{
+}
+#endif
+
 #endif /* LOONGARCH_CPU_H */
diff --git a/target/loongarch/gdbstub.c b/target/loongarch/gdbstub.c
index dafa4feb75d91..471eda28c7307 100644
--- a/target/loongarch/gdbstub.c
+++ b/target/loongarch/gdbstub.c
@@ -63,23 +63,24 @@ int loongarch_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
 {
     CPULoongArchState *env = cpu_env(cs);
     target_ulong tmp;
-    int read_length;
     int length = 0;
 
+    if (n < 0 || n > 34) {
+        return 0;
+    }
+
     if (is_la64(env)) {
         tmp = ldq_le_p(mem_buf);
-        read_length = 8;
+        length = 8;
     } else {
         tmp = ldl_le_p(mem_buf);
-        read_length = 4;
+        length = 4;
     }
 
     if (0 <= n && n < 32) {
         env->gpr[n] = tmp;
-        length = read_length;
     } else if (n == 33) {
         set_pc(env, tmp);
-        length = read_length;
     }
     return length;
 }
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 943517b5f22c0..1d5cb0198c971 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -100,6 +100,7 @@ DEF_HELPER_1(rdtime_d, i64, env)
 DEF_HELPER_1(csrrd_pgd, i64, env)
 DEF_HELPER_1(csrrd_cpuid, i64, env)
 DEF_HELPER_1(csrrd_tval, i64, env)
+DEF_HELPER_2(csrwr_stlbps, i64, env, tl)
 DEF_HELPER_2(csrwr_estat, i64, env, tl)
 DEF_HELPER_2(csrwr_asid, i64, env, tl)
 DEF_HELPER_2(csrwr_tcfg, i64, env, tl)
diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
index 7b254c5f49cf6..1cd959a766773 100644
--- a/target/loongarch/internals.h
+++ b/target/loongarch/internals.h
@@ -43,6 +43,8 @@ enum {
     TLBRET_PE = 7,
 };
 
+bool check_ps(CPULoongArchState *ent, int ps);
+
 extern const VMStateDescription vmstate_loongarch_cpu;
 
 void loongarch_cpu_set_irq(void *opaque, int irq, int level);
diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c
index a3f55155b0301..28735c80be08b 100644
--- a/target/loongarch/kvm/kvm.c
+++ b/target/loongarch/kvm/kvm.c
@@ -8,7 +8,7 @@
 #include "qemu/osdep.h"
 #include <sys/ioctl.h>
 #include <linux/kvm.h>
-
+#include "asm-loongarch/kvm_para.h"
 #include "qapi/error.h"
 #include "qemu/timer.h"
 #include "qemu/error-report.h"
@@ -21,6 +21,7 @@
 #include "exec/address-spaces.h"
 #include "hw/boards.h"
 #include "hw/irq.h"
+#include "hw/loongarch/virt.h"
 #include "qemu/log.h"
 #include "hw/loader.h"
 #include "system/runstate.h"
@@ -83,6 +84,33 @@ static int kvm_set_stealtime(CPUState *cs)
     return 0;
 }
 
+static int kvm_set_pv_features(CPUState *cs)
+{
+    CPULoongArchState *env = cpu_env(cs);
+    int err;
+    uint64_t val;
+    struct kvm_device_attr attr = {
+        .group = KVM_LOONGARCH_VCPU_CPUCFG,
+        .attr = CPUCFG_KVM_FEATURE,
+        .addr = (uint64_t)&val,
+    };
+
+    err = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, attr);
+    if (err) {
+        return 0;
+    }
+
+    val = env->pv_features;
+    err = kvm_vcpu_ioctl(cs, KVM_SET_DEVICE_ATTR, attr);
+    if (err) {
+        error_report("Fail to set pv feature "TARGET_FMT_lx " with error %s",
+                      val, strerror(errno));
+        return err;
+    }
+
+    return 0;
+}
+
 static int kvm_loongarch_get_regs_core(CPUState *cs)
 {
     int ret = 0;
@@ -581,9 +609,16 @@ static int kvm_loongarch_get_lbt(CPUState *cs)
 void kvm_arch_reset_vcpu(CPUState *cs)
 {
     CPULoongArchState *env = cpu_env(cs);
+    int ret = 0;
+    uint64_t unused = 0;
 
     env->mp_state = KVM_MP_STATE_RUNNABLE;
-    kvm_set_one_reg(cs, KVM_REG_LOONGARCH_VCPU_RESET, 0);
+    ret = kvm_set_one_reg(cs, KVM_REG_LOONGARCH_VCPU_RESET, &unused);
+    if (ret) {
+        error_report("Failed to set KVM_REG_LOONGARCH_VCPU_RESET: %s",
+                     strerror(errno));
+        exit(EXIT_FAILURE);
+    }
 }
 
 static int kvm_loongarch_get_mpstate(CPUState *cs)
@@ -731,6 +766,7 @@ int kvm_arch_get_registers(CPUState *cs, Error **errp)
 int kvm_arch_put_registers(CPUState *cs, int level, Error **errp)
 {
     int ret;
+    static int once;
 
     ret = kvm_loongarch_put_regs_core(cs);
     if (ret) {
@@ -757,6 +793,14 @@ int kvm_arch_put_registers(CPUState *cs, int level, Error **errp)
         return ret;
     }
 
+    if (!once) {
+        ret = kvm_set_pv_features(cs);
+        if (ret) {
+            return ret;
+        }
+        once = 1;
+    }
+
     if (level >= KVM_PUT_FULL_STATE) {
         /*
          * only KVM_PUT_FULL_STATE is required, kvm kernel will clear
@@ -875,6 +919,18 @@ static bool kvm_feature_supported(CPUState *cs, enum loongarch_features feature)
         ret = kvm_vm_ioctl(kvm_state, KVM_HAS_DEVICE_ATTR, &attr);
         return (ret == 0);
 
+    case LOONGARCH_FEATURE_PV_IPI:
+        attr.group = KVM_LOONGARCH_VM_FEAT_CTRL;
+        attr.attr = KVM_LOONGARCH_VM_FEAT_PV_IPI;
+        ret = kvm_vm_ioctl(kvm_state, KVM_HAS_DEVICE_ATTR, &attr);
+        return (ret == 0);
+
+    case LOONGARCH_FEATURE_STEALTIME:
+        attr.group = KVM_LOONGARCH_VM_FEAT_CTRL;
+        attr.attr = KVM_LOONGARCH_VM_FEAT_PV_STEALTIME;
+        ret = kvm_vm_ioctl(kvm_state, KVM_HAS_DEVICE_ATTR, &attr);
+        return (ret == 0);
+
     default:
         return false;
     }
@@ -973,6 +1029,52 @@ static int kvm_cpu_check_pmu(CPUState *cs, Error **errp)
     return 0;
 }
 
+static int kvm_cpu_check_pv_features(CPUState *cs, Error **errp)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    LoongArchCPU *cpu = LOONGARCH_CPU(cs);
+    CPULoongArchState *env = cpu_env(cs);
+    bool kvm_supported;
+
+    kvm_supported = kvm_feature_supported(cs, LOONGARCH_FEATURE_PV_IPI);
+    if (cpu->kvm_pv_ipi == ON_OFF_AUTO_ON) {
+        if (!kvm_supported) {
+            error_setg(errp, "'pv_ipi' feature not supported by KVM host");
+            return -ENOTSUP;
+        }
+    } else if (cpu->kvm_pv_ipi != ON_OFF_AUTO_AUTO) {
+        kvm_supported = false;
+    }
+
+    if (kvm_supported) {
+        env->pv_features |= BIT(KVM_FEATURE_IPI);
+    }
+
+    kvm_supported = kvm_feature_supported(cs, LOONGARCH_FEATURE_STEALTIME);
+    if (cpu->kvm_steal_time == ON_OFF_AUTO_ON) {
+        if (!kvm_supported) {
+            error_setg(errp, "'kvm stealtime' feature not supported by KVM host");
+            return -ENOTSUP;
+        }
+    } else if (cpu->kvm_steal_time != ON_OFF_AUTO_AUTO) {
+        kvm_supported = false;
+    }
+
+    if (kvm_supported) {
+        env->pv_features |= BIT(KVM_FEATURE_STEAL_TIME);
+    }
+
+    if (object_dynamic_cast(OBJECT(ms), TYPE_LOONGARCH_VIRT_MACHINE)) {
+        LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(ms);
+
+        if (virt_is_veiointc_enabled(lvms)) {
+            env->pv_features |= BIT(KVM_FEATURE_VIRT_EXTIOI);
+        }
+    }
+
+    return 0;
+}
+
 int kvm_arch_init_vcpu(CPUState *cs)
 {
     uint64_t val;
@@ -1006,9 +1108,89 @@ int kvm_arch_init_vcpu(CPUState *cs)
         error_report_err(local_err);
     }
 
+    ret = kvm_cpu_check_pv_features(cs, &local_err);
+    if (ret < 0) {
+        error_report_err(local_err);
+    }
+
     return ret;
 }
 
+static bool loongarch_get_lbt(Object *obj, Error **errp)
+{
+    return LOONGARCH_CPU(obj)->lbt != ON_OFF_AUTO_OFF;
+}
+
+static void loongarch_set_lbt(Object *obj, bool value, Error **errp)
+{
+    LoongArchCPU *cpu = LOONGARCH_CPU(obj);
+
+    cpu->lbt = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
+}
+
+static bool loongarch_get_pmu(Object *obj, Error **errp)
+{
+    return LOONGARCH_CPU(obj)->pmu != ON_OFF_AUTO_OFF;
+}
+
+static void loongarch_set_pmu(Object *obj, bool value, Error **errp)
+{
+    LoongArchCPU *cpu = LOONGARCH_CPU(obj);
+
+    cpu->pmu = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
+}
+
+static bool kvm_pv_ipi_get(Object *obj, Error **errp)
+{
+    return LOONGARCH_CPU(obj)->kvm_pv_ipi != ON_OFF_AUTO_OFF;
+}
+
+static void kvm_pv_ipi_set(Object *obj, bool value, Error **errp)
+{
+    LoongArchCPU *cpu = LOONGARCH_CPU(obj);
+
+    cpu->kvm_pv_ipi = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
+}
+
+static bool kvm_steal_time_get(Object *obj, Error **errp)
+{
+    return LOONGARCH_CPU(obj)->kvm_steal_time != ON_OFF_AUTO_OFF;
+}
+
+static void kvm_steal_time_set(Object *obj, bool value, Error **errp)
+{
+    LoongArchCPU *cpu = LOONGARCH_CPU(obj);
+
+    cpu->kvm_steal_time = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
+}
+
+void kvm_loongarch_cpu_post_init(LoongArchCPU *cpu)
+{
+    cpu->lbt = ON_OFF_AUTO_AUTO;
+    object_property_add_bool(OBJECT(cpu), "lbt", loongarch_get_lbt,
+                             loongarch_set_lbt);
+    object_property_set_description(OBJECT(cpu), "lbt",
+                                   "Set off to disable Binary Tranlation.");
+
+    cpu->pmu = ON_OFF_AUTO_AUTO;
+    object_property_add_bool(OBJECT(cpu), "pmu", loongarch_get_pmu,
+                             loongarch_set_pmu);
+    object_property_set_description(OBJECT(cpu), "pmu",
+                               "Set off to disable performance monitor unit.");
+
+    cpu->kvm_pv_ipi = ON_OFF_AUTO_AUTO;
+    object_property_add_bool(OBJECT(cpu), "kvm-pv-ipi", kvm_pv_ipi_get,
+                             kvm_pv_ipi_set);
+    object_property_set_description(OBJECT(cpu), "kvm-pv-ipi",
+                                    "Set off to disable KVM paravirt IPI.");
+
+    cpu->kvm_steal_time = ON_OFF_AUTO_AUTO;
+    object_property_add_bool(OBJECT(cpu), "kvm-steal-time", kvm_steal_time_get,
+                             kvm_steal_time_set);
+    object_property_set_description(OBJECT(cpu), "kvm-steal-time",
+                                    "Set off to disable KVM steal time.");
+}
+
 int kvm_arch_destroy_vcpu(CPUState *cs)
 {
     return 0;
diff --git a/target/loongarch/loongarch-qmp-cmds.c b/target/loongarch/loongarch-qmp-cmds.c
index 3fde5a5a20807..6f732d80f3f89 100644
--- a/target/loongarch/loongarch-qmp-cmds.c
+++ b/target/loongarch/loongarch-qmp-cmds.c
@@ -40,7 +40,7 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp)
 }
 
 static const char *cpu_model_advertised_features[] = {
-    "lsx", "lasx", "lbt", "pmu", NULL
+    "lsx", "lasx", "lbt", "pmu", "kvm-pv-ipi", "kvm-steal-time", NULL
 };
 
 CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type,
diff --git a/target/loongarch/tcg/csr_helper.c b/target/loongarch/tcg/csr_helper.c
index 6c95be991083e..379c71e741343 100644
--- a/target/loongarch/tcg/csr_helper.c
+++ b/target/loongarch/tcg/csr_helper.c
@@ -12,11 +12,27 @@
 #include "internals.h"
 #include "qemu/host-utils.h"
 #include "exec/helper-proto.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/cpu_ldst.h"
 #include "hw/irq.h"
 #include "cpu-csr.h"
 
+target_ulong helper_csrwr_stlbps(CPULoongArchState *env, target_ulong val)
+{
+    int64_t old_v = env->CSR_STLBPS;
+
+    /*
+     * The real hardware only supports the min tlb_ps is 12
+     * tlb_ps=0 may cause undefined-behavior.
+     */
+    uint8_t tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS);
+    if (!check_ps(env, tlb_ps)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "Attempted set ps %d\n", tlb_ps);
+    }
+    return old_v;
+}
+
 target_ulong helper_csrrd_pgd(CPULoongArchState *env)
 {
     int64_t v;
@@ -99,7 +115,7 @@ target_ulong helper_csrwr_ticlr(CPULoongArchState *env, target_ulong val)
 
 target_ulong helper_csrwr_pwcl(CPULoongArchState *env, target_ulong val)
 {
-    int shift;
+    int shift, ptbase;
     int64_t old_v = env->CSR_PWCL;
 
     /*
@@ -107,12 +123,16 @@ target_ulong helper_csrwr_pwcl(CPULoongArchState *env, target_ulong val)
      * treated as illegal.
      */
     shift = FIELD_EX64(val, CSR_PWCL, PTEWIDTH);
+    ptbase = FIELD_EX64(val, CSR_PWCL, PTBASE);
     if (shift) {
         qemu_log_mask(LOG_GUEST_ERROR,
                       "Attempted set pte width with %d bit\n", 64 << shift);
         val = FIELD_DP64(val, CSR_PWCL, PTEWIDTH, 0);
     }
-
-    env->CSR_PWCL = val;
+    if (!check_ps(env, ptbase)) {
+         qemu_log_mask(LOG_GUEST_ERROR,
+                      "Attrmpted set ptbase 2^%d\n", ptbase);
+    }
+    env->CSR_PWCL =val;
     return old_v;
 }
diff --git a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
index 974bc2a70fedd..3d70d75941715 100644
--- a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
@@ -56,7 +56,7 @@ static bool gen_am(DisasContext *ctx, arg_rrr *a,
     if (a->rd != 0 && (a->rj == a->rd || a->rk == a->rd)) {
         qemu_log_mask(LOG_GUEST_ERROR,
                       "Warning: source register overlaps destination register"
-                      "in atomic insn at pc=0x" TARGET_FMT_lx "\n",
+                      "in atomic insn at pc=0x%" VADDR_PRIx "\n",
                       ctx->base.pc_next - 4);
         return false;
     }
diff --git a/target/loongarch/tcg/insn_trans/trans_privileged.c.inc b/target/loongarch/tcg/insn_trans/trans_privileged.c.inc
index 3afa23af79303..ecbfe23b6362b 100644
--- a/target/loongarch/tcg/insn_trans/trans_privileged.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_privileged.c.inc
@@ -74,6 +74,7 @@ static bool set_csr_trans_func(unsigned int csr_num, GenCSRRead readfn,
 
 void loongarch_csr_translate_init(void)
 {
+    SET_CSR_FUNC(STLBPS, NULL, gen_helper_csrwr_stlbps);
     SET_CSR_FUNC(ESTAT, NULL, gen_helper_csrwr_estat);
     SET_CSR_FUNC(ASID,  NULL, gen_helper_csrwr_asid);
     SET_CSR_FUNC(PGD,   gen_helper_csrrd_pgd, NULL);
diff --git a/target/loongarch/tcg/tlb_helper.c b/target/loongarch/tcg/tlb_helper.c
index a323606e5a9e3..646dbf59de5cc 100644
--- a/target/loongarch/tcg/tlb_helper.c
+++ b/target/loongarch/tcg/tlb_helper.c
@@ -12,12 +12,21 @@
 #include "cpu.h"
 #include "internals.h"
 #include "exec/helper-proto.h"
+#include "exec/cputlb.h"
 #include "exec/exec-all.h"
 #include "exec/page-protection.h"
 #include "exec/cpu_ldst.h"
 #include "exec/log.h"
 #include "cpu-csr.h"
 
+bool check_ps(CPULoongArchState *env, int tlb_ps)
+{
+     if (tlb_ps > 64) {
+         return false;
+     }
+     return BIT_ULL(tlb_ps) & (env->CSR_PRCFG2);
+}
+
 void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base,
                                uint64_t *dir_width, target_ulong level)
 {
@@ -123,7 +132,11 @@ static void invalidate_tlb_entry(CPULoongArchState *env, int index)
     uint8_t tlb_v0 = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, V);
     uint8_t tlb_v1 = FIELD_EX64(tlb->tlb_entry1, TLBENTRY, V);
     uint64_t tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN);
+    uint8_t tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E);
 
+    if (!tlb_e) {
+        return;
+    }
     if (index >= LOONGARCH_STLB) {
         tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS);
     } else {
@@ -187,8 +200,10 @@ static void fill_tlb_entry(CPULoongArchState *env, int index)
         lo1 = env->CSR_TLBELO1;
     }
 
-    if (csr_ps == 0) {
-        qemu_log_mask(CPU_LOG_MMU, "page size is 0\n");
+    /*check csr_ps */
+    if (!check_ps(env, csr_ps)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "csr_ps %d is illegal\n", csr_ps);
+        return;
     }
 
     /* Only MTLB has the ps fields */
@@ -298,7 +313,16 @@ void helper_tlbfill(CPULoongArchState *env)
         pagesize = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, PS);
     }
 
+    if (!check_ps(env, pagesize)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "pagesize %d is illegal\n", pagesize);
+        return;
+    }
+
     stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS);
+    if (!check_ps(env, stlb_ps)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "stlb_ps %d is illegal\n", stlb_ps);
+        return;
+    }
 
     if (pagesize == stlb_ps) {
         /* Only write into STLB bits [47:13] */
@@ -427,7 +451,11 @@ void helper_invtlb_page_asid(CPULoongArchState *env, target_ulong info,
         uint16_t tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID);
         uint64_t vpn, tlb_vppn;
         uint8_t tlb_ps, compare_shift;
+        uint8_t tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E);
 
+        if (!tlb_e) {
+            continue;
+        }
         if (i >= LOONGARCH_STLB) {
             tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS);
         } else {
@@ -456,7 +484,11 @@ void helper_invtlb_page_asid_or_g(CPULoongArchState *env,
         uint16_t tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID);
         uint64_t vpn, tlb_vppn;
         uint8_t tlb_ps, compare_shift;
+        uint8_t tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E);
 
+        if (!tlb_e) {
+            continue;
+        }
         if (i >= LOONGARCH_STLB) {
             tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS);
         } else {
diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c
index 3480f54c71097..e59e4ed25b13f 100644
--- a/target/loongarch/tcg/translate.c
+++ b/target/loongarch/tcg/translate.c
@@ -289,7 +289,7 @@ static void loongarch_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
 
     if (!decode(ctx, ctx->opcode)) {
         qemu_log_mask(LOG_UNIMP, "Error: unknown opcode. "
-                      TARGET_FMT_lx ": 0x%x\n",
+                      "0x%" VADDR_PRIx ": 0x%x\n",
                       ctx->base.pc_next, ctx->opcode);
         generate_exception(ctx, EXCCODE_INE);
     }
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
index 41dfdf5804514..0065e1c1ca526 100644
--- a/target/m68k/cpu.c
+++ b/target/m68k/cpu.c
@@ -51,10 +51,12 @@ static void m68k_restore_state_to_opc(CPUState *cs,
     }
 }
 
+#ifndef CONFIG_USER_ONLY
 static bool m68k_cpu_has_work(CPUState *cs)
 {
     return cs->interrupt_request & CPU_INTERRUPT_HARD;
 }
+#endif /* !CONFIG_USER_ONLY */
 
 static int m68k_cpu_mmu_index(CPUState *cs, bool ifetch)
 {
@@ -107,6 +109,41 @@ static void m68k_cpu_reset_hold(Object *obj, ResetType type)
     set_float_2nan_prop_rule(float_2nan_prop_ab, &env->fp_status);
     /* Default NaN: sign bit clear, all frac bits set */
     set_float_default_nan_pattern(0b01111111, &env->fp_status);
+    /*
+     * m68k-specific floatx80 behaviour:
+     *  * default Infinity values have a zero Integer bit
+     *  * input Infinities may have the Integer bit either 0 or 1
+     *  * pseudo-denormals supported for input and output
+     *  * don't raise Invalid for pseudo-NaN/pseudo-Inf/Unnormal
+     *
+     * With m68k, the explicit integer bit can be zero in the case of:
+     * - zeros                (exp == 0, mantissa == 0)
+     * - denormalized numbers (exp == 0, mantissa != 0)
+     * - unnormalized numbers (exp != 0, exp < 0x7FFF)
+     * - infinities           (exp == 0x7FFF, mantissa == 0)
+     * - not-a-numbers        (exp == 0x7FFF, mantissa != 0)
+     *
+     * For infinities and NaNs, the explicit integer bit can be either one or
+     * zero.
+     *
+     * The IEEE 754 standard does not define a zero integer bit. Such a number
+     * is an unnormalized number. Hardware does not directly support
+     * denormalized and unnormalized numbers, but implicitly supports them by
+     * trapping them as unimplemented data types, allowing efficient conversion
+     * in software.
+     *
+     * See "M68000 FAMILY PROGRAMMER’S REFERENCE MANUAL",
+     *     "1.6 FLOATING-POINT DATA TYPES"
+     *
+     * Note though that QEMU's fp emulation does directly handle both
+     * denormal and unnormal values, and does not trap to guest software.
+     */
+    set_floatx80_behaviour(floatx80_default_inf_int_bit_is_zero |
+                           floatx80_pseudo_inf_valid |
+                           floatx80_pseudo_nan_valid |
+                           floatx80_unnormal_valid |
+                           floatx80_pseudo_denormal_valid,
+                           &env->fp_status);
 
     nan = floatx80_default_nan(&env->fp_status);
     for (i = 0; i < 8; i++) {
@@ -122,6 +159,7 @@ static void m68k_cpu_reset_hold(Object *obj, ResetType type)
 static void m68k_cpu_disas_set_info(CPUState *s, disassemble_info *info)
 {
     info->print_insn = print_insn_m68k;
+    info->endian = BFD_ENDIAN_BIG;
     info->mach = 0;
 }
 
@@ -543,11 +581,12 @@ static const VMStateDescription vmstate_m68k_cpu = {
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps m68k_sysemu_ops = {
+    .has_work = m68k_cpu_has_work,
     .get_phys_page_debug = m68k_cpu_get_phys_page_debug,
 };
 #endif /* !CONFIG_USER_ONLY */
 
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 
 static const TCGCPUOps m68k_tcg_ops = {
     .initialize = m68k_tcg_init,
@@ -576,7 +615,6 @@ static void m68k_cpu_class_init(ObjectClass *c, void *data)
                                        &mcc->parent_phases);
 
     cc->class_by_name = m68k_cpu_class_by_name;
-    cc->has_work = m68k_cpu_has_work;
     cc->mmu_index = m68k_cpu_mmu_index;
     cc->dump_state = m68k_cpu_dump_state;
     cc->set_pc = m68k_cpu_set_pc;
diff --git a/target/m68k/fpu_helper.c b/target/m68k/fpu_helper.c
index 339b73ad7dc1d..eb1cb8c6872c3 100644
--- a/target/m68k/fpu_helper.c
+++ b/target/m68k/fpu_helper.c
@@ -455,7 +455,7 @@ void HELPER(ftst)(CPUM68KState *env, FPReg *val)
 
     if (floatx80_is_any_nan(val->d)) {
         cc |= FPSR_CC_A;
-    } else if (floatx80_is_infinity(val->d)) {
+    } else if (floatx80_is_infinity(val->d, &env->fp_status)) {
         cc |= FPSR_CC_I;
     } else if (floatx80_is_zero(val->d)) {
         cc |= FPSR_CC_Z;
diff --git a/target/m68k/helper.c b/target/m68k/helper.c
index beefeb7069c91..0bf574830f9e7 100644
--- a/target/m68k/helper.c
+++ b/target/m68k/helper.c
@@ -20,6 +20,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
+#include "exec/cputlb.h"
 #include "exec/exec-all.h"
 #include "exec/page-protection.h"
 #include "exec/gdbstub.h"
diff --git a/target/m68k/softfloat.c b/target/m68k/softfloat.c
index 02dcc03d15d8a..d1f150e641fda 100644
--- a/target/m68k/softfloat.c
+++ b/target/m68k/softfloat.c
@@ -142,8 +142,7 @@ floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status)
         if ((uint64_t) (aSig << 1)) {
             return propagateFloatx80NaN(a, b, status);
         }
-        return packFloatx80(aSign, floatx80_infinity.high,
-                            floatx80_infinity.low);
+        return floatx80_default_inf(aSign, status);
     }
     if (aExp == 0) {
         if (aSig == 0) {
@@ -245,7 +244,7 @@ floatx80 floatx80_lognp1(floatx80 a, float_status *status)
             float_raise(float_flag_invalid, status);
             return floatx80_default_nan(status);
         }
-        return packFloatx80(0, floatx80_infinity.high, floatx80_infinity.low);
+        return floatx80_default_inf(0, status);
     }
 
     if (aExp == 0 && aSig == 0) {
@@ -255,8 +254,7 @@ floatx80 floatx80_lognp1(floatx80 a, float_status *status)
     if (aSign && aExp >= one_exp) {
         if (aExp == one_exp && aSig == one_sig) {
             float_raise(float_flag_divbyzero, status);
-            return packFloatx80(aSign, floatx80_infinity.high,
-                                floatx80_infinity.low);
+            return floatx80_default_inf(aSign, status);
         }
         float_raise(float_flag_invalid, status);
         return floatx80_default_nan(status);
@@ -442,8 +440,7 @@ floatx80 floatx80_logn(floatx80 a, float_status *status)
             propagateFloatx80NaNOneArg(a, status);
         }
         if (aSign == 0) {
-            return packFloatx80(0, floatx80_infinity.high,
-                                floatx80_infinity.low);
+            return floatx80_default_inf(0, status);
         }
     }
 
@@ -452,8 +449,7 @@ floatx80 floatx80_logn(floatx80 a, float_status *status)
     if (aExp == 0) {
         if (aSig == 0) { /* zero */
             float_raise(float_flag_divbyzero, status);
-            return packFloatx80(1, floatx80_infinity.high,
-                                floatx80_infinity.low);
+            return floatx80_default_inf(1, status);
         }
         if ((aSig & one_sig) == 0) { /* denormal */
             normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
@@ -610,15 +606,13 @@ floatx80 floatx80_log10(floatx80 a, float_status *status)
             propagateFloatx80NaNOneArg(a, status);
         }
         if (aSign == 0) {
-            return packFloatx80(0, floatx80_infinity.high,
-                                floatx80_infinity.low);
+            return floatx80_default_inf(0, status);
         }
     }
 
     if (aExp == 0 && aSig == 0) {
         float_raise(float_flag_divbyzero, status);
-        return packFloatx80(1, floatx80_infinity.high,
-                            floatx80_infinity.low);
+        return floatx80_default_inf(1, status);
     }
 
     if (aSign) {
@@ -668,16 +662,14 @@ floatx80 floatx80_log2(floatx80 a, float_status *status)
             propagateFloatx80NaNOneArg(a, status);
         }
         if (aSign == 0) {
-            return packFloatx80(0, floatx80_infinity.high,
-                                floatx80_infinity.low);
+            return floatx80_default_inf(0, status);
         }
     }
 
     if (aExp == 0) {
         if (aSig == 0) {
             float_raise(float_flag_divbyzero, status);
-            return packFloatx80(1, floatx80_infinity.high,
-                                floatx80_infinity.low);
+            return floatx80_default_inf(1, status);
         }
         normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
     }
@@ -740,8 +732,7 @@ floatx80 floatx80_etox(floatx80 a, float_status *status)
         if (aSign) {
             return packFloatx80(0, 0, 0);
         }
-        return packFloatx80(0, floatx80_infinity.high,
-                            floatx80_infinity.low);
+        return floatx80_default_inf(0, status);
     }
 
     if (aExp == 0 && aSig == 0) {
@@ -924,8 +915,7 @@ floatx80 floatx80_twotox(floatx80 a, float_status *status)
         if (aSign) {
             return packFloatx80(0, 0, 0);
         }
-        return packFloatx80(0, floatx80_infinity.high,
-                            floatx80_infinity.low);
+        return floatx80_default_inf(0, status);
     }
 
     if (aExp == 0 && aSig == 0) {
@@ -1075,8 +1065,7 @@ floatx80 floatx80_tentox(floatx80 a, float_status *status)
         if (aSign) {
             return packFloatx80(0, 0, 0);
         }
-        return packFloatx80(0, floatx80_infinity.high,
-                            floatx80_infinity.low);
+        return floatx80_default_inf(0, status);
     }
 
     if (aExp == 0 && aSig == 0) {
@@ -2260,8 +2249,7 @@ floatx80 floatx80_atanh(floatx80 a, float_status *status)
     if (compact >= 0x3FFF8000) { /* |X| >= 1 */
         if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */
             float_raise(float_flag_divbyzero, status);
-            return packFloatx80(aSign, floatx80_infinity.high,
-                                floatx80_infinity.low);
+            return floatx80_default_inf(aSign, status);
         } else { /* |X| > 1 */
             float_raise(float_flag_invalid, status);
             return floatx80_default_nan(status);
@@ -2320,8 +2308,7 @@ floatx80 floatx80_etoxm1(floatx80 a, float_status *status)
         if (aSign) {
             return packFloatx80(aSign, one_exp, one_sig);
         }
-        return packFloatx80(0, floatx80_infinity.high,
-                            floatx80_infinity.low);
+        return floatx80_default_inf(0, status);
     }
 
     if (aExp == 0 && aSig == 0) {
@@ -2687,8 +2674,7 @@ floatx80 floatx80_sinh(floatx80 a, float_status *status)
         if ((uint64_t) (aSig << 1)) {
             return propagateFloatx80NaNOneArg(a, status);
         }
-        return packFloatx80(aSign, floatx80_infinity.high,
-                            floatx80_infinity.low);
+        return floatx80_default_inf(aSign, status);
     }
 
     if (aExp == 0 && aSig == 0) {
@@ -2774,8 +2760,7 @@ floatx80 floatx80_cosh(floatx80 a, float_status *status)
         if ((uint64_t) (aSig << 1)) {
             return propagateFloatx80NaNOneArg(a, status);
         }
-        return packFloatx80(0, floatx80_infinity.high,
-                            floatx80_infinity.low);
+        return floatx80_default_inf(0, status);
     }
 
     if (aExp == 0 && aSig == 0) {
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
index f114789abd824..f3bebea856ee7 100644
--- a/target/microblaze/cpu.c
+++ b/target/microblaze/cpu.c
@@ -115,10 +115,12 @@ static void mb_restore_state_to_opc(CPUState *cs,
     cpu->env.iflags = data[1];
 }
 
+#ifndef CONFIG_USER_ONLY
 static bool mb_cpu_has_work(CPUState *cs)
 {
     return cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI);
 }
+#endif /* !CONFIG_USER_ONLY */
 
 static int mb_cpu_mmu_index(CPUState *cs, bool ifetch)
 {
@@ -224,6 +226,8 @@ static void mb_disas_set_info(CPUState *cpu, disassemble_info *info)
 {
     info->mach = bfd_arch_microblaze;
     info->print_insn = print_insn_microblaze;
+    info->endian = TARGET_BIG_ENDIAN ? BFD_ENDIAN_BIG
+                                     : BFD_ENDIAN_LITTLE;
 }
 
 static void mb_cpu_realizefn(DeviceState *dev, Error **errp)
@@ -415,11 +419,12 @@ static ObjectClass *mb_cpu_class_by_name(const char *cpu_model)
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps mb_sysemu_ops = {
+    .has_work = mb_cpu_has_work,
     .get_phys_page_attrs_debug = mb_cpu_get_phys_page_attrs_debug,
 };
 #endif
 
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 
 static const TCGCPUOps mb_tcg_ops = {
     .initialize = mb_tcg_init,
@@ -450,7 +455,6 @@ static void mb_cpu_class_init(ObjectClass *oc, void *data)
                                        &mcc->parent_phases);
 
     cc->class_by_name = mb_cpu_class_by_name;
-    cc->has_work = mb_cpu_has_work;
     cc->mmu_index = mb_cpu_mmu_index;
     cc->dump_state = mb_cpu_dump_state;
     cc->set_pc = mb_cpu_set_pc;
diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
index f6879eee352e2..e44ddd5307893 100644
--- a/target/microblaze/cpu.h
+++ b/target/microblaze/cpu.h
@@ -414,6 +414,13 @@ void mb_translate_code(CPUState *cs, TranslationBlock *tb,
 /* Ensure there is no overlap between the two masks. */
 QEMU_BUILD_BUG_ON(MSR_TB_MASK & IFLAGS_TB_MASK);
 
+static inline bool mb_cpu_is_big_endian(CPUState *cs)
+{
+    MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
+
+    return !cpu->cfg.endi;
+}
+
 static inline void cpu_get_tb_cpu_state(CPUMBState *env, vaddr *pc,
                                         uint64_t *cs_base, uint32_t *flags)
 {
diff --git a/target/microblaze/gdbstub.c b/target/microblaze/gdbstub.c
index 09d74e164d0d4..d493681d38d36 100644
--- a/target/microblaze/gdbstub.c
+++ b/target/microblaze/gdbstub.c
@@ -110,14 +110,9 @@ int mb_cpu_gdb_read_stack_protect(CPUState *cs, GByteArray *mem_buf, int n)
 
 int mb_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
 {
-    CPUClass *cc = CPU_GET_CLASS(cs);
     CPUMBState *env = cpu_env(cs);
     uint32_t tmp;
 
-    if (n > cc->gdb_num_core_regs) {
-        return 0;
-    }
-
     tmp = ldl_p(mem_buf);
 
     switch (n) {
diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c
index 5d3259ce316da..27fc929bee403 100644
--- a/target/microblaze/helper.c
+++ b/target/microblaze/helper.c
@@ -20,7 +20,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/page-protection.h"
 #include "qemu/host-utils.h"
 #include "exec/log.h"
diff --git a/target/microblaze/mmu.c b/target/microblaze/mmu.c
index 2423ac6172d38..f8587d5ac4da6 100644
--- a/target/microblaze/mmu.c
+++ b/target/microblaze/mmu.c
@@ -21,7 +21,7 @@
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/page-protection.h"
 
 static unsigned int tlb_decode_size(unsigned int f)
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
index 24005f05b21f2..b54e5ac4b2f7f 100644
--- a/target/microblaze/translate.c
+++ b/target/microblaze/translate.c
@@ -708,11 +708,18 @@ static void record_unaligned_ess(DisasContext *dc, int rd,
 }
 #endif
 
+static inline MemOp mo_endian(DisasContext *dc)
+{
+    return dc->cfg->endi ? MO_LE : MO_BE;
+}
+
 static bool do_load(DisasContext *dc, int rd, TCGv addr, MemOp mop,
                     int mem_index, bool rev)
 {
     MemOp size = mop & MO_SIZE;
 
+    mop |= mo_endian(dc);
+
     /*
      * When doing reverse accesses we need to do two things.
      *
@@ -780,13 +787,13 @@ static bool trans_lbui(DisasContext *dc, arg_typeb *arg)
 static bool trans_lhu(DisasContext *dc, arg_typea *arg)
 {
     TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
-    return do_load(dc, arg->rd, addr, MO_TEUW, dc->mem_index, false);
+    return do_load(dc, arg->rd, addr, MO_UW, dc->mem_index, false);
 }
 
 static bool trans_lhur(DisasContext *dc, arg_typea *arg)
 {
     TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
-    return do_load(dc, arg->rd, addr, MO_TEUW, dc->mem_index, true);
+    return do_load(dc, arg->rd, addr, MO_UW, dc->mem_index, true);
 }
 
 static bool trans_lhuea(DisasContext *dc, arg_typea *arg)
@@ -798,26 +805,26 @@ static bool trans_lhuea(DisasContext *dc, arg_typea *arg)
     return true;
 #else
     TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb);
-    return do_load(dc, arg->rd, addr, MO_TEUW, MMU_NOMMU_IDX, false);
+    return do_load(dc, arg->rd, addr, MO_UW, MMU_NOMMU_IDX, false);
 #endif
 }
 
 static bool trans_lhui(DisasContext *dc, arg_typeb *arg)
 {
     TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm);
-    return do_load(dc, arg->rd, addr, MO_TEUW, dc->mem_index, false);
+    return do_load(dc, arg->rd, addr, MO_UW, dc->mem_index, false);
 }
 
 static bool trans_lw(DisasContext *dc, arg_typea *arg)
 {
     TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
-    return do_load(dc, arg->rd, addr, MO_TEUL, dc->mem_index, false);
+    return do_load(dc, arg->rd, addr, MO_UL, dc->mem_index, false);
 }
 
 static bool trans_lwr(DisasContext *dc, arg_typea *arg)
 {
     TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
-    return do_load(dc, arg->rd, addr, MO_TEUL, dc->mem_index, true);
+    return do_load(dc, arg->rd, addr, MO_UL, dc->mem_index, true);
 }
 
 static bool trans_lwea(DisasContext *dc, arg_typea *arg)
@@ -829,14 +836,14 @@ static bool trans_lwea(DisasContext *dc, arg_typea *arg)
     return true;
 #else
     TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb);
-    return do_load(dc, arg->rd, addr, MO_TEUL, MMU_NOMMU_IDX, false);
+    return do_load(dc, arg->rd, addr, MO_UL, MMU_NOMMU_IDX, false);
 #endif
 }
 
 static bool trans_lwi(DisasContext *dc, arg_typeb *arg)
 {
     TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm);
-    return do_load(dc, arg->rd, addr, MO_TEUL, dc->mem_index, false);
+    return do_load(dc, arg->rd, addr, MO_UL, dc->mem_index, false);
 }
 
 static bool trans_lwx(DisasContext *dc, arg_typea *arg)
@@ -846,7 +853,8 @@ static bool trans_lwx(DisasContext *dc, arg_typea *arg)
     /* lwx does not throw unaligned access errors, so force alignment */
     tcg_gen_andi_tl(addr, addr, ~3);
 
-    tcg_gen_qemu_ld_i32(cpu_res_val, addr, dc->mem_index, MO_TEUL);
+    tcg_gen_qemu_ld_i32(cpu_res_val, addr, dc->mem_index,
+                        mo_endian(dc) | MO_UL);
     tcg_gen_mov_tl(cpu_res_addr, addr);
 
     if (arg->rd) {
@@ -863,6 +871,8 @@ static bool do_store(DisasContext *dc, int rd, TCGv addr, MemOp mop,
 {
     MemOp size = mop & MO_SIZE;
 
+    mop |= mo_endian(dc);
+
     /*
      * When doing reverse accesses we need to do two things.
      *
@@ -930,13 +940,13 @@ static bool trans_sbi(DisasContext *dc, arg_typeb *arg)
 static bool trans_sh(DisasContext *dc, arg_typea *arg)
 {
     TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
-    return do_store(dc, arg->rd, addr, MO_TEUW, dc->mem_index, false);
+    return do_store(dc, arg->rd, addr, MO_UW, dc->mem_index, false);
 }
 
 static bool trans_shr(DisasContext *dc, arg_typea *arg)
 {
     TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
-    return do_store(dc, arg->rd, addr, MO_TEUW, dc->mem_index, true);
+    return do_store(dc, arg->rd, addr, MO_UW, dc->mem_index, true);
 }
 
 static bool trans_shea(DisasContext *dc, arg_typea *arg)
@@ -948,26 +958,26 @@ static bool trans_shea(DisasContext *dc, arg_typea *arg)
     return true;
 #else
     TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb);
-    return do_store(dc, arg->rd, addr, MO_TEUW, MMU_NOMMU_IDX, false);
+    return do_store(dc, arg->rd, addr, MO_UW, MMU_NOMMU_IDX, false);
 #endif
 }
 
 static bool trans_shi(DisasContext *dc, arg_typeb *arg)
 {
     TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm);
-    return do_store(dc, arg->rd, addr, MO_TEUW, dc->mem_index, false);
+    return do_store(dc, arg->rd, addr, MO_UW, dc->mem_index, false);
 }
 
 static bool trans_sw(DisasContext *dc, arg_typea *arg)
 {
     TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
-    return do_store(dc, arg->rd, addr, MO_TEUL, dc->mem_index, false);
+    return do_store(dc, arg->rd, addr, MO_UL, dc->mem_index, false);
 }
 
 static bool trans_swr(DisasContext *dc, arg_typea *arg)
 {
     TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb);
-    return do_store(dc, arg->rd, addr, MO_TEUL, dc->mem_index, true);
+    return do_store(dc, arg->rd, addr, MO_UL, dc->mem_index, true);
 }
 
 static bool trans_swea(DisasContext *dc, arg_typea *arg)
@@ -979,14 +989,14 @@ static bool trans_swea(DisasContext *dc, arg_typea *arg)
     return true;
 #else
     TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb);
-    return do_store(dc, arg->rd, addr, MO_TEUL, MMU_NOMMU_IDX, false);
+    return do_store(dc, arg->rd, addr, MO_UL, MMU_NOMMU_IDX, false);
 #endif
 }
 
 static bool trans_swi(DisasContext *dc, arg_typeb *arg)
 {
     TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm);
-    return do_store(dc, arg->rd, addr, MO_TEUL, dc->mem_index, false);
+    return do_store(dc, arg->rd, addr, MO_UL, dc->mem_index, false);
 }
 
 static bool trans_swx(DisasContext *dc, arg_typea *arg)
@@ -1015,7 +1025,7 @@ static bool trans_swx(DisasContext *dc, arg_typea *arg)
 
     tcg_gen_atomic_cmpxchg_i32(tval, cpu_res_addr, cpu_res_val,
                                reg_for_write(dc, arg->rd),
-                               dc->mem_index, MO_TEUL);
+                               dc->mem_index, mo_endian(dc) | MO_UL);
 
     tcg_gen_brcond_i32(TCG_COND_NE, cpu_res_val, tval, swx_fail);
 
@@ -1637,7 +1647,8 @@ static void mb_tr_translate_insn(DisasContextBase *dcb, CPUState *cs)
 
     dc->tb_flags_to_set = 0;
 
-    ir = translator_ldl(cpu_env(cs), &dc->base, dc->base.pc_next);
+    ir = translator_ldl_swap(cpu_env(cs), &dc->base, dc->base.pc_next,
+                             mb_cpu_is_big_endian(cs) != TARGET_BIG_ENDIAN);
     if (!decode(dc, ir)) {
         trap_illegal(dc, true);
     }
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
index 47cd7cfdcef50..b207106dd7936 100644
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -132,6 +132,7 @@ static vaddr mips_cpu_get_pc(CPUState *cs)
     return cpu->env.active_tc.PC;
 }
 
+#if !defined(CONFIG_USER_ONLY)
 static bool mips_cpu_has_work(CPUState *cs)
 {
     CPUMIPSState *env = cpu_env(cs);
@@ -177,6 +178,7 @@ static bool mips_cpu_has_work(CPUState *cs)
     }
     return has_work;
 }
+#endif /* !CONFIG_USER_ONLY */
 
 static int mips_cpu_mmu_index(CPUState *cs, bool ifunc)
 {
@@ -428,13 +430,13 @@ static void mips_cpu_reset_hold(Object *obj, ResetType type)
 static void mips_cpu_disas_set_info(CPUState *s, disassemble_info *info)
 {
     if (!(cpu_env(s)->insn_flags & ISA_NANOMIPS32)) {
-#if TARGET_BIG_ENDIAN
-        info->print_insn = print_insn_big_mips;
-#else
-        info->print_insn = print_insn_little_mips;
-#endif
+        info->endian = TARGET_BIG_ENDIAN ? BFD_ENDIAN_BIG
+                                         : BFD_ENDIAN_LITTLE;
+        info->print_insn = TARGET_BIG_ENDIAN ? print_insn_big_mips
+                                             : print_insn_little_mips;
     } else {
         info->print_insn = print_insn_nanomips;
+        info->endian = BFD_ENDIAN_LITTLE;
     }
 }
 
@@ -534,6 +536,7 @@ static ObjectClass *mips_cpu_class_by_name(const char *cpu_model)
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps mips_sysemu_ops = {
+    .has_work = mips_cpu_has_work,
     .get_phys_page_debug = mips_cpu_get_phys_page_debug,
     .legacy_vmsd = &vmstate_mips_cpu,
 };
@@ -544,7 +547,7 @@ static const Property mips_cpu_properties[] = {
 };
 
 #ifdef CONFIG_TCG
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 static const TCGCPUOps mips_tcg_ops = {
     .initialize = mips_tcg_init,
     .translate_code = mips_translate_code,
@@ -577,7 +580,6 @@ static void mips_cpu_class_init(ObjectClass *c, void *data)
                                        &mcc->parent_phases);
 
     cc->class_by_name = mips_cpu_class_by_name;
-    cc->has_work = mips_cpu_has_work;
     cc->mmu_index = mips_cpu_mmu_index;
     cc->dump_state = mips_cpu_dump_state;
     cc->set_pc = mips_cpu_set_pc;
diff --git a/target/mips/internal.h b/target/mips/internal.h
index 91c786cff8ac9..28eb28936ba0e 100644
--- a/target/mips/internal.h
+++ b/target/mips/internal.h
@@ -162,8 +162,6 @@ void cpu_mips_store_cause(CPUMIPSState *env, target_ulong val);
 
 extern const VMStateDescription vmstate_mips_cpu;
 
-#endif /* !CONFIG_USER_ONLY */
-
 static inline bool cpu_mips_hw_interrupts_enabled(CPUMIPSState *env)
 {
     return (env->CP0_Status & (1 << CP0St_IE)) &&
@@ -206,6 +204,8 @@ static inline bool cpu_mips_hw_interrupts_pending(CPUMIPSState *env)
     return r;
 }
 
+#endif /* !CONFIG_USER_ONLY */
+
 void msa_reset(CPUMIPSState *env);
 
 /* cp0_timer.c */
diff --git a/target/mips/system/cp0.c b/target/mips/system/cp0.c
index bae37f515bf8a..ff7d3db00c73a 100644
--- a/target/mips/system/cp0.c
+++ b/target/mips/system/cp0.c
@@ -21,7 +21,7 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "internal.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 
 /* Called for updates to CP0_Status.  */
 void sync_c0_status(CPUMIPSState *env, CPUMIPSState *cpu, int tc)
diff --git a/target/mips/tcg/msa_helper.c b/target/mips/tcg/msa_helper.c
index ec38d9fde5ed4..74fb80cc256b0 100644
--- a/target/mips/tcg/msa_helper.c
+++ b/target/mips/tcg/msa_helper.c
@@ -5577,7 +5577,7 @@ static inline int64_t msa_mulr_q_df(uint32_t df, int64_t arg1, int64_t arg2)
 {
     int64_t q_min = DF_MIN_INT(df);
     int64_t q_max = DF_MAX_INT(df);
-    int64_t r_bit = 1 << (DF_BITS(df) - 2);
+    int64_t r_bit = 1LL << (DF_BITS(df) - 2);
 
     if (arg1 == q_min && arg2 == q_min) {
         return q_max;
@@ -5685,7 +5685,7 @@ static inline int64_t msa_maddr_q_df(uint32_t df, int64_t dest, int64_t arg1,
 
     int64_t q_max = DF_MAX_INT(df);
     int64_t q_min = DF_MIN_INT(df);
-    int64_t r_bit = 1 << (DF_BITS(df) - 2);
+    int64_t r_bit = 1LL << (DF_BITS(df) - 2);
 
     q_prod = arg1 * arg2;
     q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod + r_bit) >> (DF_BITS(df) - 1);
@@ -5700,7 +5700,7 @@ static inline int64_t msa_msubr_q_df(uint32_t df, int64_t dest, int64_t arg1,
 
     int64_t q_max = DF_MAX_INT(df);
     int64_t q_min = DF_MIN_INT(df);
-    int64_t r_bit = 1 << (DF_BITS(df) - 2);
+    int64_t r_bit = 1LL << (DF_BITS(df) - 2);
 
     q_prod = arg1 * arg2;
     q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod + r_bit) >> (DF_BITS(df) - 1);
diff --git a/target/mips/tcg/octeon_translate.c b/target/mips/tcg/octeon_translate.c
index e25c4cbaa0647..d9eb43716e2a5 100644
--- a/target/mips/tcg/octeon_translate.c
+++ b/target/mips/tcg/octeon_translate.c
@@ -18,8 +18,8 @@ static bool trans_BBIT(DisasContext *ctx, arg_BBIT *a)
     TCGv p;
 
     if (ctx->hflags & MIPS_HFLAG_BMASK) {
-        LOG_DISAS("Branch in delay / forbidden slot at PC 0x"
-                  TARGET_FMT_lx "\n", ctx->base.pc_next);
+        LOG_DISAS("Branch in delay / forbidden slot at PC 0x%" VADDR_PRIx "\n",
+                  ctx->base.pc_next);
         generate_exception_end(ctx, EXCP_RI);
         return true;
     }
diff --git a/target/mips/tcg/system/cp0_helper.c b/target/mips/tcg/system/cp0_helper.c
index 79a5c833ceecc..01a07a169f6de 100644
--- a/target/mips/tcg/system/cp0_helper.c
+++ b/target/mips/tcg/system/cp0_helper.c
@@ -27,7 +27,7 @@
 #include "internal.h"
 #include "qemu/host-utils.h"
 #include "exec/helper-proto.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 
 
 /* SMP helpers.  */
diff --git a/target/mips/tcg/system/tlb_helper.c b/target/mips/tcg/system/tlb_helper.c
index e98bb9595178e..ca4d6b27bc94e 100644
--- a/target/mips/tcg/system/tlb_helper.c
+++ b/target/mips/tcg/system/tlb_helper.c
@@ -21,6 +21,7 @@
 
 #include "cpu.h"
 #include "internal.h"
+#include "exec/cputlb.h"
 #include "exec/exec-all.h"
 #include "exec/page-protection.h"
 #include "exec/cpu_ldst.h"
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
index b7bab0d7abf8d..e8abf1f8b5cd4 100644
--- a/target/openrisc/cpu.c
+++ b/target/openrisc/cpu.c
@@ -63,11 +63,13 @@ static void openrisc_restore_state_to_opc(CPUState *cs,
     }
 }
 
+#ifndef CONFIG_USER_ONLY
 static bool openrisc_cpu_has_work(CPUState *cs)
 {
     return cs->interrupt_request & (CPU_INTERRUPT_HARD |
                                     CPU_INTERRUPT_TIMER);
 }
+#endif /* !CONFIG_USER_ONLY */
 
 static int openrisc_cpu_mmu_index(CPUState *cs, bool ifetch)
 {
@@ -83,6 +85,7 @@ static int openrisc_cpu_mmu_index(CPUState *cs, bool ifetch)
 
 static void openrisc_disas_set_info(CPUState *cpu, disassemble_info *info)
 {
+    info->endian = BFD_ENDIAN_BIG;
     info->print_insn = print_insn_or1k;
 }
 
@@ -165,6 +168,10 @@ static void openrisc_cpu_realizefn(DeviceState *dev, Error **errp)
     qemu_init_vcpu(cs);
     cpu_reset(cs);
 
+#ifndef CONFIG_USER_ONLY
+    cpu_openrisc_clock_init(OPENRISC_CPU(dev));
+#endif
+
     occ->parent_realize(dev, errp);
 }
 
@@ -228,11 +235,12 @@ static void openrisc_any_initfn(Object *obj)
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps openrisc_sysemu_ops = {
+    .has_work = openrisc_cpu_has_work,
     .get_phys_page_debug = openrisc_cpu_get_phys_page_debug,
 };
 #endif
 
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 
 static const TCGCPUOps openrisc_tcg_ops = {
     .initialize = openrisc_translate_init,
@@ -261,7 +269,6 @@ static void openrisc_cpu_class_init(ObjectClass *oc, void *data)
                                        &occ->parent_phases);
 
     cc->class_by_name = openrisc_cpu_class_by_name;
-    cc->has_work = openrisc_cpu_has_work;
     cc->mmu_index = openrisc_cpu_mmu_index;
     cc->dump_state = openrisc_cpu_dump_state;
     cc->set_pc = openrisc_cpu_set_pc;
diff --git a/target/openrisc/gdbstub.c b/target/openrisc/gdbstub.c
index c2a77d5d4d521..45bba80d8789d 100644
--- a/target/openrisc/gdbstub.c
+++ b/target/openrisc/gdbstub.c
@@ -47,14 +47,9 @@ int openrisc_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
 
 int openrisc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
 {
-    CPUClass *cc = CPU_GET_CLASS(cs);
     CPUOpenRISCState *env = cpu_env(cs);
     uint32_t tmp;
 
-    if (n > cc->gdb_num_core_regs) {
-        return 0;
-    }
-
     tmp = ldl_p(mem_buf);
 
     if (n < 32) {
diff --git a/target/openrisc/mmu.c b/target/openrisc/mmu.c
index c632d5230b29e..47ac783c5251f 100644
--- a/target/openrisc/mmu.c
+++ b/target/openrisc/mmu.c
@@ -21,7 +21,7 @@
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/page-protection.h"
 #include "gdbstub/helpers.h"
 #include "qemu/host-utils.h"
diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c
index 77567afba47f7..21bc137cccaf9 100644
--- a/target/openrisc/sys_helper.c
+++ b/target/openrisc/sys_helper.c
@@ -21,6 +21,7 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/helper-proto.h"
 #include "exception.h"
 #ifndef CONFIG_USER_ONLY
diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c
index d148cd76b4792..bfcc695de7688 100644
--- a/target/ppc/cpu.c
+++ b/target/ppc/cpu.c
@@ -130,11 +130,13 @@ void ppc_store_ciabr(CPUPPCState *env, target_ulong val)
     ppc_update_ciabr(env);
 }
 
-void ppc_update_daw0(CPUPPCState *env)
+void ppc_update_daw(CPUPPCState *env, int rid)
 {
     CPUState *cs = env_cpu(env);
-    target_ulong deaw = env->spr[SPR_DAWR0] & PPC_BITMASK(0, 60);
-    uint32_t dawrx = env->spr[SPR_DAWRX0];
+    int spr_dawr = rid ? SPR_DAWR1 : SPR_DAWR0;
+    int spr_dawrx = rid ? SPR_DAWRX1 : SPR_DAWRX0;
+    target_ulong deaw = env->spr[spr_dawr] & PPC_BITMASK(0, 60);
+    uint32_t dawrx = env->spr[spr_dawrx];
     int mrd = extract32(dawrx, PPC_BIT_NR(48), 54 - 48);
     bool dw = extract32(dawrx, PPC_BIT_NR(57), 1);
     bool dr = extract32(dawrx, PPC_BIT_NR(58), 1);
@@ -144,9 +146,9 @@ void ppc_update_daw0(CPUPPCState *env)
     vaddr len;
     int flags;
 
-    if (env->dawr0_watchpoint) {
-        cpu_watchpoint_remove_by_ref(cs, env->dawr0_watchpoint);
-        env->dawr0_watchpoint = NULL;
+    if (env->dawr_watchpoint[rid]) {
+        cpu_watchpoint_remove_by_ref(cs, env->dawr_watchpoint[rid]);
+        env->dawr_watchpoint[rid] = NULL;
     }
 
     if (!dr && !dw) {
@@ -166,28 +168,45 @@ void ppc_update_daw0(CPUPPCState *env)
         flags |= BP_MEM_WRITE;
     }
 
-    cpu_watchpoint_insert(cs, deaw, len, flags, &env->dawr0_watchpoint);
+    cpu_watchpoint_insert(cs, deaw, len, flags, &env->dawr_watchpoint[rid]);
 }
 
 void ppc_store_dawr0(CPUPPCState *env, target_ulong val)
 {
     env->spr[SPR_DAWR0] = val;
-    ppc_update_daw0(env);
+    ppc_update_daw(env, 0);
 }
 
-void ppc_store_dawrx0(CPUPPCState *env, uint32_t val)
+static void ppc_store_dawrx(CPUPPCState *env, uint32_t val, int rid)
 {
     int hrammc = extract32(val, PPC_BIT_NR(56), 1);
 
     if (hrammc) {
         /* This might be done with a second watchpoint at the xor of DEAW[0] */
-        qemu_log_mask(LOG_UNIMP, "%s: DAWRX0[HRAMMC] is unimplemented\n",
-                      __func__);
+        qemu_log_mask(LOG_UNIMP, "%s: DAWRX%d[HRAMMC] is unimplemented\n",
+                      __func__, rid);
     }
 
-    env->spr[SPR_DAWRX0] = val;
-    ppc_update_daw0(env);
+    env->spr[rid ? SPR_DAWRX1 : SPR_DAWRX0] = val;
+    ppc_update_daw(env, rid);
+}
+
+void ppc_store_dawrx0(CPUPPCState *env, uint32_t val)
+{
+    ppc_store_dawrx(env, val, 0);
+}
+
+void ppc_store_dawr1(CPUPPCState *env, target_ulong val)
+{
+    env->spr[SPR_DAWR1] = val;
+    ppc_update_daw(env, 1);
+}
+
+void ppc_store_dawrx1(CPUPPCState *env, uint32_t val)
+{
+    ppc_store_dawrx(env, val, 1);
 }
+
 #endif
 #endif
 
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 0b8b4c0517246..efab54a068301 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1260,7 +1260,7 @@ struct CPUArchState {
 #if defined(TARGET_PPC64)
     ppc_slb_t slb[MAX_SLB_ENTRIES]; /* PowerPC 64 SLB area */
     struct CPUBreakpoint *ciabr_breakpoint;
-    struct CPUWatchpoint *dawr0_watchpoint;
+    struct CPUWatchpoint *dawr_watchpoint[2];
 #endif
     target_ulong sr[32];   /* segment registers */
     uint32_t nb_BATs;      /* number of BATs */
@@ -1589,9 +1589,11 @@ void ppc_store_sdr1(CPUPPCState *env, target_ulong value);
 void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val);
 void ppc_update_ciabr(CPUPPCState *env);
 void ppc_store_ciabr(CPUPPCState *env, target_ulong value);
-void ppc_update_daw0(CPUPPCState *env);
+void ppc_update_daw(CPUPPCState *env, int rid);
 void ppc_store_dawr0(CPUPPCState *env, target_ulong value);
 void ppc_store_dawrx0(CPUPPCState *env, uint32_t value);
+void ppc_store_dawr1(CPUPPCState *env, target_ulong value);
+void ppc_store_dawrx1(CPUPPCState *env, uint32_t value);
 #endif /* !defined(CONFIG_USER_ONLY) */
 void ppc_store_msr(CPUPPCState *env, target_ulong value);
 
@@ -2091,6 +2093,7 @@ void ppc_compat_add_property(Object *obj, const char *name,
 #define SPR_VTB               (0x351)
 #define SPR_LDBAR             (0x352)
 #define SPR_MMCRC             (0x353)
+#define SPR_PMSR              (0x355)
 #define SPR_PSSCR             (0x357)
 #define SPR_440_INV0          (0x370)
 #define SPR_440_INV1          (0x371)
@@ -2098,8 +2101,10 @@ void ppc_compat_add_property(Object *obj, const char *name,
 #define SPR_440_INV2          (0x372)
 #define SPR_TRIG2             (0x372)
 #define SPR_440_INV3          (0x373)
+#define SPR_PMCR              (0x374)
 #define SPR_440_ITV0          (0x374)
 #define SPR_440_ITV1          (0x375)
+#define SPR_RWMR              (0x375)
 #define SPR_440_ITV2          (0x376)
 #define SPR_440_ITV3          (0x377)
 #define SPR_440_CCR1          (0x378)
@@ -2752,11 +2757,6 @@ static inline void cpu_get_tb_cpu_state(CPUPPCState *env, vaddr *pc,
 }
 #endif
 
-G_NORETURN void raise_exception(CPUPPCState *env, uint32_t exception);
-G_NORETURN void raise_exception_ra(CPUPPCState *env, uint32_t exception,
-                                   uintptr_t raddr);
-G_NORETURN void raise_exception_err(CPUPPCState *env, uint32_t exception,
-                                    uint32_t error_code);
 G_NORETURN void raise_exception_err_ra(CPUPPCState *env, uint32_t exception,
                                        uint32_t error_code, uintptr_t raddr);
 
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 062a6e85fbaec..8b590e7f17c17 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -922,6 +922,18 @@ static void register_BookE206_sprs(CPUPPCState *env, uint32_t mas_mask,
 #endif
 }
 
+static void register_atb_sprs(CPUPPCState *env)
+{
+    spr_register(env, SPR_ATBL, "ATBL",
+                 &spr_read_atbl, SPR_NOACCESS,
+                 &spr_read_atbl, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_ATBU, "ATBU",
+                 &spr_read_atbu, SPR_NOACCESS,
+                 &spr_read_atbu, SPR_NOACCESS,
+                 0x00000000);
+}
+
 /* SPR specific to PowerPC 440 implementation */
 static void register_440_sprs(CPUPPCState *env)
 {
@@ -2911,6 +2923,11 @@ static void init_proc_e500(CPUPPCState *env, int version)
     register_BookE206_sprs(env, 0x000000DF, tlbncfg, mmucfg);
     register_usprgh_sprs(env);
 
+    if (version != fsl_e500v1) {
+        /* e500v1 has no support for alternate timebase */
+        register_atb_sprs(env);
+    }
+
     spr_register(env, SPR_HID0, "HID0",
                  SPR_NOACCESS, SPR_NOACCESS,
                  &spr_read_generic, &spr_write_generic,
@@ -5172,6 +5189,20 @@ static void register_book3s_207_dbg_sprs(CPUPPCState *env)
                         KVM_REG_PPC_CIABR, 0x00000000);
 }
 
+static void register_book3s_310_dbg_sprs(CPUPPCState *env)
+{
+    spr_register_kvm_hv(env, SPR_DAWR1, "DAWR1",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_generic, &spr_write_dawr1,
+                        KVM_REG_PPC_DAWR1, 0x00000000);
+    spr_register_kvm_hv(env, SPR_DAWRX1, "DAWRX1",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_generic, &spr_write_dawrx1,
+                        KVM_REG_PPC_DAWRX1, 0x00000000);
+}
+
 static void register_970_dbg_sprs(CPUPPCState *env)
 {
     /* Breakpoints */
@@ -5773,6 +5804,11 @@ static void register_power9_book4_sprs(CPUPPCState *env)
                      SPR_NOACCESS, SPR_NOACCESS,
                      &spr_read_generic, &spr_write_generic,
                      KVM_REG_PPC_WORT, 0);
+    spr_register_hv(env, SPR_RWMR, "RWMR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
 #endif
 }
 
@@ -6451,6 +6487,17 @@ static void register_power9_common_sprs(CPUPPCState *env)
                         spr_read_generic, spr_write_generic,
                         KVM_REG_PPC_PSSCR, 0);
 
+    spr_register_hv(env, SPR_PMSR, "PMSR",
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    &spr_read_pmsr, SPR_NOACCESS,
+                    0);
+    spr_register_hv(env, SPR_PMCR, "PMCR",
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    &spr_read_generic, &spr_write_pmcr,
+                    PPC_BIT(63)); /* Version 1 (POWER9/10) */
+
 }
 
 static void init_proc_POWER9(CPUPPCState *env)
@@ -6568,6 +6615,7 @@ static void init_proc_POWER10(CPUPPCState *env)
 {
     register_power9_common_sprs(env);
     register_HEIR64_spr(env);
+    register_book3s_310_dbg_sprs(env);
     register_power10_hash_sprs(env);
     register_power10_dexcr_sprs(env);
     register_power10_pmu_sup_sprs(env);
@@ -7177,10 +7225,12 @@ static void ppc_restore_state_to_opc(CPUState *cs,
 }
 #endif /* CONFIG_TCG */
 
+#ifndef CONFIG_USER_ONLY
 static bool ppc_cpu_has_work(CPUState *cs)
 {
     return cs->interrupt_request & CPU_INTERRUPT_HARD;
 }
+#endif /* !CONFIG_USER_ONLY */
 
 static int ppc_cpu_mmu_index(CPUState *cs, bool ifetch)
 {
@@ -7401,6 +7451,8 @@ static void ppc_disas_set_info(CPUState *cs, disassemble_info *info)
 
     if ((env->hflags >> MSR_LE) & 1) {
         info->endian = BFD_ENDIAN_LITTLE;
+    } else {
+        info->endian = BFD_ENDIAN_BIG;
     }
     info->mach = env->bfd_mach;
     if (!env->bfd_mach) {
@@ -7421,6 +7473,7 @@ static void ppc_disas_set_info(CPUState *cs, disassemble_info *info)
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps ppc_sysemu_ops = {
+    .has_work = ppc_cpu_has_work,
     .get_phys_page_debug = ppc_cpu_get_phys_page_debug,
     .write_elf32_note = ppc32_cpu_write_elf32_note,
     .write_elf64_note = ppc64_cpu_write_elf64_note,
@@ -7430,7 +7483,7 @@ static const struct SysemuCPUOps ppc_sysemu_ops = {
 #endif
 
 #ifdef CONFIG_TCG
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 
 static const TCGCPUOps ppc_tcg_ops = {
   .initialize = ppc_translate_init,
@@ -7472,7 +7525,6 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
                                        &pcc->parent_phases);
 
     cc->class_by_name = ppc_cpu_class_by_name;
-    cc->has_work = ppc_cpu_has_work;
     cc->mmu_index = ppc_cpu_mmu_index;
     cc->dump_state = ppc_cpu_dump_state;
     cc->set_pc = ppc_cpu_set_pc;
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index fde9912230e92..44e19aacd8d88 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -19,6 +19,7 @@
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
 #include "qemu/log.h"
+#include "system/tcg.h"
 #include "system/system.h"
 #include "system/runstate.h"
 #include "cpu.h"
@@ -29,12 +30,6 @@
 
 #include "trace.h"
 
-#ifdef CONFIG_TCG
-#include "system/tcg.h"
-#include "exec/helper-proto.h"
-#include "exec/cpu_ldst.h"
-#endif
-
 /*****************************************************************************/
 /* Exception processing */
 #ifndef CONFIG_USER_ONLY
@@ -136,27 +131,6 @@ static void dump_hcall(CPUPPCState *env)
                   env->nip);
 }
 
-#ifdef CONFIG_TCG
-/* Return true iff byteswap is needed to load instruction */
-static inline bool insn_need_byteswap(CPUArchState *env)
-{
-    /* SYSTEM builds TARGET_BIG_ENDIAN. Need to swap when MSR[LE] is set */
-    return !!(env->msr & ((target_ulong)1 << MSR_LE));
-}
-
-static uint32_t ppc_ldl_code(CPUArchState *env, target_ulong addr)
-{
-    uint32_t insn = cpu_ldl_code(env, addr);
-
-    if (insn_need_byteswap(env)) {
-        insn = bswap32(insn);
-    }
-
-    return insn;
-}
-
-#endif
-
 static void ppc_excp_debug_sw_tlb(CPUPPCState *env, int excp)
 {
     const char *es;
@@ -420,57 +394,14 @@ static void powerpc_set_excp_state(PowerPCCPU *cpu, target_ulong vector,
     env->reserve_addr = -1;
 }
 
-#ifdef CONFIG_TCG
-/*
- * This stops the machine and logs CPU state without killing QEMU (like
- * cpu_abort()) because it is often a guest error as opposed to a QEMU error,
- * so the machine can still be debugged.
- */
-static G_NORETURN void powerpc_checkstop(CPUPPCState *env, const char *reason)
-{
-    CPUState *cs = env_cpu(env);
-    FILE *f;
-
-    f = qemu_log_trylock();
-    if (f) {
-        fprintf(f, "Entering checkstop state: %s\n", reason);
-        cpu_dump_state(cs, f, CPU_DUMP_FPU | CPU_DUMP_CCOP);
-        qemu_log_unlock(f);
-    }
-
-    /*
-     * This stops the machine and logs CPU state without killing QEMU
-     * (like cpu_abort()) so the machine can still be debugged (because
-     * it is often a guest error).
-     */
-    qemu_system_guest_panicked(NULL);
-    cpu_loop_exit_noexc(cs);
-}
-
-#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
-void helper_attn(CPUPPCState *env)
-{
-    /* POWER attn is unprivileged when enabled by HID, otherwise illegal */
-    if ((*env->check_attn)(env)) {
-        powerpc_checkstop(env, "host executed attn");
-    } else {
-        raise_exception_err(env, POWERPC_EXCP_HV_EMU,
-                            POWERPC_EXCP_INVAL | POWERPC_EXCP_INVAL_INVAL);
-    }
-}
-#endif
-#endif /* CONFIG_TCG */
-
 static void powerpc_mcheck_checkstop(CPUPPCState *env)
 {
     /* KVM guests always have MSR[ME] enabled */
-#ifdef CONFIG_TCG
     if (FIELD_EX64(env->msr, MSR, ME)) {
         return;
     }
-
+    assert(tcg_enabled());
     powerpc_checkstop(env, "machine check with MSR[ME]=0");
-#endif
 }
 
 static void powerpc_excp_40x(PowerPCCPU *cpu, int excp)
@@ -1620,7 +1551,7 @@ static inline void powerpc_excp_books(PowerPCCPU *cpu, int excp)
 }
 #endif /* TARGET_PPC64 */
 
-static void powerpc_excp(PowerPCCPU *cpu, int excp)
+void powerpc_excp(PowerPCCPU *cpu, int excp)
 {
     CPUPPCState *env = &cpu->env;
 
@@ -2552,770 +2483,3 @@ bool ppc_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 }
 
 #endif /* !CONFIG_USER_ONLY */
-
-/*****************************************************************************/
-/* Exceptions processing helpers */
-
-void raise_exception_err_ra(CPUPPCState *env, uint32_t exception,
-                            uint32_t error_code, uintptr_t raddr)
-{
-    CPUState *cs = env_cpu(env);
-
-    cs->exception_index = exception;
-    env->error_code = error_code;
-    cpu_loop_exit_restore(cs, raddr);
-}
-
-void raise_exception_err(CPUPPCState *env, uint32_t exception,
-                         uint32_t error_code)
-{
-    raise_exception_err_ra(env, exception, error_code, 0);
-}
-
-void raise_exception(CPUPPCState *env, uint32_t exception)
-{
-    raise_exception_err_ra(env, exception, 0, 0);
-}
-
-void raise_exception_ra(CPUPPCState *env, uint32_t exception,
-                        uintptr_t raddr)
-{
-    raise_exception_err_ra(env, exception, 0, raddr);
-}
-
-#ifdef CONFIG_TCG
-void helper_raise_exception_err(CPUPPCState *env, uint32_t exception,
-                                uint32_t error_code)
-{
-    raise_exception_err_ra(env, exception, error_code, 0);
-}
-
-void helper_raise_exception(CPUPPCState *env, uint32_t exception)
-{
-    raise_exception_err_ra(env, exception, 0, 0);
-}
-
-#ifndef CONFIG_USER_ONLY
-void helper_store_msr(CPUPPCState *env, target_ulong val)
-{
-    uint32_t excp = hreg_store_msr(env, val, 0);
-
-    if (excp != 0) {
-        cpu_interrupt_exittb(env_cpu(env));
-        raise_exception(env, excp);
-    }
-}
-
-void helper_ppc_maybe_interrupt(CPUPPCState *env)
-{
-    ppc_maybe_interrupt(env);
-}
-
-#ifdef TARGET_PPC64
-void helper_scv(CPUPPCState *env, uint32_t lev)
-{
-    if (env->spr[SPR_FSCR] & (1ull << FSCR_SCV)) {
-        raise_exception_err(env, POWERPC_EXCP_SYSCALL_VECTORED, lev);
-    } else {
-        raise_exception_err(env, POWERPC_EXCP_FU, FSCR_IC_SCV);
-    }
-}
-
-void helper_pminsn(CPUPPCState *env, uint32_t insn)
-{
-    CPUState *cs = env_cpu(env);
-
-    cs->halted = 1;
-
-    /* Condition for waking up at 0x100 */
-    env->resume_as_sreset = (insn != PPC_PM_STOP) ||
-        (env->spr[SPR_PSSCR] & PSSCR_EC);
-
-    /* HDECR is not to wake from PM state, it may have already fired */
-    if (env->resume_as_sreset) {
-        PowerPCCPU *cpu = env_archcpu(env);
-        ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 0);
-    }
-
-    ppc_maybe_interrupt(env);
-}
-#endif /* TARGET_PPC64 */
-
-static void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr)
-{
-    /* MSR:POW cannot be set by any form of rfi */
-    msr &= ~(1ULL << MSR_POW);
-
-    /* MSR:TGPR cannot be set by any form of rfi */
-    if (env->flags & POWERPC_FLAG_TGPR)
-        msr &= ~(1ULL << MSR_TGPR);
-
-#ifdef TARGET_PPC64
-    /* Switching to 32-bit ? Crop the nip */
-    if (!msr_is_64bit(env, msr)) {
-        nip = (uint32_t)nip;
-    }
-#else
-    nip = (uint32_t)nip;
-#endif
-    /* XXX: beware: this is false if VLE is supported */
-    env->nip = nip & ~((target_ulong)0x00000003);
-    hreg_store_msr(env, msr, 1);
-    trace_ppc_excp_rfi(env->nip, env->msr);
-    /*
-     * No need to raise an exception here, as rfi is always the last
-     * insn of a TB
-     */
-    cpu_interrupt_exittb(env_cpu(env));
-    /* Reset the reservation */
-    env->reserve_addr = -1;
-
-    /* Context synchronizing: check if TCG TLB needs flush */
-    check_tlb_flush(env, false);
-}
-
-void helper_rfi(CPUPPCState *env)
-{
-    do_rfi(env, env->spr[SPR_SRR0], env->spr[SPR_SRR1] & 0xfffffffful);
-}
-
-#ifdef TARGET_PPC64
-void helper_rfid(CPUPPCState *env)
-{
-    /*
-     * The architecture defines a number of rules for which bits can
-     * change but in practice, we handle this in hreg_store_msr()
-     * which will be called by do_rfi(), so there is no need to filter
-     * here
-     */
-    do_rfi(env, env->spr[SPR_SRR0], env->spr[SPR_SRR1]);
-}
-
-void helper_rfscv(CPUPPCState *env)
-{
-    do_rfi(env, env->lr, env->ctr);
-}
-
-void helper_hrfid(CPUPPCState *env)
-{
-    do_rfi(env, env->spr[SPR_HSRR0], env->spr[SPR_HSRR1]);
-}
-
-void helper_rfebb(CPUPPCState *env, target_ulong s)
-{
-    target_ulong msr = env->msr;
-
-    /*
-     * Handling of BESCR bits 32:33 according to PowerISA v3.1:
-     *
-     * "If BESCR 32:33 != 0b00 the instruction is treated as if
-     *  the instruction form were invalid."
-     */
-    if (env->spr[SPR_BESCR] & BESCR_INVALID) {
-        raise_exception_err(env, POWERPC_EXCP_PROGRAM,
-                            POWERPC_EXCP_INVAL | POWERPC_EXCP_INVAL_INVAL);
-    }
-
-    env->nip = env->spr[SPR_EBBRR];
-
-    /* Switching to 32-bit ? Crop the nip */
-    if (!msr_is_64bit(env, msr)) {
-        env->nip = (uint32_t)env->spr[SPR_EBBRR];
-    }
-
-    if (s) {
-        env->spr[SPR_BESCR] |= BESCR_GE;
-    } else {
-        env->spr[SPR_BESCR] &= ~BESCR_GE;
-    }
-}
-
-/*
- * Triggers or queues an 'ebb_excp' EBB exception. All checks
- * but FSCR, HFSCR and msr_pr must be done beforehand.
- *
- * PowerISA v3.1 isn't clear about whether an EBB should be
- * postponed or cancelled if the EBB facility is unavailable.
- * Our assumption here is that the EBB is cancelled if both
- * FSCR and HFSCR EBB facilities aren't available.
- */
-static void do_ebb(CPUPPCState *env, int ebb_excp)
-{
-    PowerPCCPU *cpu = env_archcpu(env);
-
-    /*
-     * FSCR_EBB and FSCR_IC_EBB are the same bits used with
-     * HFSCR.
-     */
-    helper_fscr_facility_check(env, FSCR_EBB, 0, FSCR_IC_EBB);
-    helper_hfscr_facility_check(env, FSCR_EBB, "EBB", FSCR_IC_EBB);
-
-    if (ebb_excp == POWERPC_EXCP_PERFM_EBB) {
-        env->spr[SPR_BESCR] |= BESCR_PMEO;
-    } else if (ebb_excp == POWERPC_EXCP_EXTERNAL_EBB) {
-        env->spr[SPR_BESCR] |= BESCR_EEO;
-    }
-
-    if (FIELD_EX64(env->msr, MSR, PR)) {
-        powerpc_excp(cpu, ebb_excp);
-    } else {
-        ppc_set_irq(cpu, PPC_INTERRUPT_EBB, 1);
-    }
-}
-
-void raise_ebb_perfm_exception(CPUPPCState *env)
-{
-    bool perfm_ebb_enabled = env->spr[SPR_POWER_MMCR0] & MMCR0_EBE &&
-                             env->spr[SPR_BESCR] & BESCR_PME &&
-                             env->spr[SPR_BESCR] & BESCR_GE;
-
-    if (!perfm_ebb_enabled) {
-        return;
-    }
-
-    do_ebb(env, POWERPC_EXCP_PERFM_EBB);
-}
-#endif /* TARGET_PPC64 */
-
-/*****************************************************************************/
-/* Embedded PowerPC specific helpers */
-void helper_40x_rfci(CPUPPCState *env)
-{
-    do_rfi(env, env->spr[SPR_40x_SRR2], env->spr[SPR_40x_SRR3]);
-}
-
-void helper_rfci(CPUPPCState *env)
-{
-    do_rfi(env, env->spr[SPR_BOOKE_CSRR0], env->spr[SPR_BOOKE_CSRR1]);
-}
-
-void helper_rfdi(CPUPPCState *env)
-{
-    /* FIXME: choose CSRR1 or DSRR1 based on cpu type */
-    do_rfi(env, env->spr[SPR_BOOKE_DSRR0], env->spr[SPR_BOOKE_DSRR1]);
-}
-
-void helper_rfmci(CPUPPCState *env)
-{
-    /* FIXME: choose CSRR1 or MCSRR1 based on cpu type */
-    do_rfi(env, env->spr[SPR_BOOKE_MCSRR0], env->spr[SPR_BOOKE_MCSRR1]);
-}
-#endif /* !CONFIG_USER_ONLY */
-
-void helper_TW(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
-               uint32_t flags)
-{
-    if (!likely(!(((int32_t)arg1 < (int32_t)arg2 && (flags & 0x10)) ||
-                  ((int32_t)arg1 > (int32_t)arg2 && (flags & 0x08)) ||
-                  ((int32_t)arg1 == (int32_t)arg2 && (flags & 0x04)) ||
-                  ((uint32_t)arg1 < (uint32_t)arg2 && (flags & 0x02)) ||
-                  ((uint32_t)arg1 > (uint32_t)arg2 && (flags & 0x01))))) {
-        raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
-                               POWERPC_EXCP_TRAP, GETPC());
-    }
-}
-
-#ifdef TARGET_PPC64
-void helper_TD(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
-               uint32_t flags)
-{
-    if (!likely(!(((int64_t)arg1 < (int64_t)arg2 && (flags & 0x10)) ||
-                  ((int64_t)arg1 > (int64_t)arg2 && (flags & 0x08)) ||
-                  ((int64_t)arg1 == (int64_t)arg2 && (flags & 0x04)) ||
-                  ((uint64_t)arg1 < (uint64_t)arg2 && (flags & 0x02)) ||
-                  ((uint64_t)arg1 > (uint64_t)arg2 && (flags & 0x01))))) {
-        raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
-                               POWERPC_EXCP_TRAP, GETPC());
-    }
-}
-#endif /* TARGET_PPC64 */
-
-static uint32_t helper_SIMON_LIKE_32_64(uint32_t x, uint64_t key, uint32_t lane)
-{
-    const uint16_t c = 0xfffc;
-    const uint64_t z0 = 0xfa2561cdf44ac398ULL;
-    uint16_t z = 0, temp;
-    uint16_t k[32], eff_k[32], xleft[33], xright[33], fxleft[32];
-
-    for (int i = 3; i >= 0; i--) {
-        k[i] = key & 0xffff;
-        key >>= 16;
-    }
-    xleft[0] = x & 0xffff;
-    xright[0] = (x >> 16) & 0xffff;
-
-    for (int i = 0; i < 28; i++) {
-        z = (z0 >> (63 - i)) & 1;
-        temp = ror16(k[i + 3], 3) ^ k[i + 1];
-        k[i + 4] = c ^ z ^ k[i] ^ temp ^ ror16(temp, 1);
-    }
-
-    for (int i = 0; i < 8; i++) {
-        eff_k[4 * i + 0] = k[4 * i + ((0 + lane) % 4)];
-        eff_k[4 * i + 1] = k[4 * i + ((1 + lane) % 4)];
-        eff_k[4 * i + 2] = k[4 * i + ((2 + lane) % 4)];
-        eff_k[4 * i + 3] = k[4 * i + ((3 + lane) % 4)];
-    }
-
-    for (int i = 0; i < 32; i++) {
-        fxleft[i] = (rol16(xleft[i], 1) &
-            rol16(xleft[i], 8)) ^ rol16(xleft[i], 2);
-        xleft[i + 1] = xright[i] ^ fxleft[i] ^ eff_k[i];
-        xright[i + 1] = xleft[i];
-    }
-
-    return (((uint32_t)xright[32]) << 16) | xleft[32];
-}
-
-static uint64_t hash_digest(uint64_t ra, uint64_t rb, uint64_t key)
-{
-    uint64_t stage0_h = 0ULL, stage0_l = 0ULL;
-    uint64_t stage1_h, stage1_l;
-
-    for (int i = 0; i < 4; i++) {
-        stage0_h |= ror64(rb & 0xff, 8 * (2 * i + 1));
-        stage0_h |= ((ra >> 32) & 0xff) << (8 * 2 * i);
-        stage0_l |= ror64((rb >> 32) & 0xff, 8 * (2 * i + 1));
-        stage0_l |= (ra & 0xff) << (8 * 2 * i);
-        rb >>= 8;
-        ra >>= 8;
-    }
-
-    stage1_h = (uint64_t)helper_SIMON_LIKE_32_64(stage0_h >> 32, key, 0) << 32;
-    stage1_h |= helper_SIMON_LIKE_32_64(stage0_h, key, 1);
-    stage1_l = (uint64_t)helper_SIMON_LIKE_32_64(stage0_l >> 32, key, 2) << 32;
-    stage1_l |= helper_SIMON_LIKE_32_64(stage0_l, key, 3);
-
-    return stage1_h ^ stage1_l;
-}
-
-static void do_hash(CPUPPCState *env, target_ulong ea, target_ulong ra,
-                    target_ulong rb, uint64_t key, bool store)
-{
-    uint64_t calculated_hash = hash_digest(ra, rb, key), loaded_hash;
-
-    if (store) {
-        cpu_stq_data_ra(env, ea, calculated_hash, GETPC());
-    } else {
-        loaded_hash = cpu_ldq_data_ra(env, ea, GETPC());
-        if (loaded_hash != calculated_hash) {
-            raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
-                POWERPC_EXCP_TRAP, GETPC());
-        }
-    }
-}
-
-#include "qemu/guest-random.h"
-
-#ifdef TARGET_PPC64
-#define HELPER_HASH(op, key, store, dexcr_aspect)                             \
-void helper_##op(CPUPPCState *env, target_ulong ea, target_ulong ra,          \
-                 target_ulong rb)                                             \
-{                                                                             \
-    if (env->msr & R_MSR_PR_MASK) {                                           \
-        if (!(env->spr[SPR_DEXCR] & R_DEXCR_PRO_##dexcr_aspect##_MASK ||      \
-            env->spr[SPR_HDEXCR] & R_HDEXCR_ENF_##dexcr_aspect##_MASK))       \
-            return;                                                           \
-    } else if (!(env->msr & R_MSR_HV_MASK)) {                                 \
-        if (!(env->spr[SPR_DEXCR] & R_DEXCR_PNH_##dexcr_aspect##_MASK ||      \
-            env->spr[SPR_HDEXCR] & R_HDEXCR_ENF_##dexcr_aspect##_MASK))       \
-            return;                                                           \
-    } else if (!(env->msr & R_MSR_S_MASK)) {                                  \
-        if (!(env->spr[SPR_HDEXCR] & R_HDEXCR_HNU_##dexcr_aspect##_MASK))     \
-            return;                                                           \
-    }                                                                         \
-                                                                              \
-    do_hash(env, ea, ra, rb, key, store);                                     \
-}
-#else
-#define HELPER_HASH(op, key, store, dexcr_aspect)                             \
-void helper_##op(CPUPPCState *env, target_ulong ea, target_ulong ra,          \
-                 target_ulong rb)                                             \
-{                                                                             \
-    do_hash(env, ea, ra, rb, key, store);                                     \
-}
-#endif /* TARGET_PPC64 */
-
-HELPER_HASH(HASHST, env->spr[SPR_HASHKEYR], true, NPHIE)
-HELPER_HASH(HASHCHK, env->spr[SPR_HASHKEYR], false, NPHIE)
-HELPER_HASH(HASHSTP, env->spr[SPR_HASHPKEYR], true, PHIE)
-HELPER_HASH(HASHCHKP, env->spr[SPR_HASHPKEYR], false, PHIE)
-
-#ifndef CONFIG_USER_ONLY
-/* Embedded.Processor Control */
-static int dbell2irq(target_ulong rb)
-{
-    int msg = rb & DBELL_TYPE_MASK;
-    int irq = -1;
-
-    switch (msg) {
-    case DBELL_TYPE_DBELL:
-        irq = PPC_INTERRUPT_DOORBELL;
-        break;
-    case DBELL_TYPE_DBELL_CRIT:
-        irq = PPC_INTERRUPT_CDOORBELL;
-        break;
-    case DBELL_TYPE_G_DBELL:
-    case DBELL_TYPE_G_DBELL_CRIT:
-    case DBELL_TYPE_G_DBELL_MC:
-        /* XXX implement */
-    default:
-        break;
-    }
-
-    return irq;
-}
-
-void helper_msgclr(CPUPPCState *env, target_ulong rb)
-{
-    int irq = dbell2irq(rb);
-
-    if (irq < 0) {
-        return;
-    }
-
-    ppc_set_irq(env_archcpu(env), irq, 0);
-}
-
-void helper_msgsnd(target_ulong rb)
-{
-    int irq = dbell2irq(rb);
-    int pir = rb & DBELL_PIRTAG_MASK;
-    CPUState *cs;
-
-    if (irq < 0) {
-        return;
-    }
-
-    bql_lock();
-    CPU_FOREACH(cs) {
-        PowerPCCPU *cpu = POWERPC_CPU(cs);
-        CPUPPCState *cenv = &cpu->env;
-
-        if ((rb & DBELL_BRDCAST_MASK) || (cenv->spr[SPR_BOOKE_PIR] == pir)) {
-            ppc_set_irq(cpu, irq, 1);
-        }
-    }
-    bql_unlock();
-}
-
-/* Server Processor Control */
-
-static bool dbell_type_server(target_ulong rb)
-{
-    /*
-     * A Directed Hypervisor Doorbell message is sent only if the
-     * message type is 5. All other types are reserved and the
-     * instruction is a no-op
-     */
-    return (rb & DBELL_TYPE_MASK) == DBELL_TYPE_DBELL_SERVER;
-}
-
-static inline bool dbell_bcast_core(target_ulong rb)
-{
-    return (rb & DBELL_BRDCAST_MASK) == DBELL_BRDCAST_CORE;
-}
-
-static inline bool dbell_bcast_subproc(target_ulong rb)
-{
-    return (rb & DBELL_BRDCAST_MASK) == DBELL_BRDCAST_SUBPROC;
-}
-
-/*
- * Send an interrupt to a thread in the same core as env).
- */
-static void msgsnd_core_tir(CPUPPCState *env, uint32_t target_tir, int irq)
-{
-    PowerPCCPU *cpu = env_archcpu(env);
-    CPUState *cs = env_cpu(env);
-
-    if (ppc_cpu_lpar_single_threaded(cs)) {
-        if (target_tir == 0) {
-            ppc_set_irq(cpu, irq, 1);
-        }
-    } else {
-        CPUState *ccs;
-
-        /* Does iothread need to be locked for walking CPU list? */
-        bql_lock();
-        THREAD_SIBLING_FOREACH(cs, ccs) {
-            PowerPCCPU *ccpu = POWERPC_CPU(ccs);
-            if (target_tir == ppc_cpu_tir(ccpu)) {
-                ppc_set_irq(ccpu, irq, 1);
-                break;
-            }
-        }
-        bql_unlock();
-    }
-}
-
-void helper_book3s_msgclr(CPUPPCState *env, target_ulong rb)
-{
-    if (!dbell_type_server(rb)) {
-        return;
-    }
-
-    ppc_set_irq(env_archcpu(env), PPC_INTERRUPT_HDOORBELL, 0);
-}
-
-void helper_book3s_msgsnd(CPUPPCState *env, target_ulong rb)
-{
-    int pir = rb & DBELL_PROCIDTAG_MASK;
-    bool brdcast = false;
-    CPUState *cs, *ccs;
-    PowerPCCPU *cpu;
-
-    if (!dbell_type_server(rb)) {
-        return;
-    }
-
-    /* POWER8 msgsnd is like msgsndp (targets a thread within core) */
-    if (!(env->insns_flags2 & PPC2_ISA300)) {
-        msgsnd_core_tir(env, rb & PPC_BITMASK(57, 63), PPC_INTERRUPT_HDOORBELL);
-        return;
-    }
-
-    /* POWER9 and later msgsnd is a global (targets any thread) */
-    cpu = ppc_get_vcpu_by_pir(pir);
-    if (!cpu) {
-        return;
-    }
-    cs = CPU(cpu);
-
-    if (dbell_bcast_core(rb) || (dbell_bcast_subproc(rb) &&
-                                 (env->flags & POWERPC_FLAG_SMT_1LPAR))) {
-        brdcast = true;
-    }
-
-    if (ppc_cpu_core_single_threaded(cs) || !brdcast) {
-        ppc_set_irq(cpu, PPC_INTERRUPT_HDOORBELL, 1);
-        return;
-    }
-
-    /*
-     * Why is bql needed for walking CPU list? Answer seems to be because ppc
-     * irq handling needs it, but ppc_set_irq takes the lock itself if needed,
-     * so could this be removed?
-     */
-    bql_lock();
-    THREAD_SIBLING_FOREACH(cs, ccs) {
-        ppc_set_irq(POWERPC_CPU(ccs), PPC_INTERRUPT_HDOORBELL, 1);
-    }
-    bql_unlock();
-}
-
-#ifdef TARGET_PPC64
-void helper_book3s_msgclrp(CPUPPCState *env, target_ulong rb)
-{
-    helper_hfscr_facility_check(env, HFSCR_MSGP, "msgclrp", HFSCR_IC_MSGP);
-
-    if (!dbell_type_server(rb)) {
-        return;
-    }
-
-    ppc_set_irq(env_archcpu(env), PPC_INTERRUPT_DOORBELL, 0);
-}
-
-/*
- * sends a message to another thread  on the same
- * multi-threaded processor
- */
-void helper_book3s_msgsndp(CPUPPCState *env, target_ulong rb)
-{
-    helper_hfscr_facility_check(env, HFSCR_MSGP, "msgsndp", HFSCR_IC_MSGP);
-
-    if (!dbell_type_server(rb)) {
-        return;
-    }
-
-    msgsnd_core_tir(env, rb & PPC_BITMASK(57, 63), PPC_INTERRUPT_DOORBELL);
-}
-#endif /* TARGET_PPC64 */
-
-/* Single-step tracing */
-void helper_book3s_trace(CPUPPCState *env, target_ulong prev_ip)
-{
-    uint32_t error_code = 0;
-    if (env->insns_flags2 & PPC2_ISA207S) {
-        /* Load/store reporting, SRR1[35, 36] and SDAR, are not implemented. */
-        env->spr[SPR_POWER_SIAR] = prev_ip;
-        error_code = PPC_BIT(33);
-    }
-    raise_exception_err(env, POWERPC_EXCP_TRACE, error_code);
-}
-
-void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
-                                 MMUAccessType access_type,
-                                 int mmu_idx, uintptr_t retaddr)
-{
-    CPUPPCState *env = cpu_env(cs);
-    uint32_t insn;
-
-    /* Restore state and reload the insn we executed, for filling in DSISR.  */
-    cpu_restore_state(cs, retaddr);
-    insn = ppc_ldl_code(env, env->nip);
-
-    switch (env->mmu_model) {
-    case POWERPC_MMU_SOFT_4xx:
-        env->spr[SPR_40x_DEAR] = vaddr;
-        break;
-    case POWERPC_MMU_BOOKE:
-    case POWERPC_MMU_BOOKE206:
-        env->spr[SPR_BOOKE_DEAR] = vaddr;
-        break;
-    default:
-        env->spr[SPR_DAR] = vaddr;
-        break;
-    }
-
-    cs->exception_index = POWERPC_EXCP_ALIGN;
-    env->error_code = insn & 0x03FF0000;
-    cpu_loop_exit(cs);
-}
-
-void ppc_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
-                                   vaddr vaddr, unsigned size,
-                                   MMUAccessType access_type,
-                                   int mmu_idx, MemTxAttrs attrs,
-                                   MemTxResult response, uintptr_t retaddr)
-{
-    CPUPPCState *env = cpu_env(cs);
-
-    switch (env->excp_model) {
-#if defined(TARGET_PPC64)
-    case POWERPC_EXCP_POWER8:
-    case POWERPC_EXCP_POWER9:
-    case POWERPC_EXCP_POWER10:
-    case POWERPC_EXCP_POWER11:
-        /*
-         * Machine check codes can be found in processor User Manual or
-         * Linux or skiboot source.
-         */
-        if (access_type == MMU_DATA_LOAD) {
-            env->spr[SPR_DAR] = vaddr;
-            env->spr[SPR_DSISR] = PPC_BIT(57);
-            env->error_code = PPC_BIT(42);
-
-        } else if (access_type == MMU_DATA_STORE) {
-            /*
-             * MCE for stores in POWER is asynchronous so hardware does
-             * not set DAR, but QEMU can do better.
-             */
-            env->spr[SPR_DAR] = vaddr;
-            env->error_code = PPC_BIT(36) | PPC_BIT(43) | PPC_BIT(45);
-            env->error_code |= PPC_BIT(42);
-
-        } else { /* Fetch */
-            /*
-             * is_prefix_insn_excp() tests !PPC_BIT(42) to avoid fetching
-             * the instruction, so that must always be clear for fetches.
-             */
-            env->error_code = PPC_BIT(36) | PPC_BIT(44) | PPC_BIT(45);
-        }
-        break;
-#endif
-    default:
-        /*
-         * TODO: Check behaviour for other CPUs, for now do nothing.
-         * Could add a basic MCE even if real hardware ignores.
-         */
-        return;
-    }
-
-    cs->exception_index = POWERPC_EXCP_MCHECK;
-    cpu_loop_exit_restore(cs, retaddr);
-}
-
-void ppc_cpu_debug_excp_handler(CPUState *cs)
-{
-#if defined(TARGET_PPC64)
-    CPUPPCState *env = cpu_env(cs);
-
-    if (env->insns_flags2 & PPC2_ISA207S) {
-        if (cs->watchpoint_hit) {
-            if (cs->watchpoint_hit->flags & BP_CPU) {
-                env->spr[SPR_DAR] = cs->watchpoint_hit->hitaddr;
-                env->spr[SPR_DSISR] = PPC_BIT(41);
-                cs->watchpoint_hit = NULL;
-                raise_exception(env, POWERPC_EXCP_DSI);
-            }
-            cs->watchpoint_hit = NULL;
-        } else if (cpu_breakpoint_test(cs, env->nip, BP_CPU)) {
-            raise_exception_err(env, POWERPC_EXCP_TRACE,
-                                PPC_BIT(33) | PPC_BIT(43));
-        }
-    }
-#endif
-}
-
-bool ppc_cpu_debug_check_breakpoint(CPUState *cs)
-{
-#if defined(TARGET_PPC64)
-    CPUPPCState *env = cpu_env(cs);
-
-    if (env->insns_flags2 & PPC2_ISA207S) {
-        target_ulong priv;
-
-        priv = env->spr[SPR_CIABR] & PPC_BITMASK(62, 63);
-        switch (priv) {
-        case 0x1: /* problem */
-            return env->msr & ((target_ulong)1 << MSR_PR);
-        case 0x2: /* supervisor */
-            return (!(env->msr & ((target_ulong)1 << MSR_PR)) &&
-                    !(env->msr & ((target_ulong)1 << MSR_HV)));
-        case 0x3: /* hypervisor */
-            return (!(env->msr & ((target_ulong)1 << MSR_PR)) &&
-                     (env->msr & ((target_ulong)1 << MSR_HV)));
-        default:
-            g_assert_not_reached();
-        }
-    }
-#endif
-
-    return false;
-}
-
-bool ppc_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp)
-{
-#if defined(TARGET_PPC64)
-    CPUPPCState *env = cpu_env(cs);
-
-    if (env->insns_flags2 & PPC2_ISA207S) {
-        if (wp == env->dawr0_watchpoint) {
-            uint32_t dawrx = env->spr[SPR_DAWRX0];
-            bool wt = extract32(dawrx, PPC_BIT_NR(59), 1);
-            bool wti = extract32(dawrx, PPC_BIT_NR(60), 1);
-            bool hv = extract32(dawrx, PPC_BIT_NR(61), 1);
-            bool sv = extract32(dawrx, PPC_BIT_NR(62), 1);
-            bool pr = extract32(dawrx, PPC_BIT_NR(62), 1);
-
-            if ((env->msr & ((target_ulong)1 << MSR_PR)) && !pr) {
-                return false;
-            } else if ((env->msr & ((target_ulong)1 << MSR_HV)) && !hv) {
-                return false;
-            } else if (!sv) {
-                return false;
-            }
-
-            if (!wti) {
-                if (env->msr & ((target_ulong)1 << MSR_DR)) {
-                    if (!wt) {
-                        return false;
-                    }
-                } else {
-                    if (wt) {
-                        return false;
-                    }
-                }
-            }
-
-            return true;
-        }
-    }
-#endif
-
-    return false;
-}
-
-#endif /* !CONFIG_USER_ONLY */
-#endif /* CONFIG_TCG */
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 5a77e761bd3f8..ca414f2f43d3a 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -28,6 +28,8 @@ DEF_HELPER_2(store_pcr, void, env, tl)
 DEF_HELPER_2(store_ciabr, void, env, tl)
 DEF_HELPER_2(store_dawr0, void, env, tl)
 DEF_HELPER_2(store_dawrx0, void, env, tl)
+DEF_HELPER_2(store_dawr1, void, env, tl)
+DEF_HELPER_2(store_dawrx1, void, env, tl)
 DEF_HELPER_2(store_mmcr0, void, env, tl)
 DEF_HELPER_2(store_mmcr1, void, env, tl)
 DEF_HELPER_2(store_mmcrA, void, env, tl)
@@ -733,6 +735,8 @@ DEF_HELPER_2(store_tfmr, void, env, tl)
 DEF_HELPER_FLAGS_2(store_sprc, TCG_CALL_NO_RWG, void, env, tl)
 DEF_HELPER_FLAGS_1(load_sprd, TCG_CALL_NO_RWG_SE, tl, env)
 DEF_HELPER_FLAGS_2(store_sprd, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_1(load_pmsr, TCG_CALL_NO_RWG_SE, tl, env)
+DEF_HELPER_FLAGS_2(store_pmcr, TCG_CALL_NO_RWG, void, env, tl)
 #endif
 DEF_HELPER_2(store_sdr1, void, env, tl)
 DEF_HELPER_2(store_pidr, void, env, tl)
diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
index 3ad4273c16493..f211bc9830444 100644
--- a/target/ppc/helper_regs.c
+++ b/target/ppc/helper_regs.c
@@ -20,7 +20,7 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "qemu/main-loop.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "system/kvm.h"
 #include "system/tcg.h"
 #include "helper_regs.h"
diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index 20fb2ec593c51..9012d3809cb7f 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -268,6 +268,8 @@ static inline void pte_invalidate(target_ulong *pte0)
 #define PTE_PTEM_MASK 0x7FFFFFBF
 #define PTE_CHECK_MASK (TARGET_PAGE_MASK | 0x7B)
 
+uint32_t ppc_ldl_code(CPUArchState *env, target_ulong addr);
+
 #ifdef CONFIG_USER_ONLY
 void ppc_cpu_record_sigsegv(CPUState *cs, vaddr addr,
                             MMUAccessType access_type,
@@ -287,7 +289,11 @@ void ppc_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
 void ppc_cpu_debug_excp_handler(CPUState *cs);
 bool ppc_cpu_debug_check_breakpoint(CPUState *cs);
 bool ppc_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp);
-#endif
+
+G_NORETURN void powerpc_checkstop(CPUPPCState *env, const char *reason);
+void powerpc_excp(PowerPCCPU *cpu, int excp);
+
+#endif /* !CONFIG_USER_ONLY */
 
 FIELD(GER_MSK, XMSK, 0, 4)
 FIELD(GER_MSK, YMSK, 4, 4)
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 966c2c6572342..992356cb7593c 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -49,7 +49,7 @@
 #include "elf.h"
 #include "system/kvm_int.h"
 #include "system/kvm.h"
-#include "hw/core/accel-cpu.h"
+#include "accel/accel-cpu-target.h"
 
 #include CONFIG_DEVICES
 
@@ -92,6 +92,7 @@ static int cap_large_decr;
 static int cap_fwnmi;
 static int cap_rpt_invalidate;
 static int cap_ail_mode_3;
+static int cap_dawr1;
 
 #ifdef CONFIG_PSERIES
 static int cap_papr;
@@ -152,6 +153,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
     cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
     cap_large_decr = kvmppc_get_dec_bits();
     cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI);
+    cap_dawr1 = kvm_vm_check_extension(s, KVM_CAP_PPC_DAWR1);
     /*
      * Note: setting it to false because there is not such capability
      * in KVM at this moment.
@@ -2114,6 +2116,16 @@ int kvmppc_set_fwnmi(PowerPCCPU *cpu)
     return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
 }
 
+bool kvmppc_has_cap_dawr1(void)
+{
+    return !!cap_dawr1;
+}
+
+int kvmppc_set_cap_dawr1(int enable)
+{
+    return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_DAWR1, 0, enable);
+}
+
 int kvmppc_smt_threads(void)
 {
     return cap_ppc_smt ? cap_ppc_smt : 1;
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 1d8cb76a6beed..a8768c1dfd8cc 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -68,6 +68,8 @@ bool kvmppc_has_cap_htm(void);
 bool kvmppc_has_cap_mmu_radix(void);
 bool kvmppc_has_cap_mmu_hash_v3(void);
 bool kvmppc_has_cap_xive(void);
+bool kvmppc_has_cap_dawr1(void);
+int kvmppc_set_cap_dawr1(int enable);
 int kvmppc_get_cap_safe_cache(void);
 int kvmppc_get_cap_safe_bounds_check(void);
 int kvmppc_get_cap_safe_indirect_branch(void);
@@ -377,6 +379,16 @@ static inline bool kvmppc_has_cap_xive(void)
     return false;
 }
 
+static inline bool kvmppc_has_cap_dawr1(void)
+{
+    return false;
+}
+
+static inline int kvmppc_set_cap_dawr1(int enable)
+{
+    abort();
+}
+
 static inline int kvmppc_get_cap_safe_cache(void)
 {
     return 0;
diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index 0bd7ae6c0ca34..98df5b4a3a2a0 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -264,7 +264,8 @@ static int cpu_post_load(void *opaque, int version_id)
         /* Re-set breaks based on regs */
 #if defined(TARGET_PPC64)
         ppc_update_ciabr(env);
-        ppc_update_daw0(env);
+        ppc_update_daw(env, 0);
+        ppc_update_daw(env, 1);
 #endif
         /*
          * TCG needs to re-start the decrementer timer and/or raise the
diff --git a/target/ppc/meson.build b/target/ppc/meson.build
index db3b7a0c33b0a..8eed1fa40ca9c 100644
--- a/target/ppc/meson.build
+++ b/target/ppc/meson.build
@@ -14,6 +14,7 @@ ppc_ss.add(when: 'CONFIG_TCG', if_true: files(
   'int_helper.c',
   'mem_helper.c',
   'misc_helper.c',
+  'tcg-excp_helper.c',
   'timebase_helper.c',
   'translate.c',
   'power8-pmu.c',
diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
index f0ca80153b2b1..2d9512c116b3f 100644
--- a/target/ppc/misc_helper.c
+++ b/target/ppc/misc_helper.c
@@ -21,6 +21,7 @@
 #include "qemu/log.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/helper-proto.h"
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
@@ -233,6 +234,16 @@ void helper_store_dawrx0(CPUPPCState *env, target_ulong value)
     ppc_store_dawrx0(env, value);
 }
 
+void helper_store_dawr1(CPUPPCState *env, target_ulong value)
+{
+    ppc_store_dawr1(env, value);
+}
+
+void helper_store_dawrx1(CPUPPCState *env, target_ulong value)
+{
+    ppc_store_dawrx1(env, value);
+}
+
 /*
  * DPDES register is shared. Each bit reflects the state of the
  * doorbell interrupt of a thread of the same core.
@@ -376,6 +387,59 @@ void helper_store_sprd(CPUPPCState *env, target_ulong val)
         break;
     }
 }
+
+target_ulong helper_load_pmsr(CPUPPCState *env)
+{
+    target_ulong lowerps = extract64(env->spr[SPR_PMCR], PPC_BIT_NR(15), 8);
+    target_ulong val = 0;
+
+    val |= PPC_BIT(63); /* verion 0x1 (POWER9/10) */
+    /* Pmin = 0 */
+    /* XXX: POWER9 should be 3 */
+    val |= 4ULL << PPC_BIT_NR(31); /* Pmax */
+    val |= lowerps << PPC_BIT_NR(15); /* Local actual Pstate */
+    val |= lowerps << PPC_BIT_NR(7); /* Global actual Pstate */
+
+    return val;
+}
+
+static void ppc_set_pmcr(PowerPCCPU *cpu, target_ulong val)
+{
+    cpu->env.spr[SPR_PMCR] = val;
+}
+
+void helper_store_pmcr(CPUPPCState *env, target_ulong val)
+{
+    PowerPCCPU *cpu = env_archcpu(env);
+    CPUState *cs = env_cpu(env);
+    CPUState *ccs;
+
+    /* Leave version field unchanged (0x1) */
+    val &= ~PPC_BITMASK(60, 63);
+    val |= PPC_BIT(63);
+
+    val &= ~PPC_BITMASK(0, 7); /* UpperPS ignored */
+    if (val & PPC_BITMASK(16, 59)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Non-zero PMCR reserved bits "
+                      TARGET_FMT_lx"\n", val);
+        val &= ~PPC_BITMASK(16, 59);
+    }
+
+    /* DPDES behaves as 1-thread in LPAR-per-thread mode */
+    if (ppc_cpu_lpar_single_threaded(cs)) {
+        ppc_set_pmcr(cpu, val);
+        return;
+    }
+
+    /* Does iothread need to be locked for walking CPU list? */
+    bql_lock();
+    THREAD_SIBLING_FOREACH(cs, ccs) {
+        PowerPCCPU *ccpu = POWERPC_CPU(ccs);
+        ppc_set_pmcr(ccpu, val);
+    }
+    bql_unlock();
+}
+
 #endif /* defined(TARGET_PPC64) */
 
 void helper_store_pidr(CPUPPCState *env, target_ulong val)
diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 1d3d9e1be7731..461eda4a3dce7 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -571,6 +571,20 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
         prtbe0 = ldq_phys(cs->as, h_raddr);
     }
 
+    /*
+     * Some Linux uses a zero process table entry in PID!=0 for kernel context
+     * without userspace in order to fault on NULL dereference, because using
+     * PIDR=0 for the kernel causes the Q0 page table to be used to translate
+     * Q3 as well. Check for that case here to avoid the invalid configuration
+     * message.
+     */
+    if (unlikely(!prtbe0)) {
+        if (guest_visible) {
+            ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_R_BADCONFIG);
+        }
+        return 1;
+    }
+
     /* Walk Radix Tree from Process Table Entry to Convert EA to RA */
     *g_page_size = PRTBE_R_GET_RTS(prtbe0);
     base_addr = prtbe0 & PRTBE_R_RPDB;
diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c
index a802bc9c62b0a..ad9ba8294cc61 100644
--- a/target/ppc/mmu_helper.c
+++ b/target/ppc/mmu_helper.c
@@ -24,6 +24,7 @@
 #include "kvm_ppc.h"
 #include "mmu-hash64.h"
 #include "mmu-hash32.h"
+#include "exec/cputlb.h"
 #include "exec/exec-all.h"
 #include "exec/page-protection.h"
 #include "exec/log.h"
diff --git a/target/ppc/spr_common.h b/target/ppc/spr_common.h
index 01aff449bcccb..84c910c440f7d 100644
--- a/target/ppc/spr_common.h
+++ b/target/ppc/spr_common.h
@@ -165,6 +165,8 @@ void spr_write_cfar(DisasContext *ctx, int sprn, int gprn);
 void spr_write_ciabr(DisasContext *ctx, int sprn, int gprn);
 void spr_write_dawr0(DisasContext *ctx, int sprn, int gprn);
 void spr_write_dawrx0(DisasContext *ctx, int sprn, int gprn);
+void spr_write_dawr1(DisasContext *ctx, int sprn, int gprn);
+void spr_write_dawrx1(DisasContext *ctx, int sprn, int gprn);
 void spr_write_ureg(DisasContext *ctx, int sprn, int gprn);
 void spr_read_purr(DisasContext *ctx, int gprn, int sprn);
 void spr_write_purr(DisasContext *ctx, int sprn, int gprn);
@@ -204,6 +206,8 @@ void spr_write_hmer(DisasContext *ctx, int sprn, int gprn);
 void spr_read_tfmr(DisasContext *ctx, int gprn, int sprn);
 void spr_write_tfmr(DisasContext *ctx, int sprn, int gprn);
 void spr_write_lpcr(DisasContext *ctx, int sprn, int gprn);
+void spr_read_pmsr(DisasContext *ctx, int gprn, int sprn);
+void spr_write_pmcr(DisasContext *ctx, int sprn, int gprn);
 void spr_read_dexcr_ureg(DisasContext *ctx, int gprn, int sprn);
 void spr_read_ppr32(DisasContext *ctx, int sprn, int gprn);
 void spr_write_ppr32(DisasContext *ctx, int sprn, int gprn);
diff --git a/target/ppc/tcg-excp_helper.c b/target/ppc/tcg-excp_helper.c
new file mode 100644
index 0000000000000..5a189dc3d7090
--- /dev/null
+++ b/target/ppc/tcg-excp_helper.c
@@ -0,0 +1,851 @@
+/*
+ *  PowerPC exception emulation helpers for QEMU (TCG specific)
+ *
+ *  Copyright (c) 2003-2007 Jocelyn Mayer
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qemu/log.h"
+#include "exec/cpu_ldst.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "system/runstate.h"
+
+#include "helper_regs.h"
+#include "hw/ppc/ppc.h"
+#include "internal.h"
+#include "cpu.h"
+#include "trace.h"
+
+/*****************************************************************************/
+/* Exceptions processing helpers */
+
+void raise_exception_err_ra(CPUPPCState *env, uint32_t exception,
+                            uint32_t error_code, uintptr_t raddr)
+{
+    CPUState *cs = env_cpu(env);
+
+    cs->exception_index = exception;
+    env->error_code = error_code;
+    cpu_loop_exit_restore(cs, raddr);
+}
+
+void helper_raise_exception_err(CPUPPCState *env, uint32_t exception,
+                                uint32_t error_code)
+{
+    raise_exception_err_ra(env, exception, error_code, 0);
+}
+
+void helper_raise_exception(CPUPPCState *env, uint32_t exception)
+{
+    raise_exception_err_ra(env, exception, 0, 0);
+}
+
+#ifndef CONFIG_USER_ONLY
+
+static G_NORETURN void raise_exception_err(CPUPPCState *env, uint32_t exception,
+                                           uint32_t error_code)
+{
+    raise_exception_err_ra(env, exception, error_code, 0);
+}
+
+static G_NORETURN void raise_exception(CPUPPCState *env, uint32_t exception)
+{
+    raise_exception_err_ra(env, exception, 0, 0);
+}
+
+#endif /* !CONFIG_USER_ONLY */
+
+void helper_TW(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
+               uint32_t flags)
+{
+    if (!likely(!(((int32_t)arg1 < (int32_t)arg2 && (flags & 0x10)) ||
+                  ((int32_t)arg1 > (int32_t)arg2 && (flags & 0x08)) ||
+                  ((int32_t)arg1 == (int32_t)arg2 && (flags & 0x04)) ||
+                  ((uint32_t)arg1 < (uint32_t)arg2 && (flags & 0x02)) ||
+                  ((uint32_t)arg1 > (uint32_t)arg2 && (flags & 0x01))))) {
+        raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+                               POWERPC_EXCP_TRAP, GETPC());
+    }
+}
+
+#ifdef TARGET_PPC64
+void helper_TD(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
+               uint32_t flags)
+{
+    if (!likely(!(((int64_t)arg1 < (int64_t)arg2 && (flags & 0x10)) ||
+                  ((int64_t)arg1 > (int64_t)arg2 && (flags & 0x08)) ||
+                  ((int64_t)arg1 == (int64_t)arg2 && (flags & 0x04)) ||
+                  ((uint64_t)arg1 < (uint64_t)arg2 && (flags & 0x02)) ||
+                  ((uint64_t)arg1 > (uint64_t)arg2 && (flags & 0x01))))) {
+        raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+                               POWERPC_EXCP_TRAP, GETPC());
+    }
+}
+#endif /* TARGET_PPC64 */
+
+static uint32_t helper_SIMON_LIKE_32_64(uint32_t x, uint64_t key, uint32_t lane)
+{
+    const uint16_t c = 0xfffc;
+    const uint64_t z0 = 0xfa2561cdf44ac398ULL;
+    uint16_t z = 0, temp;
+    uint16_t k[32], eff_k[32], xleft[33], xright[33], fxleft[32];
+
+    for (int i = 3; i >= 0; i--) {
+        k[i] = key & 0xffff;
+        key >>= 16;
+    }
+    xleft[0] = x & 0xffff;
+    xright[0] = (x >> 16) & 0xffff;
+
+    for (int i = 0; i < 28; i++) {
+        z = (z0 >> (63 - i)) & 1;
+        temp = ror16(k[i + 3], 3) ^ k[i + 1];
+        k[i + 4] = c ^ z ^ k[i] ^ temp ^ ror16(temp, 1);
+    }
+
+    for (int i = 0; i < 8; i++) {
+        eff_k[4 * i + 0] = k[4 * i + ((0 + lane) % 4)];
+        eff_k[4 * i + 1] = k[4 * i + ((1 + lane) % 4)];
+        eff_k[4 * i + 2] = k[4 * i + ((2 + lane) % 4)];
+        eff_k[4 * i + 3] = k[4 * i + ((3 + lane) % 4)];
+    }
+
+    for (int i = 0; i < 32; i++) {
+        fxleft[i] = (rol16(xleft[i], 1) &
+            rol16(xleft[i], 8)) ^ rol16(xleft[i], 2);
+        xleft[i + 1] = xright[i] ^ fxleft[i] ^ eff_k[i];
+        xright[i + 1] = xleft[i];
+    }
+
+    return (((uint32_t)xright[32]) << 16) | xleft[32];
+}
+
+static uint64_t hash_digest(uint64_t ra, uint64_t rb, uint64_t key)
+{
+    uint64_t stage0_h = 0ULL, stage0_l = 0ULL;
+    uint64_t stage1_h, stage1_l;
+
+    for (int i = 0; i < 4; i++) {
+        stage0_h |= ror64(rb & 0xff, 8 * (2 * i + 1));
+        stage0_h |= ((ra >> 32) & 0xff) << (8 * 2 * i);
+        stage0_l |= ror64((rb >> 32) & 0xff, 8 * (2 * i + 1));
+        stage0_l |= (ra & 0xff) << (8 * 2 * i);
+        rb >>= 8;
+        ra >>= 8;
+    }
+
+    stage1_h = (uint64_t)helper_SIMON_LIKE_32_64(stage0_h >> 32, key, 0) << 32;
+    stage1_h |= helper_SIMON_LIKE_32_64(stage0_h, key, 1);
+    stage1_l = (uint64_t)helper_SIMON_LIKE_32_64(stage0_l >> 32, key, 2) << 32;
+    stage1_l |= helper_SIMON_LIKE_32_64(stage0_l, key, 3);
+
+    return stage1_h ^ stage1_l;
+}
+
+static void do_hash(CPUPPCState *env, target_ulong ea, target_ulong ra,
+                    target_ulong rb, uint64_t key, bool store)
+{
+    uint64_t calculated_hash = hash_digest(ra, rb, key), loaded_hash;
+
+    if (store) {
+        cpu_stq_data_ra(env, ea, calculated_hash, GETPC());
+    } else {
+        loaded_hash = cpu_ldq_data_ra(env, ea, GETPC());
+        if (loaded_hash != calculated_hash) {
+            raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+                POWERPC_EXCP_TRAP, GETPC());
+        }
+    }
+}
+
+#include "qemu/guest-random.h"
+
+#ifdef TARGET_PPC64
+#define HELPER_HASH(op, key, store, dexcr_aspect)                             \
+void helper_##op(CPUPPCState *env, target_ulong ea, target_ulong ra,          \
+                 target_ulong rb)                                             \
+{                                                                             \
+    if (env->msr & R_MSR_PR_MASK) {                                           \
+        if (!(env->spr[SPR_DEXCR] & R_DEXCR_PRO_##dexcr_aspect##_MASK ||      \
+            env->spr[SPR_HDEXCR] & R_HDEXCR_ENF_##dexcr_aspect##_MASK))       \
+            return;                                                           \
+    } else if (!(env->msr & R_MSR_HV_MASK)) {                                 \
+        if (!(env->spr[SPR_DEXCR] & R_DEXCR_PNH_##dexcr_aspect##_MASK ||      \
+            env->spr[SPR_HDEXCR] & R_HDEXCR_ENF_##dexcr_aspect##_MASK))       \
+            return;                                                           \
+    } else if (!(env->msr & R_MSR_S_MASK)) {                                  \
+        if (!(env->spr[SPR_HDEXCR] & R_HDEXCR_HNU_##dexcr_aspect##_MASK))     \
+            return;                                                           \
+    }                                                                         \
+                                                                              \
+    do_hash(env, ea, ra, rb, key, store);                                     \
+}
+#else
+#define HELPER_HASH(op, key, store, dexcr_aspect)                             \
+void helper_##op(CPUPPCState *env, target_ulong ea, target_ulong ra,          \
+                 target_ulong rb)                                             \
+{                                                                             \
+    do_hash(env, ea, ra, rb, key, store);                                     \
+}
+#endif /* TARGET_PPC64 */
+
+HELPER_HASH(HASHST, env->spr[SPR_HASHKEYR], true, NPHIE)
+HELPER_HASH(HASHCHK, env->spr[SPR_HASHKEYR], false, NPHIE)
+HELPER_HASH(HASHSTP, env->spr[SPR_HASHPKEYR], true, PHIE)
+HELPER_HASH(HASHCHKP, env->spr[SPR_HASHPKEYR], false, PHIE)
+
+#ifndef CONFIG_USER_ONLY
+
+void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
+                                 MMUAccessType access_type,
+                                 int mmu_idx, uintptr_t retaddr)
+{
+    CPUPPCState *env = cpu_env(cs);
+    uint32_t insn;
+
+    /* Restore state and reload the insn we executed, for filling in DSISR.  */
+    cpu_restore_state(cs, retaddr);
+    insn = ppc_ldl_code(env, env->nip);
+
+    switch (env->mmu_model) {
+    case POWERPC_MMU_SOFT_4xx:
+        env->spr[SPR_40x_DEAR] = vaddr;
+        break;
+    case POWERPC_MMU_BOOKE:
+    case POWERPC_MMU_BOOKE206:
+        env->spr[SPR_BOOKE_DEAR] = vaddr;
+        break;
+    default:
+        env->spr[SPR_DAR] = vaddr;
+        break;
+    }
+
+    cs->exception_index = POWERPC_EXCP_ALIGN;
+    env->error_code = insn & 0x03FF0000;
+    cpu_loop_exit(cs);
+}
+
+void ppc_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
+                                   vaddr vaddr, unsigned size,
+                                   MMUAccessType access_type,
+                                   int mmu_idx, MemTxAttrs attrs,
+                                   MemTxResult response, uintptr_t retaddr)
+{
+    CPUPPCState *env = cpu_env(cs);
+
+    switch (env->excp_model) {
+#if defined(TARGET_PPC64)
+    case POWERPC_EXCP_POWER8:
+    case POWERPC_EXCP_POWER9:
+    case POWERPC_EXCP_POWER10:
+    case POWERPC_EXCP_POWER11:
+        /*
+         * Machine check codes can be found in processor User Manual or
+         * Linux or skiboot source.
+         */
+        if (access_type == MMU_DATA_LOAD) {
+            env->spr[SPR_DAR] = vaddr;
+            env->spr[SPR_DSISR] = PPC_BIT(57);
+            env->error_code = PPC_BIT(42);
+
+        } else if (access_type == MMU_DATA_STORE) {
+            /*
+             * MCE for stores in POWER is asynchronous so hardware does
+             * not set DAR, but QEMU can do better.
+             */
+            env->spr[SPR_DAR] = vaddr;
+            env->error_code = PPC_BIT(36) | PPC_BIT(43) | PPC_BIT(45);
+            env->error_code |= PPC_BIT(42);
+
+        } else { /* Fetch */
+            /*
+             * is_prefix_insn_excp() tests !PPC_BIT(42) to avoid fetching
+             * the instruction, so that must always be clear for fetches.
+             */
+            env->error_code = PPC_BIT(36) | PPC_BIT(44) | PPC_BIT(45);
+        }
+        break;
+#endif
+    default:
+        /*
+         * TODO: Check behaviour for other CPUs, for now do nothing.
+         * Could add a basic MCE even if real hardware ignores.
+         */
+        return;
+    }
+
+    cs->exception_index = POWERPC_EXCP_MCHECK;
+    cpu_loop_exit_restore(cs, retaddr);
+}
+
+void ppc_cpu_debug_excp_handler(CPUState *cs)
+{
+#if defined(TARGET_PPC64)
+    CPUPPCState *env = cpu_env(cs);
+
+    if (env->insns_flags2 & PPC2_ISA207S) {
+        if (cs->watchpoint_hit) {
+            if (cs->watchpoint_hit->flags & BP_CPU) {
+                env->spr[SPR_DAR] = cs->watchpoint_hit->hitaddr;
+                env->spr[SPR_DSISR] = PPC_BIT(41);
+                cs->watchpoint_hit = NULL;
+                raise_exception(env, POWERPC_EXCP_DSI);
+            }
+            cs->watchpoint_hit = NULL;
+        } else if (cpu_breakpoint_test(cs, env->nip, BP_CPU)) {
+            raise_exception_err(env, POWERPC_EXCP_TRACE,
+                                PPC_BIT(33) | PPC_BIT(43));
+        }
+    }
+#endif
+}
+
+bool ppc_cpu_debug_check_breakpoint(CPUState *cs)
+{
+#if defined(TARGET_PPC64)
+    CPUPPCState *env = cpu_env(cs);
+
+    if (env->insns_flags2 & PPC2_ISA207S) {
+        target_ulong priv;
+
+        priv = env->spr[SPR_CIABR] & PPC_BITMASK(62, 63);
+        switch (priv) {
+        case 0x1: /* problem */
+            return env->msr & ((target_ulong)1 << MSR_PR);
+        case 0x2: /* supervisor */
+            return (!(env->msr & ((target_ulong)1 << MSR_PR)) &&
+                    !(env->msr & ((target_ulong)1 << MSR_HV)));
+        case 0x3: /* hypervisor */
+            return (!(env->msr & ((target_ulong)1 << MSR_PR)) &&
+                     (env->msr & ((target_ulong)1 << MSR_HV)));
+        default:
+            g_assert_not_reached();
+        }
+    }
+#endif
+
+    return false;
+}
+
+bool ppc_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp)
+{
+#if defined(TARGET_PPC64)
+    CPUPPCState *env = cpu_env(cs);
+    bool wt, wti, hv, sv, pr;
+    uint32_t dawrx;
+
+    if ((env->insns_flags2 & PPC2_ISA207S) &&
+        (wp == env->dawr_watchpoint[0])) {
+        dawrx = env->spr[SPR_DAWRX0];
+    } else if ((env->insns_flags2 & PPC2_ISA310) &&
+               (wp == env->dawr_watchpoint[1])) {
+        dawrx = env->spr[SPR_DAWRX1];
+    } else {
+        return false;
+    }
+
+    wt = extract32(dawrx, PPC_BIT_NR(59), 1);
+    wti = extract32(dawrx, PPC_BIT_NR(60), 1);
+    hv = extract32(dawrx, PPC_BIT_NR(61), 1);
+    sv = extract32(dawrx, PPC_BIT_NR(62), 1);
+    pr = extract32(dawrx, PPC_BIT_NR(62), 1);
+
+    if ((env->msr & ((target_ulong)1 << MSR_PR)) && !pr) {
+        return false;
+    } else if ((env->msr & ((target_ulong)1 << MSR_HV)) && !hv) {
+        return false;
+    } else if (!sv) {
+        return false;
+    }
+
+    if (!wti) {
+        if (env->msr & ((target_ulong)1 << MSR_DR)) {
+            return wt;
+        } else {
+            return !wt;
+        }
+    }
+
+    return true;
+#endif
+
+    return false;
+}
+
+/*
+ * This stops the machine and logs CPU state without killing QEMU (like
+ * cpu_abort()) because it is often a guest error as opposed to a QEMU error,
+ * so the machine can still be debugged.
+ */
+G_NORETURN void powerpc_checkstop(CPUPPCState *env, const char *reason)
+{
+    CPUState *cs = env_cpu(env);
+    FILE *f;
+
+    f = qemu_log_trylock();
+    if (f) {
+        fprintf(f, "Entering checkstop state: %s\n", reason);
+        cpu_dump_state(cs, f, CPU_DUMP_FPU | CPU_DUMP_CCOP);
+        qemu_log_unlock(f);
+    }
+
+    /*
+     * This stops the machine and logs CPU state without killing QEMU
+     * (like cpu_abort()) so the machine can still be debugged (because
+     * it is often a guest error).
+     */
+    qemu_system_guest_panicked(NULL);
+    cpu_loop_exit_noexc(cs);
+}
+
+/* Return true iff byteswap is needed to load instruction */
+static inline bool insn_need_byteswap(CPUArchState *env)
+{
+    /* SYSTEM builds TARGET_BIG_ENDIAN. Need to swap when MSR[LE] is set */
+    return !!(env->msr & ((target_ulong)1 << MSR_LE));
+}
+
+uint32_t ppc_ldl_code(CPUArchState *env, target_ulong addr)
+{
+    uint32_t insn = cpu_ldl_code(env, addr);
+
+    if (insn_need_byteswap(env)) {
+        insn = bswap32(insn);
+    }
+
+    return insn;
+}
+
+#if defined(TARGET_PPC64)
+void helper_attn(CPUPPCState *env)
+{
+    /* POWER attn is unprivileged when enabled by HID, otherwise illegal */
+    if ((*env->check_attn)(env)) {
+        powerpc_checkstop(env, "host executed attn");
+    } else {
+        raise_exception_err(env, POWERPC_EXCP_HV_EMU,
+                            POWERPC_EXCP_INVAL | POWERPC_EXCP_INVAL_INVAL);
+    }
+}
+
+void helper_scv(CPUPPCState *env, uint32_t lev)
+{
+    if (env->spr[SPR_FSCR] & (1ull << FSCR_SCV)) {
+        raise_exception_err(env, POWERPC_EXCP_SYSCALL_VECTORED, lev);
+    } else {
+        raise_exception_err(env, POWERPC_EXCP_FU, FSCR_IC_SCV);
+    }
+}
+
+void helper_pminsn(CPUPPCState *env, uint32_t insn)
+{
+    CPUState *cs = env_cpu(env);
+
+    cs->halted = 1;
+
+    /* Condition for waking up at 0x100 */
+    env->resume_as_sreset = (insn != PPC_PM_STOP) ||
+        (env->spr[SPR_PSSCR] & PSSCR_EC);
+
+    /* HDECR is not to wake from PM state, it may have already fired */
+    if (env->resume_as_sreset) {
+        PowerPCCPU *cpu = env_archcpu(env);
+        ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 0);
+    }
+
+    ppc_maybe_interrupt(env);
+}
+
+#endif /* TARGET_PPC64 */
+void helper_store_msr(CPUPPCState *env, target_ulong val)
+{
+    uint32_t excp = hreg_store_msr(env, val, 0);
+
+    if (excp != 0) {
+        cpu_interrupt_exittb(env_cpu(env));
+        raise_exception(env, excp);
+    }
+}
+
+void helper_ppc_maybe_interrupt(CPUPPCState *env)
+{
+    ppc_maybe_interrupt(env);
+}
+
+static void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr)
+{
+    /* MSR:POW cannot be set by any form of rfi */
+    msr &= ~(1ULL << MSR_POW);
+
+    /* MSR:TGPR cannot be set by any form of rfi */
+    if (env->flags & POWERPC_FLAG_TGPR) {
+        msr &= ~(1ULL << MSR_TGPR);
+    }
+
+#ifdef TARGET_PPC64
+    /* Switching to 32-bit ? Crop the nip */
+    if (!msr_is_64bit(env, msr)) {
+        nip = (uint32_t)nip;
+    }
+#else
+    nip = (uint32_t)nip;
+#endif
+    /* XXX: beware: this is false if VLE is supported */
+    env->nip = nip & ~((target_ulong)0x00000003);
+    hreg_store_msr(env, msr, 1);
+    trace_ppc_excp_rfi(env->nip, env->msr);
+    /*
+     * No need to raise an exception here, as rfi is always the last
+     * insn of a TB
+     */
+    cpu_interrupt_exittb(env_cpu(env));
+    /* Reset the reservation */
+    env->reserve_addr = -1;
+
+    /* Context synchronizing: check if TCG TLB needs flush */
+    check_tlb_flush(env, false);
+}
+
+void helper_rfi(CPUPPCState *env)
+{
+    do_rfi(env, env->spr[SPR_SRR0], env->spr[SPR_SRR1] & 0xfffffffful);
+}
+
+#ifdef TARGET_PPC64
+void helper_rfid(CPUPPCState *env)
+{
+    /*
+     * The architecture defines a number of rules for which bits can
+     * change but in practice, we handle this in hreg_store_msr()
+     * which will be called by do_rfi(), so there is no need to filter
+     * here
+     */
+    do_rfi(env, env->spr[SPR_SRR0], env->spr[SPR_SRR1]);
+}
+
+void helper_rfscv(CPUPPCState *env)
+{
+    do_rfi(env, env->lr, env->ctr);
+}
+
+void helper_hrfid(CPUPPCState *env)
+{
+    do_rfi(env, env->spr[SPR_HSRR0], env->spr[SPR_HSRR1]);
+}
+
+void helper_rfebb(CPUPPCState *env, target_ulong s)
+{
+    target_ulong msr = env->msr;
+
+    /*
+     * Handling of BESCR bits 32:33 according to PowerISA v3.1:
+     *
+     * "If BESCR 32:33 != 0b00 the instruction is treated as if
+     *  the instruction form were invalid."
+     */
+    if (env->spr[SPR_BESCR] & BESCR_INVALID) {
+        raise_exception_err(env, POWERPC_EXCP_PROGRAM,
+                            POWERPC_EXCP_INVAL | POWERPC_EXCP_INVAL_INVAL);
+    }
+
+    env->nip = env->spr[SPR_EBBRR];
+
+    /* Switching to 32-bit ? Crop the nip */
+    if (!msr_is_64bit(env, msr)) {
+        env->nip = (uint32_t)env->spr[SPR_EBBRR];
+    }
+
+    if (s) {
+        env->spr[SPR_BESCR] |= BESCR_GE;
+    } else {
+        env->spr[SPR_BESCR] &= ~BESCR_GE;
+    }
+}
+
+/*
+ * Triggers or queues an 'ebb_excp' EBB exception. All checks
+ * but FSCR, HFSCR and msr_pr must be done beforehand.
+ *
+ * PowerISA v3.1 isn't clear about whether an EBB should be
+ * postponed or cancelled if the EBB facility is unavailable.
+ * Our assumption here is that the EBB is cancelled if both
+ * FSCR and HFSCR EBB facilities aren't available.
+ */
+static void do_ebb(CPUPPCState *env, int ebb_excp)
+{
+    PowerPCCPU *cpu = env_archcpu(env);
+
+    /*
+     * FSCR_EBB and FSCR_IC_EBB are the same bits used with
+     * HFSCR.
+     */
+    helper_fscr_facility_check(env, FSCR_EBB, 0, FSCR_IC_EBB);
+    helper_hfscr_facility_check(env, FSCR_EBB, "EBB", FSCR_IC_EBB);
+
+    if (ebb_excp == POWERPC_EXCP_PERFM_EBB) {
+        env->spr[SPR_BESCR] |= BESCR_PMEO;
+    } else if (ebb_excp == POWERPC_EXCP_EXTERNAL_EBB) {
+        env->spr[SPR_BESCR] |= BESCR_EEO;
+    }
+
+    if (FIELD_EX64(env->msr, MSR, PR)) {
+        powerpc_excp(cpu, ebb_excp);
+    } else {
+        ppc_set_irq(cpu, PPC_INTERRUPT_EBB, 1);
+    }
+}
+
+void raise_ebb_perfm_exception(CPUPPCState *env)
+{
+    bool perfm_ebb_enabled = env->spr[SPR_POWER_MMCR0] & MMCR0_EBE &&
+                             env->spr[SPR_BESCR] & BESCR_PME &&
+                             env->spr[SPR_BESCR] & BESCR_GE;
+
+    if (!perfm_ebb_enabled) {
+        return;
+    }
+
+    do_ebb(env, POWERPC_EXCP_PERFM_EBB);
+}
+#endif /* TARGET_PPC64 */
+
+/*****************************************************************************/
+/* Embedded PowerPC specific helpers */
+void helper_40x_rfci(CPUPPCState *env)
+{
+    do_rfi(env, env->spr[SPR_40x_SRR2], env->spr[SPR_40x_SRR3]);
+}
+
+void helper_rfci(CPUPPCState *env)
+{
+    do_rfi(env, env->spr[SPR_BOOKE_CSRR0], env->spr[SPR_BOOKE_CSRR1]);
+}
+
+void helper_rfdi(CPUPPCState *env)
+{
+    /* FIXME: choose CSRR1 or DSRR1 based on cpu type */
+    do_rfi(env, env->spr[SPR_BOOKE_DSRR0], env->spr[SPR_BOOKE_DSRR1]);
+}
+
+void helper_rfmci(CPUPPCState *env)
+{
+    /* FIXME: choose CSRR1 or MCSRR1 based on cpu type */
+    do_rfi(env, env->spr[SPR_BOOKE_MCSRR0], env->spr[SPR_BOOKE_MCSRR1]);
+}
+
+/* Embedded.Processor Control */
+static int dbell2irq(target_ulong rb)
+{
+    int msg = rb & DBELL_TYPE_MASK;
+    int irq = -1;
+
+    switch (msg) {
+    case DBELL_TYPE_DBELL:
+        irq = PPC_INTERRUPT_DOORBELL;
+        break;
+    case DBELL_TYPE_DBELL_CRIT:
+        irq = PPC_INTERRUPT_CDOORBELL;
+        break;
+    case DBELL_TYPE_G_DBELL:
+    case DBELL_TYPE_G_DBELL_CRIT:
+    case DBELL_TYPE_G_DBELL_MC:
+        /* XXX implement */
+    default:
+        break;
+    }
+
+    return irq;
+}
+
+void helper_msgclr(CPUPPCState *env, target_ulong rb)
+{
+    int irq = dbell2irq(rb);
+
+    if (irq < 0) {
+        return;
+    }
+
+    ppc_set_irq(env_archcpu(env), irq, 0);
+}
+
+void helper_msgsnd(target_ulong rb)
+{
+    int irq = dbell2irq(rb);
+    int pir = rb & DBELL_PIRTAG_MASK;
+    CPUState *cs;
+
+    if (irq < 0) {
+        return;
+    }
+
+    bql_lock();
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        CPUPPCState *cenv = &cpu->env;
+
+        if ((rb & DBELL_BRDCAST_MASK) || (cenv->spr[SPR_BOOKE_PIR] == pir)) {
+            ppc_set_irq(cpu, irq, 1);
+        }
+    }
+    bql_unlock();
+}
+
+/* Server Processor Control */
+
+static bool dbell_type_server(target_ulong rb)
+{
+    /*
+     * A Directed Hypervisor Doorbell message is sent only if the
+     * message type is 5. All other types are reserved and the
+     * instruction is a no-op
+     */
+    return (rb & DBELL_TYPE_MASK) == DBELL_TYPE_DBELL_SERVER;
+}
+
+static inline bool dbell_bcast_core(target_ulong rb)
+{
+    return (rb & DBELL_BRDCAST_MASK) == DBELL_BRDCAST_CORE;
+}
+
+static inline bool dbell_bcast_subproc(target_ulong rb)
+{
+    return (rb & DBELL_BRDCAST_MASK) == DBELL_BRDCAST_SUBPROC;
+}
+
+/*
+ * Send an interrupt to a thread in the same core as env).
+ */
+static void msgsnd_core_tir(CPUPPCState *env, uint32_t target_tir, int irq)
+{
+    PowerPCCPU *cpu = env_archcpu(env);
+    CPUState *cs = env_cpu(env);
+
+    if (ppc_cpu_lpar_single_threaded(cs)) {
+        if (target_tir == 0) {
+            ppc_set_irq(cpu, irq, 1);
+        }
+    } else {
+        CPUState *ccs;
+
+        /* Does iothread need to be locked for walking CPU list? */
+        bql_lock();
+        THREAD_SIBLING_FOREACH(cs, ccs) {
+            PowerPCCPU *ccpu = POWERPC_CPU(ccs);
+            if (target_tir == ppc_cpu_tir(ccpu)) {
+                ppc_set_irq(ccpu, irq, 1);
+                break;
+            }
+        }
+        bql_unlock();
+    }
+}
+
+void helper_book3s_msgclr(CPUPPCState *env, target_ulong rb)
+{
+    if (!dbell_type_server(rb)) {
+        return;
+    }
+
+    ppc_set_irq(env_archcpu(env), PPC_INTERRUPT_HDOORBELL, 0);
+}
+
+void helper_book3s_msgsnd(CPUPPCState *env, target_ulong rb)
+{
+    int pir = rb & DBELL_PROCIDTAG_MASK;
+    bool brdcast = false;
+    CPUState *cs, *ccs;
+    PowerPCCPU *cpu;
+
+    if (!dbell_type_server(rb)) {
+        return;
+    }
+
+    /* POWER8 msgsnd is like msgsndp (targets a thread within core) */
+    if (!(env->insns_flags2 & PPC2_ISA300)) {
+        msgsnd_core_tir(env, rb & PPC_BITMASK(57, 63), PPC_INTERRUPT_HDOORBELL);
+        return;
+    }
+
+    /* POWER9 and later msgsnd is a global (targets any thread) */
+    cpu = ppc_get_vcpu_by_pir(pir);
+    if (!cpu) {
+        return;
+    }
+    cs = CPU(cpu);
+
+    if (dbell_bcast_core(rb) || (dbell_bcast_subproc(rb) &&
+                                 (env->flags & POWERPC_FLAG_SMT_1LPAR))) {
+        brdcast = true;
+    }
+
+    if (ppc_cpu_core_single_threaded(cs) || !brdcast) {
+        ppc_set_irq(cpu, PPC_INTERRUPT_HDOORBELL, 1);
+        return;
+    }
+
+    /*
+     * Why is bql needed for walking CPU list? Answer seems to be because ppc
+     * irq handling needs it, but ppc_set_irq takes the lock itself if needed,
+     * so could this be removed?
+     */
+    bql_lock();
+    THREAD_SIBLING_FOREACH(cs, ccs) {
+        ppc_set_irq(POWERPC_CPU(ccs), PPC_INTERRUPT_HDOORBELL, 1);
+    }
+    bql_unlock();
+}
+
+#ifdef TARGET_PPC64
+void helper_book3s_msgclrp(CPUPPCState *env, target_ulong rb)
+{
+    helper_hfscr_facility_check(env, HFSCR_MSGP, "msgclrp", HFSCR_IC_MSGP);
+
+    if (!dbell_type_server(rb)) {
+        return;
+    }
+
+    ppc_set_irq(env_archcpu(env), PPC_INTERRUPT_DOORBELL, 0);
+}
+
+/*
+ * sends a message to another thread  on the same
+ * multi-threaded processor
+ */
+void helper_book3s_msgsndp(CPUPPCState *env, target_ulong rb)
+{
+    helper_hfscr_facility_check(env, HFSCR_MSGP, "msgsndp", HFSCR_IC_MSGP);
+
+    if (!dbell_type_server(rb)) {
+        return;
+    }
+
+    msgsnd_core_tir(env, rb & PPC_BITMASK(57, 63), PPC_INTERRUPT_DOORBELL);
+}
+#endif /* TARGET_PPC64 */
+
+/* Single-step tracing */
+void helper_book3s_trace(CPUPPCState *env, target_ulong prev_ip)
+{
+    uint32_t error_code = 0;
+    if (env->insns_flags2 & PPC2_ISA207S) {
+        /* Load/store reporting, SRR1[35, 36] and SDAR, are not implemented. */
+        env->spr[SPR_POWER_SIAR] = prev_ip;
+        error_code = PPC_BIT(33);
+    }
+    raise_exception_err(env, POWERPC_EXCP_TRACE, error_code);
+}
+#endif /* !CONFIG_USER_ONLY */
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 80638ab535971..a52cbc869ae03 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -637,6 +637,18 @@ void spr_write_dawrx0(DisasContext *ctx, int sprn, int gprn)
     translator_io_start(&ctx->base);
     gen_helper_store_dawrx0(tcg_env, cpu_gpr[gprn]);
 }
+
+void spr_write_dawr1(DisasContext *ctx, int sprn, int gprn)
+{
+    translator_io_start(&ctx->base);
+    gen_helper_store_dawr1(tcg_env, cpu_gpr[gprn]);
+}
+
+void spr_write_dawrx1(DisasContext *ctx, int sprn, int gprn)
+{
+    translator_io_start(&ctx->base);
+    gen_helper_store_dawrx1(tcg_env, cpu_gpr[gprn]);
+}
 #endif /* defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) */
 
 /* CTR */
@@ -1326,6 +1338,22 @@ void spr_write_lpcr(DisasContext *ctx, int sprn, int gprn)
     translator_io_start(&ctx->base);
     gen_helper_store_lpcr(tcg_env, cpu_gpr[gprn]);
 }
+
+void spr_read_pmsr(DisasContext *ctx, int gprn, int sprn)
+{
+    translator_io_start(&ctx->base);
+    gen_helper_load_pmsr(cpu_gpr[gprn], tcg_env);
+}
+
+void spr_write_pmcr(DisasContext *ctx, int sprn, int gprn)
+{
+    if (!gen_serialize_core_lpar(ctx)) {
+        return;
+    }
+    translator_io_start(&ctx->base);
+    gen_helper_store_pmcr(tcg_env, cpu_gpr[gprn]);
+}
+
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 void spr_read_tar(DisasContext *ctx, int gprn, int sprn)
diff --git a/target/riscv/common-semi-target.h b/target/riscv/common-semi-target.h
index 7c8a59e0cc3cd..ef6929bdfc5a2 100644
--- a/target/riscv/common-semi-target.h
+++ b/target/riscv/common-semi-target.h
@@ -11,6 +11,17 @@
 #ifndef TARGET_RISCV_COMMON_SEMI_TARGET_H
 #define TARGET_RISCV_COMMON_SEMI_TARGET_H
 
+static inline bool common_semi_read_arg_word(CPUArchState *env,
+                                             target_ulong *save_to,
+                                             target_ulong args_addr,
+                                             int arg_num)
+{
+    if (is_64bit_semihosting(env)) {
+        return get_user_u64(*save_to, args_addr + (arg_num) * 8));
+    }
+    return get_user_u32(*save_to, args_addr + (arg_num) * 4));
+}
+
 static inline target_ulong common_semi_arg(CPUState *cs, int argno)
 {
     RISCVCPU *cpu = RISCV_CPU(cs);
diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h
index d56b067bf2489..4cfdb74891e15 100644
--- a/target/riscv/cpu-qom.h
+++ b/target/riscv/cpu-qom.h
@@ -40,6 +40,8 @@
 #define TYPE_RISCV_CPU_RV64E            RISCV_CPU_TYPE_NAME("rv64e")
 #define TYPE_RISCV_CPU_RVA22U64         RISCV_CPU_TYPE_NAME("rva22u64")
 #define TYPE_RISCV_CPU_RVA22S64         RISCV_CPU_TYPE_NAME("rva22s64")
+#define TYPE_RISCV_CPU_RVA23U64         RISCV_CPU_TYPE_NAME("rva23u64")
+#define TYPE_RISCV_CPU_RVA23S64         RISCV_CPU_TYPE_NAME("rva23s64")
 #define TYPE_RISCV_CPU_IBEX             RISCV_CPU_TYPE_NAME("lowrisc-ibex")
 #define TYPE_RISCV_CPU_SHAKTI_C         RISCV_CPU_TYPE_NAME("shakti-c")
 #define TYPE_RISCV_CPU_SIFIVE_E31       RISCV_CPU_TYPE_NAME("sifive-e31")
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 3d4bd157d2cc5..09ded6829a2cc 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -105,7 +105,7 @@ const RISCVIsaExtData isa_edata_arr[] = {
     ISA_EXT_DATA_ENTRY(ziccamoa, PRIV_VERSION_1_11_0, has_priv_1_11),
     ISA_EXT_DATA_ENTRY(ziccif, PRIV_VERSION_1_11_0, has_priv_1_11),
     ISA_EXT_DATA_ENTRY(zicclsm, PRIV_VERSION_1_11_0, has_priv_1_11),
-    ISA_EXT_DATA_ENTRY(ziccrse, PRIV_VERSION_1_11_0, has_priv_1_11),
+    ISA_EXT_DATA_ENTRY(ziccrse, PRIV_VERSION_1_11_0, ext_ziccrse),
     ISA_EXT_DATA_ENTRY(zicfilp, PRIV_VERSION_1_12_0, ext_zicfilp),
     ISA_EXT_DATA_ENTRY(zicfiss, PRIV_VERSION_1_13_0, ext_zicfiss),
     ISA_EXT_DATA_ENTRY(zicond, PRIV_VERSION_1_12_0, ext_zicond),
@@ -213,8 +213,11 @@ const RISCVIsaExtData isa_edata_arr[] = {
     ISA_EXT_DATA_ENTRY(sstc, PRIV_VERSION_1_12_0, ext_sstc),
     ISA_EXT_DATA_ENTRY(sstvala, PRIV_VERSION_1_12_0, has_priv_1_12),
     ISA_EXT_DATA_ENTRY(sstvecd, PRIV_VERSION_1_12_0, has_priv_1_12),
+    ISA_EXT_DATA_ENTRY(ssu64xl, PRIV_VERSION_1_12_0, has_priv_1_12),
     ISA_EXT_DATA_ENTRY(supm, PRIV_VERSION_1_13_0, ext_supm),
     ISA_EXT_DATA_ENTRY(svade, PRIV_VERSION_1_11_0, ext_svade),
+    ISA_EXT_DATA_ENTRY(smctr, PRIV_VERSION_1_12_0, ext_smctr),
+    ISA_EXT_DATA_ENTRY(ssctr, PRIV_VERSION_1_12_0, ext_ssctr),
     ISA_EXT_DATA_ENTRY(svadu, PRIV_VERSION_1_12_0, ext_svadu),
     ISA_EXT_DATA_ENTRY(svinval, PRIV_VERSION_1_12_0, ext_svinval),
     ISA_EXT_DATA_ENTRY(svnapot, PRIV_VERSION_1_12_0, ext_svnapot),
@@ -700,13 +703,6 @@ static void rv128_base_cpu_init(Object *obj)
     RISCVCPU *cpu = RISCV_CPU(obj);
     CPURISCVState *env = &cpu->env;
 
-    if (qemu_tcg_mttcg_enabled()) {
-        /* Missing 128-bit aligned atomics */
-        error_report("128-bit RISC-V currently does not work with Multi "
-                     "Threaded TCG. Please use: -accel tcg,thread=single");
-        exit(EXIT_FAILURE);
-    }
-
     cpu->cfg.mmu = true;
     cpu->cfg.pmp = true;
 
@@ -1010,9 +1006,9 @@ static vaddr riscv_cpu_get_pc(CPUState *cs)
     return env->pc;
 }
 
+#ifndef CONFIG_USER_ONLY
 bool riscv_cpu_has_work(CPUState *cs)
 {
-#ifndef CONFIG_USER_ONLY
     RISCVCPU *cpu = RISCV_CPU(cs);
     CPURISCVState *env = &cpu->env;
     /*
@@ -1022,10 +1018,8 @@ bool riscv_cpu_has_work(CPUState *cs)
     return riscv_cpu_all_pending(env) != 0 ||
         riscv_cpu_sirq_pending(env) != RISCV_EXCP_NONE ||
         riscv_cpu_vsirq_pending(env) != RISCV_EXCP_NONE;
-#else
-    return true;
-#endif
 }
+#endif /* !CONFIG_USER_ONLY */
 
 static int riscv_cpu_mmu_index(CPUState *cs, bool ifetch)
 {
@@ -1156,6 +1150,15 @@ static void riscv_cpu_disas_set_info(CPUState *s, disassemble_info *info)
     CPURISCVState *env = &cpu->env;
     info->target_info = &cpu->cfg;
 
+    /*
+     * A couple of bits in MSTATUS set the endianness:
+     *  - MSTATUS_UBE (User-mode),
+     *  - MSTATUS_SBE (Supervisor-mode),
+     *  - MSTATUS_MBE (Machine-mode)
+     * but we don't implement that yet.
+     */
+    info->endian = BFD_ENDIAN_LITTLE;
+
     switch (env->xl) {
     case MXL_RV32:
         info->print_insn = print_insn_riscv32;
@@ -1598,6 +1601,8 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
     MULTI_EXT_CFG_BOOL("smcdeleg", ext_smcdeleg, false),
     MULTI_EXT_CFG_BOOL("sscsrind", ext_sscsrind, false),
     MULTI_EXT_CFG_BOOL("ssccfg", ext_ssccfg, false),
+    MULTI_EXT_CFG_BOOL("smctr", ext_smctr, false),
+    MULTI_EXT_CFG_BOOL("ssctr", ext_ssctr, false),
     MULTI_EXT_CFG_BOOL("zifencei", ext_zifencei, true),
     MULTI_EXT_CFG_BOOL("zicfilp", ext_zicfilp, false),
     MULTI_EXT_CFG_BOOL("zicfiss", ext_zicfiss, false),
@@ -1744,6 +1749,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_named_features[] = {
     MULTI_EXT_CFG_BOOL("zic64b", ext_zic64b, true),
     MULTI_EXT_CFG_BOOL("ssstateen", ext_ssstateen, true),
     MULTI_EXT_CFG_BOOL("sha", ext_sha, true),
+    MULTI_EXT_CFG_BOOL("ziccrse", ext_ziccrse, true),
 
     { },
 };
@@ -1817,7 +1823,8 @@ static void prop_pmu_num_get(Object *obj, Visitor *v, const char *name,
 }
 
 static const PropertyInfo prop_pmu_num = {
-    .name = "pmu-num",
+    .type = "int8",
+    .description = "pmu-num",
     .get = prop_pmu_num_get,
     .set = prop_pmu_num_set,
 };
@@ -1858,7 +1865,8 @@ static void prop_pmu_mask_get(Object *obj, Visitor *v, const char *name,
 }
 
 static const PropertyInfo prop_pmu_mask = {
-    .name = "pmu-mask",
+    .type = "int8",
+    .description = "pmu-mask",
     .get = prop_pmu_mask_get,
     .set = prop_pmu_mask_set,
 };
@@ -1889,7 +1897,8 @@ static void prop_mmu_get(Object *obj, Visitor *v, const char *name,
 }
 
 static const PropertyInfo prop_mmu = {
-    .name = "mmu",
+    .type = "bool",
+    .description = "mmu",
     .get = prop_mmu_get,
     .set = prop_mmu_set,
 };
@@ -1920,7 +1929,8 @@ static void prop_pmp_get(Object *obj, Visitor *v, const char *name,
 }
 
 static const PropertyInfo prop_pmp = {
-    .name = "pmp",
+    .type = "bool",
+    .description = "pmp",
     .get = prop_pmp_get,
     .set = prop_pmp_set,
 };
@@ -1994,7 +2004,9 @@ static void prop_priv_spec_get(Object *obj, Visitor *v, const char *name,
 }
 
 static const PropertyInfo prop_priv_spec = {
-    .name = "priv_spec",
+    .type = "str",
+    .description = "priv_spec",
+    /* FIXME enum? */
     .get = prop_priv_spec_get,
     .set = prop_priv_spec_set,
 };
@@ -2025,7 +2037,9 @@ static void prop_vext_spec_get(Object *obj, Visitor *v, const char *name,
 }
 
 static const PropertyInfo prop_vext_spec = {
-    .name = "vext_spec",
+    .type = "str",
+    .description = "vext_spec",
+    /* FIXME enum? */
     .get = prop_vext_spec_get,
     .set = prop_vext_spec_set,
 };
@@ -2034,6 +2048,7 @@ static void prop_vlen_set(Object *obj, Visitor *v, const char *name,
                          void *opaque, Error **errp)
 {
     RISCVCPU *cpu = RISCV_CPU(obj);
+    uint16_t cpu_vlen = cpu->cfg.vlenb << 3;
     uint16_t value;
 
     if (!visit_type_uint16(v, name, &value, errp)) {
@@ -2045,10 +2060,10 @@ static void prop_vlen_set(Object *obj, Visitor *v, const char *name,
         return;
     }
 
-    if (value != cpu->cfg.vlenb && riscv_cpu_is_vendor(obj)) {
+    if (value != cpu_vlen && riscv_cpu_is_vendor(obj)) {
         cpu_set_prop_err(cpu, name, errp);
         error_append_hint(errp, "Current '%s' val: %u\n",
-                          name, cpu->cfg.vlenb << 3);
+                          name, cpu_vlen);
         return;
     }
 
@@ -2065,7 +2080,8 @@ static void prop_vlen_get(Object *obj, Visitor *v, const char *name,
 }
 
 static const PropertyInfo prop_vlen = {
-    .name = "vlen",
+    .type = "uint16",
+    .description = "vlen",
     .get = prop_vlen_get,
     .set = prop_vlen_set,
 };
@@ -2105,7 +2121,8 @@ static void prop_elen_get(Object *obj, Visitor *v, const char *name,
 }
 
 static const PropertyInfo prop_elen = {
-    .name = "elen",
+    .type = "uint16",
+    .description = "elen",
     .get = prop_elen_get,
     .set = prop_elen_set,
 };
@@ -2140,7 +2157,8 @@ static void prop_cbom_blksize_get(Object *obj, Visitor *v, const char *name,
 }
 
 static const PropertyInfo prop_cbom_blksize = {
-    .name = "cbom_blocksize",
+    .type = "uint16",
+    .description = "cbom_blocksize",
     .get = prop_cbom_blksize_get,
     .set = prop_cbom_blksize_set,
 };
@@ -2175,7 +2193,8 @@ static void prop_cbop_blksize_get(Object *obj, Visitor *v, const char *name,
 }
 
 static const PropertyInfo prop_cbop_blksize = {
-    .name = "cbop_blocksize",
+    .type = "uint16",
+    .description = "cbop_blocksize",
     .get = prop_cbop_blksize_get,
     .set = prop_cbop_blksize_set,
 };
@@ -2210,7 +2229,8 @@ static void prop_cboz_blksize_get(Object *obj, Visitor *v, const char *name,
 }
 
 static const PropertyInfo prop_cboz_blksize = {
-    .name = "cboz_blocksize",
+    .type = "uint16",
+    .description = "cboz_blocksize",
     .get = prop_cboz_blksize_get,
     .set = prop_cboz_blksize_set,
 };
@@ -2245,7 +2265,8 @@ static void prop_mvendorid_get(Object *obj, Visitor *v, const char *name,
 }
 
 static const PropertyInfo prop_mvendorid = {
-    .name = "mvendorid",
+    .type = "uint32",
+    .description = "mvendorid",
     .get = prop_mvendorid_get,
     .set = prop_mvendorid_set,
 };
@@ -2280,7 +2301,8 @@ static void prop_mimpid_get(Object *obj, Visitor *v, const char *name,
 }
 
 static const PropertyInfo prop_mimpid = {
-    .name = "mimpid",
+    .type = "uint64",
+    .description = "mimpid",
     .get = prop_mimpid_get,
     .set = prop_mimpid_set,
 };
@@ -2336,7 +2358,8 @@ static void prop_marchid_get(Object *obj, Visitor *v, const char *name,
 }
 
 static const PropertyInfo prop_marchid = {
-    .name = "marchid",
+    .type = "uint64",
+    .description = "marchid",
     .get = prop_marchid_get,
     .set = prop_marchid_set,
 };
@@ -2348,9 +2371,10 @@ static const PropertyInfo prop_marchid = {
  * doesn't need to be manually enabled by the profile.
  */
 static RISCVCPUProfile RVA22U64 = {
-    .parent = NULL,
+    .u_parent = NULL,
+    .s_parent = NULL,
     .name = "rva22u64",
-    .misa_ext = RVI | RVM | RVA | RVF | RVD | RVC | RVU,
+    .misa_ext = RVI | RVM | RVA | RVF | RVD | RVC | RVB | RVU,
     .priv_spec = RISCV_PROFILE_ATTR_UNUSED,
     .satp_mode = RISCV_PROFILE_ATTR_UNUSED,
     .ext_offsets = {
@@ -2380,7 +2404,8 @@ static RISCVCPUProfile RVA22U64 = {
  * The remaining features/extensions comes from RVA22U64.
  */
 static RISCVCPUProfile RVA22S64 = {
-    .parent = &RVA22U64,
+    .u_parent = &RVA22U64,
+    .s_parent = NULL,
     .name = "rva22s64",
     .misa_ext = RVS,
     .priv_spec = PRIV_VERSION_1_12_0,
@@ -2394,9 +2419,65 @@ static RISCVCPUProfile RVA22S64 = {
     }
 };
 
+/*
+ * All mandatory extensions from RVA22U64 are present
+ * in RVA23U64 so set RVA22 as a parent. We need to
+ * declare just the newly added mandatory extensions.
+ */
+static RISCVCPUProfile RVA23U64 = {
+    .u_parent = &RVA22U64,
+    .s_parent = NULL,
+    .name = "rva23u64",
+    .misa_ext = RVV,
+    .priv_spec = RISCV_PROFILE_ATTR_UNUSED,
+    .satp_mode = RISCV_PROFILE_ATTR_UNUSED,
+    .ext_offsets = {
+        CPU_CFG_OFFSET(ext_zvfhmin), CPU_CFG_OFFSET(ext_zvbb),
+        CPU_CFG_OFFSET(ext_zvkt), CPU_CFG_OFFSET(ext_zihintntl),
+        CPU_CFG_OFFSET(ext_zicond), CPU_CFG_OFFSET(ext_zimop),
+        CPU_CFG_OFFSET(ext_zcmop), CPU_CFG_OFFSET(ext_zcb),
+        CPU_CFG_OFFSET(ext_zfa), CPU_CFG_OFFSET(ext_zawrs),
+        CPU_CFG_OFFSET(ext_supm),
+
+        RISCV_PROFILE_EXT_LIST_END
+    }
+};
+
+/*
+ * As with RVA23U64, RVA23S64 also defines 'named features'.
+ *
+ * Cache related features that we consider enabled since we don't
+ * implement cache: Ssccptr
+ *
+ * Other named features that we already implement: Sstvecd, Sstvala,
+ * Sscounterenw, Ssu64xl
+ *
+ * The remaining features/extensions comes from RVA23S64.
+ */
+static RISCVCPUProfile RVA23S64 = {
+    .u_parent = &RVA23U64,
+    .s_parent = &RVA22S64,
+    .name = "rva23s64",
+    .misa_ext = RVS,
+    .priv_spec = PRIV_VERSION_1_13_0,
+    .satp_mode = VM_1_10_SV39,
+    .ext_offsets = {
+        /* New in RVA23S64 */
+        CPU_CFG_OFFSET(ext_svnapot), CPU_CFG_OFFSET(ext_sstc),
+        CPU_CFG_OFFSET(ext_sscofpmf), CPU_CFG_OFFSET(ext_ssnpm),
+
+        /* Named features: Sha */
+        CPU_CFG_OFFSET(ext_sha),
+
+        RISCV_PROFILE_EXT_LIST_END
+    }
+};
+
 RISCVCPUProfile *riscv_profiles[] = {
     &RVA22U64,
     &RVA22S64,
+    &RVA23U64,
+    &RVA23S64,
     NULL,
 };
 
@@ -2803,6 +2884,26 @@ static RISCVCPUImpliedExtsRule SSPM_IMPLIED = {
     },
 };
 
+static RISCVCPUImpliedExtsRule SMCTR_IMPLIED = {
+    .ext = CPU_CFG_OFFSET(ext_smctr),
+    .implied_misa_exts = RVS,
+    .implied_multi_exts = {
+        CPU_CFG_OFFSET(ext_sscsrind),
+
+        RISCV_IMPLIED_EXTS_RULE_END
+    },
+};
+
+static RISCVCPUImpliedExtsRule SSCTR_IMPLIED = {
+    .ext = CPU_CFG_OFFSET(ext_ssctr),
+    .implied_misa_exts = RVS,
+    .implied_multi_exts = {
+        CPU_CFG_OFFSET(ext_sscsrind),
+
+        RISCV_IMPLIED_EXTS_RULE_END
+    },
+};
+
 RISCVCPUImpliedExtsRule *riscv_misa_ext_implied_rules[] = {
     &RVA_IMPLIED, &RVD_IMPLIED, &RVF_IMPLIED,
     &RVM_IMPLIED, &RVV_IMPLIED, NULL
@@ -2821,7 +2922,7 @@ RISCVCPUImpliedExtsRule *riscv_multi_ext_implied_rules[] = {
     &ZVFH_IMPLIED, &ZVFHMIN_IMPLIED, &ZVKN_IMPLIED,
     &ZVKNC_IMPLIED, &ZVKNG_IMPLIED, &ZVKNHB_IMPLIED,
     &ZVKS_IMPLIED,  &ZVKSC_IMPLIED, &ZVKSG_IMPLIED, &SSCFG_IMPLIED,
-    &SUPM_IMPLIED, &SSPM_IMPLIED,
+    &SUPM_IMPLIED, &SSPM_IMPLIED, &SMCTR_IMPLIED, &SSCTR_IMPLIED,
     NULL
 };
 
@@ -2844,9 +2945,9 @@ static const Property riscv_cpu_properties[] = {
     {.name = "cbop_blocksize", .info = &prop_cbop_blksize},
     {.name = "cboz_blocksize", .info = &prop_cboz_blksize},
 
-     {.name = "mvendorid", .info = &prop_mvendorid},
-     {.name = "mimpid", .info = &prop_mimpid},
-     {.name = "marchid", .info = &prop_marchid},
+    {.name = "mvendorid", .info = &prop_mvendorid},
+    {.name = "mimpid", .info = &prop_mimpid},
+    {.name = "marchid", .info = &prop_marchid},
 
 #ifndef CONFIG_USER_ONLY
     DEFINE_PROP_UINT64("resetvec", RISCVCPU, env.resetvec, DEFAULT_RSTVEC),
@@ -2883,6 +2984,20 @@ static void rva22s64_profile_cpu_init(Object *obj)
 
     RVA22S64.enabled = true;
 }
+
+static void rva23u64_profile_cpu_init(Object *obj)
+{
+    rv64i_bare_cpu_init(obj);
+
+    RVA23U64.enabled = true;
+}
+
+static void rva23s64_profile_cpu_init(Object *obj)
+{
+    rv64i_bare_cpu_init(obj);
+
+    RVA23S64.enabled = true;
+}
 #endif
 
 static const gchar *riscv_gdb_arch_name(CPUState *cs)
@@ -2912,6 +3027,7 @@ static int64_t riscv_get_arch_id(CPUState *cs)
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps riscv_sysemu_ops = {
+    .has_work = riscv_cpu_has_work,
     .get_phys_page_debug = riscv_cpu_get_phys_page_debug,
     .write_elf64_note = riscv_cpu_write_elf64_note,
     .write_elf32_note = riscv_cpu_write_elf32_note,
@@ -2933,7 +3049,6 @@ static void riscv_cpu_common_class_init(ObjectClass *c, void *data)
                                        &mcc->parent_phases);
 
     cc->class_by_name = riscv_cpu_class_by_name;
-    cc->has_work = riscv_cpu_has_work;
     cc->mmu_index = riscv_cpu_mmu_index;
     cc->dump_state = riscv_cpu_dump_state;
     cc->set_pc = riscv_cpu_set_pc;
@@ -2955,7 +3070,7 @@ static void riscv_cpu_class_init(ObjectClass *c, void *data)
 {
     RISCVCPUClass *mcc = RISCV_CPU_CLASS(c);
 
-    mcc->misa_mxl_max = (uint32_t)(uintptr_t)data;
+    mcc->misa_mxl_max = (RISCVMXL)GPOINTER_TO_UINT(data);
     riscv_cpu_validate_misa_mxl(mcc);
 }
 
@@ -3051,22 +3166,13 @@ void riscv_isa_write_fdt(RISCVCPU *cpu, void *fdt, char *nodename)
 }
 #endif
 
-#define DEFINE_CPU(type_name, misa_mxl_max, initfn)         \
-    {                                                       \
-        .name = (type_name),                                \
-        .parent = TYPE_RISCV_CPU,                           \
-        .instance_init = (initfn),                          \
-        .class_init = riscv_cpu_class_init,                 \
-        .class_data = (void *)(misa_mxl_max)                \
-    }
-
 #define DEFINE_DYNAMIC_CPU(type_name, misa_mxl_max, initfn) \
     {                                                       \
         .name = (type_name),                                \
         .parent = TYPE_RISCV_DYNAMIC_CPU,                   \
         .instance_init = (initfn),                          \
         .class_init = riscv_cpu_class_init,                 \
-        .class_data = (void *)(misa_mxl_max)                \
+        .class_data = GUINT_TO_POINTER(misa_mxl_max)        \
     }
 
 #define DEFINE_VENDOR_CPU(type_name, misa_mxl_max, initfn)  \
@@ -3075,7 +3181,7 @@ void riscv_isa_write_fdt(RISCVCPU *cpu, void *fdt, char *nodename)
         .parent = TYPE_RISCV_VENDOR_CPU,                    \
         .instance_init = (initfn),                          \
         .class_init = riscv_cpu_class_init,                 \
-        .class_data = (void *)(misa_mxl_max)                \
+        .class_data = GUINT_TO_POINTER(misa_mxl_max)        \
     }
 
 #define DEFINE_BARE_CPU(type_name, misa_mxl_max, initfn)    \
@@ -3084,7 +3190,7 @@ void riscv_isa_write_fdt(RISCVCPU *cpu, void *fdt, char *nodename)
         .parent = TYPE_RISCV_BARE_CPU,                      \
         .instance_init = (initfn),                          \
         .class_init = riscv_cpu_class_init,                 \
-        .class_data = (void *)(misa_mxl_max)                \
+        .class_data = GUINT_TO_POINTER(misa_mxl_max)        \
     }
 
 #define DEFINE_PROFILE_CPU(type_name, misa_mxl_max, initfn) \
@@ -3093,7 +3199,7 @@ void riscv_isa_write_fdt(RISCVCPU *cpu, void *fdt, char *nodename)
         .parent = TYPE_RISCV_BARE_CPU,                      \
         .instance_init = (initfn),                          \
         .class_init = riscv_cpu_class_init,                 \
-        .class_data = (void *)(misa_mxl_max)                \
+        .class_data = GUINT_TO_POINTER(misa_mxl_max)        \
     }
 
 static const TypeInfo riscv_cpu_type_infos[] = {
@@ -3162,6 +3268,8 @@ static const TypeInfo riscv_cpu_type_infos[] = {
     DEFINE_BARE_CPU(TYPE_RISCV_CPU_RV64E,        MXL_RV64,  rv64e_bare_cpu_init),
     DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA22U64,  MXL_RV64,  rva22u64_profile_cpu_init),
     DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA22S64,  MXL_RV64,  rva22s64_profile_cpu_init),
+    DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA23U64,  MXL_RV64,  rva23u64_profile_cpu_init),
+    DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA23S64,  MXL_RV64,  rva23s64_profile_cpu_init),
 #endif /* TARGET_RISCV64 */
 };
 
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 97713681cbea7..7de19b418369f 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -81,7 +81,8 @@ const char *riscv_get_misa_ext_description(uint32_t bit);
 #define CPU_CFG_OFFSET(_prop) offsetof(struct RISCVCPUConfig, _prop)
 
 typedef struct riscv_cpu_profile {
-    struct riscv_cpu_profile *parent;
+    struct riscv_cpu_profile *u_parent;
+    struct riscv_cpu_profile *s_parent;
     const char *name;
     uint32_t misa_ext;
     bool enabled;
@@ -312,6 +313,15 @@ struct CPUArchState {
     target_ulong mcause;
     target_ulong mtval;  /* since: priv-1.10.0 */
 
+    uint64_t mctrctl;
+    uint32_t sctrdepth;
+    uint32_t sctrstatus;
+    uint64_t vsctrctl;
+
+    uint64_t ctr_src[16 << SCTRDEPTH_MAX];
+    uint64_t ctr_dst[16 << SCTRDEPTH_MAX];
+    uint64_t ctr_data[16 << SCTRDEPTH_MAX];
+
     /* Machine and Supervisor interrupt priorities */
     uint8_t miprio[64];
     uint8_t siprio[64];
@@ -529,7 +539,7 @@ struct RISCVCPUClass {
 
     DeviceRealize parent_realize;
     ResettablePhases parent_phases;
-    uint32_t misa_mxl_max;  /* max mxl for this cpu */
+    RISCVMXL misa_mxl_max;  /* max mxl for this cpu */
 };
 
 static inline int riscv_has_ext(CPURISCVState *env, target_ulong ext)
@@ -607,6 +617,10 @@ RISCVException smstateen_acc_ok(CPURISCVState *env, int index, uint64_t bit);
 
 void riscv_cpu_set_mode(CPURISCVState *env, target_ulong newpriv, bool virt_en);
 
+void riscv_ctr_add_entry(CPURISCVState *env, target_long src, target_long dst,
+    enum CTRType type, target_ulong prev_priv, bool prev_virt);
+void riscv_ctr_clear(CPURISCVState *env);
+
 void riscv_translate_init(void);
 void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
                           int *max_insns, vaddr pc, void *host_pc);
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index f97c48a3943fb..a30317c617811 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -247,6 +247,17 @@
 #define CSR_SIEH            0x114
 #define CSR_SIPH            0x154
 
+/* Machine-Level Control transfer records CSRs */
+#define CSR_MCTRCTL         0x34e
+
+/* Supervisor-Level Control transfer records CSRs */
+#define CSR_SCTRCTL         0x14e
+#define CSR_SCTRSTATUS      0x14f
+#define CSR_SCTRDEPTH       0x15f
+
+/* VS-Level Control transfer records CSRs */
+#define CSR_VSCTRCTL        0x24e
+
 /* Hpervisor CSRs */
 #define CSR_HSTATUS         0x600
 #define CSR_HEDELEG         0x602
@@ -344,6 +355,7 @@
 #define SMSTATEEN0_CS       (1ULL << 0)
 #define SMSTATEEN0_FCSR     (1ULL << 1)
 #define SMSTATEEN0_JVT      (1ULL << 2)
+#define SMSTATEEN0_CTR      (1ULL << 54)
 #define SMSTATEEN0_P1P13    (1ULL << 56)
 #define SMSTATEEN0_HSCONTXT (1ULL << 57)
 #define SMSTATEEN0_IMSIC    (1ULL << 58)
@@ -825,6 +837,139 @@ typedef enum RISCVException {
 #define HENVCFGH_PBMTE                      MENVCFGH_PBMTE
 #define HENVCFGH_STCE                       MENVCFGH_STCE
 
+/* Offsets for every pair of control bits per each priv level */
+#define XS_OFFSET    0ULL
+#define U_OFFSET     2ULL
+#define S_OFFSET     5ULL
+#define M_OFFSET     8ULL
+
+#define PM_XS_BITS   (EXT_STATUS_MASK << XS_OFFSET)
+#define U_PM_ENABLE  (PM_ENABLE  << U_OFFSET)
+#define U_PM_CURRENT (PM_CURRENT << U_OFFSET)
+#define U_PM_INSN    (PM_INSN    << U_OFFSET)
+#define S_PM_ENABLE  (PM_ENABLE  << S_OFFSET)
+#define S_PM_CURRENT (PM_CURRENT << S_OFFSET)
+#define S_PM_INSN    (PM_INSN    << S_OFFSET)
+#define M_PM_ENABLE  (PM_ENABLE  << M_OFFSET)
+#define M_PM_CURRENT (PM_CURRENT << M_OFFSET)
+#define M_PM_INSN    (PM_INSN    << M_OFFSET)
+
+/* mmte CSR bits */
+#define MMTE_PM_XS_BITS     PM_XS_BITS
+#define MMTE_U_PM_ENABLE    U_PM_ENABLE
+#define MMTE_U_PM_CURRENT   U_PM_CURRENT
+#define MMTE_U_PM_INSN      U_PM_INSN
+#define MMTE_S_PM_ENABLE    S_PM_ENABLE
+#define MMTE_S_PM_CURRENT   S_PM_CURRENT
+#define MMTE_S_PM_INSN      S_PM_INSN
+#define MMTE_M_PM_ENABLE    M_PM_ENABLE
+#define MMTE_M_PM_CURRENT   M_PM_CURRENT
+#define MMTE_M_PM_INSN      M_PM_INSN
+#define MMTE_MASK    (MMTE_U_PM_ENABLE | MMTE_U_PM_CURRENT | MMTE_U_PM_INSN | \
+                      MMTE_S_PM_ENABLE | MMTE_S_PM_CURRENT | MMTE_S_PM_INSN | \
+                      MMTE_M_PM_ENABLE | MMTE_M_PM_CURRENT | MMTE_M_PM_INSN | \
+                      MMTE_PM_XS_BITS)
+
+/* (v)smte CSR bits */
+#define SMTE_PM_XS_BITS     PM_XS_BITS
+#define SMTE_U_PM_ENABLE    U_PM_ENABLE
+#define SMTE_U_PM_CURRENT   U_PM_CURRENT
+#define SMTE_U_PM_INSN      U_PM_INSN
+#define SMTE_S_PM_ENABLE    S_PM_ENABLE
+#define SMTE_S_PM_CURRENT   S_PM_CURRENT
+#define SMTE_S_PM_INSN      S_PM_INSN
+#define SMTE_MASK    (SMTE_U_PM_ENABLE | SMTE_U_PM_CURRENT | SMTE_U_PM_INSN | \
+                      SMTE_S_PM_ENABLE | SMTE_S_PM_CURRENT | SMTE_S_PM_INSN | \
+                      SMTE_PM_XS_BITS)
+
+/* umte CSR bits */
+#define UMTE_U_PM_ENABLE    U_PM_ENABLE
+#define UMTE_U_PM_CURRENT   U_PM_CURRENT
+#define UMTE_U_PM_INSN      U_PM_INSN
+#define UMTE_MASK     (UMTE_U_PM_ENABLE | MMTE_U_PM_CURRENT | UMTE_U_PM_INSN)
+
+/* CTR control register commom fields */
+#define XCTRCTL_U              BIT_ULL(0)
+#define XCTRCTL_S              BIT_ULL(1)
+#define XCTRCTL_RASEMU         BIT_ULL(7)
+#define XCTRCTL_STE            BIT_ULL(8)
+#define XCTRCTL_BPFRZ          BIT_ULL(11)
+#define XCTRCTL_LCOFIFRZ       BIT_ULL(12)
+#define XCTRCTL_EXCINH         BIT_ULL(33)
+#define XCTRCTL_INTRINH        BIT_ULL(34)
+#define XCTRCTL_TRETINH        BIT_ULL(35)
+#define XCTRCTL_NTBREN         BIT_ULL(36)
+#define XCTRCTL_TKBRINH        BIT_ULL(37)
+#define XCTRCTL_INDCALLINH     BIT_ULL(40)
+#define XCTRCTL_DIRCALLINH     BIT_ULL(41)
+#define XCTRCTL_INDJMPINH      BIT_ULL(42)
+#define XCTRCTL_DIRJMPINH      BIT_ULL(43)
+#define XCTRCTL_CORSWAPINH     BIT_ULL(44)
+#define XCTRCTL_RETINH         BIT_ULL(45)
+#define XCTRCTL_INDLJMPINH     BIT_ULL(46)
+#define XCTRCTL_DIRLJMPINH     BIT_ULL(47)
+
+#define XCTRCTL_MASK (XCTRCTL_U | XCTRCTL_S | XCTRCTL_RASEMU |                \
+                      XCTRCTL_STE | XCTRCTL_BPFRZ | XCTRCTL_LCOFIFRZ |        \
+                      XCTRCTL_EXCINH | XCTRCTL_INTRINH | XCTRCTL_TRETINH |    \
+                      XCTRCTL_NTBREN | XCTRCTL_TKBRINH | XCTRCTL_INDCALLINH | \
+                      XCTRCTL_DIRCALLINH | XCTRCTL_INDJMPINH |                \
+                      XCTRCTL_DIRJMPINH | XCTRCTL_CORSWAPINH |                \
+                      XCTRCTL_RETINH | XCTRCTL_INDLJMPINH | XCTRCTL_DIRLJMPINH)
+
+#define XCTRCTL_INH_START         32U
+
+/* CTR mctrctl bits */
+#define MCTRCTL_M                 BIT_ULL(2)
+#define MCTRCTL_MTE               BIT_ULL(9)
+
+#define MCTRCTL_MASK              (XCTRCTL_MASK | MCTRCTL_M | MCTRCTL_MTE)
+#define SCTRCTL_MASK              XCTRCTL_MASK
+#define VSCTRCTL_MASK             XCTRCTL_MASK
+
+/* sctrstatus CSR bits. */
+#define SCTRSTATUS_WRPTR_MASK       0xFF
+#define SCTRSTATUS_FROZEN           BIT(31)
+#define SCTRSTATUS_MASK             (SCTRSTATUS_WRPTR_MASK | SCTRSTATUS_FROZEN)
+
+/* sctrdepth CSR bits. */
+#define SCTRDEPTH_MASK              0x7
+#define SCTRDEPTH_MIN               0U  /* 16 Entries. */
+#define SCTRDEPTH_MAX               4U  /* 256 Entries. */
+
+#define CTR_ENTRIES_FIRST           0x200
+#define CTR_ENTRIES_LAST            0x2ff
+
+#define CTRSOURCE_VALID             BIT(0)
+#define CTRTARGET_MISP              BIT(0)
+
+#define CTRDATA_TYPE_MASK           0xF
+#define CTRDATA_CCV                 BIT(15)
+#define CTRDATA_CCM_MASK            0xFFF0000
+#define CTRDATA_CCE_MASK            0xF0000000
+
+#define CTRDATA_MASK            (CTRDATA_TYPE_MASK | CTRDATA_CCV |  \
+                                 CTRDATA_CCM_MASK | CTRDATA_CCE_MASK)
+
+typedef enum CTRType {
+    CTRDATA_TYPE_NONE                   = 0,
+    CTRDATA_TYPE_EXCEPTION              = 1,
+    CTRDATA_TYPE_INTERRUPT              = 2,
+    CTRDATA_TYPE_EXCEP_INT_RET          = 3,
+    CTRDATA_TYPE_NONTAKEN_BRANCH        = 4,
+    CTRDATA_TYPE_TAKEN_BRANCH           = 5,
+    CTRDATA_TYPE_RESERVED_0             = 6,
+    CTRDATA_TYPE_RESERVED_1             = 7,
+    CTRDATA_TYPE_INDIRECT_CALL          = 8,
+    CTRDATA_TYPE_DIRECT_CALL            = 9,
+    CTRDATA_TYPE_INDIRECT_JUMP          = 10,
+    CTRDATA_TYPE_DIRECT_JUMP            = 11,
+    CTRDATA_TYPE_CO_ROUTINE_SWAP        = 12,
+    CTRDATA_TYPE_RETURN                 = 13,
+    CTRDATA_TYPE_OTHER_INDIRECT_JUMP    = 14,
+    CTRDATA_TYPE_OTHER_DIRECT_JUMP      = 15,
+} CTRType;
+
 /* MISELECT, SISELECT, and VSISELECT bits (AIA) */
 #define ISELECT_IPRIO0                     0x30
 #define ISELECT_IPRIO15                    0x3f
@@ -933,9 +1078,8 @@ typedef enum RISCVException {
                                             MHPMEVENTH_BIT_VSINH | \
                                             MHPMEVENTH_BIT_VUINH)
 
-#define MHPMEVENT_SSCOF_MASK               _ULL(0xFFFF000000000000)
-#define MHPMEVENT_IDX_MASK                 0xFFFFF
-#define MHPMEVENT_SSCOF_RESVD              16
+#define MHPMEVENT_SSCOF_MASK               MAKE_64BIT_MASK(63, 56)
+#define MHPMEVENT_IDX_MASK                 (~MHPMEVENT_SSCOF_MASK)
 
 /* RISC-V-specific interrupt pending bits. */
 #define CPU_INTERRUPT_RNMI                 CPU_INTERRUPT_TGT_EXT_0
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
index b410b1e603839..8a843482cc64e 100644
--- a/target/riscv/cpu_cfg.h
+++ b/target/riscv/cpu_cfg.h
@@ -133,6 +133,8 @@ struct RISCVCPUConfig {
     bool ext_zvfhmin;
     bool ext_smaia;
     bool ext_ssaia;
+    bool ext_smctr;
+    bool ext_ssctr;
     bool ext_sscofpmf;
     bool ext_smepmp;
     bool ext_smrnmi;
@@ -164,6 +166,9 @@ struct RISCVCPUConfig {
     bool has_priv_1_12;
     bool has_priv_1_11;
 
+    /* Always enabled for TCG if has_priv_1_11 */
+    bool ext_ziccrse;
+
     /* Vendor-specific custom extensions */
     bool ext_xtheadba;
     bool ext_xtheadbb;
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index e1dfc4ecbfc5d..6c4391d96b840 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -23,16 +23,17 @@
 #include "cpu.h"
 #include "internals.h"
 #include "pmu.h"
+#include "exec/cputlb.h"
 #include "exec/exec-all.h"
 #include "exec/page-protection.h"
 #include "instmap.h"
 #include "tcg/tcg-op.h"
+#include "accel/tcg/cpu-ops.h"
 #include "trace.h"
 #include "semihosting/common-semi.h"
 #include "system/cpu-timers.h"
 #include "cpu_bits.h"
 #include "debug.h"
-#include "tcg/oversized-guest.h"
 #include "pmp.h"
 
 int riscv_env_mmu_index(CPURISCVState *env, bool ifetch)
@@ -875,6 +876,254 @@ void riscv_cpu_set_aia_ireg_rmw_fn(CPURISCVState *env, uint32_t priv,
     }
 }
 
+static void riscv_ctr_freeze(CPURISCVState *env, uint64_t freeze_mask,
+                             bool virt)
+{
+    uint64_t ctl = virt ? env->vsctrctl : env->mctrctl;
+
+    assert((freeze_mask & (~(XCTRCTL_BPFRZ | XCTRCTL_LCOFIFRZ))) == 0);
+
+    if (ctl & freeze_mask) {
+        env->sctrstatus |= SCTRSTATUS_FROZEN;
+    }
+}
+
+void riscv_ctr_clear(CPURISCVState *env)
+{
+    memset(env->ctr_src, 0x0, sizeof(env->ctr_src));
+    memset(env->ctr_dst, 0x0, sizeof(env->ctr_dst));
+    memset(env->ctr_data, 0x0, sizeof(env->ctr_data));
+}
+
+static uint64_t riscv_ctr_priv_to_mask(target_ulong priv, bool virt)
+{
+    switch (priv) {
+    case PRV_M:
+        return MCTRCTL_M;
+    case PRV_S:
+        if (virt) {
+            return XCTRCTL_S;
+        }
+        return XCTRCTL_S;
+    case PRV_U:
+        if (virt) {
+            return XCTRCTL_U;
+        }
+        return XCTRCTL_U;
+    }
+
+    g_assert_not_reached();
+}
+
+static uint64_t riscv_ctr_get_control(CPURISCVState *env, target_long priv,
+                                      bool virt)
+{
+    switch (priv) {
+    case PRV_M:
+        return env->mctrctl;
+    case PRV_S:
+    case PRV_U:
+        if (virt) {
+            return env->vsctrctl;
+        }
+        return env->mctrctl;
+    }
+
+    g_assert_not_reached();
+}
+
+/*
+ * This function assumes that src privilege and target privilege are not same
+ * and src privilege is less than target privilege. This includes the virtual
+ * state as well.
+ */
+static bool riscv_ctr_check_xte(CPURISCVState *env, target_long src_prv,
+                                bool src_virt)
+{
+    target_long tgt_prv = env->priv;
+    bool res = true;
+
+    /*
+     * VS and U mode are same in terms of xTE bits required to record an
+     * external trap. See 6.1.2. External Traps, table 8 External Trap Enable
+     * Requirements. This changes VS to U to simplify the logic a bit.
+     */
+    if (src_virt && src_prv == PRV_S) {
+        src_prv = PRV_U;
+    } else if (env->virt_enabled && tgt_prv == PRV_S) {
+        tgt_prv = PRV_U;
+    }
+
+    /* VU mode is an outlier here. */
+    if (src_virt && src_prv == PRV_U) {
+        res &= !!(env->vsctrctl & XCTRCTL_STE);
+    }
+
+    switch (src_prv) {
+    case PRV_U:
+        if (tgt_prv == PRV_U) {
+            break;
+        }
+        res &= !!(env->mctrctl & XCTRCTL_STE);
+        /* fall-through */
+    case PRV_S:
+        if (tgt_prv == PRV_S) {
+            break;
+        }
+        res &= !!(env->mctrctl & MCTRCTL_MTE);
+        /* fall-through */
+    case PRV_M:
+        break;
+    }
+
+    return res;
+}
+
+/*
+ * Special cases for traps and trap returns:
+ *
+ * 1- Traps, and trap returns, between enabled modes are recorded as normal.
+ * 2- Traps from an inhibited mode to an enabled mode, and trap returns from an
+ * enabled mode back to an inhibited mode, are partially recorded.  In such
+ * cases, the PC from the inhibited mode (source PC for traps, and target PC
+ * for trap returns) is 0.
+ *
+ * 3- Trap returns from an inhibited mode to an enabled mode are not recorded.
+ * Traps from an enabled mode to an inhibited mode, known as external traps,
+ * receive special handling.
+ * By default external traps are not recorded, but a handshake mechanism exists
+ * to allow partial recording.  Software running in the target mode of the trap
+ * can opt-in to allowing CTR to record traps into that mode even when the mode
+ * is inhibited.  The MTE, STE, and VSTE bits allow M-mode, S-mode, and VS-mode,
+ * respectively, to opt-in. When an External Trap occurs, and xTE=1, such that
+ * x is the target privilege mode of the trap, will CTR record the trap. In such
+ * cases, the target PC is 0.
+ */
+/*
+ * CTR arrays are implemented as circular buffers and new entry is stored at
+ * sctrstatus.WRPTR, but they are presented to software as moving circular
+ * buffers. Which means, software get's the illusion that whenever a new entry
+ * is added the whole buffer is moved by one place and the new entry is added at
+ * the start keeping new entry at idx 0 and older ones follow.
+ *
+ * Depth = 16.
+ *
+ * buffer [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [A] [B] [C] [D] [E] [F]
+ * WRPTR                                   W
+ * entry   7   6   5   4   3   2   1   0   F   E   D   C   B   A   9   8
+ *
+ * When a new entry is added:
+ * buffer [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [A] [B] [C] [D] [E] [F]
+ * WRPTR                                       W
+ * entry   8   7   6   5   4   3   2   1   0   F   E   D   C   B   A   9
+ *
+ * entry here denotes the logical entry number that software can access
+ * using ctrsource, ctrtarget and ctrdata registers. So xiselect 0x200
+ * will return entry 0 i-e buffer[8] and 0x201 will return entry 1 i-e
+ * buffer[7]. Here is how we convert entry to buffer idx.
+ *
+ *    entry = isel - CTR_ENTRIES_FIRST;
+ *    idx = (sctrstatus.WRPTR - entry - 1) & (depth - 1);
+ */
+void riscv_ctr_add_entry(CPURISCVState *env, target_long src, target_long dst,
+    enum CTRType type, target_ulong src_priv, bool src_virt)
+{
+    bool tgt_virt = env->virt_enabled;
+    uint64_t src_mask = riscv_ctr_priv_to_mask(src_priv, src_virt);
+    uint64_t tgt_mask = riscv_ctr_priv_to_mask(env->priv, tgt_virt);
+    uint64_t src_ctrl = riscv_ctr_get_control(env, src_priv, src_virt);
+    uint64_t tgt_ctrl = riscv_ctr_get_control(env, env->priv, tgt_virt);
+    uint64_t depth, head;
+    bool ext_trap = false;
+
+    /*
+     * Return immediately if both target and src recording is disabled or if
+     * CTR is in frozen state.
+     */
+    if ((!(src_ctrl & src_mask) && !(tgt_ctrl & tgt_mask)) ||
+        env->sctrstatus & SCTRSTATUS_FROZEN) {
+        return;
+    }
+
+    /*
+     * With RAS Emul enabled, only allow Indirect, direct calls, Function
+     * returns and Co-routine swap types.
+     */
+    if (tgt_ctrl & XCTRCTL_RASEMU &&
+        type != CTRDATA_TYPE_INDIRECT_CALL &&
+        type != CTRDATA_TYPE_DIRECT_CALL &&
+        type != CTRDATA_TYPE_RETURN &&
+        type != CTRDATA_TYPE_CO_ROUTINE_SWAP) {
+        return;
+    }
+
+    if (type == CTRDATA_TYPE_EXCEPTION || type == CTRDATA_TYPE_INTERRUPT) {
+        /* Case 2 for traps. */
+        if (!(src_ctrl & src_mask)) {
+            src = 0;
+        } else if (!(tgt_ctrl & tgt_mask)) {
+            /* Check if target priv-mode has allowed external trap recording. */
+            if (!riscv_ctr_check_xte(env, src_priv, src_virt)) {
+                return;
+            }
+
+            ext_trap = true;
+            dst = 0;
+        }
+    } else if (type == CTRDATA_TYPE_EXCEP_INT_RET) {
+        /*
+         * Case 3 for trap returns.  Trap returns from inhibited mode are not
+         * recorded.
+         */
+        if (!(src_ctrl & src_mask)) {
+            return;
+        }
+
+        /* Case 2 for trap returns. */
+        if (!(tgt_ctrl & tgt_mask)) {
+            dst = 0;
+        }
+    }
+
+    /* Ignore filters in case of RASEMU mode or External trap. */
+    if (!(tgt_ctrl & XCTRCTL_RASEMU) && !ext_trap) {
+        /*
+         * Check if the specific type is inhibited. Not taken branch filter is
+         * an enable bit and needs to be checked separatly.
+         */
+        bool check = tgt_ctrl & BIT_ULL(type + XCTRCTL_INH_START);
+        if ((type == CTRDATA_TYPE_NONTAKEN_BRANCH && !check) ||
+            (type != CTRDATA_TYPE_NONTAKEN_BRANCH && check)) {
+            return;
+        }
+    }
+
+    head = get_field(env->sctrstatus, SCTRSTATUS_WRPTR_MASK);
+
+    depth = 16 << get_field(env->sctrdepth, SCTRDEPTH_MASK);
+    if (tgt_ctrl & XCTRCTL_RASEMU && type == CTRDATA_TYPE_RETURN) {
+        head = (head - 1) & (depth - 1);
+
+        env->ctr_src[head] &= ~CTRSOURCE_VALID;
+        env->sctrstatus =
+            set_field(env->sctrstatus, SCTRSTATUS_WRPTR_MASK, head);
+        return;
+    }
+
+    /* In case of Co-routine SWAP we overwrite latest entry. */
+    if (tgt_ctrl & XCTRCTL_RASEMU && type == CTRDATA_TYPE_CO_ROUTINE_SWAP) {
+        head = (head - 1) & (depth - 1);
+    }
+
+    env->ctr_src[head] = src | CTRSOURCE_VALID;
+    env->ctr_dst[head] = dst & ~CTRTARGET_MISP;
+    env->ctr_data[head] = set_field(0, CTRDATA_TYPE_MASK, type);
+
+    head = (head + 1) & (depth - 1);
+
+    env->sctrstatus = set_field(env->sctrstatus, SCTRSTATUS_WRPTR_MASK, head);
+}
+
 void riscv_cpu_set_mode(CPURISCVState *env, target_ulong newpriv, bool virt_en)
 {
     g_assert(newpriv <= PRV_M && newpriv != PRV_RESERVED);
@@ -1167,9 +1416,7 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical,
     hwaddr pte_addr;
     int i;
 
-#if !TCG_OVERSIZED_GUEST
-restart:
-#endif
+ restart:
     for (i = 0; i < levels; i++, ptshift -= ptidxbits) {
         target_ulong idx;
         if (i == 0) {
@@ -1226,14 +1473,27 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical,
             ppn = pte >> PTE_PPN_SHIFT;
         } else {
             if (pte & PTE_RESERVED) {
+                qemu_log_mask(LOG_GUEST_ERROR, "%s: reserved bits set in PTE: "
+                              "addr: 0x%" HWADDR_PRIx " pte: 0x" TARGET_FMT_lx "\n",
+                              __func__, pte_addr, pte);
                 return TRANSLATE_FAIL;
             }
 
             if (!pbmte && (pte & PTE_PBMT)) {
+                /* Reserved without Svpbmt. */
+                qemu_log_mask(LOG_GUEST_ERROR, "%s: PBMT bits set in PTE, "
+                              "and Svpbmt extension is disabled: "
+                              "addr: 0x%" HWADDR_PRIx " pte: 0x" TARGET_FMT_lx "\n",
+                              __func__, pte_addr, pte);
                 return TRANSLATE_FAIL;
             }
 
             if (!riscv_cpu_cfg(env)->ext_svnapot && (pte & PTE_N)) {
+                /* Reserved without Svnapot extension */
+                qemu_log_mask(LOG_GUEST_ERROR, "%s: N bit set in PTE, "
+                              "and Svnapot extension is disabled: "
+                              "addr: 0x%" HWADDR_PRIx " pte: 0x" TARGET_FMT_lx "\n",
+                              __func__, pte_addr, pte);
                 return TRANSLATE_FAIL;
             }
 
@@ -1244,14 +1504,19 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical,
             /* Invalid PTE */
             return TRANSLATE_FAIL;
         }
+
         if (pte & (PTE_R | PTE_W | PTE_X)) {
             goto leaf;
         }
 
-        /* Inner PTE, continue walking */
         if (pte & (PTE_D | PTE_A | PTE_U | PTE_ATTR)) {
+            /* D, A, and U bits are reserved in non-leaf/inner PTEs */
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: D, A, or U bits set in non-leaf PTE: "
+                          "addr: 0x%" HWADDR_PRIx " pte: 0x" TARGET_FMT_lx "\n",
+                          __func__, pte_addr, pte);
             return TRANSLATE_FAIL;
         }
+        /* Inner PTE, continue walking */
         base = ppn << PGSHIFT;
     }
 
@@ -1261,10 +1526,17 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical,
  leaf:
     if (ppn & ((1ULL << ptshift) - 1)) {
         /* Misaligned PPN */
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: PPN bits in PTE is misaligned: "
+                      "addr: 0x%" HWADDR_PRIx " pte: 0x" TARGET_FMT_lx "\n",
+                      __func__, pte_addr, pte);
         return TRANSLATE_FAIL;
     }
     if (!pbmte && (pte & PTE_PBMT)) {
         /* Reserved without Svpbmt. */
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: PBMT bits set in PTE, "
+                      "and Svpbmt extension is disabled: "
+                      "addr: 0x%" HWADDR_PRIx " pte: 0x" TARGET_FMT_lx "\n",
+                      __func__, pte_addr, pte);
         return TRANSLATE_FAIL;
     }
 
@@ -1388,13 +1660,6 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical,
                                      false, MEMTXATTRS_UNSPECIFIED);
         if (memory_region_is_ram(mr)) {
             target_ulong *pte_pa = qemu_map_ram_ptr(mr->ram_block, addr1);
-#if TCG_OVERSIZED_GUEST
-            /*
-             * MTTCG is not enabled on oversized TCG guests so
-             * page table updates do not need to be atomic
-             */
-            *pte_pa = pte = updated_pte;
-#else
             target_ulong old_pte;
             if (riscv_cpu_sxl(env) == MXL_RV32) {
                 old_pte = qatomic_cmpxchg((uint32_t *)pte_pa, pte, updated_pte);
@@ -1405,7 +1670,6 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical,
                 goto restart;
             }
             pte = updated_pte;
-#endif
         } else {
             /*
              * Misconfigured PTE in ROM (AD bits are not preset) or
@@ -1708,6 +1972,23 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
     } else if (probe) {
         return false;
     } else {
+        int wp_access = 0;
+
+        if (access_type == MMU_DATA_LOAD) {
+            wp_access |= BP_MEM_READ;
+        } else if (access_type == MMU_DATA_STORE) {
+            wp_access |= BP_MEM_WRITE;
+        }
+
+        /*
+         * If a watchpoint isn't found for 'addr' this will
+         * be a no-op and we'll resume the mmu_exception path.
+         * Otherwise we'll throw a debug exception and execution
+         * will continue elsewhere.
+         */
+        cpu_check_watchpoint(cs, address, size, MEMTXATTRS_UNSPECIFIED,
+                             wp_access, retaddr);
+
         raise_mmu_exception(env, address, access_type, pmp_violation,
                             first_stage_error, two_stage_lookup,
                             two_stage_indirect_error);
@@ -1986,10 +2267,13 @@ void riscv_cpu_do_interrupt(CPUState *cs)
         !(env->mip & (1ULL << cause));
     bool smode_double_trap = false;
     uint64_t hdeleg = async ? env->hideleg : env->hedeleg;
+    const bool prev_virt = env->virt_enabled;
+    const target_ulong prev_priv = env->priv;
     target_ulong tval = 0;
     target_ulong tinst = 0;
     target_ulong htval = 0;
     target_ulong mtval2 = 0;
+    target_ulong src;
     int sxlen = 0;
     int mxlen = 16 << riscv_cpu_mxl(env);
     bool nnmi_excep = false;
@@ -2095,7 +2379,9 @@ void riscv_cpu_do_interrupt(CPUState *cs)
     mode = env->priv <= PRV_S && cause < 64 &&
         (((deleg >> cause) & 1) || s_injected || vs_injected) ? PRV_S : PRV_M;
 
-    vsmode_exc = env->virt_enabled && (((hdeleg >> cause) & 1) || vs_injected);
+    vsmode_exc = env->virt_enabled && cause < 64 &&
+        (((hdeleg >> cause) & 1) || vs_injected);
+
     /*
      * Check double trap condition only if already in S-mode and targeting
      * S-mode
@@ -2173,6 +2459,8 @@ void riscv_cpu_do_interrupt(CPUState *cs)
         env->pc = (env->stvec >> 2 << 2) +
                   ((async && (env->stvec & 3) == 1) ? cause * 4 : 0);
         riscv_cpu_set_mode(env, PRV_S, virt);
+
+        src = env->sepc;
     } else {
         /*
          * If the hart encounters an exception while executing in M-mode
@@ -2257,6 +2545,19 @@ void riscv_cpu_do_interrupt(CPUState *cs)
                       ((async && (env->mtvec & 3) == 1) ? cause * 4 : 0);
         }
         riscv_cpu_set_mode(env, PRV_M, virt);
+        src = env->mepc;
+    }
+
+    if (riscv_cpu_cfg(env)->ext_smctr || riscv_cpu_cfg(env)->ext_ssctr) {
+        if (async && cause == IRQ_PMU_OVF) {
+            riscv_ctr_freeze(env, XCTRCTL_LCOFIFRZ, virt);
+        } else if (!async && cause == RISCV_EXCP_BREAKPOINT) {
+            riscv_ctr_freeze(env, XCTRCTL_BPFRZ, virt);
+        }
+
+        riscv_ctr_add_entry(env, src, env->pc,
+                        async ? CTRDATA_TYPE_INTERRUPT : CTRDATA_TYPE_EXCEPTION,
+                        prev_priv, prev_virt);
     }
 
     /*
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index afb7544f0780b..49566d3c08288 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -25,6 +25,7 @@
 #include "pmu.h"
 #include "time_helper.h"
 #include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/tb-flush.h"
 #include "system/cpu-timers.h"
 #include "qemu/guest-random.h"
@@ -376,10 +377,6 @@ static RISCVException aia_smode32(CPURISCVState *env, int csrno)
         return ret;
     }
 
-    if (ret != RISCV_EXCP_NONE) {
-        return ret;
-    }
-
     return smode32(env, csrno);
 }
 
@@ -639,6 +636,48 @@ static RISCVException hgatp(CPURISCVState *env, int csrno)
     return hmode(env, csrno);
 }
 
+/*
+ * M-mode:
+ * Without ext_smctr raise illegal inst excep.
+ * Otherwise everything is accessible to m-mode.
+ *
+ * S-mode:
+ * Without ext_ssctr or mstateen.ctr raise illegal inst excep.
+ * Otherwise everything other than mctrctl is accessible.
+ *
+ * VS-mode:
+ * Without ext_ssctr or mstateen.ctr raise illegal inst excep.
+ * Without hstateen.ctr raise virtual illegal inst excep.
+ * Otherwise allow sctrctl (vsctrctl), sctrstatus, 0x200-0x2ff entry range.
+ * Always raise illegal instruction exception for sctrdepth.
+ */
+static RISCVException ctr_mmode(CPURISCVState *env, int csrno)
+{
+    /* Check if smctr-ext is present */
+    if (riscv_cpu_cfg(env)->ext_smctr) {
+        return RISCV_EXCP_NONE;
+    }
+
+    return RISCV_EXCP_ILLEGAL_INST;
+}
+
+static RISCVException ctr_smode(CPURISCVState *env, int csrno)
+{
+    const RISCVCPUConfig *cfg = riscv_cpu_cfg(env);
+
+    if (!cfg->ext_smctr && !cfg->ext_ssctr) {
+        return RISCV_EXCP_ILLEGAL_INST;
+    }
+
+    RISCVException ret = smstateen_acc_ok(env, 0, SMSTATEEN0_CTR);
+    if (ret == RISCV_EXCP_NONE && csrno == CSR_SCTRDEPTH &&
+        env->virt_enabled) {
+        return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
+    }
+
+    return ret;
+}
+
 static RISCVException aia_hmode(CPURISCVState *env, int csrno)
 {
     int ret;
@@ -2389,6 +2428,13 @@ static bool xiselect_cd_range(target_ulong isel)
     return (ISELECT_CD_FIRST <= isel && isel <= ISELECT_CD_LAST);
 }
 
+static bool xiselect_ctr_range(int csrno, target_ulong isel)
+{
+    /* MIREG-MIREG6 for the range 0x200-0x2ff are not used by CTR. */
+    return CTR_ENTRIES_FIRST <= isel && isel <= CTR_ENTRIES_LAST &&
+           csrno < CSR_MIREG;
+}
+
 static int rmw_iprio(target_ulong xlen,
                      target_ulong iselect, uint8_t *iprio,
                      target_ulong *val, target_ulong new_val,
@@ -2434,6 +2480,124 @@ static int rmw_iprio(target_ulong xlen,
     return 0;
 }
 
+static int rmw_ctrsource(CPURISCVState *env, int isel, target_ulong *val,
+                          target_ulong new_val, target_ulong wr_mask)
+{
+    /*
+     * CTR arrays are treated as circular buffers and TOS always points to next
+     * empty slot, keeping TOS - 1 always pointing to latest entry. Given entry
+     * 0 is always the latest one, traversal is a bit different here. See the
+     * below example.
+     *
+     * Depth = 16.
+     *
+     * idx    [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [A] [B] [C] [D] [E] [F]
+     * TOS                                 H
+     * entry   6   5   4   3   2   1   0   F   E   D   C   B   A   9   8   7
+     */
+    const uint64_t entry = isel - CTR_ENTRIES_FIRST;
+    const uint64_t depth = 16 << get_field(env->sctrdepth, SCTRDEPTH_MASK);
+    uint64_t idx;
+
+    /* Entry greater than depth-1 is read-only zero */
+    if (entry >= depth) {
+        if (val) {
+            *val = 0;
+        }
+        return 0;
+    }
+
+    idx = get_field(env->sctrstatus, SCTRSTATUS_WRPTR_MASK);
+    idx = (idx - entry - 1) & (depth - 1);
+
+    if (val) {
+        *val = env->ctr_src[idx];
+    }
+
+    env->ctr_src[idx] = (env->ctr_src[idx] & ~wr_mask) | (new_val & wr_mask);
+
+    return 0;
+}
+
+static int rmw_ctrtarget(CPURISCVState *env, int isel, target_ulong *val,
+                          target_ulong new_val, target_ulong wr_mask)
+{
+    /*
+     * CTR arrays are treated as circular buffers and TOS always points to next
+     * empty slot, keeping TOS - 1 always pointing to latest entry. Given entry
+     * 0 is always the latest one, traversal is a bit different here. See the
+     * below example.
+     *
+     * Depth = 16.
+     *
+     * idx    [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [A] [B] [C] [D] [E] [F]
+     * head                                H
+     * entry   6   5   4   3   2   1   0   F   E   D   C   B   A   9   8   7
+     */
+    const uint64_t entry = isel - CTR_ENTRIES_FIRST;
+    const uint64_t depth = 16 << get_field(env->sctrdepth, SCTRDEPTH_MASK);
+    uint64_t idx;
+
+    /* Entry greater than depth-1 is read-only zero */
+    if (entry >= depth) {
+        if (val) {
+            *val = 0;
+        }
+        return 0;
+    }
+
+    idx = get_field(env->sctrstatus, SCTRSTATUS_WRPTR_MASK);
+    idx = (idx - entry - 1) & (depth - 1);
+
+    if (val) {
+        *val = env->ctr_dst[idx];
+    }
+
+    env->ctr_dst[idx] = (env->ctr_dst[idx] & ~wr_mask) | (new_val & wr_mask);
+
+    return 0;
+}
+
+static int rmw_ctrdata(CPURISCVState *env, int isel, target_ulong *val,
+                        target_ulong new_val, target_ulong wr_mask)
+{
+    /*
+     * CTR arrays are treated as circular buffers and TOS always points to next
+     * empty slot, keeping TOS - 1 always pointing to latest entry. Given entry
+     * 0 is always the latest one, traversal is a bit different here. See the
+     * below example.
+     *
+     * Depth = 16.
+     *
+     * idx    [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [A] [B] [C] [D] [E] [F]
+     * head                                H
+     * entry   6   5   4   3   2   1   0   F   E   D   C   B   A   9   8   7
+     */
+    const uint64_t entry = isel - CTR_ENTRIES_FIRST;
+    const uint64_t mask = wr_mask & CTRDATA_MASK;
+    const uint64_t depth = 16 << get_field(env->sctrdepth, SCTRDEPTH_MASK);
+    uint64_t idx;
+
+    /* Entry greater than depth-1 is read-only zero */
+    if (entry >= depth) {
+        if (val) {
+            *val = 0;
+        }
+        return 0;
+    }
+
+    idx = get_field(env->sctrstatus, SCTRSTATUS_WRPTR_MASK);
+    idx = (idx - entry - 1) & (depth - 1);
+
+    if (val) {
+        *val = env->ctr_data[idx];
+    }
+
+    env->ctr_data[idx] = (env->ctr_data[idx] & ~mask) | (new_val & mask);
+
+    return 0;
+}
+
 static RISCVException rmw_xireg_aia(CPURISCVState *env, int csrno,
                          target_ulong isel, target_ulong *val,
                          target_ulong new_val, target_ulong wr_mask)
@@ -2586,6 +2750,27 @@ static int rmw_xireg_cd(CPURISCVState *env, int csrno,
     return ret;
 }
 
+static int rmw_xireg_ctr(CPURISCVState *env, int csrno,
+                        target_ulong isel, target_ulong *val,
+                        target_ulong new_val, target_ulong wr_mask)
+{
+    if (!riscv_cpu_cfg(env)->ext_smctr && !riscv_cpu_cfg(env)->ext_ssctr) {
+        return -EINVAL;
+    }
+
+    if (csrno == CSR_SIREG || csrno == CSR_VSIREG) {
+        return rmw_ctrsource(env, isel, val, new_val, wr_mask);
+    } else if (csrno == CSR_SIREG2 || csrno == CSR_VSIREG2) {
+        return rmw_ctrtarget(env, isel, val, new_val, wr_mask);
+    } else if (csrno == CSR_SIREG3 || csrno == CSR_VSIREG3) {
+        return rmw_ctrdata(env, isel, val, new_val, wr_mask);
+    } else if (val) {
+        *val = 0;
+    }
+
+    return 0;
+}
+
 /*
  * rmw_xireg_csrind: Perform indirect access to xireg and xireg2-xireg6
  *
@@ -2597,11 +2782,13 @@ static int rmw_xireg_csrind(CPURISCVState *env, int csrno,
                               target_ulong isel, target_ulong *val,
                               target_ulong new_val, target_ulong wr_mask)
 {
-    int ret = -EINVAL;
     bool virt = csrno == CSR_VSIREG ? true : false;
+    int ret = -EINVAL;
 
     if (xiselect_cd_range(isel)) {
         ret = rmw_xireg_cd(env, csrno, isel, val, new_val, wr_mask);
+    } else if (xiselect_ctr_range(csrno, isel)) {
+        ret = rmw_xireg_ctr(env, csrno, isel, val, new_val, wr_mask);
     } else {
         /*
          * As per the specification, access to unimplented region is undefined
@@ -2621,7 +2808,6 @@ static int rmw_xireg_csrind(CPURISCVState *env, int csrno,
 static int rmw_xiregi(CPURISCVState *env, int csrno, target_ulong *val,
                       target_ulong new_val, target_ulong wr_mask)
 {
-    bool virt = false;
     int ret = -EINVAL;
     target_ulong isel;
 
@@ -2642,23 +2828,17 @@ static int rmw_xiregi(CPURISCVState *env, int csrno, target_ulong *val,
     } else if (CSR_VSIREG <= csrno && csrno <= CSR_VSIREG6 &&
                csrno != CSR_VSIREG4 - 1) {
         isel = env->vsiselect;
-        virt = true;
     } else {
-        goto done;
+        return RISCV_EXCP_ILLEGAL_INST;
     }
 
     return rmw_xireg_csrind(env, csrno, isel, val, new_val, wr_mask);
-
-done:
-    return (env->virt_enabled && virt) ?
-            RISCV_EXCP_VIRT_INSTRUCTION_FAULT : RISCV_EXCP_ILLEGAL_INST;
 }
 
 static RISCVException rmw_xireg(CPURISCVState *env, int csrno,
                                 target_ulong *val, target_ulong new_val,
                                 target_ulong wr_mask)
 {
-    bool virt = false;
     int ret = -EINVAL;
     target_ulong isel;
 
@@ -2680,10 +2860,9 @@ static RISCVException rmw_xireg(CPURISCVState *env, int csrno,
         break;
     case CSR_VSIREG:
         isel = env->vsiselect;
-        virt = true;
         break;
     default:
-         goto done;
+        goto done;
     };
 
     /*
@@ -2699,16 +2878,10 @@ static RISCVException rmw_xireg(CPURISCVState *env, int csrno,
     } else if (riscv_cpu_cfg(env)->ext_smcsrind ||
                riscv_cpu_cfg(env)->ext_sscsrind) {
         return rmw_xireg_csrind(env, csrno, isel, val, new_val, wr_mask);
-    } else {
-        return RISCV_EXCP_ILLEGAL_INST;
     }
 
 done:
-    if (ret) {
-        return (env->virt_enabled && virt) ?
-               RISCV_EXCP_VIRT_INSTRUCTION_FAULT : RISCV_EXCP_ILLEGAL_INST;
-    }
-    return RISCV_EXCP_NONE;
+    return RISCV_EXCP_ILLEGAL_INST;
 }
 
 static RISCVException rmw_xtopei(CPURISCVState *env, int csrno,
@@ -3234,6 +3407,10 @@ static RISCVException write_mstateen0(CPURISCVState *env, int csrno,
         wr_mask |= (SMSTATEEN0_AIA | SMSTATEEN0_IMSIC);
     }
 
+    if (riscv_cpu_cfg(env)->ext_ssctr) {
+        wr_mask |= SMSTATEEN0_CTR;
+    }
+
     return write_mstateen(env, csrno, wr_mask, new_val);
 }
 
@@ -3273,6 +3450,10 @@ static RISCVException write_mstateen0h(CPURISCVState *env, int csrno,
         wr_mask |= SMSTATEEN0_P1P13;
     }
 
+    if (riscv_cpu_cfg(env)->ext_ssctr) {
+        wr_mask |= SMSTATEEN0_CTR;
+    }
+
     return write_mstateenh(env, csrno, wr_mask, new_val);
 }
 
@@ -3327,6 +3508,10 @@ static RISCVException write_hstateen0(CPURISCVState *env, int csrno,
         wr_mask |= (SMSTATEEN0_AIA | SMSTATEEN0_IMSIC);
     }
 
+    if (riscv_cpu_cfg(env)->ext_ssctr) {
+        wr_mask |= SMSTATEEN0_CTR;
+    }
+
     return write_hstateen(env, csrno, wr_mask, new_val);
 }
 
@@ -3366,6 +3551,10 @@ static RISCVException write_hstateen0h(CPURISCVState *env, int csrno,
 {
     uint64_t wr_mask = SMSTATEEN_STATEEN | SMSTATEEN0_HSENVCFG;
 
+    if (riscv_cpu_cfg(env)->ext_ssctr) {
+        wr_mask |= SMSTATEEN0_CTR;
+    }
+
     return write_hstateenh(env, csrno, wr_mask, new_val);
 }
 
@@ -4086,6 +4275,86 @@ static RISCVException write_satp(CPURISCVState *env, int csrno,
     return RISCV_EXCP_NONE;
 }
 
+static RISCVException rmw_sctrdepth(CPURISCVState *env, int csrno,
+                                    target_ulong *ret_val,
+                                    target_ulong new_val, target_ulong wr_mask)
+{
+    uint64_t mask = wr_mask & SCTRDEPTH_MASK;
+
+    if (ret_val) {
+        *ret_val = env->sctrdepth;
+    }
+
+    env->sctrdepth = (env->sctrdepth & ~mask) | (new_val & mask);
+
+    /* Correct depth. */
+    if (mask) {
+        uint64_t depth = get_field(env->sctrdepth, SCTRDEPTH_MASK);
+
+        if (depth > SCTRDEPTH_MAX) {
+            depth = SCTRDEPTH_MAX;
+            env->sctrdepth = set_field(env->sctrdepth, SCTRDEPTH_MASK, depth);
+        }
+
+        /* Update sctrstatus.WRPTR with a legal value */
+        depth = 16 << depth;
+        env->sctrstatus =
+            env->sctrstatus & (~SCTRSTATUS_WRPTR_MASK | (depth - 1));
+    }
+
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException rmw_sctrstatus(CPURISCVState *env, int csrno,
+                                     target_ulong *ret_val,
+                                     target_ulong new_val, target_ulong wr_mask)
+{
+    uint32_t depth = 16 << get_field(env->sctrdepth, SCTRDEPTH_MASK);
+    uint32_t mask = wr_mask & SCTRSTATUS_MASK;
+
+    if (ret_val) {
+        *ret_val = env->sctrstatus;
+    }
+
+    env->sctrstatus = (env->sctrstatus & ~mask) | (new_val & mask);
+
+    /* Update sctrstatus.WRPTR with a legal value */
+    env->sctrstatus = env->sctrstatus & (~SCTRSTATUS_WRPTR_MASK | (depth - 1));
+
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException rmw_xctrctl(CPURISCVState *env, int csrno,
+                                    target_ulong *ret_val,
+                                    target_ulong new_val, target_ulong wr_mask)
+{
+    uint64_t csr_mask, mask = wr_mask;
+    uint64_t *ctl_ptr = &env->mctrctl;
+
+    if (csrno == CSR_MCTRCTL) {
+        csr_mask = MCTRCTL_MASK;
+    } else if (csrno == CSR_SCTRCTL && !env->virt_enabled) {
+        csr_mask = SCTRCTL_MASK;
+    } else {
+        /*
+         * This is for csrno == CSR_SCTRCTL and env->virt_enabled == true
+         * or csrno == CSR_VSCTRCTL.
+         */
+        csr_mask = VSCTRCTL_MASK;
+        ctl_ptr = &env->vsctrctl;
+    }
+
+    mask &= csr_mask;
+
+    if (ret_val) {
+        *ret_val = *ctl_ptr & csr_mask;
+    }
+
+    *ctl_ptr = (*ctl_ptr & ~mask) | (new_val & mask);
+
+    return RISCV_EXCP_NONE;
+}
+
 static RISCVException read_vstopi(CPURISCVState *env, int csrno,
                                   target_ulong *val)
 {
@@ -5839,6 +6108,12 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
     [CSR_TINFO]     =  { "tinfo",    debug, read_tinfo,    write_ignore   },
     [CSR_MCONTEXT]  =  { "mcontext", debug, read_mcontext, write_mcontext },
 
+    [CSR_MCTRCTL]    = { "mctrctl",    ctr_mmode,  NULL, NULL, rmw_xctrctl    },
+    [CSR_SCTRCTL]    = { "sctrctl",    ctr_smode,  NULL, NULL, rmw_xctrctl    },
+    [CSR_VSCTRCTL]   = { "vsctrctl",   ctr_smode,  NULL, NULL, rmw_xctrctl    },
+    [CSR_SCTRDEPTH]  = { "sctrdepth",  ctr_smode,  NULL, NULL, rmw_sctrdepth  },
+    [CSR_SCTRSTATUS] = { "sctrstatus", ctr_smode,  NULL, NULL, rmw_sctrstatus },
+
     /* Performance Counters */
     [CSR_HPMCOUNTER3]    = { "hpmcounter3",    ctr,    read_hpmcounter },
     [CSR_HPMCOUNTER4]    = { "hpmcounter4",    ctr,    read_hpmcounter },
diff --git a/target/riscv/debug.c b/target/riscv/debug.c
index f6241a80be801..9db4048523e83 100644
--- a/target/riscv/debug.c
+++ b/target/riscv/debug.c
@@ -478,7 +478,7 @@ static void type2_breakpoint_insert(CPURISCVState *env, target_ulong index)
     bool enabled = type2_breakpoint_enabled(ctrl);
     CPUState *cs = env_cpu(env);
     int flags = BP_CPU | BP_STOP_BEFORE_ACCESS;
-    uint32_t size;
+    uint32_t size, def_size;
 
     if (!enabled) {
         return;
@@ -501,7 +501,9 @@ static void type2_breakpoint_insert(CPURISCVState *env, target_ulong index)
             cpu_watchpoint_insert(cs, addr, size, flags,
                                   &env->cpu_watchpoint[index]);
         } else {
-            cpu_watchpoint_insert(cs, addr, 8, flags,
+            def_size = riscv_cpu_mxl(env) == MXL_RV64 ? 8 : 4;
+
+            cpu_watchpoint_insert(cs, addr, def_size, flags,
                                   &env->cpu_watchpoint[index]);
         }
     }
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 16ea240d26d31..85d73e492d77f 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -132,10 +132,12 @@ DEF_HELPER_6(csrrw_i128, tl, env, int, tl, tl, tl, tl)
 DEF_HELPER_1(sret, tl, env)
 DEF_HELPER_1(mret, tl, env)
 DEF_HELPER_1(mnret, tl, env)
+DEF_HELPER_1(ctr_clear, void, env)
 DEF_HELPER_1(wfi, void, env)
 DEF_HELPER_1(wrs_nto, void, env)
 DEF_HELPER_1(tlb_flush, void, env)
 DEF_HELPER_1(tlb_flush_all, void, env)
+DEF_HELPER_4(ctr_add_entry, void, env, tl, tl, tl)
 /* Native Debug */
 DEF_HELPER_1(itrigger_match, void, env)
 #endif
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 942c434c6e8c0..6d1a13c82603e 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -114,12 +114,12 @@
 # *** Privileged Instructions ***
 ecall       000000000000     00000 000 00000 1110011
 ebreak      000000000001     00000 000 00000 1110011
+sctrclr     000100000100     00000 000 00000 1110011
 uret        0000000    00010 00000 000 00000 1110011
 sret        0001000    00010 00000 000 00000 1110011
 mret        0011000    00010 00000 000 00000 1110011
 wfi         0001000    00101 00000 000 00000 1110011
 sfence_vma  0001001    ..... ..... 000 00000 1110011 @sfence_vma
-sfence_vm   0001000    00100 ..... 000 00000 1110011 @sfence_vm
 
 # *** NMI ***
 mnret       0111000    00010 00000 000 00000 1110011
diff --git a/target/riscv/insn_trans/trans_privileged.c.inc b/target/riscv/insn_trans/trans_privileged.c.inc
index 73f940d406135..8a62b4cfcd9f9 100644
--- a/target/riscv/insn_trans/trans_privileged.c.inc
+++ b/target/riscv/insn_trans/trans_privileged.c.inc
@@ -75,6 +75,17 @@ static bool trans_ebreak(DisasContext *ctx, arg_ebreak *a)
     return true;
 }
 
+static bool trans_sctrclr(DisasContext *ctx, arg_sctrclr *a)
+{
+#ifndef CONFIG_USER_ONLY
+    if (ctx->cfg_ptr->ext_smctr || ctx->cfg_ptr->ext_ssctr) {
+        gen_helper_ctr_clear(tcg_env);
+        return true;
+    }
+#endif
+    return false;
+}
+
 static bool trans_uret(DisasContext *ctx, arg_uret *a)
 {
     return false;
@@ -86,6 +97,7 @@ static bool trans_sret(DisasContext *ctx, arg_sret *a)
     if (has_ext(ctx, RVS)) {
         decode_save_opc(ctx, 0);
         translator_io_start(&ctx->base);
+        gen_update_pc(ctx, 0);
         gen_helper_sret(cpu_pc, tcg_env);
         exit_tb(ctx); /* no chaining */
         ctx->base.is_jmp = DISAS_NORETURN;
@@ -103,6 +115,7 @@ static bool trans_mret(DisasContext *ctx, arg_mret *a)
 #ifndef CONFIG_USER_ONLY
     decode_save_opc(ctx, 0);
     translator_io_start(&ctx->base);
+    gen_update_pc(ctx, 0);
     gen_helper_mret(cpu_pc, tcg_env);
     exit_tb(ctx); /* no chaining */
     ctx->base.is_jmp = DISAS_NORETURN;
@@ -147,8 +160,3 @@ static bool trans_sfence_vma(DisasContext *ctx, arg_sfence_vma *a)
 #endif
     return false;
 }
-
-static bool trans_sfence_vm(DisasContext *ctx, arg_sfence_vm *a)
-{
-    return false;
-}
diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc
index 96c218a9d7875..b55f56a5eb2c7 100644
--- a/target/riscv/insn_trans/trans_rvi.c.inc
+++ b/target/riscv/insn_trans/trans_rvi.c.inc
@@ -93,6 +93,51 @@ static bool trans_jal(DisasContext *ctx, arg_jal *a)
     return true;
 }
 
+#ifndef CONFIG_USER_ONLY
+/*
+ * Indirect calls
+ * - jalr x1, rs where rs != x5;
+ * - jalr x5, rs where rs != x1;
+ * - c.jalr rs1 where rs1 != x5;
+ *
+ * Indirect jumps
+ * - jalr x0, rs where rs != x1 and rs != x5;
+ * - c.jr rs1 where rs1 != x1 and rs1 != x5.
+ *
+ * Returns
+ * - jalr rd, rs where (rs == x1 or rs == x5) and rd != x1 and rd != x5;
+ * - c.jr rs1 where rs1 == x1 or rs1 == x5.
+ *
+ * Co-routine swap
+ * - jalr x1, x5;
+ * - jalr x5, x1;
+ * - c.jalr x5.
+ *
+ * Other indirect jumps
+ * - jalr rd, rs where rs != x1, rs != x5, rd != x0, rd != x1 and rd != x5.
+ */
+static void gen_ctr_jalr(DisasContext *ctx, arg_jalr *a, TCGv dest)
+{
+    TCGv src = tcg_temp_new();
+    TCGv type;
+
+    if ((a->rd == 1 && a->rs1 != 5) || (a->rd == 5 && a->rs1 != 1)) {
+        type = tcg_constant_tl(CTRDATA_TYPE_INDIRECT_CALL);
+    } else if (a->rd == 0 && a->rs1 != 1 && a->rs1 != 5) {
+        type = tcg_constant_tl(CTRDATA_TYPE_INDIRECT_JUMP);
+    } else if ((a->rs1 == 1 || a->rs1 == 5) && (a->rd != 1 && a->rd != 5)) {
+        type = tcg_constant_tl(CTRDATA_TYPE_RETURN);
+    } else if ((a->rs1 == 1 && a->rd == 5) || (a->rs1 == 5 && a->rd == 1)) {
+        type = tcg_constant_tl(CTRDATA_TYPE_CO_ROUTINE_SWAP);
+    } else {
+        type = tcg_constant_tl(CTRDATA_TYPE_OTHER_INDIRECT_JUMP);
+    }
+
+    gen_pc_plus_diff(src, ctx, 0);
+    gen_helper_ctr_add_entry(tcg_env, src, dest, type);
+}
+#endif
+
 static bool trans_jalr(DisasContext *ctx, arg_jalr *a)
 {
     TCGLabel *misaligned = NULL;
@@ -117,6 +162,12 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a)
     gen_pc_plus_diff(succ_pc, ctx, ctx->cur_insn_len);
     gen_set_gpr(ctx, a->rd, succ_pc);
 
+#ifndef CONFIG_USER_ONLY
+    if (ctx->cfg_ptr->ext_smctr || ctx->cfg_ptr->ext_ssctr) {
+        gen_ctr_jalr(ctx, a, target_pc);
+    }
+#endif
+
     tcg_gen_mov_tl(cpu_pc, target_pc);
     if (ctx->fcfi_enabled) {
         /*
@@ -231,6 +282,19 @@ static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond)
     } else {
         tcg_gen_brcond_tl(cond, src1, src2, l);
     }
+
+#ifndef CONFIG_USER_ONLY
+    if (ctx->cfg_ptr->ext_smctr || ctx->cfg_ptr->ext_ssctr) {
+        TCGv type = tcg_constant_tl(CTRDATA_TYPE_NONTAKEN_BRANCH);
+        TCGv dest = tcg_temp_new();
+        TCGv src = tcg_temp_new();
+
+        gen_pc_plus_diff(src, ctx, 0);
+        gen_pc_plus_diff(dest, ctx, ctx->cur_insn_len);
+        gen_helper_ctr_add_entry(tcg_env, src, dest, type);
+    }
+#endif
+
     gen_goto_tb(ctx, 1, ctx->cur_insn_len);
     ctx->pc_save = orig_pc_save;
 
@@ -243,6 +307,17 @@ static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond)
         gen_pc_plus_diff(target_pc, ctx, a->imm);
         gen_exception_inst_addr_mis(ctx, target_pc);
     } else {
+#ifndef CONFIG_USER_ONLY
+        if (ctx->cfg_ptr->ext_smctr || ctx->cfg_ptr->ext_ssctr) {
+            TCGv type = tcg_constant_tl(CTRDATA_TYPE_TAKEN_BRANCH);
+            TCGv dest = tcg_temp_new();
+            TCGv src = tcg_temp_new();
+
+            gen_pc_plus_diff(src, ctx, 0);
+            gen_pc_plus_diff(dest, ctx, a->imm);
+            gen_helper_ctr_add_entry(tcg_env, src, dest, type);
+        }
+#endif
         gen_goto_tb(ctx, 0, a->imm);
     }
     ctx->pc_save = -1;
diff --git a/target/riscv/insn_trans/trans_rvzce.c.inc b/target/riscv/insn_trans/trans_rvzce.c.inc
index cd234ad960724..c77c2b927b0cf 100644
--- a/target/riscv/insn_trans/trans_rvzce.c.inc
+++ b/target/riscv/insn_trans/trans_rvzce.c.inc
@@ -203,6 +203,14 @@ static bool gen_pop(DisasContext *ctx, arg_cmpp *a, bool ret, bool ret_val)
 
     if (ret) {
         TCGv ret_addr = get_gpr(ctx, xRA, EXT_SIGN);
+#ifndef CONFIG_USER_ONLY
+        if (ctx->cfg_ptr->ext_smctr || ctx->cfg_ptr->ext_ssctr) {
+            TCGv type = tcg_constant_tl(CTRDATA_TYPE_RETURN);
+            TCGv src = tcg_temp_new();
+            gen_pc_plus_diff(src, ctx, 0);
+            gen_helper_ctr_add_entry(tcg_env, src, ret_addr, type);
+        }
+#endif
         tcg_gen_mov_tl(cpu_pc, ret_addr);
         tcg_gen_lookup_and_goto_ptr();
         ctx->base.is_jmp = DISAS_NORETURN;
@@ -309,6 +317,19 @@ static bool trans_cm_jalt(DisasContext *ctx, arg_cm_jalt *a)
         gen_set_gpr(ctx, xRA, succ_pc);
     }
 
+#ifndef CONFIG_USER_ONLY
+    if (ctx->cfg_ptr->ext_smctr || ctx->cfg_ptr->ext_ssctr) {
+        if (a->index >= 32) {
+            TCGv type = tcg_constant_tl(CTRDATA_TYPE_DIRECT_CALL);
+            gen_helper_ctr_add_entry(tcg_env, cpu_pc, addr, type);
+        } else {
+            TCGv type = tcg_constant_tl(CTRDATA_TYPE_DIRECT_JUMP);
+            gen_helper_ctr_add_entry(tcg_env, cpu_pc, addr, type);
+        }
+    }
+#endif
+
+
     tcg_gen_mov_tl(cpu_pc, addr);
 
     tcg_gen_lookup_and_goto_ptr();
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index 67291933f848e..213aff31d85d8 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -142,8 +142,10 @@ static inline float16 check_nanbox_h(CPURISCVState *env, uint64_t f)
     }
 }
 
-/* Our implementation of CPUClass::has_work */
+#ifndef CONFIG_USER_ONLY
+/* Our implementation of SysemuCPUOps::has_work */
 bool riscv_cpu_has_work(CPUState *cs);
+#endif
 
 /* Zjpm addr masking routine */
 static inline target_ulong adjust_addr_body(CPURISCVState *env,
diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
index 23ce77935940c..4ffeeaa1c9538 100644
--- a/target/riscv/kvm/kvm-cpu.c
+++ b/target/riscv/kvm/kvm-cpu.c
@@ -32,7 +32,7 @@
 #include "system/kvm_int.h"
 #include "cpu.h"
 #include "trace.h"
-#include "hw/core/accel-cpu.h"
+#include "accel/accel-cpu-target.h"
 #include "hw/pci/pci.h"
 #include "exec/memattrs.h"
 #include "exec/address-spaces.h"
@@ -274,6 +274,7 @@ static void kvm_riscv_update_cpu_misa_ext(RISCVCPU *cpu, CPUState *cs)
 static KVMCPUConfig kvm_multi_ext_cfgs[] = {
     KVM_EXT_CFG("zicbom", ext_zicbom, KVM_RISCV_ISA_EXT_ZICBOM),
     KVM_EXT_CFG("zicboz", ext_zicboz, KVM_RISCV_ISA_EXT_ZICBOZ),
+    KVM_EXT_CFG("ziccrse", ext_ziccrse, KVM_RISCV_ISA_EXT_ZICCRSE),
     KVM_EXT_CFG("zicntr", ext_zicntr, KVM_RISCV_ISA_EXT_ZICNTR),
     KVM_EXT_CFG("zicond", ext_zicond, KVM_RISCV_ISA_EXT_ZICOND),
     KVM_EXT_CFG("zicsr", ext_zicsr, KVM_RISCV_ISA_EXT_ZICSR),
@@ -283,6 +284,7 @@ static KVMCPUConfig kvm_multi_ext_cfgs[] = {
     KVM_EXT_CFG("zihpm", ext_zihpm, KVM_RISCV_ISA_EXT_ZIHPM),
     KVM_EXT_CFG("zimop", ext_zimop, KVM_RISCV_ISA_EXT_ZIMOP),
     KVM_EXT_CFG("zcmop", ext_zcmop, KVM_RISCV_ISA_EXT_ZCMOP),
+    KVM_EXT_CFG("zabha", ext_zabha, KVM_RISCV_ISA_EXT_ZABHA),
     KVM_EXT_CFG("zacas", ext_zacas, KVM_RISCV_ISA_EXT_ZACAS),
     KVM_EXT_CFG("zawrs", ext_zawrs, KVM_RISCV_ISA_EXT_ZAWRS),
     KVM_EXT_CFG("zfa", ext_zfa, KVM_RISCV_ISA_EXT_ZFA),
@@ -319,12 +321,18 @@ static KVMCPUConfig kvm_multi_ext_cfgs[] = {
     KVM_EXT_CFG("zvksed", ext_zvksed, KVM_RISCV_ISA_EXT_ZVKSED),
     KVM_EXT_CFG("zvksh", ext_zvksh, KVM_RISCV_ISA_EXT_ZVKSH),
     KVM_EXT_CFG("zvkt", ext_zvkt, KVM_RISCV_ISA_EXT_ZVKT),
+    KVM_EXT_CFG("smnpm", ext_smnpm, KVM_RISCV_ISA_EXT_SMNPM),
     KVM_EXT_CFG("smstateen", ext_smstateen, KVM_RISCV_ISA_EXT_SMSTATEEN),
     KVM_EXT_CFG("ssaia", ext_ssaia, KVM_RISCV_ISA_EXT_SSAIA),
+    KVM_EXT_CFG("sscofpmf", ext_sscofpmf, KVM_RISCV_ISA_EXT_SSCOFPMF),
+    KVM_EXT_CFG("ssnpm", ext_ssnpm, KVM_RISCV_ISA_EXT_SSNPM),
     KVM_EXT_CFG("sstc", ext_sstc, KVM_RISCV_ISA_EXT_SSTC),
+    KVM_EXT_CFG("svade", ext_svade, KVM_RISCV_ISA_EXT_SVADE),
+    KVM_EXT_CFG("svadu", ext_svadu, KVM_RISCV_ISA_EXT_SVADU),
     KVM_EXT_CFG("svinval", ext_svinval, KVM_RISCV_ISA_EXT_SVINVAL),
     KVM_EXT_CFG("svnapot", ext_svnapot, KVM_RISCV_ISA_EXT_SVNAPOT),
     KVM_EXT_CFG("svpbmt", ext_svpbmt, KVM_RISCV_ISA_EXT_SVPBMT),
+    KVM_EXT_CFG("svvptc", ext_svvptc, KVM_RISCV_ISA_EXT_SVVPTC),
 };
 
 static void *kvmconfig_get_cfg_addr(RISCVCPU *cpu, KVMCPUConfig *kvmcfg)
@@ -605,6 +613,21 @@ static int kvm_riscv_put_regs_core(CPUState *cs)
     return ret;
 }
 
+static void kvm_riscv_reset_regs_csr(CPURISCVState *env)
+{
+    env->mstatus = 0;
+    env->mie = 0;
+    env->stvec = 0;
+    env->sscratch = 0;
+    env->sepc = 0;
+    env->scause = 0;
+    env->stval = 0;
+    env->mip = 0;
+    env->satp = 0;
+    env->scounteren = 0;
+    env->senvcfg = 0;
+}
+
 static int kvm_riscv_get_regs_csr(CPUState *cs)
 {
     CPURISCVState *env = &RISCV_CPU(cs)->env;
@@ -618,6 +641,8 @@ static int kvm_riscv_get_regs_csr(CPUState *cs)
     KVM_RISCV_GET_CSR(cs, env, stval, env->stval);
     KVM_RISCV_GET_CSR(cs, env, sip, env->mip);
     KVM_RISCV_GET_CSR(cs, env, satp, env->satp);
+    KVM_RISCV_GET_CSR(cs, env, scounteren, env->scounteren);
+    KVM_RISCV_GET_CSR(cs, env, senvcfg, env->senvcfg);
 
     return 0;
 }
@@ -635,6 +660,8 @@ static int kvm_riscv_put_regs_csr(CPUState *cs)
     KVM_RISCV_SET_CSR(cs, env, stval, env->stval);
     KVM_RISCV_SET_CSR(cs, env, sip, env->mip);
     KVM_RISCV_SET_CSR(cs, env, satp, env->satp);
+    KVM_RISCV_SET_CSR(cs, env, scounteren, env->scounteren);
+    KVM_RISCV_SET_CSR(cs, env, senvcfg, env->senvcfg);
 
     return 0;
 }
@@ -1603,23 +1630,14 @@ void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
     CPURISCVState *env = &cpu->env;
     int i;
 
-    if (!kvm_enabled()) {
-        return;
-    }
     for (i = 0; i < 32; i++) {
         env->gpr[i] = 0;
     }
     env->pc = cpu->env.kernel_addr;
     env->gpr[10] = kvm_arch_vcpu_id(CPU(cpu)); /* a0 */
     env->gpr[11] = cpu->env.fdt_addr;          /* a1 */
-    env->satp = 0;
-    env->mie = 0;
-    env->stvec = 0;
-    env->sscratch = 0;
-    env->sepc = 0;
-    env->scause = 0;
-    env->stval = 0;
-    env->mip = 0;
+
+    kvm_riscv_reset_regs_csr(env);
 }
 
 void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
diff --git a/target/riscv/machine.c b/target/riscv/machine.c
index d8445244ab251..889e2b6570132 100644
--- a/target/riscv/machine.c
+++ b/target/riscv/machine.c
@@ -300,6 +300,30 @@ static const VMStateDescription vmstate_envcfg = {
     }
 };
 
+static bool ctr_needed(void *opaque)
+{
+    RISCVCPU *cpu = opaque;
+
+    return cpu->cfg.ext_smctr || cpu->cfg.ext_ssctr;
+}
+
+static const VMStateDescription vmstate_ctr = {
+    .name = "cpu/ctr",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = ctr_needed,
+    .fields = (const VMStateField[]) {
+        VMSTATE_UINT64(env.mctrctl, RISCVCPU),
+        VMSTATE_UINT32(env.sctrdepth, RISCVCPU),
+        VMSTATE_UINT32(env.sctrstatus, RISCVCPU),
+        VMSTATE_UINT64(env.vsctrctl, RISCVCPU),
+        VMSTATE_UINT64_ARRAY(env.ctr_src, RISCVCPU, 16 << SCTRDEPTH_MAX),
+        VMSTATE_UINT64_ARRAY(env.ctr_dst, RISCVCPU, 16 << SCTRDEPTH_MAX),
+        VMSTATE_UINT64_ARRAY(env.ctr_data, RISCVCPU, 16 << SCTRDEPTH_MAX),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static bool pmu_needed(void *opaque)
 {
     RISCVCPU *cpu = opaque;
@@ -450,6 +474,7 @@ const VMStateDescription vmstate_riscv_cpu = {
         &vmstate_jvt,
         &vmstate_elp,
         &vmstate_ssp,
+        &vmstate_ctr,
         NULL
     }
 };
diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
index ce1256f439c69..0d4220ba93b75 100644
--- a/target/riscv/op_helper.c
+++ b/target/riscv/op_helper.c
@@ -22,6 +22,7 @@
 #include "cpu.h"
 #include "internals.h"
 #include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/cpu_ldst.h"
 #include "exec/helper-proto.h"
 #include "trace.h"
@@ -270,6 +271,8 @@ target_ulong helper_sret(CPURISCVState *env)
 {
     uint64_t mstatus;
     target_ulong prev_priv, prev_virt = env->virt_enabled;
+    const target_ulong src_priv = env->priv;
+    const bool src_virt = env->virt_enabled;
 
     if (!(env->priv >= PRV_S)) {
         riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
@@ -339,6 +342,11 @@ target_ulong helper_sret(CPURISCVState *env)
     }
     env->mstatus = set_field(env->mstatus, MSTATUS_SPELP, 0);
 
+    if (riscv_cpu_cfg(env)->ext_smctr || riscv_cpu_cfg(env)->ext_ssctr) {
+        riscv_ctr_add_entry(env, env->pc, retpc, CTRDATA_TYPE_EXCEP_INT_RET,
+                            src_priv, src_virt);
+    }
+
     return retpc;
 }
 
@@ -416,6 +424,11 @@ target_ulong helper_mret(CPURISCVState *env)
     }
     env->mstatus = set_field(env->mstatus, MSTATUS_MPELP, 0);
 
+    if (riscv_cpu_cfg(env)->ext_smctr || riscv_cpu_cfg(env)->ext_ssctr) {
+        riscv_ctr_add_entry(env, env->pc, retpc, CTRDATA_TYPE_EXCEP_INT_RET,
+                            PRV_M, false);
+    }
+
     return retpc;
 }
 
@@ -466,6 +479,42 @@ target_ulong helper_mnret(CPURISCVState *env)
     return retpc;
 }
 
+void helper_ctr_add_entry(CPURISCVState *env, target_ulong src,
+                          target_ulong dest, target_ulong type)
+{
+    riscv_ctr_add_entry(env, src, dest, (enum CTRType)type,
+                        env->priv, env->virt_enabled);
+}
+
+void helper_ctr_clear(CPURISCVState *env)
+{
+    /*
+     * It's safe to call smstateen_acc_ok() for umode access regardless of the
+     * state of bit 54 (CTR bit in case of m/hstateen) of sstateen. If the bit
+     * is zero, smstateen_acc_ok() will return the correct exception code and
+     * if it's one, smstateen_acc_ok() will return RISCV_EXCP_NONE. In that
+     * scenario the U-mode check below will handle that case.
+     */
+    RISCVException ret = smstateen_acc_ok(env, 0, SMSTATEEN0_CTR);
+    if (ret != RISCV_EXCP_NONE) {
+        riscv_raise_exception(env, ret, GETPC());
+    }
+
+    if (env->priv == PRV_U) {
+        /*
+         * One corner case is when sctrclr is executed from VU-mode and
+         * mstateen.CTR = 0, in which case we are supposed to raise
+         * RISCV_EXCP_ILLEGAL_INST. This case is already handled in
+         * smstateen_acc_ok().
+         */
+        uint32_t excep = env->virt_enabled ? RISCV_EXCP_VIRT_INSTRUCTION_FAULT :
+            RISCV_EXCP_ILLEGAL_INST;
+        riscv_raise_exception(env, excep, GETPC());
+    }
+
+    riscv_ctr_clear(env);
+}
+
 void helper_wfi(CPURISCVState *env)
 {
     CPUState *cs = env_cpu(env);
diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c
index a185c246d6294..b0841d44f4c26 100644
--- a/target/riscv/pmp.c
+++ b/target/riscv/pmp.c
@@ -24,7 +24,7 @@
 #include "qapi/error.h"
 #include "cpu.h"
 #include "trace.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/page-protection.h"
 
 static bool pmp_write_cfg(CPURISCVState *env, uint32_t addr_index,
@@ -524,7 +524,7 @@ void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index,
             uint8_t pmp_cfg = env->pmp_state.pmp[addr_index + 1].cfg_reg;
             is_next_cfg_tor = PMP_AMATCH_TOR == pmp_get_a_field(pmp_cfg);
 
-            if (pmp_cfg & PMP_LOCK && is_next_cfg_tor) {
+            if (pmp_is_locked(env, addr_index + 1) && is_next_cfg_tor) {
                 qemu_log_mask(LOG_GUEST_ERROR,
                               "ignoring pmpaddr write - pmpcfg + 1 locked\n");
                 return;
diff --git a/target/riscv/pmu.c b/target/riscv/pmu.c
index cf713663ee56f..0408f96e6af8a 100644
--- a/target/riscv/pmu.c
+++ b/target/riscv/pmu.c
@@ -390,7 +390,7 @@ int riscv_pmu_update_event_map(CPURISCVState *env, uint64_t value,
      * Expected mhpmevent value is zero for reset case. Remove the current
      * mapping.
      */
-    if (!value) {
+    if (!(value & MHPMEVENT_IDX_MASK)) {
         g_hash_table_foreach_remove(cpu->pmu_event_ctr_map,
                                     pmu_remove_event_map,
                                     GUINT_TO_POINTER(ctr_idx));
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
index 0a137281de15f..5aef9eef36665 100644
--- a/target/riscv/tcg/tcg-cpu.c
+++ b/target/riscv/tcg/tcg-cpu.c
@@ -30,8 +30,8 @@
 #include "qemu/accel.h"
 #include "qemu/error-report.h"
 #include "qemu/log.h"
-#include "hw/core/accel-cpu.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/accel-cpu-target.h"
+#include "accel/tcg/cpu-ops.h"
 #include "tcg/tcg.h"
 #ifndef CONFIG_USER_ONLY
 #include "hw/boards.h"
@@ -360,6 +360,8 @@ static void riscv_cpu_update_named_features(RISCVCPU *cpu)
 
     cpu->cfg.ext_sha = riscv_has_ext(&cpu->env, RVH) &&
                        cpu->cfg.ext_ssstateen;
+
+    cpu->cfg.ext_ziccrse = cpu->cfg.has_priv_1_11;
 }
 
 static void riscv_cpu_validate_g(RISCVCPU *cpu)
@@ -681,6 +683,17 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
         return;
     }
 
+    if ((cpu->cfg.ext_smctr || cpu->cfg.ext_ssctr) &&
+        (!riscv_has_ext(env, RVS) || !cpu->cfg.ext_sscsrind)) {
+        if (cpu_cfg_ext_is_user_set(CPU_CFG_OFFSET(ext_smctr)) ||
+            cpu_cfg_ext_is_user_set(CPU_CFG_OFFSET(ext_ssctr))) {
+            error_setg(errp, "Smctr and Ssctr require S-mode and Sscsrind");
+            return;
+        }
+        cpu->cfg.ext_smctr = false;
+        cpu->cfg.ext_ssctr = false;
+    }
+
     /*
      * Disable isa extensions based on priv spec after we
      * validated and set everything we need.
@@ -713,13 +726,29 @@ static bool riscv_cpu_validate_profile_satp(RISCVCPU *cpu,
 }
 #endif
 
+static void riscv_cpu_check_parent_profile(RISCVCPU *cpu,
+                                           RISCVCPUProfile *profile,
+                                           RISCVCPUProfile *parent)
+{
+    const char *parent_name;
+    bool parent_enabled;
+
+    if (!profile->enabled || !parent) {
+        return;
+    }
+
+    parent_name = parent->name;
+    parent_enabled = object_property_get_bool(OBJECT(cpu), parent_name, NULL);
+    profile->enabled = parent_enabled;
+}
+
 static void riscv_cpu_validate_profile(RISCVCPU *cpu,
                                        RISCVCPUProfile *profile)
 {
     CPURISCVState *env = &cpu->env;
     const char *warn_msg = "Profile %s mandates disabled extension %s";
     bool send_warn = profile->user_set && profile->enabled;
-    bool parent_enabled, profile_impl = true;
+    bool profile_impl = true;
     int i;
 
 #ifndef CONFIG_USER_ONLY
@@ -730,7 +759,7 @@ static void riscv_cpu_validate_profile(RISCVCPU *cpu,
 #endif
 
     if (profile->priv_spec != RISCV_PROFILE_ATTR_UNUSED &&
-        profile->priv_spec != env->priv_ver) {
+        profile->priv_spec > env->priv_ver) {
         profile_impl = false;
 
         if (send_warn) {
@@ -773,12 +802,8 @@ static void riscv_cpu_validate_profile(RISCVCPU *cpu,
 
     profile->enabled = profile_impl;
 
-    if (profile->parent != NULL) {
-        parent_enabled = object_property_get_bool(OBJECT(cpu),
-                                                  profile->parent->name,
-                                                  NULL);
-        profile->enabled = profile->enabled && parent_enabled;
-    }
+    riscv_cpu_check_parent_profile(cpu, profile, profile->u_parent);
+    riscv_cpu_check_parent_profile(cpu, profile, profile->s_parent);
 }
 
 static void riscv_cpu_validate_profiles(RISCVCPU *cpu)
@@ -1014,6 +1039,7 @@ static bool riscv_cpu_is_generic(Object *cpu_obj)
 static bool riscv_tcg_cpu_realize(CPUState *cs, Error **errp)
 {
     RISCVCPU *cpu = RISCV_CPU(cs);
+    RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(cpu);
 
     if (!riscv_cpu_tcg_compatible(cpu)) {
         g_autofree char *name = riscv_cpu_get_name(cpu);
@@ -1022,6 +1048,14 @@ static bool riscv_tcg_cpu_realize(CPUState *cs, Error **errp)
         return false;
     }
 
+    if (mcc->misa_mxl_max >= MXL_RV128 && qemu_tcg_mttcg_enabled()) {
+        /* Missing 128-bit aligned atomics */
+        error_setg(errp,
+                   "128-bit RISC-V currently does not work with Multi "
+                   "Threaded TCG. Please use: -accel tcg,thread=single");
+        return false;
+    }
+
 #ifndef CONFIG_USER_ONLY
     CPURISCVState *env = &cpu->env;
 
@@ -1181,8 +1215,13 @@ static void cpu_set_profile(Object *obj, Visitor *v, const char *name,
     profile->user_set = true;
     profile->enabled = value;
 
-    if (profile->parent != NULL) {
-        object_property_set_bool(obj, profile->parent->name,
+    if (profile->u_parent != NULL) {
+        object_property_set_bool(obj, profile->u_parent->name,
+                                 profile->enabled, NULL);
+    }
+
+    if (profile->s_parent != NULL) {
+        object_property_set_bool(obj, profile->s_parent->name,
                                  profile->enabled, NULL);
     }
 
@@ -1432,22 +1471,20 @@ static void riscv_init_max_cpu_extensions(Object *obj)
     }
 
     /*
-     * ext_smrnmi requires OpenSBI changes that our current
+     * TODO: ext_smrnmi requires OpenSBI changes that our current
      * image does not have. Disable it for now.
      */
     if (cpu->cfg.ext_smrnmi) {
         isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_smrnmi), false);
-        qemu_log("Smrnmi is disabled in the 'max' type CPU\n");
     }
 
     /*
-     * ext_smdbltrp requires the firmware to clear MSTATUS.MDT on startup to
-     * avoid generating a double trap. OpenSBI does not currently support it,
+     * TODO: ext_smdbltrp requires the firmware to clear MSTATUS.MDT on startup
+     * to avoid generating a double trap. OpenSBI does not currently support it,
      * disable it for now.
      */
     if (cpu->cfg.ext_smdbltrp) {
         isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_smdbltrp), false);
-        qemu_log("Smdbltrp is disabled in the 'max' type CPU\n");
     }
 }
 
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 698b74f7a8fab..eaa5d86eaeedd 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -561,6 +561,46 @@ static void gen_set_fpr_d(DisasContext *ctx, int reg_num, TCGv_i64 t)
     }
 }
 
+#ifndef CONFIG_USER_ONLY
+/*
+ * Direct calls
+ * - jal x1;
+ * - jal x5;
+ * - c.jal.
+ * - cm.jalt.
+ *
+ * Direct jumps
+ * - jal x0;
+ * - c.j;
+ * - cm.jt.
+ *
+ * Other direct jumps
+ * - jal rd where rd != x1 and rd != x5 and rd != x0;
+ */
+static void gen_ctr_jal(DisasContext *ctx, int rd, target_ulong imm)
+{
+    TCGv dest = tcg_temp_new();
+    TCGv src = tcg_temp_new();
+    TCGv type;
+
+    /*
+     * If rd is x1 or x5 link registers, treat this as direct call otherwise
+     * its a direct jump.
+     */
+    if (rd == 1 || rd == 5) {
+        type = tcg_constant_tl(CTRDATA_TYPE_DIRECT_CALL);
+    } else if (rd == 0) {
+        type = tcg_constant_tl(CTRDATA_TYPE_DIRECT_JUMP);
+    } else {
+        type = tcg_constant_tl(CTRDATA_TYPE_OTHER_DIRECT_JUMP);
+    }
+
+    gen_pc_plus_diff(dest, ctx, imm);
+    gen_pc_plus_diff(src, ctx, 0);
+    gen_helper_ctr_add_entry(tcg_env, src, dest, type);
+}
+#endif
+
 static void gen_jal(DisasContext *ctx, int rd, target_ulong imm)
 {
     TCGv succ_pc = dest_gpr(ctx, rd);
@@ -575,6 +615,12 @@ static void gen_jal(DisasContext *ctx, int rd, target_ulong imm)
         }
     }
 
+#ifndef CONFIG_USER_ONLY
+    if (ctx->cfg_ptr->ext_smctr || ctx->cfg_ptr->ext_ssctr) {
+        gen_ctr_jal(ctx, rd, imm);
+    }
+#endif
+
     gen_pc_plus_diff(succ_pc, ctx, ctx->cur_insn_len);
     gen_set_gpr(ctx, rd, succ_pc);
 
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 5386e3b97c5da..7773df6a7c72b 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -4659,7 +4659,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
         }                                                 \
         s1 = OP(s1, (TD)s2);                              \
     }                                                     \
-    *((TD *)vd + HD(0)) = s1;                             \
+    if (vl > 0) {                                         \
+        *((TD *)vd + HD(0)) = s1;                         \
+    }                                                     \
     env->vstart = 0;                                      \
     /* set tail elements to 1s */                         \
     vext_set_elems_1s(vd, vta, esz, vlenb);               \
@@ -4745,7 +4747,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
         }                                                  \
         s1 = OP(s1, (TD)s2, &env->fp_status);              \
     }                                                      \
-    *((TD *)vd + HD(0)) = s1;                              \
+    if (vl > 0) {                                          \
+        *((TD *)vd + HD(0)) = s1;                          \
+    }                                                      \
     env->vstart = 0;                                       \
     /* set tail elements to 1s */                          \
     vext_set_elems_1s(vd, vta, esz, vlenb);                \
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
index 37a6fdd569b37..0ba0d55ab5bdb 100644
--- a/target/rx/cpu.c
+++ b/target/rx/cpu.c
@@ -21,7 +21,7 @@
 #include "qapi/error.h"
 #include "cpu.h"
 #include "migration/vmstate.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/page-protection.h"
 #include "exec/translation-block.h"
 #include "hw/loader.h"
@@ -168,6 +168,7 @@ static void rx_cpu_set_irq(void *opaque, int no, int request)
 
 static void rx_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
 {
+    info->endian = BFD_ENDIAN_LITTLE;
     info->mach = bfd_mach_rx;
     info->print_insn = print_insn_rx;
 }
@@ -192,15 +193,14 @@ static void rx_cpu_init(Object *obj)
     qdev_init_gpio_in(DEVICE(cpu), rx_cpu_set_irq, 2);
 }
 
-#ifndef CONFIG_USER_ONLY
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps rx_sysemu_ops = {
+    .has_work = rx_cpu_has_work,
     .get_phys_page_debug = rx_cpu_get_phys_page_debug,
 };
-#endif
 
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 
 static const TCGCPUOps rx_tcg_ops = {
     .initialize = rx_translate_init,
@@ -209,11 +209,9 @@ static const TCGCPUOps rx_tcg_ops = {
     .restore_state_to_opc = rx_restore_state_to_opc,
     .tlb_fill = rx_cpu_tlb_fill,
 
-#ifndef CONFIG_USER_ONLY
     .cpu_exec_interrupt = rx_cpu_exec_interrupt,
     .cpu_exec_halt = rx_cpu_has_work,
     .do_interrupt = rx_cpu_do_interrupt,
-#endif /* !CONFIG_USER_ONLY */
 };
 
 static void rx_cpu_class_init(ObjectClass *klass, void *data)
@@ -229,15 +227,12 @@ static void rx_cpu_class_init(ObjectClass *klass, void *data)
                                        &rcc->parent_phases);
 
     cc->class_by_name = rx_cpu_class_by_name;
-    cc->has_work = rx_cpu_has_work;
     cc->mmu_index = riscv_cpu_mmu_index;
     cc->dump_state = rx_cpu_dump_state;
     cc->set_pc = rx_cpu_set_pc;
     cc->get_pc = rx_cpu_get_pc;
 
-#ifndef CONFIG_USER_ONLY
     cc->sysemu_ops = &rx_sysemu_ops;
-#endif
     cc->gdb_read_register = rx_cpu_gdb_read_register;
     cc->gdb_write_register = rx_cpu_gdb_write_register;
     cc->disas_set_info = rx_cpu_disas_set_info;
diff --git a/target/rx/cpu.h b/target/rx/cpu.h
index 5ba1874bd7613..349d61c4e402d 100644
--- a/target/rx/cpu.h
+++ b/target/rx/cpu.h
@@ -26,6 +26,10 @@
 #include "exec/cpu-defs.h"
 #include "qemu/cpu-float.h"
 
+#ifdef CONFIG_USER_ONLY
+#error "RX does not support user mode emulation"
+#endif
+
 /* PSW define */
 REG32(PSW, 0)
 FIELD(PSW, C, 0, 1)
@@ -129,11 +133,9 @@ struct RXCPUClass {
 #define CPU_RESOLVING_TYPE TYPE_RX_CPU
 
 const char *rx_crname(uint8_t cr);
-#ifndef CONFIG_USER_ONLY
 void rx_cpu_do_interrupt(CPUState *cpu);
 bool rx_cpu_exec_interrupt(CPUState *cpu, int int_req);
 hwaddr rx_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
-#endif /* !CONFIG_USER_ONLY */
 void rx_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
 int rx_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int rx_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
diff --git a/target/rx/helper.c b/target/rx/helper.c
index 80912e8dcb402..e8aabf40ffb41 100644
--- a/target/rx/helper.c
+++ b/target/rx/helper.c
@@ -40,8 +40,6 @@ void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte)
     env->psw_c = FIELD_EX32(psw, PSW, C);
 }
 
-#ifndef CONFIG_USER_ONLY
-
 #define INT_FLAGS (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIR)
 void rx_cpu_do_interrupt(CPUState *cs)
 {
@@ -90,7 +88,7 @@ void rx_cpu_do_interrupt(CPUState *cs)
         cpu_stl_data(env, env->isp, env->pc);
 
         if (vec < 0x100) {
-            env->pc = cpu_ldl_data(env, 0xffffffc0 + vec * 4);
+            env->pc = cpu_ldl_data(env, 0xffffff80 + vec * 4);
         } else {
             env->pc = cpu_ldl_data(env, env->intb + (vec & 0xff) * 4);
         }
@@ -146,5 +144,3 @@ hwaddr rx_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
 {
     return addr;
 }
-
-#endif /* !CONFIG_USER_ONLY */
diff --git a/target/rx/helper.h b/target/rx/helper.h
index ebb47394744e3..8cc38b0cb7177 100644
--- a/target/rx/helper.h
+++ b/target/rx/helper.h
@@ -4,27 +4,27 @@ DEF_HELPER_1(raise_privilege_violation, noreturn, env)
 DEF_HELPER_1(wait, noreturn, env)
 DEF_HELPER_2(rxint, noreturn, env, i32)
 DEF_HELPER_1(rxbrk, noreturn, env)
-DEF_HELPER_FLAGS_3(fadd, TCG_CALL_NO_WG, f32, env, f32, f32)
-DEF_HELPER_FLAGS_3(fsub, TCG_CALL_NO_WG, f32, env, f32, f32)
-DEF_HELPER_FLAGS_3(fmul, TCG_CALL_NO_WG, f32, env, f32, f32)
-DEF_HELPER_FLAGS_3(fdiv, TCG_CALL_NO_WG, f32, env, f32, f32)
-DEF_HELPER_FLAGS_3(fcmp, TCG_CALL_NO_WG, void, env, f32, f32)
-DEF_HELPER_FLAGS_2(ftoi, TCG_CALL_NO_WG, i32, env, f32)
-DEF_HELPER_FLAGS_2(round, TCG_CALL_NO_WG, i32, env, f32)
-DEF_HELPER_FLAGS_2(itof, TCG_CALL_NO_WG, f32, env, i32)
+DEF_HELPER_3(fadd, f32, env, f32, f32)
+DEF_HELPER_3(fsub, f32, env, f32, f32)
+DEF_HELPER_3(fmul, f32, env, f32, f32)
+DEF_HELPER_3(fdiv, f32, env, f32, f32)
+DEF_HELPER_3(fcmp, void, env, f32, f32)
+DEF_HELPER_2(ftoi, i32, env, f32)
+DEF_HELPER_2(round, i32, env, f32)
+DEF_HELPER_2(itof, f32, env, i32)
 DEF_HELPER_2(set_fpsw, void, env, i32)
-DEF_HELPER_FLAGS_2(racw, TCG_CALL_NO_WG, void, env, i32)
-DEF_HELPER_FLAGS_2(set_psw_rte, TCG_CALL_NO_WG, void, env, i32)
-DEF_HELPER_FLAGS_2(set_psw, TCG_CALL_NO_WG, void, env, i32)
+DEF_HELPER_2(racw, void, env, i32)
+DEF_HELPER_2(set_psw_rte, void, env, i32)
+DEF_HELPER_2(set_psw, void, env, i32)
 DEF_HELPER_1(pack_psw, i32, env)
-DEF_HELPER_FLAGS_3(div, TCG_CALL_NO_WG, i32, env, i32, i32)
-DEF_HELPER_FLAGS_3(divu, TCG_CALL_NO_WG, i32, env, i32, i32)
-DEF_HELPER_FLAGS_1(scmpu, TCG_CALL_NO_WG, void, env)
+DEF_HELPER_3(div, i32, env, i32, i32)
+DEF_HELPER_3(divu, i32, env, i32, i32)
+DEF_HELPER_1(scmpu, void, env)
 DEF_HELPER_1(smovu, void, env)
 DEF_HELPER_1(smovf, void, env)
 DEF_HELPER_1(smovb, void, env)
 DEF_HELPER_2(sstr, void, env, i32)
-DEF_HELPER_FLAGS_2(swhile, TCG_CALL_NO_WG, void, env, i32)
-DEF_HELPER_FLAGS_2(suntil, TCG_CALL_NO_WG, void, env, i32)
-DEF_HELPER_FLAGS_2(rmpa, TCG_CALL_NO_WG, void, env, i32)
+DEF_HELPER_2(swhile, void, env, i32)
+DEF_HELPER_2(suntil, void, env, i32)
+DEF_HELPER_2(rmpa, void, env, i32)
 DEF_HELPER_1(satr, void, env)
diff --git a/target/s390x/cpu-system.c b/target/s390x/cpu-system.c
index e9f8e7cc72f5f..9b380e343c25e 100644
--- a/target/s390x/cpu-system.c
+++ b/target/s390x/cpu-system.c
@@ -39,6 +39,23 @@
 #include "system/tcg.h"
 #include "hw/core/sysemu-cpu-ops.h"
 
+bool s390_cpu_has_work(CPUState *cs)
+{
+    S390CPU *cpu = S390_CPU(cs);
+
+    /* STOPPED cpus can never wake up */
+    if (s390_cpu_get_state(cpu) != S390_CPU_STATE_LOAD &&
+        s390_cpu_get_state(cpu) != S390_CPU_STATE_OPERATING) {
+        return false;
+    }
+
+    if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
+        return false;
+    }
+
+    return s390_cpu_has_int(cpu);
+}
+
 /* S390CPUClass::load_normal() */
 static void s390_cpu_load_normal(CPUState *s)
 {
@@ -158,6 +175,7 @@ void s390_cpu_finalize(Object *obj)
 }
 
 static const struct SysemuCPUOps s390_sysemu_ops = {
+    .has_work = s390_cpu_has_work,
     .get_phys_page_debug = s390_cpu_get_phys_page_debug,
     .get_crash_info = s390_cpu_get_crash_info,
     .write_elf64_note = s390_cpu_write_elf64_note,
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index 97d41c23de7c4..d73142600bff3 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -126,23 +126,6 @@ static vaddr s390_cpu_get_pc(CPUState *cs)
     return cpu->env.psw.addr;
 }
 
-static bool s390_cpu_has_work(CPUState *cs)
-{
-    S390CPU *cpu = S390_CPU(cs);
-
-    /* STOPPED cpus can never wake up */
-    if (s390_cpu_get_state(cpu) != S390_CPU_STATE_LOAD &&
-        s390_cpu_get_state(cpu) != S390_CPU_STATE_OPERATING) {
-        return false;
-    }
-
-    if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
-        return false;
-    }
-
-    return s390_cpu_has_int(cpu);
-}
-
 static int s390x_cpu_mmu_index(CPUState *cs, bool ifetch)
 {
     return s390x_env_mmu_index(cpu_env(cs), ifetch);
@@ -243,6 +226,7 @@ static void s390_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
 {
     info->mach = bfd_mach_s390_64;
     info->cap_arch = CS_ARCH_SYSZ;
+    info->endian = BFD_ENDIAN_BIG;
     info->cap_insn_unit = 2;
     info->cap_insn_split = 6;
 }
@@ -322,7 +306,7 @@ static const Property s390x_cpu_properties[] = {
 #endif
 
 #ifdef CONFIG_TCG
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 
 void cpu_get_tb_cpu_state(CPUS390XState *env, vaddr *pc,
                           uint64_t *cs_base, uint32_t *pflags)
@@ -394,7 +378,6 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data)
                                        &scc->parent_phases);
 
     cc->class_by_name = s390_cpu_class_by_name,
-    cc->has_work = s390_cpu_has_work;
     cc->mmu_index = s390x_cpu_mmu_index;
     cc->dump_state = s390_cpu_dump_state;
     cc->query_cpu_fast = s390_query_cpu_fast;
diff --git a/target/s390x/gdbstub.c b/target/s390x/gdbstub.c
index 6879430adc2a7..6bca376f2b6fd 100644
--- a/target/s390x/gdbstub.c
+++ b/target/s390x/gdbstub.c
@@ -21,7 +21,7 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "s390x-internal.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/gdbstub.h"
 #include "gdbstub/helpers.h"
 #include "qemu/bitops.h"
diff --git a/target/s390x/interrupt.c b/target/s390x/interrupt.c
index d68d8955b1a5f..4ae6e2ddeaadf 100644
--- a/target/s390x/interrupt.c
+++ b/target/s390x/interrupt.c
@@ -30,6 +30,7 @@ void trigger_pgm_exception(CPUS390XState *env, uint32_t code)
     /* env->int_pgm_ilen is already set, or will be set during unwinding */
 }
 
+#if !defined(CONFIG_USER_ONLY)
 void s390_program_interrupt(CPUS390XState *env, uint32_t code, uintptr_t ra)
 {
     if (kvm_enabled()) {
@@ -41,7 +42,6 @@ void s390_program_interrupt(CPUS390XState *env, uint32_t code, uintptr_t ra)
     }
 }
 
-#if !defined(CONFIG_USER_ONLY)
 void cpu_inject_clock_comparator(S390CPU *cpu)
 {
     CPUS390XState *env = &cpu->env;
@@ -225,11 +225,9 @@ bool s390_cpu_has_stop_int(S390CPU *cpu)
 
     return env->pending_int & INTERRUPT_STOP;
 }
-#endif
 
 bool s390_cpu_has_int(S390CPU *cpu)
 {
-#ifndef CONFIG_USER_ONLY
     if (!tcg_enabled()) {
         return false;
     }
@@ -238,7 +236,5 @@ bool s390_cpu_has_int(S390CPU *cpu)
            s390_cpu_has_io_int(cpu) ||
            s390_cpu_has_restart_int(cpu) ||
            s390_cpu_has_stop_int(cpu);
-#else
-    return false;
-#endif
 }
+#endif /* !CONFIG_USER_ONLY */
diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h
index a750e7a343a43..a4ba6227ab4ad 100644
--- a/target/s390x/s390x-internal.h
+++ b/target/s390x/s390x-internal.h
@@ -245,6 +245,7 @@ bool s390_cpu_system_realize(DeviceState *dev, Error **errp);
 void s390_cpu_finalize(Object *obj);
 void s390_cpu_system_class_init(CPUClass *cc);
 void s390_cpu_machine_reset_cb(void *opaque);
+bool s390_cpu_has_work(CPUState *cs);
 
 #else
 static inline unsigned int s390_cpu_halt(S390CPU *cpu)
@@ -341,6 +342,7 @@ void cpu_unmap_lowcore(LowCore *lowcore);
 
 /* interrupt.c */
 void trigger_pgm_exception(CPUS390XState *env, uint32_t code);
+#ifndef CONFIG_USER_ONLY
 void cpu_inject_clock_comparator(S390CPU *cpu);
 void cpu_inject_cpu_timer(S390CPU *cpu);
 void cpu_inject_emergency_signal(S390CPU *cpu, uint16_t src_cpu_addr);
@@ -353,9 +355,11 @@ bool s390_cpu_has_restart_int(S390CPU *cpu);
 bool s390_cpu_has_stop_int(S390CPU *cpu);
 void cpu_inject_restart(S390CPU *cpu);
 void cpu_inject_stop(S390CPU *cpu);
+#endif /* CONFIG_USER_ONLY */
 
 
 /* ioinst.c */
+#ifndef CONFIG_USER_ONLY
 void ioinst_handle_xsch(S390CPU *cpu, uint64_t reg1, uintptr_t ra);
 void ioinst_handle_csch(S390CPU *cpu, uint64_t reg1, uintptr_t ra);
 void ioinst_handle_hsch(S390CPU *cpu, uint64_t reg1, uintptr_t ra);
@@ -373,6 +377,7 @@ void ioinst_handle_schm(S390CPU *cpu, uint64_t reg1, uint64_t reg2,
 void ioinst_handle_rsch(S390CPU *cpu, uint64_t reg1, uintptr_t ra);
 void ioinst_handle_rchp(S390CPU *cpu, uint64_t reg1, uintptr_t ra);
 void ioinst_handle_sal(S390CPU *cpu, uint64_t reg1, uintptr_t ra);
+#endif /* CONFIG_USER_ONLY */
 
 
 /* mem_helper.c */
diff --git a/target/s390x/sigp.c b/target/s390x/sigp.c
index cf53b23291856..6a4d9c50813d4 100644
--- a/target/s390x/sigp.c
+++ b/target/s390x/sigp.c
@@ -15,6 +15,7 @@
 #include "system/hw_accel.h"
 #include "system/runstate.h"
 #include "exec/address-spaces.h"
+#include "exec/cputlb.h"
 #include "exec/exec-all.h"
 #include "system/tcg.h"
 #include "trace.h"
diff --git a/target/s390x/tcg/excp_helper.c b/target/s390x/tcg/excp_helper.c
index 4c0b692c9e850..f969850f8716a 100644
--- a/target/s390x/tcg/excp_helper.c
+++ b/target/s390x/tcg/excp_helper.c
@@ -22,6 +22,7 @@
 #include "qemu/log.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
+#include "exec/cputlb.h"
 #include "exec/exec-all.h"
 #include "s390x-internal.h"
 #include "tcg_s390x.h"
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
index c6ab2901e5ad0..8187b917ba126 100644
--- a/target/s390x/tcg/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -26,9 +26,10 @@
 #include "exec/helper-proto.h"
 #include "exec/cpu-common.h"
 #include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/page-protection.h"
 #include "exec/cpu_ldst.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 #include "qemu/int128.h"
 #include "qemu/atomic128.h"
 
diff --git a/target/s390x/tcg/misc_helper.c b/target/s390x/tcg/misc_helper.c
index 0245451472e7f..31266aeda479e 100644
--- a/target/s390x/tcg/misc_helper.c
+++ b/target/s390x/tcg/misc_helper.c
@@ -27,6 +27,7 @@
 #include "exec/helper-proto.h"
 #include "qemu/timer.h"
 #include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/cpu_ldst.h"
 #include "qapi/error.h"
 #include "tcg_s390x.h"
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
index 4ac693d99bd45..ce84bdf539a62 100644
--- a/target/sh4/cpu.c
+++ b/target/sh4/cpu.c
@@ -82,12 +82,12 @@ static bool superh_io_recompile_replay_branch(CPUState *cs,
     }
     return false;
 }
-#endif
 
 static bool superh_cpu_has_work(CPUState *cs)
 {
     return cs->interrupt_request & CPU_INTERRUPT_HARD;
 }
+#endif /* !CONFIG_USER_ONLY */
 
 static int sh4_cpu_mmu_index(CPUState *cs, bool ifetch)
 {
@@ -128,6 +128,7 @@ static void superh_cpu_reset_hold(Object *obj, ResetType type)
     set_flush_to_zero(1, &env->fp_status);
 #endif
     set_default_nan_mode(1, &env->fp_status);
+    set_snan_bit_is_one(true, &env->fp_status);
     /* sign bit clear, set all frac bits other than msb */
     set_float_default_nan_pattern(0b00111111, &env->fp_status);
     /*
@@ -142,6 +143,8 @@ static void superh_cpu_reset_hold(Object *obj, ResetType type)
 
 static void superh_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
 {
+    info->endian = TARGET_BIG_ENDIAN ? BFD_ENDIAN_BIG
+                                     : BFD_ENDIAN_LITTLE;
     info->mach = bfd_mach_sh4;
     info->print_insn = print_insn_sh;
 }
@@ -251,11 +254,12 @@ static const VMStateDescription vmstate_sh_cpu = {
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps sh4_sysemu_ops = {
+    .has_work = superh_cpu_has_work,
     .get_phys_page_debug = superh_cpu_get_phys_page_debug,
 };
 #endif
 
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 
 static const TCGCPUOps superh_tcg_ops = {
     .initialize = sh4_translate_init,
@@ -287,7 +291,6 @@ static void superh_cpu_class_init(ObjectClass *oc, void *data)
                                        &scc->parent_phases);
 
     cc->class_by_name = superh_cpu_class_by_name;
-    cc->has_work = superh_cpu_has_work;
     cc->mmu_index = sh4_cpu_mmu_index;
     cc->dump_state = superh_cpu_dump_state;
     cc->set_pc = superh_cpu_set_pc;
diff --git a/target/sh4/helper.c b/target/sh4/helper.c
index b8774e046e479..7567e6c8b6691 100644
--- a/target/sh4/helper.c
+++ b/target/sh4/helper.c
@@ -20,6 +20,7 @@
 #include "qemu/osdep.h"
 
 #include "cpu.h"
+#include "exec/cputlb.h"
 #include "exec/exec-all.h"
 #include "exec/page-protection.h"
 #include "exec/log.h"
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
index fbd38ec334a92..571612011730b 100644
--- a/target/sparc/cpu.c
+++ b/target/sparc/cpu.c
@@ -106,6 +106,7 @@ static bool sparc_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 static void cpu_sparc_disas_set_info(CPUState *cpu, disassemble_info *info)
 {
     info->print_insn = print_insn_sparc;
+    info->endian = BFD_ENDIAN_BIG;
 #ifdef TARGET_SPARC64
     info->mach = bfd_mach_sparc_v9b;
 #endif
@@ -776,11 +777,13 @@ static void sparc_restore_state_to_opc(CPUState *cs,
     }
 }
 
+#ifndef CONFIG_USER_ONLY
 static bool sparc_cpu_has_work(CPUState *cs)
 {
     return (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
            cpu_interrupts_enabled(cpu_env(cs));
 }
+#endif /* !CONFIG_USER_ONLY */
 
 static int sparc_cpu_mmu_index(CPUState *cs, bool ifetch)
 {
@@ -938,7 +941,8 @@ static void sparc_set_nwindows(Object *obj, Visitor *v, const char *name,
 }
 
 static const PropertyInfo qdev_prop_nwindows = {
-    .name  = "int",
+    .type  = "int",
+    .description = "Number of register windows",
     .get   = sparc_get_nwindows,
     .set   = sparc_set_nwindows,
 };
@@ -986,13 +990,14 @@ static const Property sparc_cpu_properties[] = {
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps sparc_sysemu_ops = {
+    .has_work = sparc_cpu_has_work,
     .get_phys_page_debug = sparc_cpu_get_phys_page_debug,
     .legacy_vmsd = &vmstate_sparc_cpu,
 };
 #endif
 
 #ifdef CONFIG_TCG
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 
 static const TCGCPUOps sparc_tcg_ops = {
     .initialize = sparc_tcg_init,
@@ -1027,7 +1032,6 @@ static void sparc_cpu_class_init(ObjectClass *oc, void *data)
 
     cc->class_by_name = sparc_cpu_class_by_name;
     cc->parse_features = sparc_cpu_parse_features;
-    cc->has_work = sparc_cpu_has_work;
     cc->mmu_index = sparc_cpu_mmu_index;
     cc->dump_state = sparc_cpu_dump_state;
 #if !defined(TARGET_SPARC64) && !defined(CONFIG_USER_ONLY)
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
index dda811503b568..462bcb6c0e6eb 100644
--- a/target/sparc/cpu.h
+++ b/target/sparc/cpu.h
@@ -574,7 +574,7 @@ struct SPARCCPUClass {
 
     DeviceRealize parent_realize;
     ResettablePhases parent_phases;
-    sparc_def_t *cpu_def;
+    const sparc_def_t *cpu_def;
 };
 
 #ifndef CONFIG_USER_ONLY
diff --git a/target/sparc/gdbstub.c b/target/sparc/gdbstub.c
index ec0036e9ef640..134617fb232bc 100644
--- a/target/sparc/gdbstub.c
+++ b/target/sparc/gdbstub.c
@@ -79,8 +79,13 @@ int sparc_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
         }
     }
     if (n < 80) {
-        /* f32-f62 (double width, even numbers only) */
-        return gdb_get_reg64(mem_buf, env->fpr[(n - 32) / 2].ll);
+        /* f32-f62 (16 double width registers, even register numbers only)
+         * n == 64: f32 : env->fpr[16]
+         * n == 65: f34 : env->fpr[17]
+         * etc...
+         * n == 79: f62 : env->fpr[31]
+         */
+        return gdb_get_reg64(mem_buf, env->fpr[(n - 64) + 16].ll);
     }
     switch (n) {
     case 80:
@@ -173,8 +178,13 @@ int sparc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
         }
         return 4;
     } else if (n < 80) {
-        /* f32-f62 (double width, even numbers only) */
-        env->fpr[(n - 32) / 2].ll = tmp;
+        /* f32-f62 (16 double width registers, even register numbers only)
+         * n == 64: f32 : env->fpr[16]
+         * n == 65: f34 : env->fpr[17]
+         * etc...
+         * n == 79: f62 : env->fpr[31]
+         */
+        env->fpr[(n - 64) + 16].ll = tmp;
     } else {
         switch (n) {
         case 80:
diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode
index 989c20b44add1..9e39d232735e6 100644
--- a/target/sparc/insns.decode
+++ b/target/sparc/insns.decode
@@ -96,7 +96,10 @@ CALL    01 i:s30
     RDTICK          10 rd:5  101000 00100 0 0000000000000
     RDPC            10 rd:5  101000 00101 0 0000000000000
     RDFPRS          10 rd:5  101000 00110 0 0000000000000
-    RDASR17         10 rd:5  101000 10001 0 0000000000000
+    {
+      RDASR17       10 rd:5  101000 10001 0 0000000000000
+      RDPIC         10 rd:5  101000 10001 0 0000000000000
+    }
     RDGSR           10 rd:5  101000 10011 0 0000000000000
     RDSOFTINT       10 rd:5  101000 10110 0 0000000000000
     RDTICK_CMPR     10 rd:5  101000 10111 0 0000000000000
@@ -114,6 +117,8 @@ CALL    01 i:s30
     WRCCR           10 00010 110000 ..... . .............  @n_r_ri
     WRASI           10 00011 110000 ..... . .............  @n_r_ri
     WRFPRS          10 00110 110000 ..... . .............  @n_r_ri
+    WRPCR           10 10000 110000 01000 0 0000000000000
+    WRPIC           10 10001 110000 01000 0 0000000000000
     {
       WRGSR         10 10011 110000 ..... . .............  @n_r_ri
       WRPOWERDOWN   10 10011 110000 ..... . .............  @n_r_ri
@@ -321,12 +326,12 @@ FdMULq      10 ..... 110100 ..... 0 0110 1110 .....        @q_d_d
 FNHADDs     10 ..... 110100 ..... 0 0111 0001 .....        @r_r_r
 FNHADDd     10 ..... 110100 ..... 0 0111 0010 .....        @d_d_d
 FNsMULd     10 ..... 110100 ..... 0 0111 1001 .....        @d_r_r
-FsTOx       10 ..... 110100 00000 0 1000 0001 .....        @r_r2
-FdTOx       10 ..... 110100 00000 0 1000 0010 .....        @r_d2
-FqTOx       10 ..... 110100 00000 0 1000 0011 .....        @r_q2
-FxTOs       10 ..... 110100 00000 0 1000 0100 .....        @r_r2
-FxTOd       10 ..... 110100 00000 0 1000 1000 .....        @d_r2
-FxTOq       10 ..... 110100 00000 0 1000 1100 .....        @q_r2
+FsTOx       10 ..... 110100 00000 0 1000 0001 .....        @d_r2
+FdTOx       10 ..... 110100 00000 0 1000 0010 .....        @d_d2
+FqTOx       10 ..... 110100 00000 0 1000 0011 .....        @d_q2
+FxTOs       10 ..... 110100 00000 0 1000 0100 .....        @r_d2
+FxTOd       10 ..... 110100 00000 0 1000 1000 .....        @d_d2
+FxTOq       10 ..... 110100 00000 0 1000 1100 .....        @q_d2
 FiTOs       10 ..... 110100 00000 0 1100 0100 .....        @r_r2
 FdTOs       10 ..... 110100 00000 0 1100 0110 .....        @r_d2
 FqTOs       10 ..... 110100 00000 0 1100 0111 .....        @r_q2
diff --git a/target/sparc/ldst_helper.c b/target/sparc/ldst_helper.c
index 4c54e45655352..b559afc9a9446 100644
--- a/target/sparc/ldst_helper.c
+++ b/target/sparc/ldst_helper.c
@@ -24,6 +24,7 @@
 #include "tcg/tcg.h"
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/page-protection.h"
 #include "exec/cpu_ldst.h"
 #ifdef CONFIG_USER_ONLY
diff --git a/target/sparc/mmu_helper.c b/target/sparc/mmu_helper.c
index 9ff06026b8c13..7548d01777c84 100644
--- a/target/sparc/mmu_helper.c
+++ b/target/sparc/mmu_helper.c
@@ -20,7 +20,7 @@
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/page-protection.h"
 #include "qemu/qemu-print.h"
 #include "trace.h"
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index 7e5c7351cb329..bfe63649db249 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -2882,6 +2882,14 @@ static TCGv do_rd_leon3_config(DisasContext *dc, TCGv dst)
 
 TRANS(RDASR17, ASR17, do_rd_special, true, a->rd, do_rd_leon3_config)
 
+static TCGv do_rdpic(DisasContext *dc, TCGv dst)
+{
+    return tcg_constant_tl(0);
+}
+
+TRANS(RDPIC, HYPV, do_rd_special, supervisor(dc), a->rd, do_rdpic)
+
+
 static TCGv do_rdccr(DisasContext *dc, TCGv dst)
 {
     gen_helper_rdccr(dst, tcg_env);
@@ -3315,6 +3323,17 @@ static void do_wrfprs(DisasContext *dc, TCGv src)
 
 TRANS(WRFPRS, 64, do_wr_special, a, true, do_wrfprs)
 
+static bool do_priv_nop(DisasContext *dc, bool priv)
+{
+    if (!priv) {
+        return raise_priv(dc);
+    }
+    return advance_pc(dc);
+}
+
+TRANS(WRPCR, HYPV, do_priv_nop, supervisor(dc))
+TRANS(WRPIC, HYPV, do_priv_nop, supervisor(dc))
+
 static void do_wrgsr(DisasContext *dc, TCGv src)
 {
     gen_trap_ifnofpu(dc);
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
index 95202fadbfdff..16acc4ecb92ec 100644
--- a/target/tricore/cpu.c
+++ b/target/tricore/cpu.c
@@ -165,10 +165,11 @@ static bool tricore_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps tricore_sysemu_ops = {
+    .has_work = tricore_cpu_has_work,
     .get_phys_page_debug = tricore_cpu_get_phys_page_debug,
 };
 
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 
 static const TCGCPUOps tricore_tcg_ops = {
     .initialize = tricore_tcg_init,
@@ -193,7 +194,6 @@ static void tricore_cpu_class_init(ObjectClass *c, void *data)
     resettable_class_set_parent_phases(rc, NULL, tricore_cpu_reset_hold, NULL,
                                        &mcc->parent_phases);
     cc->class_by_name = tricore_cpu_class_by_name;
-    cc->has_work = tricore_cpu_has_work;
     cc->mmu_index = tricore_cpu_mmu_index;
 
     cc->gdb_read_register = tricore_cpu_gdb_read_register;
diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h
index 8e431d7922236..cf9dbc6df8ee6 100644
--- a/target/tricore/cpu.h
+++ b/target/tricore/cpu.h
@@ -26,6 +26,10 @@
 #include "qemu/cpu-float.h"
 #include "tricore-defs.h"
 
+#ifdef CONFIG_USER_ONLY
+#error "TriCore does not support user mode emulation"
+#endif
+
 typedef struct CPUArchState {
     /* GPR Register */
     uint32_t gpr_a[16];
diff --git a/target/tricore/helper.c b/target/tricore/helper.c
index 9898752eb00eb..a64412e6bd898 100644
--- a/target/tricore/helper.c
+++ b/target/tricore/helper.c
@@ -19,7 +19,7 @@
 #include "qemu/log.h"
 #include "hw/registerfields.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "exec/page-protection.h"
 #include "fpu/softfloat-helpers.h"
 #include "qemu/qemu-print.h"
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
index 4eb699d1f456e..7663b62d01ebd 100644
--- a/target/xtensa/cpu.c
+++ b/target/xtensa/cpu.c
@@ -63,16 +63,14 @@ static void xtensa_restore_state_to_opc(CPUState *cs,
     cpu->env.pc = data[0];
 }
 
+#ifndef CONFIG_USER_ONLY
 static bool xtensa_cpu_has_work(CPUState *cs)
 {
-#ifndef CONFIG_USER_ONLY
-    XtensaCPU *cpu = XTENSA_CPU(cs);
+    CPUXtensaState *env = cpu_env(cs);
 
-    return !cpu->env.runstall && cpu->env.pending_irq_level;
-#else
-    return true;
-#endif
+    return !env->runstall && env->pending_irq_level;
 }
+#endif /* !CONFIG_USER_ONLY */
 
 static int xtensa_cpu_mmu_index(CPUState *cs, bool ifetch)
 {
@@ -159,6 +157,8 @@ static void xtensa_cpu_disas_set_info(CPUState *cs, disassemble_info *info)
 
     info->private_data = cpu->env.config->isa;
     info->print_insn = print_insn_xtensa;
+    info->endian = TARGET_BIG_ENDIAN ? BFD_ENDIAN_BIG
+                                     : BFD_ENDIAN_LITTLE;
 }
 
 static void xtensa_cpu_realizefn(DeviceState *dev, Error **errp)
@@ -224,11 +224,12 @@ static const VMStateDescription vmstate_xtensa_cpu = {
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps xtensa_sysemu_ops = {
+    .has_work = xtensa_cpu_has_work,
     .get_phys_page_debug = xtensa_cpu_get_phys_page_debug,
 };
 #endif
 
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
 
 static const TCGCPUOps xtensa_tcg_ops = {
     .initialize = xtensa_translate_init,
@@ -261,7 +262,6 @@ static void xtensa_cpu_class_init(ObjectClass *oc, void *data)
                                        &xcc->parent_phases);
 
     cc->class_by_name = xtensa_cpu_class_by_name;
-    cc->has_work = xtensa_cpu_has_work;
     cc->mmu_index = xtensa_cpu_mmu_index;
     cc->dump_state = xtensa_cpu_dump_state;
     cc->set_pc = xtensa_cpu_set_pc;
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
index 0e6302c5bd367..8d70bfc0cd422 100644
--- a/target/xtensa/cpu.h
+++ b/target/xtensa/cpu.h
@@ -490,7 +490,7 @@ typedef struct XtensaConfig {
 } XtensaConfig;
 
 typedef struct XtensaConfigList {
-    const XtensaConfig *config;
+    XtensaConfig *config;
     struct XtensaConfigList *next;
 } XtensaConfigList;
 
diff --git a/target/xtensa/helper.c b/target/xtensa/helper.c
index 2978c471c1fc2..4824b97e371e4 100644
--- a/target/xtensa/helper.c
+++ b/target/xtensa/helper.c
@@ -28,7 +28,7 @@
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
 #include "gdbstub/helpers.h"
 #include "exec/helper-proto.h"
 #include "qemu/error-report.h"
@@ -173,9 +173,8 @@ static void xtensa_core_class_init(ObjectClass *oc, void *data)
 {
     CPUClass *cc = CPU_CLASS(oc);
     XtensaCPUClass *xcc = XTENSA_CPU_CLASS(oc);
-    XtensaConfig *config = data;
+    const XtensaConfig *config = data;
 
-    xtensa_finalize_config(config);
     xcc->config = config;
 
     /*
@@ -195,6 +194,8 @@ void xtensa_register_core(XtensaConfigList *node)
         .class_data = (void *)node->config,
     };
 
+    xtensa_finalize_config(node->config);
+
     node->next = xtensa_cores;
     xtensa_cores = node;
     type.name = g_strdup_printf(XTENSA_CPU_TYPE_NAME("%s"), node->config->name);
diff --git a/target/xtensa/mmu_helper.c b/target/xtensa/mmu_helper.c
index 29b84d5dbf66e..63be741a42aea 100644
--- a/target/xtensa/mmu_helper.c
+++ b/target/xtensa/mmu_helper.c
@@ -32,6 +32,7 @@
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "qemu/host-utils.h"
+#include "exec/cputlb.h"
 #include "exec/exec-all.h"
 #include "exec/page-protection.h"
 
diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h
index 44fcc1206e0ae..1281e5efc01c6 100644
--- a/tcg/aarch64/tcg-target-con-set.h
+++ b/tcg/aarch64/tcg-target-con-set.h
@@ -11,27 +11,27 @@
  */
 C_O0_I1(r)
 C_O0_I2(r, rC)
-C_O0_I2(rZ, r)
+C_O0_I2(rz, r)
 C_O0_I2(w, r)
-C_O0_I3(rZ, rZ, r)
+C_O0_I3(rz, rz, r)
 C_O1_I1(r, r)
 C_O1_I1(w, r)
 C_O1_I1(w, w)
 C_O1_I1(w, wr)
-C_O1_I2(r, 0, rZ)
+C_O1_I2(r, 0, rz)
 C_O1_I2(r, r, r)
 C_O1_I2(r, r, rA)
 C_O1_I2(r, r, rAL)
 C_O1_I2(r, r, rC)
 C_O1_I2(r, r, ri)
 C_O1_I2(r, r, rL)
-C_O1_I2(r, rZ, rZ)
+C_O1_I2(r, rz, rz)
 C_O1_I2(w, 0, w)
 C_O1_I2(w, w, w)
 C_O1_I2(w, w, wN)
 C_O1_I2(w, w, wO)
 C_O1_I2(w, w, wZ)
 C_O1_I3(w, w, w, w)
-C_O1_I4(r, r, rC, rZ, rZ)
+C_O1_I4(r, r, rC, rz, rz)
 C_O2_I1(r, r, r)
-C_O2_I4(r, r, rZ, rZ, rA, rMZ)
+C_O2_I4(r, r, rz, rz, rA, rMZ)
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 66eb4b73b5ad5..4645242d85ea4 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -1775,7 +1775,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
         ldst = new_ldst_label(s);
         ldst->is_ld = is_ld;
         ldst->oi = oi;
-        ldst->addrlo_reg = addr_reg;
+        ldst->addr_reg = addr_reg;
 
         mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
                      ? TCG_TYPE_I64 : TCG_TYPE_I32);
@@ -1837,7 +1837,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
 
             ldst->is_ld = is_ld;
             ldst->oi = oi;
-            ldst->addrlo_reg = addr_reg;
+            ldst->addr_reg = addr_reg;
 
             /* tst addr, #mask */
             tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
@@ -2125,10 +2125,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext,
     TCGArg a2 = args[2];
     int c2 = const_args[2];
 
-    /* Some operands are defined with "rZ" constraint, a register or
-       the zero register.  These need not actually test args[I] == 0.  */
-#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
-
     switch (opc) {
     case INDEX_op_goto_ptr:
         tcg_out_insn(s, 3207, BR, a0);
@@ -2171,18 +2167,18 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext,
 
     case INDEX_op_st8_i32:
     case INDEX_op_st8_i64:
-        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
+        tcg_out_ldst(s, I3312_STRB, a0, a1, a2, 0);
         break;
     case INDEX_op_st16_i32:
     case INDEX_op_st16_i64:
-        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
+        tcg_out_ldst(s, I3312_STRH, a0, a1, a2, 1);
         break;
     case INDEX_op_st_i32:
     case INDEX_op_st32_i64:
-        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
+        tcg_out_ldst(s, I3312_STRW, a0, a1, a2, 2);
         break;
     case INDEX_op_st_i64:
-        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
+        tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3);
         break;
 
     case INDEX_op_add_i32:
@@ -2395,28 +2391,22 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext,
         /* FALLTHRU */
     case INDEX_op_movcond_i64:
         tcg_out_cmp(s, ext, args[5], a1, a2, c2);
-        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
+        tcg_out_insn(s, 3506, CSEL, ext, a0, args[3], args[4], args[5]);
         break;
 
-    case INDEX_op_qemu_ld_a32_i32:
-    case INDEX_op_qemu_ld_a64_i32:
-    case INDEX_op_qemu_ld_a32_i64:
-    case INDEX_op_qemu_ld_a64_i64:
+    case INDEX_op_qemu_ld_i32:
+    case INDEX_op_qemu_ld_i64:
         tcg_out_qemu_ld(s, a0, a1, a2, ext);
         break;
-    case INDEX_op_qemu_st_a32_i32:
-    case INDEX_op_qemu_st_a64_i32:
-    case INDEX_op_qemu_st_a32_i64:
-    case INDEX_op_qemu_st_a64_i64:
-        tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
+    case INDEX_op_qemu_st_i32:
+    case INDEX_op_qemu_st_i64:
+        tcg_out_qemu_st(s, a0, a1, a2, ext);
         break;
-    case INDEX_op_qemu_ld_a32_i128:
-    case INDEX_op_qemu_ld_a64_i128:
+    case INDEX_op_qemu_ld_i128:
         tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
         break;
-    case INDEX_op_qemu_st_a32_i128:
-    case INDEX_op_qemu_st_a64_i128:
-        tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
+    case INDEX_op_qemu_st_i128:
+        tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], false);
         break;
 
     case INDEX_op_bswap64_i64:
@@ -2445,7 +2435,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext,
 
     case INDEX_op_deposit_i64:
     case INDEX_op_deposit_i32:
-        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
+        tcg_out_dep(s, ext, a0, a2, args[3], args[4]);
         break;
 
     case INDEX_op_extract_i64:
@@ -2465,25 +2455,25 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext,
 
     case INDEX_op_extract2_i64:
     case INDEX_op_extract2_i32:
-        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
+        tcg_out_extr(s, ext, a0, a2, a1, args[3]);
         break;
 
     case INDEX_op_add2_i32:
-        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
+        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, a2, args[3],
                         (int32_t)args[4], args[5], const_args[4],
                         const_args[5], false);
         break;
     case INDEX_op_add2_i64:
-        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
+        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, a2, args[3], args[4],
                         args[5], const_args[4], const_args[5], false);
         break;
     case INDEX_op_sub2_i32:
-        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
+        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, a2, args[3],
                         (int32_t)args[4], args[5], const_args[4],
                         const_args[5], true);
         break;
     case INDEX_op_sub2_i64:
-        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
+        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, a2, args[3], args[4],
                         args[5], const_args[4], const_args[5], true);
         break;
 
@@ -2519,8 +2509,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext,
     default:
         g_assert_not_reached();
     }
-
-#undef REG0
 }
 
 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
@@ -3016,7 +3004,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_st16_i64:
     case INDEX_op_st32_i64:
     case INDEX_op_st_i64:
-        return C_O0_I2(rZ, r);
+        return C_O0_I2(rz, r);
 
     case INDEX_op_add_i32:
     case INDEX_op_add_i64:
@@ -3082,38 +3070,32 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
 
     case INDEX_op_movcond_i32:
     case INDEX_op_movcond_i64:
-        return C_O1_I4(r, r, rC, rZ, rZ);
+        return C_O1_I4(r, r, rC, rz, rz);
 
-    case INDEX_op_qemu_ld_a32_i32:
-    case INDEX_op_qemu_ld_a64_i32:
-    case INDEX_op_qemu_ld_a32_i64:
-    case INDEX_op_qemu_ld_a64_i64:
+    case INDEX_op_qemu_ld_i32:
+    case INDEX_op_qemu_ld_i64:
         return C_O1_I1(r, r);
-    case INDEX_op_qemu_ld_a32_i128:
-    case INDEX_op_qemu_ld_a64_i128:
+    case INDEX_op_qemu_ld_i128:
         return C_O2_I1(r, r, r);
-    case INDEX_op_qemu_st_a32_i32:
-    case INDEX_op_qemu_st_a64_i32:
-    case INDEX_op_qemu_st_a32_i64:
-    case INDEX_op_qemu_st_a64_i64:
-        return C_O0_I2(rZ, r);
-    case INDEX_op_qemu_st_a32_i128:
-    case INDEX_op_qemu_st_a64_i128:
-        return C_O0_I3(rZ, rZ, r);
+    case INDEX_op_qemu_st_i32:
+    case INDEX_op_qemu_st_i64:
+        return C_O0_I2(rz, r);
+    case INDEX_op_qemu_st_i128:
+        return C_O0_I3(rz, rz, r);
 
     case INDEX_op_deposit_i32:
     case INDEX_op_deposit_i64:
-        return C_O1_I2(r, 0, rZ);
+        return C_O1_I2(r, 0, rz);
 
     case INDEX_op_extract2_i32:
     case INDEX_op_extract2_i64:
-        return C_O1_I2(r, rZ, rZ);
+        return C_O1_I2(r, rz, rz);
 
     case INDEX_op_add2_i32:
     case INDEX_op_add2_i64:
     case INDEX_op_sub2_i32:
     case INDEX_op_sub2_i64:
-        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
+        return C_O2_I4(r, r, rz, rz, rA, rMZ);
 
     case INDEX_op_add_vec:
     case INDEX_op_sub_vec:
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 0dd6e1f069533..3f3df5176d914 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -45,6 +45,8 @@ typedef enum {
     TCG_AREG0  = TCG_REG_X19,
 } TCGReg;
 
+#define TCG_REG_ZERO TCG_REG_XZR
+
 #define TCG_TARGET_NB_REGS 64
 
 #endif /* AARCH64_TCG_TARGET_H */
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 12dad7307f569..cec3d761d45f5 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -676,14 +676,8 @@ static void tcg_out_ldrd_r(TCGContext *s, ARMCond cond, TCGReg rt,
     tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
 }
 
-static void __attribute__((unused))
-tcg_out_ldrd_rwb(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, TCGReg rm)
-{
-    tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 1);
-}
-
-static void __attribute__((unused))
-tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, int imm8)
+static void tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt,
+                           TCGReg rn, int imm8)
 {
     tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
 }
@@ -1455,8 +1449,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 #define MIN_TLB_MASK_TABLE_OFS  -256
 
 static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
-                                           TCGReg addrlo, TCGReg addrhi,
-                                           MemOpIdx oi, bool is_ld)
+                                           TCGReg addr, MemOpIdx oi, bool is_ld)
 {
     TCGLabelQemuLdst *ldst = NULL;
     MemOp opc = get_memop(oi);
@@ -1465,14 +1458,14 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
     if (tcg_use_softmmu) {
         *h = (HostAddress){
             .cond = COND_AL,
-            .base = addrlo,
+            .base = addr,
             .index = TCG_REG_R1,
             .index_scratch = true,
         };
     } else {
         *h = (HostAddress){
             .cond = COND_AL,
-            .base = addrlo,
+            .base = addr,
             .index = guest_base ? TCG_REG_GUEST_BASE : -1,
             .index_scratch = false,
         };
@@ -1492,8 +1485,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
         ldst = new_ldst_label(s);
         ldst->is_ld = is_ld;
         ldst->oi = oi;
-        ldst->addrlo_reg = addrlo;
-        ldst->addrhi_reg = addrhi;
+        ldst->addr_reg = addr;
 
         /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {r0,r1}.  */
         QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
@@ -1501,30 +1493,19 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
         tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
 
         /* Extract the tlb index from the address into R0.  */
-        tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
+        tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addr,
                         SHIFT_IMM_LSR(s->page_bits - CPU_TLB_ENTRY_BITS));
 
         /*
          * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
-         * Load the tlb comparator into R2/R3 and the fast path addend into R1.
+         * Load the tlb comparator into R2 and the fast path addend into R1.
          */
-        QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
         if (cmp_off == 0) {
-            if (s->addr_type == TCG_TYPE_I32) {
-                tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2,
-                                 TCG_REG_R1, TCG_REG_R0);
-            } else {
-                tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2,
-                                 TCG_REG_R1, TCG_REG_R0);
-            }
+            tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
         } else {
             tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
                             TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
-            if (s->addr_type == TCG_TYPE_I32) {
-                tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
-            } else {
-                tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
-            }
+            tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
         }
 
         /* Load the tlb addend.  */
@@ -1543,11 +1524,11 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
          * This leaves the least significant alignment bits unchanged, and of
          * course must be zero.
          */
-        t_addr = addrlo;
+        t_addr = addr;
         if (a_mask < s_mask) {
             t_addr = TCG_REG_R0;
             tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
-                            addrlo, s_mask - a_mask);
+                            addr, s_mask - a_mask);
         }
         if (use_armv7_instructions && s->page_bits <= 16) {
             tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(s->page_mask | a_mask));
@@ -1558,7 +1539,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
         } else {
             if (a_mask) {
                 tcg_debug_assert(a_mask <= 0xff);
-                tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
+                tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addr, a_mask);
             }
             tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
                             SHIFT_IMM_LSR(s->page_bits));
@@ -1566,21 +1547,16 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
                             0, TCG_REG_R2, TCG_REG_TMP,
                             SHIFT_IMM_LSL(s->page_bits));
         }
-
-        if (s->addr_type != TCG_TYPE_I32) {
-            tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
-        }
     } else if (a_mask) {
         ldst = new_ldst_label(s);
         ldst->is_ld = is_ld;
         ldst->oi = oi;
-        ldst->addrlo_reg = addrlo;
-        ldst->addrhi_reg = addrhi;
+        ldst->addr_reg = addr;
 
         /* We are expecting alignment to max out at 7 */
         tcg_debug_assert(a_mask <= 0xff);
         /* tst addr, #mask */
-        tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
+        tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addr, a_mask);
     }
 
     return ldst;
@@ -1678,14 +1654,13 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
 }
 
 static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
-                            TCGReg addrlo, TCGReg addrhi,
-                            MemOpIdx oi, TCGType data_type)
+                            TCGReg addr, MemOpIdx oi, TCGType data_type)
 {
     MemOp opc = get_memop(oi);
     TCGLabelQemuLdst *ldst;
     HostAddress h;
 
-    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
+    ldst = prepare_host_addr(s, &h, addr, oi, true);
     if (ldst) {
         ldst->type = data_type;
         ldst->datalo_reg = datalo;
@@ -1764,14 +1739,13 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
 }
 
 static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
-                            TCGReg addrlo, TCGReg addrhi,
-                            MemOpIdx oi, TCGType data_type)
+                            TCGReg addr, MemOpIdx oi, TCGType data_type)
 {
     MemOp opc = get_memop(oi);
     TCGLabelQemuLdst *ldst;
     HostAddress h;
 
-    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
+    ldst = prepare_host_addr(s, &h, addr, oi, false);
     if (ldst) {
         ldst->type = data_type;
         ldst->datalo_reg = datalo;
@@ -2071,36 +2045,18 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                         ARITH_MOV, args[0], 0, 0);
         break;
 
-    case INDEX_op_qemu_ld_a32_i32:
-        tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
-        break;
-    case INDEX_op_qemu_ld_a64_i32:
-        tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
-                        args[3], TCG_TYPE_I32);
+    case INDEX_op_qemu_ld_i32:
+        tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32);
         break;
-    case INDEX_op_qemu_ld_a32_i64:
-        tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
-                        args[3], TCG_TYPE_I64);
-        break;
-    case INDEX_op_qemu_ld_a64_i64:
-        tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
-                        args[4], TCG_TYPE_I64);
+    case INDEX_op_qemu_ld_i64:
+        tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3], TCG_TYPE_I64);
         break;
 
-    case INDEX_op_qemu_st_a32_i32:
-        tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
-        break;
-    case INDEX_op_qemu_st_a64_i32:
-        tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
-                        args[3], TCG_TYPE_I32);
-        break;
-    case INDEX_op_qemu_st_a32_i64:
-        tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
-                        args[3], TCG_TYPE_I64);
+    case INDEX_op_qemu_st_i32:
+        tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I32);
         break;
-    case INDEX_op_qemu_st_a64_i64:
-        tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
-                        args[4], TCG_TYPE_I64);
+    case INDEX_op_qemu_st_i64:
+        tcg_out_qemu_st(s, args[0], args[1], args[2], args[3], TCG_TYPE_I64);
         break;
 
     case INDEX_op_bswap16_i32:
@@ -2243,22 +2199,14 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_setcond2_i32:
         return C_O1_I4(r, r, r, rI, rI);
 
-    case INDEX_op_qemu_ld_a32_i32:
+    case INDEX_op_qemu_ld_i32:
         return C_O1_I1(r, q);
-    case INDEX_op_qemu_ld_a64_i32:
-        return C_O1_I2(r, q, q);
-    case INDEX_op_qemu_ld_a32_i64:
+    case INDEX_op_qemu_ld_i64:
         return C_O2_I1(e, p, q);
-    case INDEX_op_qemu_ld_a64_i64:
-        return C_O2_I2(e, p, q, q);
-    case INDEX_op_qemu_st_a32_i32:
+    case INDEX_op_qemu_st_i32:
         return C_O0_I2(q, q);
-    case INDEX_op_qemu_st_a64_i32:
-        return C_O0_I3(q, q, q);
-    case INDEX_op_qemu_st_a32_i64:
+    case INDEX_op_qemu_st_i64:
         return C_O0_I3(Q, p, q);
-    case INDEX_op_qemu_st_a64_i64:
-        return C_O0_I4(Q, p, q, q);
 
     case INDEX_op_st_vec:
         return C_O0_I2(w, r);
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 2cac151331623..33d303a123573 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1658,6 +1658,7 @@ static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
         tcg_out_brcond(s, 0, cond, args[1], args[3], const_args[3],
                        label_this, small);
         break;
+
     case TCG_COND_NE:
     case TCG_COND_TSTNE:
         tcg_out_brcond(s, 0, cond, args[0], args[2], const_args[2],
@@ -1665,64 +1666,14 @@ static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
         tcg_out_brcond(s, 0, cond, args[1], args[3], const_args[3],
                        label_this, small);
         break;
-    case TCG_COND_LT:
-        tcg_out_brcond(s, 0, TCG_COND_LT, args[1], args[3], const_args[3],
-                       label_this, small);
-        tcg_out_jxx(s, JCC_JNE, label_next, 1);
-        tcg_out_brcond(s, 0, TCG_COND_LTU, args[0], args[2], const_args[2],
-                       label_this, small);
-        break;
-    case TCG_COND_LE:
-        tcg_out_brcond(s, 0, TCG_COND_LT, args[1], args[3], const_args[3],
-                       label_this, small);
-        tcg_out_jxx(s, JCC_JNE, label_next, 1);
-        tcg_out_brcond(s, 0, TCG_COND_LEU, args[0], args[2], const_args[2],
-                       label_this, small);
-        break;
-    case TCG_COND_GT:
-        tcg_out_brcond(s, 0, TCG_COND_GT, args[1], args[3], const_args[3],
-                       label_this, small);
-        tcg_out_jxx(s, JCC_JNE, label_next, 1);
-        tcg_out_brcond(s, 0, TCG_COND_GTU, args[0], args[2], const_args[2],
-                       label_this, small);
-        break;
-    case TCG_COND_GE:
-        tcg_out_brcond(s, 0, TCG_COND_GT, args[1], args[3], const_args[3],
-                       label_this, small);
-        tcg_out_jxx(s, JCC_JNE, label_next, 1);
-        tcg_out_brcond(s, 0, TCG_COND_GEU, args[0], args[2], const_args[2],
-                       label_this, small);
-        break;
-    case TCG_COND_LTU:
-        tcg_out_brcond(s, 0, TCG_COND_LTU, args[1], args[3], const_args[3],
-                       label_this, small);
-        tcg_out_jxx(s, JCC_JNE, label_next, 1);
-        tcg_out_brcond(s, 0, TCG_COND_LTU, args[0], args[2], const_args[2],
-                       label_this, small);
-        break;
-    case TCG_COND_LEU:
-        tcg_out_brcond(s, 0, TCG_COND_LTU, args[1], args[3], const_args[3],
-                       label_this, small);
-        tcg_out_jxx(s, JCC_JNE, label_next, 1);
-        tcg_out_brcond(s, 0, TCG_COND_LEU, args[0], args[2], const_args[2],
-                       label_this, small);
-        break;
-    case TCG_COND_GTU:
-        tcg_out_brcond(s, 0, TCG_COND_GTU, args[1], args[3], const_args[3],
-                       label_this, small);
-        tcg_out_jxx(s, JCC_JNE, label_next, 1);
-        tcg_out_brcond(s, 0, TCG_COND_GTU, args[0], args[2], const_args[2],
-                       label_this, small);
-        break;
-    case TCG_COND_GEU:
-        tcg_out_brcond(s, 0, TCG_COND_GTU, args[1], args[3], const_args[3],
-                       label_this, small);
+
+    default:
+        tcg_out_brcond(s, 0, tcg_high_cond(cond), args[1],
+                       args[3], const_args[3], label_this, small);
         tcg_out_jxx(s, JCC_JNE, label_next, 1);
-        tcg_out_brcond(s, 0, TCG_COND_GEU, args[0], args[2], const_args[2],
-                       label_this, small);
+        tcg_out_brcond(s, 0, tcg_unsigned_cond(cond), args[0],
+                       args[2], const_args[2], label_this, small);
         break;
-    default:
-        g_assert_not_reached();
     }
     tcg_out_label(s, label_next);
 }
@@ -2169,8 +2120,7 @@ static inline int setup_guest_base_seg(void)
  * is required and fill in @h with the host address for the fast path.
  */
 static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
-                                           TCGReg addrlo, TCGReg addrhi,
-                                           MemOpIdx oi, bool is_ld)
+                                           TCGReg addr, MemOpIdx oi, bool is_ld)
 {
     TCGLabelQemuLdst *ldst = NULL;
     MemOp opc = get_memop(oi);
@@ -2184,7 +2134,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
     } else {
         *h = x86_guest_base;
     }
-    h->base = addrlo;
+    h->base = addr;
     h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
     a_mask = (1 << h->aa.align) - 1;
 
@@ -2202,8 +2152,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
         ldst = new_ldst_label(s);
         ldst->is_ld = is_ld;
         ldst->oi = oi;
-        ldst->addrlo_reg = addrlo;
-        ldst->addrhi_reg = addrhi;
+        ldst->addr_reg = addr;
 
         if (TCG_TARGET_REG_BITS == 64) {
             ttype = s->addr_type;
@@ -2217,7 +2166,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
             }
         }
 
-        tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
+        tcg_out_mov(s, tlbtype, TCG_REG_L0, addr);
         tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
                        s->page_bits - CPU_TLB_ENTRY_BITS);
 
@@ -2233,10 +2182,10 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
          * check that we don't cross pages for the complete access.
          */
         if (a_mask >= s_mask) {
-            tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
+            tcg_out_mov(s, ttype, TCG_REG_L1, addr);
         } else {
             tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
-                                 addrlo, s_mask - a_mask);
+                                 addr, s_mask - a_mask);
         }
         tlb_mask = s->page_mask | a_mask;
         tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
@@ -2250,17 +2199,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
         ldst->label_ptr[0] = s->code_ptr;
         s->code_ptr += 4;
 
-        if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I64) {
-            /* cmp 4(TCG_REG_L0), addrhi */
-            tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi,
-                                 TCG_REG_L0, cmp_ofs + 4);
-
-            /* jne slow_path */
-            tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
-            ldst->label_ptr[1] = s->code_ptr;
-            s->code_ptr += 4;
-        }
-
         /* TLB Hit.  */
         tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0,
                    offsetof(CPUTLBEntry, addend));
@@ -2270,11 +2208,10 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
         ldst = new_ldst_label(s);
         ldst->is_ld = is_ld;
         ldst->oi = oi;
-        ldst->addrlo_reg = addrlo;
-        ldst->addrhi_reg = addrhi;
+        ldst->addr_reg = addr;
 
         /* jne slow_path */
-        jcc = tcg_out_cmp(s, TCG_COND_TSTNE, addrlo, a_mask, true, false);
+        jcc = tcg_out_cmp(s, TCG_COND_TSTNE, addr, a_mask, true, false);
         tcg_out_opc(s, OPC_JCC_long + jcc, 0, 0, 0);
         ldst->label_ptr[0] = s->code_ptr;
         s->code_ptr += 4;
@@ -2446,13 +2383,12 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
 }
 
 static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
-                            TCGReg addrlo, TCGReg addrhi,
-                            MemOpIdx oi, TCGType data_type)
+                            TCGReg addr, MemOpIdx oi, TCGType data_type)
 {
     TCGLabelQemuLdst *ldst;
     HostAddress h;
 
-    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
+    ldst = prepare_host_addr(s, &h, addr, oi, true);
     tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, get_memop(oi));
 
     if (ldst) {
@@ -2574,13 +2510,12 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
 }
 
 static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
-                            TCGReg addrlo, TCGReg addrhi,
-                            MemOpIdx oi, TCGType data_type)
+                            TCGReg addr, MemOpIdx oi, TCGType data_type)
 {
     TCGLabelQemuLdst *ldst;
     HostAddress h;
 
-    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
+    ldst = prepare_host_addr(s, &h, addr, oi, false);
     tcg_out_qemu_st_direct(s, datalo, datahi, h, get_memop(oi));
 
     if (ldst) {
@@ -2879,64 +2814,35 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
         tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0);
         break;
 
-    case INDEX_op_qemu_ld_a64_i32:
-        if (TCG_TARGET_REG_BITS == 32) {
-            tcg_out_qemu_ld(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32);
-            break;
-        }
-        /* fall through */
-    case INDEX_op_qemu_ld_a32_i32:
-        tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
-        break;
-    case INDEX_op_qemu_ld_a32_i64:
-        if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
-        } else {
-            tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64);
-        }
+    case INDEX_op_qemu_ld_i32:
+        tcg_out_qemu_ld(s, a0, -1, a1, a2, TCG_TYPE_I32);
         break;
-    case INDEX_op_qemu_ld_a64_i64:
+    case INDEX_op_qemu_ld_i64:
         if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
+            tcg_out_qemu_ld(s, a0, -1, a1, a2, TCG_TYPE_I64);
         } else {
-            tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
+            tcg_out_qemu_ld(s, a0, a1, a2, args[3], TCG_TYPE_I64);
         }
         break;
-    case INDEX_op_qemu_ld_a32_i128:
-    case INDEX_op_qemu_ld_a64_i128:
+    case INDEX_op_qemu_ld_i128:
         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
-        tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
+        tcg_out_qemu_ld(s, a0, a1, a2, args[3], TCG_TYPE_I128);
         break;
 
-    case INDEX_op_qemu_st_a64_i32:
-    case INDEX_op_qemu_st8_a64_i32:
-        if (TCG_TARGET_REG_BITS == 32) {
-            tcg_out_qemu_st(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32);
-            break;
-        }
-        /* fall through */
-    case INDEX_op_qemu_st_a32_i32:
-    case INDEX_op_qemu_st8_a32_i32:
-        tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
+    case INDEX_op_qemu_st_i32:
+    case INDEX_op_qemu_st8_i32:
+        tcg_out_qemu_st(s, a0, -1, a1, a2, TCG_TYPE_I32);
         break;
-    case INDEX_op_qemu_st_a32_i64:
+    case INDEX_op_qemu_st_i64:
         if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
+            tcg_out_qemu_st(s, a0, -1, a1, a2, TCG_TYPE_I64);
         } else {
-            tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64);
+            tcg_out_qemu_st(s, a0, a1, a2, args[3], TCG_TYPE_I64);
         }
         break;
-    case INDEX_op_qemu_st_a64_i64:
-        if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
-        } else {
-            tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
-        }
-        break;
-    case INDEX_op_qemu_st_a32_i128:
-    case INDEX_op_qemu_st_a64_i128:
+    case INDEX_op_qemu_st_i128:
         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
-        tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
+        tcg_out_qemu_st(s, a0, a1, a2, args[3], TCG_TYPE_I128);
         break;
 
     OP_32_64(mulu2):
@@ -3824,36 +3730,24 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_clz_i64:
         return have_lzcnt ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r);
 
-    case INDEX_op_qemu_ld_a32_i32:
+    case INDEX_op_qemu_ld_i32:
         return C_O1_I1(r, L);
-    case INDEX_op_qemu_ld_a64_i32:
-        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O1_I2(r, L, L);
 
-    case INDEX_op_qemu_st_a32_i32:
+    case INDEX_op_qemu_st_i32:
         return C_O0_I2(L, L);
-    case INDEX_op_qemu_st_a64_i32:
-        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L);
-    case INDEX_op_qemu_st8_a32_i32:
+    case INDEX_op_qemu_st8_i32:
         return C_O0_I2(s, L);
-    case INDEX_op_qemu_st8_a64_i32:
-        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(s, L) : C_O0_I3(s, L, L);
 
-    case INDEX_op_qemu_ld_a32_i64:
+    case INDEX_op_qemu_ld_i64:
         return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I1(r, r, L);
-    case INDEX_op_qemu_ld_a64_i64:
-        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I2(r, r, L, L);
 
-    case INDEX_op_qemu_st_a32_i64:
+    case INDEX_op_qemu_st_i64:
         return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L);
-    case INDEX_op_qemu_st_a64_i64:
-        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I4(L, L, L, L);
 
-    case INDEX_op_qemu_ld_a32_i128:
-    case INDEX_op_qemu_ld_a64_i128:
+    case INDEX_op_qemu_ld_i128:
         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
         return C_O2_I1(r, r, L);
-    case INDEX_op_qemu_st_a32_i128:
-    case INDEX_op_qemu_st_a64_i128:
+    case INDEX_op_qemu_st_i128:
         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
         return C_O0_I3(L, L, L);
 
diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h
index cae6c2aad6468..8afaee9476c1b 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -15,8 +15,8 @@
  * tcg-target-con-str.h; the constraint combination is inclusive or.
  */
 C_O0_I1(r)
-C_O0_I2(rZ, r)
-C_O0_I2(rZ, rZ)
+C_O0_I2(rz, r)
+C_O0_I2(rz, rz)
 C_O0_I2(w, r)
 C_O0_I3(r, r, r)
 C_O1_I1(r, r)
@@ -28,14 +28,13 @@ C_O1_I2(r, r, rI)
 C_O1_I2(r, r, rJ)
 C_O1_I2(r, r, rU)
 C_O1_I2(r, r, rW)
-C_O1_I2(r, r, rZ)
-C_O1_I2(r, 0, rZ)
-C_O1_I2(r, rZ, ri)
-C_O1_I2(r, rZ, rJ)
-C_O1_I2(r, rZ, rZ)
+C_O1_I2(r, 0, rz)
+C_O1_I2(r, rz, ri)
+C_O1_I2(r, rz, rJ)
+C_O1_I2(r, rz, rz)
 C_O1_I2(w, w, w)
 C_O1_I2(w, w, wM)
 C_O1_I2(w, w, wA)
 C_O1_I3(w, w, w, w)
-C_O1_I4(r, rZ, rJ, rZ, rZ)
+C_O1_I4(r, rz, rJ, rz, rz)
 C_N2_I1(r, r, r)
diff --git a/tcg/loongarch64/tcg-target-con-str.h b/tcg/loongarch64/tcg-target-con-str.h
index 2ba9c135ac19e..99759120b470c 100644
--- a/tcg/loongarch64/tcg-target-con-str.h
+++ b/tcg/loongarch64/tcg-target-con-str.h
@@ -23,7 +23,6 @@ REGS('w', ALL_VECTOR_REGS)
 CONST('I', TCG_CT_CONST_S12)
 CONST('J', TCG_CT_CONST_S32)
 CONST('U', TCG_CT_CONST_U12)
-CONST('Z', TCG_CT_CONST_ZERO)
 CONST('C', TCG_CT_CONST_C12)
 CONST('W', TCG_CT_CONST_WSZ)
 CONST('M', TCG_CT_CONST_VCMP)
diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h
index ac88522eef250..188b00799fbec 100644
--- a/tcg/loongarch64/tcg-target-has.h
+++ b/tcg/loongarch64/tcg-target-has.h
@@ -37,8 +37,6 @@
 #define TCG_TARGET_HAS_clz_i32          1
 #define TCG_TARGET_HAS_ctz_i32          1
 #define TCG_TARGET_HAS_ctpop_i32        0
-#define TCG_TARGET_HAS_brcond2          0
-#define TCG_TARGET_HAS_setcond2         0
 #define TCG_TARGET_HAS_qemu_st8_i32     0
 
 /* 64-bit operations */
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index cebe8dd354e64..cbd7642b58ac5 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -173,14 +173,13 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
 
 #define TCG_GUEST_BASE_REG TCG_REG_S1
 
-#define TCG_CT_CONST_ZERO  0x100
-#define TCG_CT_CONST_S12   0x200
-#define TCG_CT_CONST_S32   0x400
-#define TCG_CT_CONST_U12   0x800
-#define TCG_CT_CONST_C12   0x1000
-#define TCG_CT_CONST_WSZ   0x2000
-#define TCG_CT_CONST_VCMP  0x4000
-#define TCG_CT_CONST_VADD  0x8000
+#define TCG_CT_CONST_S12   0x100
+#define TCG_CT_CONST_S32   0x200
+#define TCG_CT_CONST_U12   0x400
+#define TCG_CT_CONST_C12   0x800
+#define TCG_CT_CONST_WSZ   0x1000
+#define TCG_CT_CONST_VCMP  0x2000
+#define TCG_CT_CONST_VADD  0x4000
 
 #define ALL_GENERAL_REGS   MAKE_64BIT_MASK(0, 32)
 #define ALL_VECTOR_REGS    MAKE_64BIT_MASK(32, 32)
@@ -197,9 +196,6 @@ static bool tcg_target_const_match(int64_t val, int ct,
     if (ct & TCG_CT_CONST) {
         return true;
     }
-    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
-        return true;
-    }
     if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) {
         return true;
     }
@@ -1010,7 +1006,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
         ldst = new_ldst_label(s);
         ldst->is_ld = is_ld;
         ldst->oi = oi;
-        ldst->addrlo_reg = addr_reg;
+        ldst->addr_reg = addr_reg;
 
         tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
         tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
@@ -1055,7 +1051,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
 
             ldst->is_ld = is_ld;
             ldst->oi = oi;
-            ldst->addrlo_reg = addr_reg;
+            ldst->addr_reg = addr_reg;
 
             /*
              * Without micro-architecture details, we don't know which of
@@ -1675,28 +1671,22 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
         tcg_out_ldst(s, OPC_ST_D, a0, a1, a2);
         break;
 
-    case INDEX_op_qemu_ld_a32_i32:
-    case INDEX_op_qemu_ld_a64_i32:
+    case INDEX_op_qemu_ld_i32:
         tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
         break;
-    case INDEX_op_qemu_ld_a32_i64:
-    case INDEX_op_qemu_ld_a64_i64:
+    case INDEX_op_qemu_ld_i64:
         tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
         break;
-    case INDEX_op_qemu_ld_a32_i128:
-    case INDEX_op_qemu_ld_a64_i128:
+    case INDEX_op_qemu_ld_i128:
         tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, true);
         break;
-    case INDEX_op_qemu_st_a32_i32:
-    case INDEX_op_qemu_st_a64_i32:
+    case INDEX_op_qemu_st_i32:
         tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
         break;
-    case INDEX_op_qemu_st_a32_i64:
-    case INDEX_op_qemu_st_a64_i64:
+    case INDEX_op_qemu_st_i64:
         tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
         break;
-    case INDEX_op_qemu_st_a32_i128:
-    case INDEX_op_qemu_st_a64_i128:
+    case INDEX_op_qemu_st_i128:
         tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, false);
         break;
 
@@ -2233,23 +2223,19 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_st32_i64:
     case INDEX_op_st_i32:
     case INDEX_op_st_i64:
-    case INDEX_op_qemu_st_a32_i32:
-    case INDEX_op_qemu_st_a64_i32:
-    case INDEX_op_qemu_st_a32_i64:
-    case INDEX_op_qemu_st_a64_i64:
-        return C_O0_I2(rZ, r);
-
-    case INDEX_op_qemu_ld_a32_i128:
-    case INDEX_op_qemu_ld_a64_i128:
+    case INDEX_op_qemu_st_i32:
+    case INDEX_op_qemu_st_i64:
+        return C_O0_I2(rz, r);
+
+    case INDEX_op_qemu_ld_i128:
         return C_N2_I1(r, r, r);
 
-    case INDEX_op_qemu_st_a32_i128:
-    case INDEX_op_qemu_st_a64_i128:
+    case INDEX_op_qemu_st_i128:
         return C_O0_I3(r, r, r);
 
     case INDEX_op_brcond_i32:
     case INDEX_op_brcond_i64:
-        return C_O0_I2(rZ, rZ);
+        return C_O0_I2(rz, rz);
 
     case INDEX_op_ext8s_i32:
     case INDEX_op_ext8s_i64:
@@ -2290,10 +2276,8 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_ld32u_i64:
     case INDEX_op_ld_i32:
     case INDEX_op_ld_i64:
-    case INDEX_op_qemu_ld_a32_i32:
-    case INDEX_op_qemu_ld_a64_i32:
-    case INDEX_op_qemu_ld_a32_i64:
-    case INDEX_op_qemu_ld_a64_i64:
+    case INDEX_op_qemu_ld_i32:
+    case INDEX_op_qemu_ld_i64:
         return C_O1_I1(r, r);
 
     case INDEX_op_andc_i32:
@@ -2344,14 +2328,14 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_deposit_i32:
     case INDEX_op_deposit_i64:
         /* Must deposit into the same register as input */
-        return C_O1_I2(r, 0, rZ);
+        return C_O1_I2(r, 0, rz);
 
     case INDEX_op_sub_i32:
     case INDEX_op_setcond_i32:
-        return C_O1_I2(r, rZ, ri);
+        return C_O1_I2(r, rz, ri);
     case INDEX_op_sub_i64:
     case INDEX_op_setcond_i64:
-        return C_O1_I2(r, rZ, rJ);
+        return C_O1_I2(r, rz, rJ);
 
     case INDEX_op_mul_i32:
     case INDEX_op_mul_i64:
@@ -2367,11 +2351,11 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_rem_i64:
     case INDEX_op_remu_i32:
     case INDEX_op_remu_i64:
-        return C_O1_I2(r, rZ, rZ);
+        return C_O1_I2(r, rz, rz);
 
     case INDEX_op_movcond_i32:
     case INDEX_op_movcond_i64:
-        return C_O1_I4(r, rZ, rJ, rZ, rZ);
+        return C_O1_I4(r, rz, rJ, rz, rz);
 
     case INDEX_op_ld_vec:
     case INDEX_op_dupm_vec:
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index 8533284631393..6a206fb97e6ec 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -85,4 +85,6 @@ typedef enum {
     TCG_VEC_TMP0 = TCG_REG_V23,
 } TCGReg;
 
+#define TCG_REG_ZERO  TCG_REG_ZERO
+
 #endif /* LOONGARCH_TCG_TARGET_H */
diff --git a/tcg/mips/tcg-target-con-set.h b/tcg/mips/tcg-target-con-set.h
index 864034f468782..06ab04cc4dd99 100644
--- a/tcg/mips/tcg-target-con-set.h
+++ b/tcg/mips/tcg-target-con-set.h
@@ -10,24 +10,24 @@
  * tcg-target-con-str.h; the constraint combination is inclusive or.
  */
 C_O0_I1(r)
-C_O0_I2(rZ, r)
-C_O0_I2(rZ, rZ)
-C_O0_I3(rZ, r, r)
-C_O0_I3(rZ, rZ, r)
-C_O0_I4(rZ, rZ, rZ, rZ)
-C_O0_I4(rZ, rZ, r, r)
+C_O0_I2(rz, r)
+C_O0_I2(rz, rz)
+C_O0_I3(rz, r, r)
+C_O0_I3(rz, rz, r)
+C_O0_I4(rz, rz, rz, rz)
+C_O0_I4(rz, rz, r, r)
 C_O1_I1(r, r)
-C_O1_I2(r, 0, rZ)
+C_O1_I2(r, 0, rz)
 C_O1_I2(r, r, r)
 C_O1_I2(r, r, ri)
 C_O1_I2(r, r, rI)
 C_O1_I2(r, r, rIK)
 C_O1_I2(r, r, rJ)
-C_O1_I2(r, r, rWZ)
-C_O1_I2(r, rZ, rN)
-C_O1_I2(r, rZ, rZ)
-C_O1_I4(r, rZ, rZ, rZ, 0)
-C_O1_I4(r, rZ, rZ, rZ, rZ)
+C_O1_I2(r, r, rzW)
+C_O1_I2(r, rz, rN)
+C_O1_I2(r, rz, rz)
+C_O1_I4(r, rz, rz, rz, 0)
+C_O1_I4(r, rz, rz, rz, rz)
 C_O2_I1(r, r, r)
 C_O2_I2(r, r, r, r)
-C_O2_I4(r, r, rZ, rZ, rN, rN)
+C_O2_I4(r, r, rz, rz, rN, rN)
diff --git a/tcg/mips/tcg-target-con-str.h b/tcg/mips/tcg-target-con-str.h
index 413c280a7a90c..dfe2b156df544 100644
--- a/tcg/mips/tcg-target-con-str.h
+++ b/tcg/mips/tcg-target-con-str.h
@@ -19,4 +19,3 @@ CONST('J', TCG_CT_CONST_S16)
 CONST('K', TCG_CT_CONST_P2M1)
 CONST('N', TCG_CT_CONST_N16)
 CONST('W', TCG_CT_CONST_WSZ)
-CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 99f6ef6c766ef..f8c105ba37c6a 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -184,12 +184,11 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
     g_assert_not_reached();
 }
 
-#define TCG_CT_CONST_ZERO 0x100
-#define TCG_CT_CONST_U16  0x200    /* Unsigned 16-bit: 0 - 0xffff.  */
-#define TCG_CT_CONST_S16  0x400    /* Signed 16-bit: -32768 - 32767 */
-#define TCG_CT_CONST_P2M1 0x800    /* Power of 2 minus 1.  */
-#define TCG_CT_CONST_N16  0x1000   /* "Negatable" 16-bit: -32767 - 32767 */
-#define TCG_CT_CONST_WSZ  0x2000   /* word size */
+#define TCG_CT_CONST_U16  0x100    /* Unsigned 16-bit: 0 - 0xffff.  */
+#define TCG_CT_CONST_S16  0x200    /* Signed 16-bit: -32768 - 32767 */
+#define TCG_CT_CONST_P2M1 0x400    /* Power of 2 minus 1.  */
+#define TCG_CT_CONST_N16  0x800    /* "Negatable" 16-bit: -32767 - 32767 */
+#define TCG_CT_CONST_WSZ  0x1000   /* word size */
 
 #define ALL_GENERAL_REGS  0xffffffffu
 
@@ -204,8 +203,6 @@ static bool tcg_target_const_match(int64_t val, int ct,
 {
     if (ct & TCG_CT_CONST) {
         return 1;
-    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
-        return 1;
     } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
         return 1;
     } else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
@@ -1217,8 +1214,7 @@ bool tcg_target_has_memory_bswap(MemOp memop)
  * is required and fill in @h with the host address for the fast path.
  */
 static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
-                                           TCGReg addrlo, TCGReg addrhi,
-                                           MemOpIdx oi, bool is_ld)
+                                           TCGReg addr, MemOpIdx oi, bool is_ld)
 {
     TCGType addr_type = s->addr_type;
     TCGLabelQemuLdst *ldst = NULL;
@@ -1245,8 +1241,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
         ldst = new_ldst_label(s);
         ldst->is_ld = is_ld;
         ldst->oi = oi;
-        ldst->addrlo_reg = addrlo;
-        ldst->addrhi_reg = addrhi;
+        ldst->addr_reg = addr;
 
         /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
         tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
@@ -1254,29 +1249,26 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
 
         /* Extract the TLB index from the address into TMP3.  */
         if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
-            tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, addrlo,
+            tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, addr,
                            s->page_bits - CPU_TLB_ENTRY_BITS);
         } else {
-            tcg_out_dsrl(s, TCG_TMP3, addrlo,
-                         s->page_bits - CPU_TLB_ENTRY_BITS);
+            tcg_out_dsrl(s, TCG_TMP3, addr, s->page_bits - CPU_TLB_ENTRY_BITS);
         }
         tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
 
         /* Add the tlb_table pointer, creating the CPUTLBEntry address.  */
         tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
 
-        if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
-            /* Load the (low half) tlb comparator.  */
+        /* Load the tlb comparator.  */
+        if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
             tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_TMP3,
                        cmp_off + HOST_BIG_ENDIAN * 4);
         } else {
-            tcg_out_ld(s, TCG_TYPE_I64, TCG_TMP0, TCG_TMP3, cmp_off);
+            tcg_out_ld(s, TCG_TYPE_REG, TCG_TMP0, TCG_TMP3, cmp_off);
         }
 
-        if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
-            /* Load the tlb addend for the fast path.  */
-            tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
-        }
+        /* Load the tlb addend for the fast path.  */
+        tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
 
         /*
          * Mask the page bits, keeping the alignment bits to compare against.
@@ -1288,48 +1280,35 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
             tcg_out_opc_imm(s, (TCG_TARGET_REG_BITS == 32
                                 || addr_type == TCG_TYPE_I32
                                 ? OPC_ADDIU : OPC_DADDIU),
-                            TCG_TMP2, addrlo, s_mask - a_mask);
+                            TCG_TMP2, addr, s_mask - a_mask);
             tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
         } else {
-            tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
+            tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addr);
         }
 
         /* Zero extend a 32-bit guest address for a 64-bit host. */
         if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
-            tcg_out_ext32u(s, TCG_TMP2, addrlo);
-            addrlo = TCG_TMP2;
+            tcg_out_ext32u(s, TCG_TMP2, addr);
+            addr = TCG_TMP2;
         }
 
         ldst->label_ptr[0] = s->code_ptr;
         tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
 
-        /* Load and test the high half tlb comparator.  */
-        if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
-            /* delay slot */
-            tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
-
-            /* Load the tlb addend for the fast path.  */
-            tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
-
-            ldst->label_ptr[1] = s->code_ptr;
-            tcg_out_opc_br(s, OPC_BNE, addrhi, TCG_TMP0);
-        }
-
         /* delay slot */
         base = TCG_TMP3;
-        tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP3, addrlo);
+        tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP3, addr);
     } else {
         if (a_mask && (use_mips32r6_instructions || a_bits != s_bits)) {
             ldst = new_ldst_label(s);
 
             ldst->is_ld = is_ld;
             ldst->oi = oi;
-            ldst->addrlo_reg = addrlo;
-            ldst->addrhi_reg = addrhi;
+            ldst->addr_reg = addr;
 
             /* We are expecting a_bits to max out at 7, much lower than ANDI. */
             tcg_debug_assert(a_bits < 16);
-            tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask);
+            tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addr, a_mask);
 
             ldst->label_ptr[0] = s->code_ptr;
             if (use_mips32r6_instructions) {
@@ -1340,7 +1319,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
             }
         }
 
-        base = addrlo;
+        base = addr;
         if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
             tcg_out_ext32u(s, TCG_REG_A0, base);
             base = TCG_REG_A0;
@@ -1460,14 +1439,13 @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
 }
 
 static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
-                            TCGReg addrlo, TCGReg addrhi,
-                            MemOpIdx oi, TCGType data_type)
+                            TCGReg addr, MemOpIdx oi, TCGType data_type)
 {
     MemOp opc = get_memop(oi);
     TCGLabelQemuLdst *ldst;
     HostAddress h;
 
-    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
+    ldst = prepare_host_addr(s, &h, addr, oi, true);
 
     if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) {
         tcg_out_qemu_ld_direct(s, datalo, datahi, h.base, opc, data_type);
@@ -1547,14 +1525,13 @@ static void tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
 }
 
 static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
-                            TCGReg addrlo, TCGReg addrhi,
-                            MemOpIdx oi, TCGType data_type)
+                            TCGReg addr, MemOpIdx oi, TCGType data_type)
 {
     MemOp opc = get_memop(oi);
     TCGLabelQemuLdst *ldst;
     HostAddress h;
 
-    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
+    ldst = prepare_host_addr(s, &h, addr, oi, false);
 
     if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) {
         tcg_out_qemu_st_direct(s, datalo, datahi, h.base, opc);
@@ -1686,11 +1663,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
     TCGArg a0, a1, a2;
     int c2;
 
-    /*
-     * Note that many operands use the constraint set "rZ".
-     * We make use of the fact that 0 is the ZERO register,
-     * and hence such cases need not check for const_args.
-     */
     a0 = args[0];
     a1 = args[1];
     a2 = args[2];
@@ -2095,51 +2067,25 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
         tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]);
         break;
 
-    case INDEX_op_qemu_ld_a64_i32:
-        if (TCG_TARGET_REG_BITS == 32) {
-            tcg_out_qemu_ld(s, a0, 0, a1, a2, args[3], TCG_TYPE_I32);
-            break;
-        }
-        /* fall through */
-    case INDEX_op_qemu_ld_a32_i32:
-        tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
-        break;
-    case INDEX_op_qemu_ld_a32_i64:
-        if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
-        } else {
-            tcg_out_qemu_ld(s, a0, a1, a2, 0, args[3], TCG_TYPE_I64);
-        }
+    case INDEX_op_qemu_ld_i32:
+        tcg_out_qemu_ld(s, a0, 0, a1, a2, TCG_TYPE_I32);
         break;
-    case INDEX_op_qemu_ld_a64_i64:
+    case INDEX_op_qemu_ld_i64:
         if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
+            tcg_out_qemu_ld(s, a0, 0, a1, a2, TCG_TYPE_I64);
         } else {
-            tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
+            tcg_out_qemu_ld(s, a0, a1, a2, args[3], TCG_TYPE_I64);
         }
         break;
 
-    case INDEX_op_qemu_st_a64_i32:
-        if (TCG_TARGET_REG_BITS == 32) {
-            tcg_out_qemu_st(s, a0, 0, a1, a2, args[3], TCG_TYPE_I32);
-            break;
-        }
-        /* fall through */
-    case INDEX_op_qemu_st_a32_i32:
-        tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
-        break;
-    case INDEX_op_qemu_st_a32_i64:
-        if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
-        } else {
-            tcg_out_qemu_st(s, a0, a1, a2, 0, args[3], TCG_TYPE_I64);
-        }
+    case INDEX_op_qemu_st_i32:
+        tcg_out_qemu_st(s, a0, 0, a1, a2, TCG_TYPE_I32);
         break;
-    case INDEX_op_qemu_st_a64_i64:
+    case INDEX_op_qemu_st_i64:
         if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
+            tcg_out_qemu_st(s, a0, 0, a1, a2, TCG_TYPE_I64);
         } else {
-            tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
+            tcg_out_qemu_st(s, a0, a1, a2, args[3], TCG_TYPE_I64);
         }
         break;
 
@@ -2227,14 +2173,14 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_st16_i64:
     case INDEX_op_st32_i64:
     case INDEX_op_st_i64:
-        return C_O0_I2(rZ, r);
+        return C_O0_I2(rz, r);
 
     case INDEX_op_add_i32:
     case INDEX_op_add_i64:
         return C_O1_I2(r, r, rJ);
     case INDEX_op_sub_i32:
     case INDEX_op_sub_i64:
-        return C_O1_I2(r, rZ, rN);
+        return C_O1_I2(r, rz, rN);
     case INDEX_op_mul_i32:
     case INDEX_op_mulsh_i32:
     case INDEX_op_muluh_i32:
@@ -2253,7 +2199,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_remu_i64:
     case INDEX_op_nor_i64:
     case INDEX_op_setcond_i64:
-        return C_O1_I2(r, rZ, rZ);
+        return C_O1_I2(r, rz, rz);
     case INDEX_op_muls2_i32:
     case INDEX_op_mulu2_i32:
     case INDEX_op_muls2_i64:
@@ -2280,44 +2226,35 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
         return C_O1_I2(r, r, ri);
     case INDEX_op_clz_i32:
     case INDEX_op_clz_i64:
-        return C_O1_I2(r, r, rWZ);
+        return C_O1_I2(r, r, rzW);
 
     case INDEX_op_deposit_i32:
     case INDEX_op_deposit_i64:
-        return C_O1_I2(r, 0, rZ);
+        return C_O1_I2(r, 0, rz);
     case INDEX_op_brcond_i32:
     case INDEX_op_brcond_i64:
-        return C_O0_I2(rZ, rZ);
+        return C_O0_I2(rz, rz);
     case INDEX_op_movcond_i32:
     case INDEX_op_movcond_i64:
         return (use_mips32r6_instructions
-                ? C_O1_I4(r, rZ, rZ, rZ, rZ)
-                : C_O1_I4(r, rZ, rZ, rZ, 0));
+                ? C_O1_I4(r, rz, rz, rz, rz)
+                : C_O1_I4(r, rz, rz, rz, 0));
     case INDEX_op_add2_i32:
     case INDEX_op_sub2_i32:
-        return C_O2_I4(r, r, rZ, rZ, rN, rN);
+        return C_O2_I4(r, r, rz, rz, rN, rN);
     case INDEX_op_setcond2_i32:
-        return C_O1_I4(r, rZ, rZ, rZ, rZ);
+        return C_O1_I4(r, rz, rz, rz, rz);
     case INDEX_op_brcond2_i32:
-        return C_O0_I4(rZ, rZ, rZ, rZ);
+        return C_O0_I4(rz, rz, rz, rz);
 
-    case INDEX_op_qemu_ld_a32_i32:
+    case INDEX_op_qemu_ld_i32:
         return C_O1_I1(r, r);
-    case INDEX_op_qemu_ld_a64_i32:
-        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
-    case INDEX_op_qemu_st_a32_i32:
-        return C_O0_I2(rZ, r);
-    case INDEX_op_qemu_st_a64_i32:
-        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r) : C_O0_I3(rZ, r, r);
-    case INDEX_op_qemu_ld_a32_i64:
+    case INDEX_op_qemu_st_i32:
+        return C_O0_I2(rz, r);
+    case INDEX_op_qemu_ld_i64:
         return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
-    case INDEX_op_qemu_ld_a64_i64:
-        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
-    case INDEX_op_qemu_st_a32_i64:
-        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r) : C_O0_I3(rZ, rZ, r);
-    case INDEX_op_qemu_st_a64_i64:
-        return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r)
-                : C_O0_I4(rZ, rZ, r, r));
+    case INDEX_op_qemu_st_i64:
+        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rz, r) : C_O0_I3(rz, rz, r);
 
     default:
         return C_NotImplemented;
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 3090acc4f5cbb..bd4ca5f852d70 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -70,4 +70,6 @@ typedef enum {
     TCG_AREG0 = TCG_REG_S8,
 } TCGReg;
 
+#define TCG_REG_ZERO  TCG_REG_ZERO
+
 #endif
diff --git a/tcg/optimize.c b/tcg/optimize.c
index bca11cc427b08..f922f86a1d1ce 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -3011,29 +3011,22 @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64_VEC(orc):
             done = fold_orc(&ctx, op);
             break;
-        case INDEX_op_qemu_ld_a32_i32:
-        case INDEX_op_qemu_ld_a64_i32:
+        case INDEX_op_qemu_ld_i32:
             done = fold_qemu_ld_1reg(&ctx, op);
             break;
-        case INDEX_op_qemu_ld_a32_i64:
-        case INDEX_op_qemu_ld_a64_i64:
+        case INDEX_op_qemu_ld_i64:
             if (TCG_TARGET_REG_BITS == 64) {
                 done = fold_qemu_ld_1reg(&ctx, op);
                 break;
             }
             QEMU_FALLTHROUGH;
-        case INDEX_op_qemu_ld_a32_i128:
-        case INDEX_op_qemu_ld_a64_i128:
+        case INDEX_op_qemu_ld_i128:
             done = fold_qemu_ld_2reg(&ctx, op);
             break;
-        case INDEX_op_qemu_st8_a32_i32:
-        case INDEX_op_qemu_st8_a64_i32:
-        case INDEX_op_qemu_st_a32_i32:
-        case INDEX_op_qemu_st_a64_i32:
-        case INDEX_op_qemu_st_a32_i64:
-        case INDEX_op_qemu_st_a64_i64:
-        case INDEX_op_qemu_st_a32_i128:
-        case INDEX_op_qemu_st_a64_i128:
+        case INDEX_op_qemu_st8_i32:
+        case INDEX_op_qemu_st_i32:
+        case INDEX_op_qemu_st_i64:
+        case INDEX_op_qemu_st_i128:
             done = fold_qemu_st(&ctx, op);
             break;
         CASE_OP_32_64(rem):
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 6e711cd53f5f0..822925a19b39d 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -2438,8 +2438,7 @@ bool tcg_target_has_memory_bswap(MemOp memop)
  * is required and fill in @h with the host address for the fast path.
  */
 static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
-                                           TCGReg addrlo, TCGReg addrhi,
-                                           MemOpIdx oi, bool is_ld)
+                                           TCGReg addr, MemOpIdx oi, bool is_ld)
 {
     TCGType addr_type = s->addr_type;
     TCGLabelQemuLdst *ldst = NULL;
@@ -2474,8 +2473,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
         ldst = new_ldst_label(s);
         ldst->is_ld = is_ld;
         ldst->oi = oi;
-        ldst->addrlo_reg = addrlo;
-        ldst->addrhi_reg = addrhi;
+        ldst->addr_reg = addr;
 
         /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
         tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
@@ -2483,36 +2481,25 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
 
         /* Extract the page index, shifted into place for tlb index.  */
         if (TCG_TARGET_REG_BITS == 32) {
-            tcg_out_shri32(s, TCG_REG_R0, addrlo,
+            tcg_out_shri32(s, TCG_REG_R0, addr,
                            s->page_bits - CPU_TLB_ENTRY_BITS);
         } else {
-            tcg_out_shri64(s, TCG_REG_R0, addrlo,
+            tcg_out_shri64(s, TCG_REG_R0, addr,
                            s->page_bits - CPU_TLB_ENTRY_BITS);
         }
         tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
 
         /*
-         * Load the (low part) TLB comparator into TMP2.
+         * Load the TLB comparator into TMP2.
          * For 64-bit host, always load the entire 64-bit slot for simplicity.
          * We will ignore the high bits with tcg_out_cmp(..., addr_type).
          */
-        if (TCG_TARGET_REG_BITS == 64) {
-            if (cmp_off == 0) {
-                tcg_out32(s, LDUX | TAB(TCG_REG_TMP2,
-                                        TCG_REG_TMP1, TCG_REG_TMP2));
-            } else {
-                tcg_out32(s, ADD | TAB(TCG_REG_TMP1,
-                                       TCG_REG_TMP1, TCG_REG_TMP2));
-                tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2,
-                           TCG_REG_TMP1, cmp_off);
-            }
-        } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) {
-            tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2,
-                                     TCG_REG_TMP1, TCG_REG_TMP2));
+        if (cmp_off == 0) {
+            tcg_out32(s, (TCG_TARGET_REG_BITS == 64 ? LDUX : LWZUX)
+                      | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
         } else {
             tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
-            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
-                       cmp_off + 4 * HOST_BIG_ENDIAN);
+            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off);
         }
 
         /*
@@ -2534,10 +2521,10 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
             if (a_bits < s_bits) {
                 a_bits = s_bits;
             }
-            tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
+            tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr, 0,
                         (32 - a_bits) & 31, 31 - s->page_bits);
         } else {
-            TCGReg t = addrlo;
+            TCGReg t = addr;
 
             /*
              * If the access is unaligned, we need to make sure we fail if we
@@ -2566,30 +2553,8 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
             }
         }
 
-        if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
-            /* Low part comparison into cr7. */
-            tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
-                        0, 7, TCG_TYPE_I32);
-
-            /* Load the high part TLB comparator into TMP2.  */
-            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
-                       cmp_off + 4 * !HOST_BIG_ENDIAN);
-
-            /* Load addend, deferred for this case. */
-            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
-                       offsetof(CPUTLBEntry, addend));
-
-            /* High part comparison into cr6. */
-            tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2,
-                        0, 6, TCG_TYPE_I32);
-
-            /* Combine comparisons into cr0. */
-            tcg_out32(s, CRAND | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
-        } else {
-            /* Full comparison into cr0. */
-            tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
-                        0, 0, addr_type);
-        }
+        /* Full comparison into cr0. */
+        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 0, 0, addr_type);
 
         /* Load a pointer into the current opcode w/conditional branch-link. */
         ldst->label_ptr[0] = s->code_ptr;
@@ -2601,12 +2566,11 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
             ldst = new_ldst_label(s);
             ldst->is_ld = is_ld;
             ldst->oi = oi;
-            ldst->addrlo_reg = addrlo;
-            ldst->addrhi_reg = addrhi;
+            ldst->addr_reg = addr;
 
             /* We are expecting a_bits to max out at 7, much lower than ANDI. */
             tcg_debug_assert(a_bits < 16);
-            tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1));
+            tcg_out32(s, ANDI | SAI(addr, TCG_REG_R0, (1 << a_bits) - 1));
 
             ldst->label_ptr[0] = s->code_ptr;
             tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
@@ -2617,24 +2581,23 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
 
     if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
         /* Zero-extend the guest address for use in the host address. */
-        tcg_out_ext32u(s, TCG_REG_TMP2, addrlo);
+        tcg_out_ext32u(s, TCG_REG_TMP2, addr);
         h->index = TCG_REG_TMP2;
     } else {
-        h->index = addrlo;
+        h->index = addr;
     }
 
     return ldst;
 }
 
 static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
-                            TCGReg addrlo, TCGReg addrhi,
-                            MemOpIdx oi, TCGType data_type)
+                            TCGReg addr, MemOpIdx oi, TCGType data_type)
 {
     MemOp opc = get_memop(oi);
     TCGLabelQemuLdst *ldst;
     HostAddress h;
 
-    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
+    ldst = prepare_host_addr(s, &h, addr, oi, true);
 
     if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
         if (opc & MO_BSWAP) {
@@ -2678,14 +2641,13 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
 }
 
 static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
-                            TCGReg addrlo, TCGReg addrhi,
-                            MemOpIdx oi, TCGType data_type)
+                            TCGReg addr, MemOpIdx oi, TCGType data_type)
 {
     MemOp opc = get_memop(oi);
     TCGLabelQemuLdst *ldst;
     HostAddress h;
 
-    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
+    ldst = prepare_host_addr(s, &h, addr, oi, false);
 
     if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
         if (opc & MO_BSWAP) {
@@ -2729,7 +2691,7 @@ static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
     uint32_t insn;
     TCGReg index;
 
-    ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld);
+    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
 
     /* Compose the final address, as LQ/STQ have no indexing. */
     index = h.index;
@@ -3308,70 +3270,34 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
         tcg_out32(s, MODUD | TAB(args[0], args[1], args[2]));
         break;
 
-    case INDEX_op_qemu_ld_a64_i32:
-        if (TCG_TARGET_REG_BITS == 32) {
-            tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
-                            args[3], TCG_TYPE_I32);
-            break;
-        }
-        /* fall through */
-    case INDEX_op_qemu_ld_a32_i32:
-        tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
+    case INDEX_op_qemu_ld_i32:
+        tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32);
         break;
-    case INDEX_op_qemu_ld_a32_i64:
+    case INDEX_op_qemu_ld_i64:
         if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
-                            args[2], TCG_TYPE_I64);
+            tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I64);
         } else {
-            tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
+            tcg_out_qemu_ld(s, args[0], args[1], args[2],
                             args[3], TCG_TYPE_I64);
         }
         break;
-    case INDEX_op_qemu_ld_a64_i64:
-        if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
-                            args[2], TCG_TYPE_I64);
-        } else {
-            tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
-                            args[4], TCG_TYPE_I64);
-        }
-        break;
-    case INDEX_op_qemu_ld_a32_i128:
-    case INDEX_op_qemu_ld_a64_i128:
+    case INDEX_op_qemu_ld_i128:
         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
         tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
         break;
 
-    case INDEX_op_qemu_st_a64_i32:
-        if (TCG_TARGET_REG_BITS == 32) {
-            tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
-                            args[3], TCG_TYPE_I32);
-            break;
-        }
-        /* fall through */
-    case INDEX_op_qemu_st_a32_i32:
-        tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
+    case INDEX_op_qemu_st_i32:
+        tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I32);
         break;
-    case INDEX_op_qemu_st_a32_i64:
+    case INDEX_op_qemu_st_i64:
         if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_qemu_st(s, args[0], -1, args[1], -1,
-                            args[2], TCG_TYPE_I64);
+            tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I64);
         } else {
-            tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
+            tcg_out_qemu_st(s, args[0], args[1], args[2],
                             args[3], TCG_TYPE_I64);
         }
         break;
-    case INDEX_op_qemu_st_a64_i64:
-        if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_qemu_st(s, args[0], -1, args[1], -1,
-                            args[2], TCG_TYPE_I64);
-        } else {
-            tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
-                            args[4], TCG_TYPE_I64);
-        }
-        break;
-    case INDEX_op_qemu_st_a32_i128:
-    case INDEX_op_qemu_st_a64_i128:
+    case INDEX_op_qemu_st_i128:
         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
         tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
         break;
@@ -4306,29 +4232,19 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_sub2_i32:
         return C_O2_I4(r, r, rI, rZM, r, r);
 
-    case INDEX_op_qemu_ld_a32_i32:
+    case INDEX_op_qemu_ld_i32:
         return C_O1_I1(r, r);
-    case INDEX_op_qemu_ld_a64_i32:
-        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
-    case INDEX_op_qemu_ld_a32_i64:
+    case INDEX_op_qemu_ld_i64:
         return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
-    case INDEX_op_qemu_ld_a64_i64:
-        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
 
-    case INDEX_op_qemu_st_a32_i32:
+    case INDEX_op_qemu_st_i32:
         return C_O0_I2(r, r);
-    case INDEX_op_qemu_st_a64_i32:
-        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
-    case INDEX_op_qemu_st_a32_i64:
+    case INDEX_op_qemu_st_i64:
         return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
-    case INDEX_op_qemu_st_a64_i64:
-        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
 
-    case INDEX_op_qemu_ld_a32_i128:
-    case INDEX_op_qemu_ld_a64_i128:
+    case INDEX_op_qemu_ld_i128:
         return C_N1O1_I1(o, m, r);
-    case INDEX_op_qemu_st_a32_i128:
-    case INDEX_op_qemu_st_a64_i128:
+    case INDEX_op_qemu_st_i128:
         return C_O0_I3(o, m, r);
 
     case INDEX_op_add_vec:
diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h
index 3c4ef44eb0a83..e92e815491617 100644
--- a/tcg/riscv/tcg-target-con-set.h
+++ b/tcg/riscv/tcg-target-con-set.h
@@ -10,17 +10,17 @@
  * tcg-target-con-str.h; the constraint combination is inclusive or.
  */
 C_O0_I1(r)
-C_O0_I2(rZ, r)
-C_O0_I2(rZ, rZ)
+C_O0_I2(rz, r)
+C_O0_I2(rz, rz)
 C_O1_I1(r, r)
 C_O1_I2(r, r, ri)
 C_O1_I2(r, r, rI)
 C_O1_I2(r, r, rJ)
-C_O1_I2(r, rZ, rN)
-C_O1_I2(r, rZ, rZ)
+C_O1_I2(r, rz, rN)
+C_O1_I2(r, rz, rz)
 C_N1_I2(r, r, rM)
 C_O1_I4(r, r, rI, rM, rM)
-C_O2_I4(r, r, rZ, rZ, rM, rM)
+C_O2_I4(r, r, rz, rz, rM, rM)
 C_O0_I2(v, r)
 C_O1_I1(v, r)
 C_O1_I1(v, v)
diff --git a/tcg/riscv/tcg-target-con-str.h b/tcg/riscv/tcg-target-con-str.h
index 089efe96ca8aa..2f9700638c44f 100644
--- a/tcg/riscv/tcg-target-con-str.h
+++ b/tcg/riscv/tcg-target-con-str.h
@@ -21,4 +21,3 @@ CONST('K', TCG_CT_CONST_S5)
 CONST('L', TCG_CT_CONST_CMP_VI)
 CONST('N', TCG_CT_CONST_N12)
 CONST('M', TCG_CT_CONST_M12)
-CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
index f35f9b31f5723..98081084f279d 100644
--- a/tcg/riscv/tcg-target-has.h
+++ b/tcg/riscv/tcg-target-has.h
@@ -37,8 +37,6 @@
 #define TCG_TARGET_HAS_clz_i32          (cpuinfo & CPUINFO_ZBB)
 #define TCG_TARGET_HAS_ctz_i32          (cpuinfo & CPUINFO_ZBB)
 #define TCG_TARGET_HAS_ctpop_i32        (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_brcond2          1
-#define TCG_TARGET_HAS_setcond2         1
 #define TCG_TARGET_HAS_qemu_st8_i32     0
 
 #define TCG_TARGET_HAS_negsetcond_i64   1
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index 61dc310c1aa65..f7e1ca5a56f0d 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -112,13 +112,12 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
     return TCG_REG_A0 + slot;
 }
 
-#define TCG_CT_CONST_ZERO    0x100
-#define TCG_CT_CONST_S12     0x200
-#define TCG_CT_CONST_N12     0x400
-#define TCG_CT_CONST_M12     0x800
-#define TCG_CT_CONST_J12    0x1000
-#define TCG_CT_CONST_S5     0x2000
-#define TCG_CT_CONST_CMP_VI 0x4000
+#define TCG_CT_CONST_S12     0x100
+#define TCG_CT_CONST_N12     0x200
+#define TCG_CT_CONST_M12     0x400
+#define TCG_CT_CONST_J12     0x800
+#define TCG_CT_CONST_S5     0x1000
+#define TCG_CT_CONST_CMP_VI 0x2000
 
 #define ALL_GENERAL_REGS   MAKE_64BIT_MASK(0, 32)
 #define ALL_VECTOR_REGS    MAKE_64BIT_MASK(32, 32)
@@ -391,9 +390,6 @@ static bool tcg_target_const_match(int64_t val, int ct,
     if (ct & TCG_CT_CONST) {
         return 1;
     }
-    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
-        return 1;
-    }
     if (type >= TCG_TYPE_V64) {
         /* Val is replicated by VECE; extract the highest element. */
         val >>= (-8 << vece) & 63;
@@ -1727,7 +1723,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
         ldst = new_ldst_label(s);
         ldst->is_ld = is_ld;
         ldst->oi = oi;
-        ldst->addrlo_reg = addr_reg;
+        ldst->addr_reg = addr_reg;
 
         init_setting_vtype(s);
 
@@ -1790,7 +1786,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
             ldst = new_ldst_label(s);
             ldst->is_ld = is_ld;
             ldst->oi = oi;
-            ldst->addrlo_reg = addr_reg;
+            ldst->addr_reg = addr_reg;
 
             init_setting_vtype(s);
 
@@ -2309,20 +2305,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                         args[3], const_args[3], args[4], const_args[4]);
         break;
 
-    case INDEX_op_qemu_ld_a32_i32:
-    case INDEX_op_qemu_ld_a64_i32:
+    case INDEX_op_qemu_ld_i32:
         tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
         break;
-    case INDEX_op_qemu_ld_a32_i64:
-    case INDEX_op_qemu_ld_a64_i64:
+    case INDEX_op_qemu_ld_i64:
         tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
         break;
-    case INDEX_op_qemu_st_a32_i32:
-    case INDEX_op_qemu_st_a64_i32:
+    case INDEX_op_qemu_st_i32:
         tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
         break;
-    case INDEX_op_qemu_st_a32_i64:
-    case INDEX_op_qemu_st_a64_i64:
+    case INDEX_op_qemu_st_i64:
         tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
         break;
 
@@ -2685,7 +2677,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_st16_i64:
     case INDEX_op_st32_i64:
     case INDEX_op_st_i64:
-        return C_O0_I2(rZ, r);
+        return C_O0_I2(rz, r);
 
     case INDEX_op_add_i32:
     case INDEX_op_and_i32:
@@ -2711,7 +2703,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
 
     case INDEX_op_sub_i32:
     case INDEX_op_sub_i64:
-        return C_O1_I2(r, rZ, rN);
+        return C_O1_I2(r, rz, rN);
 
     case INDEX_op_mul_i32:
     case INDEX_op_mulsh_i32:
@@ -2727,7 +2719,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_divu_i64:
     case INDEX_op_rem_i64:
     case INDEX_op_remu_i64:
-        return C_O1_I2(r, rZ, rZ);
+        return C_O1_I2(r, rz, rz);
 
     case INDEX_op_shl_i32:
     case INDEX_op_shr_i32:
@@ -2749,7 +2741,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
 
     case INDEX_op_brcond_i32:
     case INDEX_op_brcond_i64:
-        return C_O0_I2(rZ, rZ);
+        return C_O0_I2(rz, rz);
 
     case INDEX_op_movcond_i32:
     case INDEX_op_movcond_i64:
@@ -2759,18 +2751,14 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_add2_i64:
     case INDEX_op_sub2_i32:
     case INDEX_op_sub2_i64:
-        return C_O2_I4(r, r, rZ, rZ, rM, rM);
+        return C_O2_I4(r, r, rz, rz, rM, rM);
 
-    case INDEX_op_qemu_ld_a32_i32:
-    case INDEX_op_qemu_ld_a64_i32:
-    case INDEX_op_qemu_ld_a32_i64:
-    case INDEX_op_qemu_ld_a64_i64:
+    case INDEX_op_qemu_ld_i32:
+    case INDEX_op_qemu_ld_i64:
         return C_O1_I1(r, r);
-    case INDEX_op_qemu_st_a32_i32:
-    case INDEX_op_qemu_st_a64_i32:
-    case INDEX_op_qemu_st_a32_i64:
-    case INDEX_op_qemu_st_a64_i64:
-        return C_O0_I2(rZ, r);
+    case INDEX_op_qemu_st_i32:
+    case INDEX_op_qemu_st_i64:
+        return C_O0_I2(rz, r);
 
     case INDEX_op_st_vec:
         return C_O0_I2(v, r);
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
index db5f3d8b7294a..6dc77d944ba3c 100644
--- a/tcg/riscv/tcg-target.h
+++ b/tcg/riscv/tcg-target.h
@@ -57,4 +57,6 @@ typedef enum {
     TCG_REG_TMP2       = TCG_REG_T4,
 } TCGReg;
 
+#define TCG_REG_ZERO  TCG_REG_ZERO
+
 #endif
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index dc7722dc31c53..b2e1cd60ffae0 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -1920,7 +1920,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
         ldst = new_ldst_label(s);
         ldst->is_ld = is_ld;
         ldst->oi = oi;
-        ldst->addrlo_reg = addr_reg;
+        ldst->addr_reg = addr_reg;
 
         tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
                      s->page_bits - CPU_TLB_ENTRY_BITS);
@@ -1974,7 +1974,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
             ldst = new_ldst_label(s);
             ldst->is_ld = is_ld;
             ldst->oi = oi;
-            ldst->addrlo_reg = addr_reg;
+            ldst->addr_reg = addr_reg;
 
             tcg_debug_assert(a_mask <= 0xffff);
             tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
@@ -2455,28 +2455,22 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                      args[2], const_args[2], args[3], const_args[3], args[4]);
         break;
 
-    case INDEX_op_qemu_ld_a32_i32:
-    case INDEX_op_qemu_ld_a64_i32:
+    case INDEX_op_qemu_ld_i32:
         tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I32);
         break;
-    case INDEX_op_qemu_ld_a32_i64:
-    case INDEX_op_qemu_ld_a64_i64:
+    case INDEX_op_qemu_ld_i64:
         tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I64);
         break;
-    case INDEX_op_qemu_st_a32_i32:
-    case INDEX_op_qemu_st_a64_i32:
+    case INDEX_op_qemu_st_i32:
         tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I32);
         break;
-    case INDEX_op_qemu_st_a32_i64:
-    case INDEX_op_qemu_st_a64_i64:
+    case INDEX_op_qemu_st_i64:
         tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
         break;
-    case INDEX_op_qemu_ld_a32_i128:
-    case INDEX_op_qemu_ld_a64_i128:
+    case INDEX_op_qemu_ld_i128:
         tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
         break;
-    case INDEX_op_qemu_st_a32_i128:
-    case INDEX_op_qemu_st_a64_i128:
+    case INDEX_op_qemu_st_i128:
         tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
         break;
 
@@ -3366,21 +3360,15 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_ctpop_i64:
         return C_O1_I1(r, r);
 
-    case INDEX_op_qemu_ld_a32_i32:
-    case INDEX_op_qemu_ld_a64_i32:
-    case INDEX_op_qemu_ld_a32_i64:
-    case INDEX_op_qemu_ld_a64_i64:
+    case INDEX_op_qemu_ld_i32:
+    case INDEX_op_qemu_ld_i64:
         return C_O1_I1(r, r);
-    case INDEX_op_qemu_st_a32_i64:
-    case INDEX_op_qemu_st_a64_i64:
-    case INDEX_op_qemu_st_a32_i32:
-    case INDEX_op_qemu_st_a64_i32:
+    case INDEX_op_qemu_st_i64:
+    case INDEX_op_qemu_st_i32:
         return C_O0_I2(r, r);
-    case INDEX_op_qemu_ld_a32_i128:
-    case INDEX_op_qemu_ld_a64_i128:
+    case INDEX_op_qemu_ld_i128:
         return C_O2_I1(o, m, r);
-    case INDEX_op_qemu_st_a32_i128:
-    case INDEX_op_qemu_st_a64_i128:
+    case INDEX_op_qemu_st_i128:
         return C_O0_I3(o, m, r);
 
     case INDEX_op_deposit_i32:
diff --git a/tcg/sparc64/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h
index 434bf25072d57..61f9fa3d9fc9e 100644
--- a/tcg/sparc64/tcg-target-con-set.h
+++ b/tcg/sparc64/tcg-target-con-set.h
@@ -10,11 +10,11 @@
  * tcg-target-con-str.h; the constraint combination is inclusive or.
  */
 C_O0_I1(r)
-C_O0_I2(rZ, r)
-C_O0_I2(rZ, rJ)
+C_O0_I2(rz, r)
+C_O0_I2(rz, rJ)
 C_O1_I1(r, r)
 C_O1_I2(r, r, r)
-C_O1_I2(r, rZ, rJ)
-C_O1_I4(r, rZ, rJ, rI, 0)
-C_O2_I2(r, r, rZ, rJ)
-C_O2_I4(r, r, rZ, rZ, rJ, rJ)
+C_O1_I2(r, rz, rJ)
+C_O1_I4(r, rz, rJ, rI, 0)
+C_O2_I2(r, r, rz, rJ)
+C_O2_I4(r, r, rz, rz, rJ, rJ)
diff --git a/tcg/sparc64/tcg-target-con-str.h b/tcg/sparc64/tcg-target-con-str.h
index 0577ec4942051..2f033b3ac24af 100644
--- a/tcg/sparc64/tcg-target-con-str.h
+++ b/tcg/sparc64/tcg-target-con-str.h
@@ -16,4 +16,3 @@ REGS('r', ALL_GENERAL_REGS)
  */
 CONST('I', TCG_CT_CONST_S11)
 CONST('J', TCG_CT_CONST_S13)
-CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
index 733cb516512bf..7c722f59a8aba 100644
--- a/tcg/sparc64/tcg-target.c.inc
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -76,7 +76,6 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 
 #define TCG_CT_CONST_S11  0x100
 #define TCG_CT_CONST_S13  0x200
-#define TCG_CT_CONST_ZERO 0x400
 
 #define ALL_GENERAL_REGS  MAKE_64BIT_MASK(0, 32)
 
@@ -340,9 +339,7 @@ static bool tcg_target_const_match(int64_t val, int ct,
         val = (int32_t)val;
     }
 
-    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
-        return 1;
-    } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) {
+    if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) {
         return 1;
     } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) {
         return 1;
@@ -1127,7 +1124,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
     ldst = new_ldst_label(s);
     ldst->is_ld = is_ld;
     ldst->oi = oi;
-    ldst->addrlo_reg = addr_reg;
+    ldst->addr_reg = addr_reg;
     ldst->label_ptr[0] = s->code_ptr;
 
     /* bne,pn %[xi]cc, label0 */
@@ -1147,7 +1144,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
         ldst = new_ldst_label(s);
         ldst->is_ld = is_ld;
         ldst->oi = oi;
-        ldst->addrlo_reg = addr_reg;
+        ldst->addr_reg = addr_reg;
         ldst->label_ptr[0] = s->code_ptr;
 
         /* bne,pn %icc, label0 */
@@ -1426,20 +1423,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
         tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
         break;
 
-    case INDEX_op_qemu_ld_a32_i32:
-    case INDEX_op_qemu_ld_a64_i32:
+    case INDEX_op_qemu_ld_i32:
         tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
         break;
-    case INDEX_op_qemu_ld_a32_i64:
-    case INDEX_op_qemu_ld_a64_i64:
+    case INDEX_op_qemu_ld_i64:
         tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
         break;
-    case INDEX_op_qemu_st_a32_i32:
-    case INDEX_op_qemu_st_a64_i32:
+    case INDEX_op_qemu_st_i32:
         tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
         break;
-    case INDEX_op_qemu_st_a32_i64:
-    case INDEX_op_qemu_st_a64_i64:
+    case INDEX_op_qemu_st_i64:
         tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
         break;
 
@@ -1570,10 +1563,8 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_extu_i32_i64:
     case INDEX_op_extract_i64:
     case INDEX_op_sextract_i64:
-    case INDEX_op_qemu_ld_a32_i32:
-    case INDEX_op_qemu_ld_a64_i32:
-    case INDEX_op_qemu_ld_a32_i64:
-    case INDEX_op_qemu_ld_a64_i64:
+    case INDEX_op_qemu_ld_i32:
+    case INDEX_op_qemu_ld_i64:
         return C_O1_I1(r, r);
 
     case INDEX_op_st8_i32:
@@ -1583,11 +1574,9 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_st_i32:
     case INDEX_op_st32_i64:
     case INDEX_op_st_i64:
-    case INDEX_op_qemu_st_a32_i32:
-    case INDEX_op_qemu_st_a64_i32:
-    case INDEX_op_qemu_st_a32_i64:
-    case INDEX_op_qemu_st_a64_i64:
-        return C_O0_I2(rZ, r);
+    case INDEX_op_qemu_st_i32:
+    case INDEX_op_qemu_st_i64:
+        return C_O0_I2(rz, r);
 
     case INDEX_op_add_i32:
     case INDEX_op_add_i64:
@@ -1619,22 +1608,22 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_setcond_i64:
     case INDEX_op_negsetcond_i32:
     case INDEX_op_negsetcond_i64:
-        return C_O1_I2(r, rZ, rJ);
+        return C_O1_I2(r, rz, rJ);
 
     case INDEX_op_brcond_i32:
     case INDEX_op_brcond_i64:
-        return C_O0_I2(rZ, rJ);
+        return C_O0_I2(rz, rJ);
     case INDEX_op_movcond_i32:
     case INDEX_op_movcond_i64:
-        return C_O1_I4(r, rZ, rJ, rI, 0);
+        return C_O1_I4(r, rz, rJ, rI, 0);
     case INDEX_op_add2_i32:
     case INDEX_op_add2_i64:
     case INDEX_op_sub2_i32:
     case INDEX_op_sub2_i64:
-        return C_O2_I4(r, r, rZ, rZ, rJ, rJ);
+        return C_O2_I4(r, r, rz, rz, rJ, rJ);
     case INDEX_op_mulu2_i32:
     case INDEX_op_muls2_i32:
-        return C_O2_I2(r, r, rZ, rJ);
+        return C_O2_I2(r, r, rz, rJ);
     case INDEX_op_muluh_i64:
         return C_O1_I2(r, r, r);
 
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
index f7d75d580654b..1b9adccd85f47 100644
--- a/tcg/sparc64/tcg-target.h
+++ b/tcg/sparc64/tcg-target.h
@@ -64,6 +64,7 @@ typedef enum {
     TCG_REG_I7,
 } TCGReg;
 
-#define TCG_AREG0 TCG_REG_I0
+#define TCG_AREG0     TCG_REG_I0
+#define TCG_REG_ZERO  TCG_REG_G0
 
 #endif
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
index 77271e019350b..73838e27015a9 100644
--- a/tcg/tcg-op-ldst.c
+++ b/tcg/tcg-op-ldst.c
@@ -91,25 +91,10 @@ static MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
 static void gen_ldst(TCGOpcode opc, TCGType type, TCGTemp *vl, TCGTemp *vh,
                      TCGTemp *addr, MemOpIdx oi)
 {
-    if (TCG_TARGET_REG_BITS == 64 || tcg_ctx->addr_type == TCG_TYPE_I32) {
-        if (vh) {
-            tcg_gen_op4(opc, type, temp_arg(vl), temp_arg(vh),
-                        temp_arg(addr), oi);
-        } else {
-            tcg_gen_op3(opc, type, temp_arg(vl), temp_arg(addr), oi);
-        }
+    if (vh) {
+        tcg_gen_op4(opc, type, temp_arg(vl), temp_arg(vh), temp_arg(addr), oi);
     } else {
-        /* See TCGV_LOW/HIGH. */
-        TCGTemp *al = addr + HOST_BIG_ENDIAN;
-        TCGTemp *ah = addr + !HOST_BIG_ENDIAN;
-
-        if (vh) {
-            tcg_gen_op5(opc, type, temp_arg(vl), temp_arg(vh),
-                        temp_arg(al), temp_arg(ah), oi);
-        } else {
-            tcg_gen_op4(opc, type, temp_arg(vl),
-                        temp_arg(al), temp_arg(ah), oi);
-        }
+        tcg_gen_op3(opc, type, temp_arg(vl), temp_arg(addr), oi);
     }
 }
 
@@ -232,7 +217,6 @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
     MemOp orig_memop;
     MemOpIdx orig_oi, oi;
     TCGv_i64 copy_addr;
-    TCGOpcode opc;
 
     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     orig_memop = memop = tcg_canonicalize_memop(memop, 0, 0);
@@ -248,12 +232,8 @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
     }
 
     copy_addr = plugin_maybe_preserve_addr(addr);
-    if (tcg_ctx->addr_type == TCG_TYPE_I32) {
-        opc = INDEX_op_qemu_ld_a32_i32;
-    } else {
-        opc = INDEX_op_qemu_ld_a64_i32;
-    }
-    gen_ldst(opc, TCG_TYPE_I32, tcgv_i32_temp(val), NULL, addr, oi);
+    gen_ldst(INDEX_op_qemu_ld_i32, TCG_TYPE_I32,
+             tcgv_i32_temp(val), NULL, addr, oi);
     plugin_gen_mem_callbacks_i32(val, copy_addr, addr, orig_oi,
                                  QEMU_PLUGIN_MEM_R);
 
@@ -310,17 +290,9 @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
     }
 
     if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
-        if (tcg_ctx->addr_type == TCG_TYPE_I32) {
-            opc = INDEX_op_qemu_st8_a32_i32;
-        } else {
-            opc = INDEX_op_qemu_st8_a64_i32;
-        }
+        opc = INDEX_op_qemu_st8_i32;
     } else {
-        if (tcg_ctx->addr_type == TCG_TYPE_I32) {
-            opc = INDEX_op_qemu_st_a32_i32;
-        } else {
-            opc = INDEX_op_qemu_st_a64_i32;
-        }
+        opc = INDEX_op_qemu_st_i32;
     }
     gen_ldst(opc, TCG_TYPE_I32, tcgv_i32_temp(val), NULL, addr, oi);
     plugin_gen_mem_callbacks_i32(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
@@ -344,7 +316,6 @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
     MemOp orig_memop;
     MemOpIdx orig_oi, oi;
     TCGv_i64 copy_addr;
-    TCGOpcode opc;
 
     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
         tcg_gen_qemu_ld_i32_int(TCGV_LOW(val), addr, idx, memop);
@@ -370,12 +341,7 @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
     }
 
     copy_addr = plugin_maybe_preserve_addr(addr);
-    if (tcg_ctx->addr_type == TCG_TYPE_I32) {
-        opc = INDEX_op_qemu_ld_a32_i64;
-    } else {
-        opc = INDEX_op_qemu_ld_a64_i64;
-    }
-    gen_ldst_i64(opc, val, addr, oi);
+    gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, oi);
     plugin_gen_mem_callbacks_i64(val, copy_addr, addr, orig_oi,
                                  QEMU_PLUGIN_MEM_R);
 
@@ -412,7 +378,6 @@ static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
 {
     TCGv_i64 swap = NULL;
     MemOpIdx orig_oi, oi;
-    TCGOpcode opc;
 
     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
         tcg_gen_qemu_st_i32_int(TCGV_LOW(val), addr, idx, memop);
@@ -443,12 +408,7 @@ static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
         oi = make_memop_idx(memop, idx);
     }
 
-    if (tcg_ctx->addr_type == TCG_TYPE_I32) {
-        opc = INDEX_op_qemu_st_a32_i64;
-    } else {
-        opc = INDEX_op_qemu_st_a64_i64;
-    }
-    gen_ldst_i64(opc, val, addr, oi);
+    gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, oi);
     plugin_gen_mem_callbacks_i64(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
 
     if (swap) {
@@ -560,7 +520,6 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
 {
     MemOpIdx orig_oi;
     TCGv_i64 ext_addr = NULL;
-    TCGOpcode opc;
 
     check_max_alignment(memop_alignment_bits(memop));
     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
@@ -588,12 +547,7 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
             hi = TCGV128_HIGH(val);
         }
 
-        if (tcg_ctx->addr_type == TCG_TYPE_I32) {
-            opc = INDEX_op_qemu_ld_a32_i128;
-        } else {
-            opc = INDEX_op_qemu_ld_a64_i128;
-        }
-        gen_ldst(opc, TCG_TYPE_I128, tcgv_i64_temp(lo),
+        gen_ldst(INDEX_op_qemu_ld_i128, TCG_TYPE_I128, tcgv_i64_temp(lo),
                  tcgv_i64_temp(hi), addr, oi);
 
         if (need_bswap) {
@@ -609,12 +563,6 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
         canonicalize_memop_i128_as_i64(mop, memop);
         need_bswap = (mop[0] ^ memop) & MO_BSWAP;
 
-        if (tcg_ctx->addr_type == TCG_TYPE_I32) {
-            opc = INDEX_op_qemu_ld_a32_i64;
-        } else {
-            opc = INDEX_op_qemu_ld_a64_i64;
-        }
-
         /*
          * Since there are no global TCGv_i128, there is no visible state
          * changed if the second load faults.  Load directly into the two
@@ -628,7 +576,8 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
             y = TCGV128_LOW(val);
         }
 
-        gen_ldst_i64(opc, x, addr, make_memop_idx(mop[0], idx));
+        gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr,
+                     make_memop_idx(mop[0], idx));
 
         if (need_bswap) {
             tcg_gen_bswap64_i64(x, x);
@@ -644,7 +593,8 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
             addr_p8 = tcgv_i64_temp(t);
         }
 
-        gen_ldst_i64(opc, y, addr_p8, make_memop_idx(mop[1], idx));
+        gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8,
+                     make_memop_idx(mop[1], idx));
         tcg_temp_free_internal(addr_p8);
 
         if (need_bswap) {
@@ -678,7 +628,6 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
 {
     MemOpIdx orig_oi;
     TCGv_i64 ext_addr = NULL;
-    TCGOpcode opc;
 
     check_max_alignment(memop_alignment_bits(memop));
     tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
@@ -709,13 +658,8 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
             hi = TCGV128_HIGH(val);
         }
 
-        if (tcg_ctx->addr_type == TCG_TYPE_I32) {
-            opc = INDEX_op_qemu_st_a32_i128;
-        } else {
-            opc = INDEX_op_qemu_st_a64_i128;
-        }
-        gen_ldst(opc, TCG_TYPE_I128, tcgv_i64_temp(lo),
-                 tcgv_i64_temp(hi), addr, oi);
+        gen_ldst(INDEX_op_qemu_st_i128, TCG_TYPE_I128,
+                 tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
 
         if (need_bswap) {
             tcg_temp_free_i64(lo);
@@ -728,12 +672,6 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
 
         canonicalize_memop_i128_as_i64(mop, memop);
 
-        if (tcg_ctx->addr_type == TCG_TYPE_I32) {
-            opc = INDEX_op_qemu_st_a32_i64;
-        } else {
-            opc = INDEX_op_qemu_st_a64_i64;
-        }
-
         if ((memop & MO_BSWAP) == MO_LE) {
             x = TCGV128_LOW(val);
             y = TCGV128_HIGH(val);
@@ -748,7 +686,8 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
             x = b;
         }
 
-        gen_ldst_i64(opc, x, addr, make_memop_idx(mop[0], idx));
+        gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr,
+                     make_memop_idx(mop[0], idx));
 
         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
             TCGv_i32 t = tcg_temp_ebb_new_i32();
@@ -762,10 +701,12 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
 
         if (b) {
             tcg_gen_bswap64_i64(b, y);
-            gen_ldst_i64(opc, b, addr_p8, make_memop_idx(mop[1], idx));
+            gen_ldst_i64(INDEX_op_qemu_st_i64, b, addr_p8,
+                         make_memop_idx(mop[1], idx));
             tcg_temp_free_i64(b);
         } else {
-            gen_ldst_i64(opc, y, addr_p8, make_memop_idx(mop[1], idx));
+            gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8,
+                         make_memop_idx(mop[1], idx));
         }
         tcg_temp_free_internal(addr_p8);
     } else {
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 43b6712286c30..e8950df2ad3b9 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -100,8 +100,7 @@ struct TCGLabelQemuLdst {
     bool is_ld;             /* qemu_ld: true, qemu_st: false */
     MemOpIdx oi;
     TCGType type;           /* result type of a load */
-    TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
-    TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
+    TCGReg addr_reg;        /* reg index for guest virtual addr */
     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
@@ -1598,21 +1597,17 @@ void tcg_prologue_init(void)
     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
 #endif
 
-#ifdef TCG_TARGET_NEED_POOL_LABELS
     s->pool_labels = NULL;
-#endif
 
     qemu_thread_jit_write();
     /* Generate the prologue.  */
     tcg_target_qemu_prologue(s);
 
-#ifdef TCG_TARGET_NEED_POOL_LABELS
     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
     {
         int result = tcg_out_pool_finalize(s);
         tcg_debug_assert(result == 0);
     }
-#endif
 
     prologue_size = tcg_current_code_size(s);
     perf_report_prologue(s->code_gen_ptr, prologue_size);
@@ -1694,9 +1689,7 @@ void tcg_func_start(TCGContext *s)
     s->emit_before_op = NULL;
     QSIMPLEQ_INIT(&s->labels);
 
-    tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
-                     s->addr_type == TCG_TYPE_I64);
-
+    tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
     tcg_debug_assert(s->insn_start_words > 0);
 }
 
@@ -2153,24 +2146,17 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_exit_tb:
     case INDEX_op_goto_tb:
     case INDEX_op_goto_ptr:
-    case INDEX_op_qemu_ld_a32_i32:
-    case INDEX_op_qemu_ld_a64_i32:
-    case INDEX_op_qemu_st_a32_i32:
-    case INDEX_op_qemu_st_a64_i32:
-    case INDEX_op_qemu_ld_a32_i64:
-    case INDEX_op_qemu_ld_a64_i64:
-    case INDEX_op_qemu_st_a32_i64:
-    case INDEX_op_qemu_st_a64_i64:
+    case INDEX_op_qemu_ld_i32:
+    case INDEX_op_qemu_st_i32:
+    case INDEX_op_qemu_ld_i64:
+    case INDEX_op_qemu_st_i64:
         return true;
 
-    case INDEX_op_qemu_st8_a32_i32:
-    case INDEX_op_qemu_st8_a64_i32:
+    case INDEX_op_qemu_st8_i32:
         return TCG_TARGET_HAS_qemu_st8_i32;
 
-    case INDEX_op_qemu_ld_a32_i128:
-    case INDEX_op_qemu_ld_a64_i128:
-    case INDEX_op_qemu_st_a32_i128:
-    case INDEX_op_qemu_st_a64_i128:
+    case INDEX_op_qemu_ld_i128:
+    case INDEX_op_qemu_st_i128:
         return TCG_TARGET_HAS_qemu_ldst_i128;
 
     case INDEX_op_mov_i32:
@@ -2868,20 +2854,13 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
                 }
                 i = 1;
                 break;
-            case INDEX_op_qemu_ld_a32_i32:
-            case INDEX_op_qemu_ld_a64_i32:
-            case INDEX_op_qemu_st_a32_i32:
-            case INDEX_op_qemu_st_a64_i32:
-            case INDEX_op_qemu_st8_a32_i32:
-            case INDEX_op_qemu_st8_a64_i32:
-            case INDEX_op_qemu_ld_a32_i64:
-            case INDEX_op_qemu_ld_a64_i64:
-            case INDEX_op_qemu_st_a32_i64:
-            case INDEX_op_qemu_st_a64_i64:
-            case INDEX_op_qemu_ld_a32_i128:
-            case INDEX_op_qemu_ld_a64_i128:
-            case INDEX_op_qemu_st_a32_i128:
-            case INDEX_op_qemu_st_a64_i128:
+            case INDEX_op_qemu_ld_i32:
+            case INDEX_op_qemu_st_i32:
+            case INDEX_op_qemu_st8_i32:
+            case INDEX_op_qemu_ld_i64:
+            case INDEX_op_qemu_st_i64:
+            case INDEX_op_qemu_ld_i128:
+            case INDEX_op_qemu_st_i128:
                 {
                     const char *s_al, *s_op, *s_at;
                     MemOpIdx oi = op->args[k++];
@@ -3244,6 +3223,11 @@ static void process_constraint_sets(void)
                 case 'i':
                     args_ct[i].ct |= TCG_CT_CONST;
                     break;
+#ifdef TCG_REG_ZERO
+                case 'z':
+                    args_ct[i].ct |= TCG_CT_REG_ZERO;
+                    break;
+#endif
 
                 /* Include all of the target-specific constraints. */
 
@@ -5095,13 +5079,23 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
         arg_ct = &args_ct[i];
         ts = arg_temp(arg);
 
-        if (ts->val_type == TEMP_VAL_CONST
-            && tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
-                                      op_cond, TCGOP_VECE(op))) {
-            /* constant is OK for instruction */
-            const_args[i] = 1;
-            new_args[i] = ts->val;
-            continue;
+        if (ts->val_type == TEMP_VAL_CONST) {
+#ifdef TCG_REG_ZERO
+            if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
+                /* Hardware zero register: indicate register via non-const. */
+                const_args[i] = 0;
+                new_args[i] = TCG_REG_ZERO;
+                continue;
+            }
+#endif
+
+            if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
+                                       op_cond, TCGOP_VECE(op))) {
+                /* constant is OK for instruction */
+                const_args[i] = 1;
+                new_args[i] = ts->val;
+                continue;
+            }
         }
 
         reg = ts->reg;
@@ -6081,7 +6075,7 @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
          */
         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
                                TCG_TYPE_I32, TCG_TYPE_I32,
-                               ldst->addrlo_reg, -1);
+                               ldst->addr_reg, -1);
         tcg_out_helper_load_slots(s, 1, mov, parm);
 
         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
@@ -6089,7 +6083,7 @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
         next_arg += 2;
     } else {
         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
-                                      ldst->addrlo_reg, ldst->addrhi_reg);
+                                      ldst->addr_reg, -1);
         tcg_out_helper_load_slots(s, nmov, mov, parm);
         next_arg += nmov;
     }
@@ -6246,21 +6240,22 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
 
     /* Handle addr argument. */
     loc = &info->in[next_arg];
-    if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
+    tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
+    if (TCG_TARGET_REG_BITS == 32) {
         /*
-         * 32-bit host with 32-bit guest: zero-extend the guest address
+         * 32-bit host (and thus 32-bit guest): zero-extend the guest address
          * to 64-bits for the helper by storing the low part.  Later,
          * after we have processed the register inputs, we will load a
          * zero for the high part.
          */
         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
                                TCG_TYPE_I32, TCG_TYPE_I32,
-                               ldst->addrlo_reg, -1);
+                               ldst->addr_reg, -1);
         next_arg += 2;
         nmov += 1;
     } else {
         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
-                                   ldst->addrlo_reg, ldst->addrhi_reg);
+                                   ldst->addr_reg, -1);
         next_arg += n;
         nmov += n;
     }
@@ -6308,7 +6303,7 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
         g_assert_not_reached();
     }
 
-    if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
+    if (TCG_TARGET_REG_BITS == 32) {
         /* Zero extend the address by loading a zero for the high part. */
         loc = &info->in[1 + !HOST_BIG_ENDIAN];
         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
diff --git a/tcg/tci.c b/tcg/tci.c
index 8c1c53424dbdf..d223258efe0d2 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -154,16 +154,6 @@ static void tci_args_rrrbb(uint32_t insn, TCGReg *r0, TCGReg *r1,
     *i4 = extract32(insn, 26, 6);
 }
 
-static void tci_args_rrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1,
-                           TCGReg *r2, TCGReg *r3, TCGReg *r4)
-{
-    *r0 = extract32(insn, 8, 4);
-    *r1 = extract32(insn, 12, 4);
-    *r2 = extract32(insn, 16, 4);
-    *r3 = extract32(insn, 20, 4);
-    *r4 = extract32(insn, 24, 4);
-}
-
 static void tci_args_rrrr(uint32_t insn,
                           TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGReg *r3)
 {
@@ -912,43 +902,21 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
             tb_ptr = ptr;
             break;
 
-        case INDEX_op_qemu_ld_a32_i32:
+        case INDEX_op_qemu_ld_i32:
             tci_args_rrm(insn, &r0, &r1, &oi);
-            taddr = (uint32_t)regs[r1];
-            goto do_ld_i32;
-        case INDEX_op_qemu_ld_a64_i32:
-            if (TCG_TARGET_REG_BITS == 64) {
-                tci_args_rrm(insn, &r0, &r1, &oi);
-                taddr = regs[r1];
-            } else {
-                tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
-                taddr = tci_uint64(regs[r2], regs[r1]);
-                oi = regs[r3];
-            }
-        do_ld_i32:
+            taddr = regs[r1];
             regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr);
             break;
 
-        case INDEX_op_qemu_ld_a32_i64:
+        case INDEX_op_qemu_ld_i64:
             if (TCG_TARGET_REG_BITS == 64) {
                 tci_args_rrm(insn, &r0, &r1, &oi);
-                taddr = (uint32_t)regs[r1];
+                taddr = regs[r1];
             } else {
                 tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
-                taddr = (uint32_t)regs[r2];
+                taddr = regs[r2];
                 oi = regs[r3];
             }
-            goto do_ld_i64;
-        case INDEX_op_qemu_ld_a64_i64:
-            if (TCG_TARGET_REG_BITS == 64) {
-                tci_args_rrm(insn, &r0, &r1, &oi);
-                taddr = regs[r1];
-            } else {
-                tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
-                taddr = tci_uint64(regs[r3], regs[r2]);
-                oi = regs[r4];
-            }
-        do_ld_i64:
             tmp64 = tci_qemu_ld(env, taddr, oi, tb_ptr);
             if (TCG_TARGET_REG_BITS == 32) {
                 tci_write_reg64(regs, r1, r0, tmp64);
@@ -957,47 +925,23 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
             }
             break;
 
-        case INDEX_op_qemu_st_a32_i32:
+        case INDEX_op_qemu_st_i32:
             tci_args_rrm(insn, &r0, &r1, &oi);
-            taddr = (uint32_t)regs[r1];
-            goto do_st_i32;
-        case INDEX_op_qemu_st_a64_i32:
-            if (TCG_TARGET_REG_BITS == 64) {
-                tci_args_rrm(insn, &r0, &r1, &oi);
-                taddr = regs[r1];
-            } else {
-                tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
-                taddr = tci_uint64(regs[r2], regs[r1]);
-                oi = regs[r3];
-            }
-        do_st_i32:
+            taddr = regs[r1];
             tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr);
             break;
 
-        case INDEX_op_qemu_st_a32_i64:
+        case INDEX_op_qemu_st_i64:
             if (TCG_TARGET_REG_BITS == 64) {
                 tci_args_rrm(insn, &r0, &r1, &oi);
                 tmp64 = regs[r0];
-                taddr = (uint32_t)regs[r1];
+                taddr = regs[r1];
             } else {
                 tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
                 tmp64 = tci_uint64(regs[r1], regs[r0]);
-                taddr = (uint32_t)regs[r2];
+                taddr = regs[r2];
                 oi = regs[r3];
             }
-            goto do_st_i64;
-        case INDEX_op_qemu_st_a64_i64:
-            if (TCG_TARGET_REG_BITS == 64) {
-                tci_args_rrm(insn, &r0, &r1, &oi);
-                tmp64 = regs[r0];
-                taddr = regs[r1];
-            } else {
-                tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
-                tmp64 = tci_uint64(regs[r1], regs[r0]);
-                taddr = tci_uint64(regs[r3], regs[r2]);
-                oi = regs[r4];
-            }
-        do_st_i64:
             tci_qemu_st(env, taddr, tmp64, oi, tb_ptr);
             break;
 
@@ -1269,42 +1213,21 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
                            str_r(r3), str_r(r4), str_r(r5));
         break;
 
-    case INDEX_op_qemu_ld_a32_i32:
-    case INDEX_op_qemu_st_a32_i32:
-        len = 1 + 1;
-        goto do_qemu_ldst;
-    case INDEX_op_qemu_ld_a32_i64:
-    case INDEX_op_qemu_st_a32_i64:
-    case INDEX_op_qemu_ld_a64_i32:
-    case INDEX_op_qemu_st_a64_i32:
-        len = 1 + DIV_ROUND_UP(64, TCG_TARGET_REG_BITS);
-        goto do_qemu_ldst;
-    case INDEX_op_qemu_ld_a64_i64:
-    case INDEX_op_qemu_st_a64_i64:
-        len = 2 * DIV_ROUND_UP(64, TCG_TARGET_REG_BITS);
-        goto do_qemu_ldst;
-    do_qemu_ldst:
-        switch (len) {
-        case 2:
-            tci_args_rrm(insn, &r0, &r1, &oi);
-            info->fprintf_func(info->stream, "%-12s  %s, %s, %x",
-                               op_name, str_r(r0), str_r(r1), oi);
-            break;
-        case 3:
+    case INDEX_op_qemu_ld_i64:
+    case INDEX_op_qemu_st_i64:
+        if (TCG_TARGET_REG_BITS == 32) {
             tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
             info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %s",
                                op_name, str_r(r0), str_r(r1),
                                str_r(r2), str_r(r3));
             break;
-        case 4:
-            tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
-            info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %s, %s",
-                               op_name, str_r(r0), str_r(r1),
-                               str_r(r2), str_r(r3), str_r(r4));
-            break;
-        default:
-            g_assert_not_reached();
         }
+        /* fall through */
+    case INDEX_op_qemu_ld_i32:
+    case INDEX_op_qemu_st_i32:
+        tci_args_rrm(insn, &r0, &r1, &oi);
+        info->fprintf_func(info->stream, "%-12s  %s, %s, %x",
+                           op_name, str_r(r0), str_r(r1), oi);
         break;
 
     case 0:
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index d6c77325a3ea0..36e018dd1911e 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -169,22 +169,14 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_setcond2_i32:
         return C_O1_I4(r, r, r, r, r);
 
-    case INDEX_op_qemu_ld_a32_i32:
+    case INDEX_op_qemu_ld_i32:
         return C_O1_I1(r, r);
-    case INDEX_op_qemu_ld_a64_i32:
-        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
-    case INDEX_op_qemu_ld_a32_i64:
+    case INDEX_op_qemu_ld_i64:
         return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
-    case INDEX_op_qemu_ld_a64_i64:
-        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
-    case INDEX_op_qemu_st_a32_i32:
+    case INDEX_op_qemu_st_i32:
         return C_O0_I2(r, r);
-    case INDEX_op_qemu_st_a64_i32:
+    case INDEX_op_qemu_st_i64:
         return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
-    case INDEX_op_qemu_st_a32_i64:
-        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
-    case INDEX_op_qemu_st_a64_i64:
-        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
 
     default:
         return C_NotImplemented;
@@ -422,20 +414,6 @@ static void tcg_out_op_rrrbb(TCGContext *s, TCGOpcode op, TCGReg r0,
     tcg_out32(s, insn);
 }
 
-static void tcg_out_op_rrrrr(TCGContext *s, TCGOpcode op, TCGReg r0,
-                             TCGReg r1, TCGReg r2, TCGReg r3, TCGReg r4)
-{
-    tcg_insn_unit insn = 0;
-
-    insn = deposit32(insn, 0, 8, op);
-    insn = deposit32(insn, 8, 4, r0);
-    insn = deposit32(insn, 12, 4, r1);
-    insn = deposit32(insn, 16, 4, r2);
-    insn = deposit32(insn, 20, 4, r3);
-    insn = deposit32(insn, 24, 4, r4);
-    tcg_out32(s, insn);
-}
-
 static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op,
                             TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3)
 {
@@ -833,29 +811,21 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
         tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]);
         break;
 
-    case INDEX_op_qemu_ld_a32_i32:
-    case INDEX_op_qemu_st_a32_i32:
-        tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
-        break;
-    case INDEX_op_qemu_ld_a64_i32:
-    case INDEX_op_qemu_st_a64_i32:
-    case INDEX_op_qemu_ld_a32_i64:
-    case INDEX_op_qemu_st_a32_i64:
-        if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
-        } else {
+    case INDEX_op_qemu_ld_i64:
+    case INDEX_op_qemu_st_i64:
+        if (TCG_TARGET_REG_BITS == 32) {
             tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[3]);
             tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], TCG_REG_TMP);
+            break;
         }
-        break;
-    case INDEX_op_qemu_ld_a64_i64:
-    case INDEX_op_qemu_st_a64_i64:
-        if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
+        /* fall through */
+    case INDEX_op_qemu_ld_i32:
+    case INDEX_op_qemu_st_i32:
+        if (TCG_TARGET_REG_BITS == 64 && s->addr_type == TCG_TYPE_I32) {
+            tcg_out_ext32u(s, TCG_REG_TMP, args[1]);
+            tcg_out_op_rrm(s, opc, args[0], TCG_REG_TMP, args[2]);
         } else {
-            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[4]);
-            tcg_out_op_rrrrr(s, opc, args[0], args[1],
-                             args[2], args[3], TCG_REG_TMP);
+            tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
         }
         break;
 
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index a9ca493d20f61..bd03aa1bc4a2a 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -72,6 +72,5 @@ typedef enum {
 } TCGReg;
 
 #define HAVE_TCG_QEMU_TB_EXEC
-#define TCG_TARGET_NEED_POOL_LABELS
 
 #endif /* TCG_TARGET_H */
diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py
index b9b54a8793d1b..35515323723be 100644
--- a/tests/avocado/replay_kernel.py
+++ b/tests/avocado/replay_kernel.py
@@ -108,305 +108,3 @@ def test_i386_pc(self):
         console_pattern = 'VFS: Cannot open root device'
 
         self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=5)
-
-    # See https://gitlab.com/qemu-project/qemu/-/issues/2094
-    @skipUnless(os.getenv('QEMU_TEST_FLAKY_TESTS'), 'pc machine is unstable with replay')
-    def test_x86_64_pc(self):
-        """
-        :avocado: tags=arch:x86_64
-        :avocado: tags=machine:pc
-        :avocado: tags=flaky
-        """
-        kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora'
-                      '/linux/releases/29/Everything/x86_64/os/images/pxeboot'
-                      '/vmlinuz')
-        kernel_hash = '23bebd2680757891cf7adedb033532163a792495'
-        kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
-
-        kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=ttyS0'
-        console_pattern = 'VFS: Cannot open root device'
-
-        self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=5)
-
-    def test_x86_64_q35(self):
-        """
-        :avocado: tags=arch:x86_64
-        :avocado: tags=machine:q35
-        """
-        kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora'
-                      '/linux/releases/29/Everything/x86_64/os/images/pxeboot'
-                      '/vmlinuz')
-        kernel_hash = '23bebd2680757891cf7adedb033532163a792495'
-        kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
-
-        kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=ttyS0'
-        console_pattern = 'VFS: Cannot open root device'
-
-        self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=5)
-
-    def test_aarch64_virt(self):
-        """
-        :avocado: tags=arch:aarch64
-        :avocado: tags=machine:virt
-        :avocado: tags=cpu:cortex-a53
-        """
-        kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora'
-                      '/linux/releases/29/Everything/aarch64/os/images/pxeboot'
-                      '/vmlinuz')
-        kernel_hash = '8c73e469fc6ea06a58dc83a628fc695b693b8493'
-        kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
-
-        kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
-                               'console=ttyAMA0')
-        console_pattern = 'VFS: Cannot open root device'
-
-        self.run_rr(kernel_path, kernel_command_line, console_pattern)
-
-    def test_arm_virt(self):
-        """
-        :avocado: tags=arch:arm
-        :avocado: tags=machine:virt
-        """
-        kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora'
-                      '/linux/releases/29/Everything/armhfp/os/images/pxeboot'
-                      '/vmlinuz')
-        kernel_hash = 'e9826d741b4fb04cadba8d4824d1ed3b7fb8b4d4'
-        kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
-
-        kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
-                               'console=ttyAMA0')
-        console_pattern = 'VFS: Cannot open root device'
-
-        self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=1)
-
-    def test_arm_cubieboard_initrd(self):
-        """
-        :avocado: tags=arch:arm
-        :avocado: tags=machine:cubieboard
-        """
-        deb_url = ('https://apt.armbian.com/pool/main/l/'
-                   'linux-6.6.16/linux-image-current-sunxi_24.2.1_armhf__6.6.16-Seb3e-D6b4a-P2359-Ce96bHfe66-HK01ba-V014b-B067e-R448a.deb')
-        deb_hash = 'f7c3c8c5432f765445dc6e7eab02f3bbe668256b'
-        deb_path = self.fetch_asset(deb_url, asset_hash=deb_hash)
-        kernel_path = self.extract_from_deb(deb_path,
-                                            '/boot/vmlinuz-6.6.16-current-sunxi')
-        dtb_path = '/usr/lib/linux-image-6.6.16-current-sunxi/sun4i-a10-cubieboard.dtb'
-        dtb_path = self.extract_from_deb(deb_path, dtb_path)
-        initrd_url = ('https://github.com/groeck/linux-build-test/raw/'
-                      '2eb0a73b5d5a28df3170c546ddaaa9757e1e0848/rootfs/'
-                      'arm/rootfs-armv5.cpio.gz')
-        initrd_hash = '2b50f1873e113523967806f4da2afe385462ff9b'
-        initrd_path_gz = self.fetch_asset(initrd_url, asset_hash=initrd_hash)
-        initrd_path = os.path.join(self.workdir, 'rootfs.cpio')
-        archive.gzip_uncompress(initrd_path_gz, initrd_path)
-
-        kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
-                               'console=ttyS0,115200 '
-                               'usbcore.nousb '
-                               'panic=-1 noreboot')
-        console_pattern = 'Boot successful.'
-        self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=1,
-                    args=('-dtb', dtb_path,
-                          '-initrd', initrd_path,
-                          '-no-reboot'))
-
-    def test_s390x_s390_ccw_virtio(self):
-        """
-        :avocado: tags=arch:s390x
-        :avocado: tags=machine:s390-ccw-virtio
-        """
-        kernel_url = ('https://archives.fedoraproject.org/pub/archive'
-                      '/fedora-secondary/releases/29/Everything/s390x/os/images'
-                      '/kernel.img')
-        kernel_hash = 'e8e8439103ef8053418ef062644ffd46a7919313'
-        kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
-
-        kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=sclp0'
-        console_pattern = 'Kernel command line: %s' % kernel_command_line
-        self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=9)
-
-    def test_alpha_clipper(self):
-        """
-        :avocado: tags=arch:alpha
-        :avocado: tags=machine:clipper
-        """
-        kernel_url = ('http://archive.debian.org/debian/dists/lenny/main/'
-                      'installer-alpha/20090123lenny10/images/cdrom/vmlinuz')
-        kernel_hash = '3a943149335529e2ed3e74d0d787b85fb5671ba3'
-        kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
-
-        uncompressed_kernel = archive.uncompress(kernel_path, self.workdir)
-
-        kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=ttyS0'
-        console_pattern = 'Kernel command line: %s' % kernel_command_line
-        self.run_rr(uncompressed_kernel, kernel_command_line, console_pattern, shift=9,
-            args=('-nodefaults', ))
-
-    def test_ppc64_pseries(self):
-        """
-        :avocado: tags=arch:ppc64
-        :avocado: tags=machine:pseries
-        :avocado: tags=accel:tcg
-        """
-        kernel_url = ('https://archives.fedoraproject.org/pub/archive'
-                      '/fedora-secondary/releases/29/Everything/ppc64le/os'
-                      '/ppc/ppc64/vmlinuz')
-        kernel_hash = '3fe04abfc852b66653b8c3c897a59a689270bc77'
-        kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
-
-        kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=hvc0'
-        console_pattern = 'VFS: Cannot open root device'
-        self.run_rr(kernel_path, kernel_command_line, console_pattern)
-
-    def test_ppc64_powernv(self):
-        """
-        :avocado: tags=arch:ppc64
-        :avocado: tags=machine:powernv
-        :avocado: tags=accel:tcg
-        """
-        kernel_url = ('https://archives.fedoraproject.org/pub/archive'
-                      '/fedora-secondary/releases/29/Everything/ppc64le/os'
-                      '/ppc/ppc64/vmlinuz')
-        kernel_hash = '3fe04abfc852b66653b8c3c897a59a689270bc77'
-        kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
-
-        kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + \
-                              'console=tty0 console=hvc0'
-        console_pattern = 'VFS: Cannot open root device'
-        self.run_rr(kernel_path, kernel_command_line, console_pattern)
-
-    def test_m68k_q800(self):
-        """
-        :avocado: tags=arch:m68k
-        :avocado: tags=machine:q800
-        """
-        deb_url = ('https://snapshot.debian.org/archive/debian-ports'
-                   '/20191021T083923Z/pool-m68k/main'
-                   '/l/linux/kernel-image-5.3.0-1-m68k-di_5.3.7-1_m68k.udeb')
-        deb_hash = '044954bb9be4160a3ce81f8bc1b5e856b75cccd1'
-        deb_path = self.fetch_asset(deb_url, asset_hash=deb_hash)
-        kernel_path = self.extract_from_deb(deb_path,
-                                            '/boot/vmlinux-5.3.0-1-m68k')
-
-        kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
-                               'console=ttyS0 vga=off')
-        console_pattern = 'No filesystem could mount root'
-        self.run_rr(kernel_path, kernel_command_line, console_pattern)
-
-    def do_test_advcal_2018(self, file_path, kernel_name, args=None):
-        archive.extract(file_path, self.workdir)
-
-        for entry in os.scandir(self.workdir):
-            if entry.name.startswith('day') and entry.is_dir():
-                kernel_path = os.path.join(entry.path, kernel_name)
-                break
-
-        kernel_command_line = ''
-        console_pattern = 'QEMU advent calendar'
-        self.run_rr(kernel_path, kernel_command_line, console_pattern,
-                    args=args)
-
-    def test_arm_vexpressa9(self):
-        """
-        :avocado: tags=arch:arm
-        :avocado: tags=machine:vexpress-a9
-        """
-        tar_hash = '32b7677ce8b6f1471fb0059865f451169934245b'
-        tar_url = ('https://qemu-advcal.gitlab.io'
-                   '/qac-best-of-multiarch/download/day16.tar.xz')
-        file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
-        dtb_path = self.workdir + '/day16/vexpress-v2p-ca9.dtb'
-        self.do_test_advcal_2018(file_path, 'winter.zImage',
-                                 args=('-dtb', dtb_path))
-
-    def test_m68k_mcf5208evb(self):
-        """
-        :avocado: tags=arch:m68k
-        :avocado: tags=machine:mcf5208evb
-        """
-        tar_hash = 'ac688fd00561a2b6ce1359f9ff6aa2b98c9a570c'
-        tar_url = ('https://qemu-advcal.gitlab.io'
-                   '/qac-best-of-multiarch/download/day07.tar.xz')
-        file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
-        self.do_test_advcal_2018(file_path, 'sanity-clause.elf')
-
-    def test_microblaze_s3adsp1800(self):
-        """
-        :avocado: tags=arch:microblaze
-        :avocado: tags=machine:petalogix-s3adsp1800
-        """
-        tar_hash = '08bf3e3bfb6b6c7ce1e54ab65d54e189f2caf13f'
-        tar_url = ('https://qemu-advcal.gitlab.io'
-                   '/qac-best-of-multiarch/download/day17.tar.xz')
-        file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
-        self.do_test_advcal_2018(file_path, 'ballerina.bin')
-
-    def test_ppc64_e500(self):
-        """
-        :avocado: tags=arch:ppc64
-        :avocado: tags=machine:ppce500
-        :avocado: tags=cpu:e5500
-        """
-        tar_hash = '6951d86d644b302898da2fd701739c9406527fe1'
-        tar_url = ('https://qemu-advcal.gitlab.io'
-                   '/qac-best-of-multiarch/download/day19.tar.xz')
-        file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
-        self.do_test_advcal_2018(file_path, 'uImage')
-
-    def test_or1k_sim(self):
-        """
-        :avocado: tags=arch:or1k
-        :avocado: tags=machine:or1k-sim
-        """
-        tar_hash = '20334cdaf386108c530ff0badaecc955693027dd'
-        tar_url = ('https://qemu-advcal.gitlab.io'
-                   '/qac-best-of-multiarch/download/day20.tar.xz')
-        file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
-        self.do_test_advcal_2018(file_path, 'vmlinux')
-
-    def test_ppc_g3beige(self):
-        """
-        :avocado: tags=arch:ppc
-        :avocado: tags=machine:g3beige
-        """
-        tar_hash = 'e0b872a5eb8fdc5bed19bd43ffe863900ebcedfc'
-        tar_url = ('https://qemu-advcal.gitlab.io'
-                   '/qac-best-of-multiarch/download/day15.tar.xz')
-        file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
-        self.do_test_advcal_2018(file_path, 'invaders.elf',
-                                 args=('-M', 'graphics=off'))
-
-    def test_ppc_mac99(self):
-        """
-        :avocado: tags=arch:ppc
-        :avocado: tags=machine:mac99
-        """
-        tar_hash = 'e0b872a5eb8fdc5bed19bd43ffe863900ebcedfc'
-        tar_url = ('https://qemu-advcal.gitlab.io'
-                   '/qac-best-of-multiarch/download/day15.tar.xz')
-        file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
-        self.do_test_advcal_2018(file_path, 'invaders.elf',
-                                 args=('-M', 'graphics=off'))
-
-    def test_sparc_ss20(self):
-        """
-        :avocado: tags=arch:sparc
-        :avocado: tags=machine:SS-20
-        """
-        tar_hash = 'b18550d5d61c7615d989a06edace051017726a9f'
-        tar_url = ('https://qemu-advcal.gitlab.io'
-                   '/qac-best-of-multiarch/download/day11.tar.xz')
-        file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
-        self.do_test_advcal_2018(file_path, 'zImage.elf')
-
-    def test_xtensa_lx60(self):
-        """
-        :avocado: tags=arch:xtensa
-        :avocado: tags=machine:lx60
-        :avocado: tags=cpu:dc233c
-        """
-        tar_hash = '49e88d9933742f0164b60839886c9739cb7a0d34'
-        tar_url = ('https://qemu-advcal.gitlab.io'
-                   '/qac-best-of-multiarch/download/day02.tar.xz')
-        file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
-        self.do_test_advcal_2018(file_path, 'santas-sleigh-ride.elf')
diff --git a/tests/data/acpi/riscv64/virt/RHCT b/tests/data/acpi/riscv64/virt/RHCT
index 695022d56c4ac..13c8025b86805 100644
Binary files a/tests/data/acpi/riscv64/virt/RHCT and b/tests/data/acpi/riscv64/virt/RHCT differ
diff --git a/tests/docker/dockerfiles/alpine.docker b/tests/docker/dockerfiles/alpine.docker
index f87c40fbfe348..bf3bd5a30dd95 100644
--- a/tests/docker/dockerfiles/alpine.docker
+++ b/tests/docker/dockerfiles/alpine.docker
@@ -1,10 +1,10 @@
 # THIS FILE WAS AUTO-GENERATED
 #
-#  $ lcitool dockerfile --layers all alpine-319 qemu
+#  $ lcitool dockerfile --layers all alpine-321 qemu
 #
 # https://gitlab.com/libvirt/libvirt-ci
 
-FROM docker.io/library/alpine:3.19
+FROM docker.io/library/alpine:3.21
 
 RUN apk update && \
     apk upgrade && \
@@ -111,6 +111,7 @@ RUN apk update && \
         vde2-dev \
         virglrenderer-dev \
         vte3-dev \
+        vulkan-tools \
         which \
         xen-dev \
         xorriso \
diff --git a/tests/docker/dockerfiles/centos9.docker b/tests/docker/dockerfiles/centos9.docker
index a9681c8a96f44..a942835a1d280 100644
--- a/tests/docker/dockerfiles/centos9.docker
+++ b/tests/docker/dockerfiles/centos9.docker
@@ -115,6 +115,7 @@ RUN dnf distro-sync -y && \
         usbredir-devel \
         util-linux \
         vte291-devel \
+        vulkan-tools \
         which \
         xorriso \
         zlib-devel \
diff --git a/tests/docker/dockerfiles/debian-amd64-cross.docker b/tests/docker/dockerfiles/debian-amd64-cross.docker
index 644fd3734d5bc..05355854285e0 100644
--- a/tests/docker/dockerfiles/debian-amd64-cross.docker
+++ b/tests/docker/dockerfiles/debian-amd64-cross.docker
@@ -58,6 +58,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       tar \
                       tesseract-ocr \
                       tesseract-ocr-eng \
+                      vulkan-tools \
                       xorriso \
                       zstd && \
     eatmydata apt-get autoremove -y && \
@@ -135,7 +136,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       libsndio-dev:amd64 \
                       libspice-protocol-dev:amd64 \
                       libspice-server-dev:amd64 \
-                      libssh-gcrypt-dev:amd64 \
+                      libssh-dev:amd64 \
                       libsystemd-dev:amd64 \
                       libtasn1-6-dev:amd64 \
                       libubsan1:amd64 \
diff --git a/tests/docker/dockerfiles/debian-arm64-cross.docker b/tests/docker/dockerfiles/debian-arm64-cross.docker
index 060da53796e09..6b1e4fc827977 100644
--- a/tests/docker/dockerfiles/debian-arm64-cross.docker
+++ b/tests/docker/dockerfiles/debian-arm64-cross.docker
@@ -58,6 +58,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       tar \
                       tesseract-ocr \
                       tesseract-ocr-eng \
+                      vulkan-tools \
                       xorriso \
                       zstd && \
     eatmydata apt-get autoremove -y && \
@@ -134,7 +135,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       libsndio-dev:arm64 \
                       libspice-protocol-dev:arm64 \
                       libspice-server-dev:arm64 \
-                      libssh-gcrypt-dev:arm64 \
+                      libssh-dev:arm64 \
                       libsystemd-dev:arm64 \
                       libtasn1-6-dev:arm64 \
                       libubsan1:arm64 \
diff --git a/tests/docker/dockerfiles/debian-armhf-cross.docker b/tests/docker/dockerfiles/debian-armhf-cross.docker
index a481fc96959e4..cf0fe63af91c1 100644
--- a/tests/docker/dockerfiles/debian-armhf-cross.docker
+++ b/tests/docker/dockerfiles/debian-armhf-cross.docker
@@ -58,6 +58,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       tar \
                       tesseract-ocr \
                       tesseract-ocr-eng \
+                      vulkan-tools \
                       xorriso \
                       zstd && \
     eatmydata apt-get autoremove -y && \
@@ -134,7 +135,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       libsndio-dev:armhf \
                       libspice-protocol-dev:armhf \
                       libspice-server-dev:armhf \
-                      libssh-gcrypt-dev:armhf \
+                      libssh-dev:armhf \
                       libsystemd-dev:armhf \
                       libtasn1-6-dev:armhf \
                       libubsan1:armhf \
diff --git a/tests/docker/dockerfiles/debian-i686-cross.docker b/tests/docker/dockerfiles/debian-i686-cross.docker
index 61bc361e85a48..1c84dfb9456d1 100644
--- a/tests/docker/dockerfiles/debian-i686-cross.docker
+++ b/tests/docker/dockerfiles/debian-i686-cross.docker
@@ -58,6 +58,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       tar \
                       tesseract-ocr \
                       tesseract-ocr-eng \
+                      vulkan-tools \
                       xorriso \
                       zstd && \
     eatmydata apt-get autoremove -y && \
@@ -134,7 +135,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       libsndio-dev:i386 \
                       libspice-protocol-dev:i386 \
                       libspice-server-dev:i386 \
-                      libssh-gcrypt-dev:i386 \
+                      libssh-dev:i386 \
                       libsystemd-dev:i386 \
                       libtasn1-6-dev:i386 \
                       libubsan1:i386 \
diff --git a/tests/docker/dockerfiles/debian-mips64el-cross.docker b/tests/docker/dockerfiles/debian-mips64el-cross.docker
index 9f6c4763c5088..257204eae483c 100644
--- a/tests/docker/dockerfiles/debian-mips64el-cross.docker
+++ b/tests/docker/dockerfiles/debian-mips64el-cross.docker
@@ -58,6 +58,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       tar \
                       tesseract-ocr \
                       tesseract-ocr-eng \
+                      vulkan-tools \
                       xorriso \
                       zstd && \
     eatmydata apt-get autoremove -y && \
@@ -133,7 +134,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       libsndio-dev:mips64el \
                       libspice-protocol-dev:mips64el \
                       libspice-server-dev:mips64el \
-                      libssh-gcrypt-dev:mips64el \
+                      libssh-dev:mips64el \
                       libsystemd-dev:mips64el \
                       libtasn1-6-dev:mips64el \
                       libudev-dev:mips64el \
diff --git a/tests/docker/dockerfiles/debian-mipsel-cross.docker b/tests/docker/dockerfiles/debian-mipsel-cross.docker
index 2e979111e012a..395c84d65bcef 100644
--- a/tests/docker/dockerfiles/debian-mipsel-cross.docker
+++ b/tests/docker/dockerfiles/debian-mipsel-cross.docker
@@ -58,6 +58,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       tar \
                       tesseract-ocr \
                       tesseract-ocr-eng \
+                      vulkan-tools \
                       xorriso \
                       zstd && \
     eatmydata apt-get autoremove -y && \
@@ -133,7 +134,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       libsndio-dev:mipsel \
                       libspice-protocol-dev:mipsel \
                       libspice-server-dev:mipsel \
-                      libssh-gcrypt-dev:mipsel \
+                      libssh-dev:mipsel \
                       libsystemd-dev:mipsel \
                       libtasn1-6-dev:mipsel \
                       libudev-dev:mipsel \
diff --git a/tests/docker/dockerfiles/debian-ppc64el-cross.docker b/tests/docker/dockerfiles/debian-ppc64el-cross.docker
index 8ee450dba09de..1ae227ccded70 100644
--- a/tests/docker/dockerfiles/debian-ppc64el-cross.docker
+++ b/tests/docker/dockerfiles/debian-ppc64el-cross.docker
@@ -58,6 +58,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       tar \
                       tesseract-ocr \
                       tesseract-ocr-eng \
+                      vulkan-tools \
                       xorriso \
                       zstd && \
     eatmydata apt-get autoremove -y && \
@@ -134,7 +135,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       libsndio-dev:ppc64el \
                       libspice-protocol-dev:ppc64el \
                       libspice-server-dev:ppc64el \
-                      libssh-gcrypt-dev:ppc64el \
+                      libssh-dev:ppc64el \
                       libsystemd-dev:ppc64el \
                       libtasn1-6-dev:ppc64el \
                       libubsan1:ppc64el \
diff --git a/tests/docker/dockerfiles/debian-s390x-cross.docker b/tests/docker/dockerfiles/debian-s390x-cross.docker
index f451a07c4c68c..afa81a57ba8d3 100644
--- a/tests/docker/dockerfiles/debian-s390x-cross.docker
+++ b/tests/docker/dockerfiles/debian-s390x-cross.docker
@@ -58,6 +58,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       tar \
                       tesseract-ocr \
                       tesseract-ocr-eng \
+                      vulkan-tools \
                       xorriso \
                       zstd && \
     eatmydata apt-get autoremove -y && \
@@ -133,7 +134,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       libsnappy-dev:s390x \
                       libsndio-dev:s390x \
                       libspice-protocol-dev:s390x \
-                      libssh-gcrypt-dev:s390x \
+                      libssh-dev:s390x \
                       libsystemd-dev:s390x \
                       libtasn1-6-dev:s390x \
                       libubsan1:s390x \
diff --git a/tests/docker/dockerfiles/debian.docker b/tests/docker/dockerfiles/debian.docker
index 505330a9e22ed..5b3bac43ccbe8 100644
--- a/tests/docker/dockerfiles/debian.docker
+++ b/tests/docker/dockerfiles/debian.docker
@@ -87,7 +87,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       libsndio-dev \
                       libspice-protocol-dev \
                       libspice-server-dev \
-                      libssh-gcrypt-dev \
+                      libssh-dev \
                       libsystemd-dev \
                       libtasn1-6-dev \
                       libubsan1 \
@@ -131,6 +131,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       tar \
                       tesseract-ocr \
                       tesseract-ocr-eng \
+                      vulkan-tools \
                       xorriso \
                       zlib1g-dev \
                       zstd && \
diff --git a/tests/docker/dockerfiles/fedora-rust-nightly.docker b/tests/docker/dockerfiles/fedora-rust-nightly.docker
index a8e4fb279a769..fe4a6ed48d68d 100644
--- a/tests/docker/dockerfiles/fedora-rust-nightly.docker
+++ b/tests/docker/dockerfiles/fedora-rust-nightly.docker
@@ -132,6 +132,7 @@ exec "$@"\n' > /usr/bin/nosync && \
                util-linux \
                virglrenderer-devel \
                vte291-devel \
+               vulkan-tools \
                which \
                xen-devel \
                xorriso \
diff --git a/tests/docker/dockerfiles/fedora-win64-cross.docker b/tests/docker/dockerfiles/fedora-win64-cross.docker
index 7dc3eb03f5375..a950344402704 100644
--- a/tests/docker/dockerfiles/fedora-win64-cross.docker
+++ b/tests/docker/dockerfiles/fedora-win64-cross.docker
@@ -61,6 +61,7 @@ exec "$@"\n' > /usr/bin/nosync && \
                tesseract \
                tesseract-langpack-eng \
                util-linux \
+               vulkan-tools \
                which \
                xorriso \
                zstd && \
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
index b64399af662cb..014e3ccf17dde 100644
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -132,6 +132,7 @@ exec "$@"\n' > /usr/bin/nosync && \
                util-linux \
                virglrenderer-devel \
                vte291-devel \
+               vulkan-tools \
                which \
                xen-devel \
                xorriso \
diff --git a/tests/docker/dockerfiles/opensuse-leap.docker b/tests/docker/dockerfiles/opensuse-leap.docker
index 4d5fb3e3a1491..e90225dc23518 100644
--- a/tests/docker/dockerfiles/opensuse-leap.docker
+++ b/tests/docker/dockerfiles/opensuse-leap.docker
@@ -115,6 +115,7 @@ RUN zypper update -y && \
            util-linux \
            virglrenderer-devel \
            vte-devel \
+           vulkan-tools \
            which \
            xen-devel \
            xorriso \
diff --git a/tests/docker/dockerfiles/ubuntu2204.docker b/tests/docker/dockerfiles/ubuntu2204.docker
index e1b70b536dee2..88ce4ef9a9d35 100644
--- a/tests/docker/dockerfiles/ubuntu2204.docker
+++ b/tests/docker/dockerfiles/ubuntu2204.docker
@@ -130,6 +130,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       tar \
                       tesseract-ocr \
                       tesseract-ocr-eng \
+                      vulkan-tools \
                       xorriso \
                       zlib1g-dev \
                       zstd && \
diff --git a/tests/functional/aspeed.py b/tests/functional/aspeed.py
index b52358bb8c617..77dc8930fa41d 100644
--- a/tests/functional/aspeed.py
+++ b/tests/functional/aspeed.py
@@ -7,21 +7,23 @@
 
 class AspeedTest(LinuxKernelTest):
 
-    def do_test_arm_aspeed(self, machine, image):
+    def do_test_arm_aspeed_openbmc(self, machine, image, uboot='2019.04',
+                                   cpu_id='0x0', soc='AST2500 rev A1'):
+        hostname = machine.removesuffix('-bmc')
+
         self.set_machine(machine)
         self.vm.set_console()
-        self.vm.add_args('-drive', 'file=' + image + ',if=mtd,format=raw',
-                         '-net', 'nic', '-snapshot')
+        self.vm.add_args('-drive', f'file={image},if=mtd,format=raw',
+                         '-snapshot')
         self.vm.launch()
 
-        self.wait_for_console_pattern("U-Boot 2016.07")
-        self.wait_for_console_pattern("## Loading kernel from FIT Image at 20080000")
-        self.wait_for_console_pattern("Starting kernel ...")
-        self.wait_for_console_pattern("Booting Linux on physical CPU 0x0")
-        self.wait_for_console_pattern(
-                "aspeed-smc 1e620000.spi: read control register: 203b0641")
-        self.wait_for_console_pattern("ftgmac100 1e660000.ethernet eth0: irq ")
-        self.wait_for_console_pattern("systemd[1]: Set hostname to")
+        self.wait_for_console_pattern(f'U-Boot {uboot}')
+        self.wait_for_console_pattern('## Loading kernel from FIT Image')
+        self.wait_for_console_pattern('Starting kernel ...')
+        self.wait_for_console_pattern(f'Booting Linux on physical CPU {cpu_id}')
+        self.wait_for_console_pattern(f'ASPEED {soc}')
+        self.wait_for_console_pattern('/init as init process')
+        self.wait_for_console_pattern(f'systemd[1]: Hostname set to <{hostname}>.')
 
     def do_test_arm_aspeed_buildroot_start(self, image, cpu_id, pattern='Aspeed EVB'):
         self.require_netdev('user')
diff --git a/tests/functional/meson.build b/tests/functional/meson.build
index b516d21cba167..1b0409179890d 100644
--- a/tests/functional/meson.build
+++ b/tests/functional/meson.build
@@ -15,38 +15,46 @@ test_timeouts = {
   'aarch64_raspi4' : 480,
   'aarch64_rme_virt' : 1200,
   'aarch64_rme_sbsaref' : 1200,
-  'aarch64_sbsaref_alpine' : 720,
+  'aarch64_sbsaref_alpine' : 1200,
   'aarch64_sbsaref_freebsd' : 720,
   'aarch64_tuxrun' : 240,
-  'aarch64_virt' : 720,
+  'aarch64_virt' : 360,
+  'aarch64_virt_gpu' : 480,
   'acpi_bits' : 420,
   'arm_aspeed_palmetto' : 120,
   'arm_aspeed_romulus' : 120,
-  'arm_aspeed_ast2500' : 480,
-  'arm_aspeed_ast2600' : 720,
-  'arm_aspeed_rainier' : 240,
+  'arm_aspeed_witherspoon' : 120,
+  'arm_aspeed_ast2500' : 720,
+  'arm_aspeed_ast2600' : 1200,
+  'arm_aspeed_bletchley' : 120,
+  'arm_aspeed_rainier' : 480,
   'arm_bpim2u' : 500,
   'arm_collie' : 180,
   'arm_cubieboard' : 360,
   'arm_orangepi' : 540,
   'arm_quanta_gsj' : 240,
   'arm_raspi2' : 120,
+  'arm_replay' : 240,
   'arm_tuxrun' : 240,
   'arm_sx1' : 360,
   'intel_iommu': 300,
   'mips_malta' : 120,
   'mipsel_replay' : 480,
+  'mips64el_replay' : 180,
   'netdev_ethtool' : 180,
   'ppc_40p' : 240,
   'ppc64_hv' : 1000,
   'ppc64_powernv' : 480,
   'ppc64_pseries' : 480,
+  'ppc64_replay' : 210,
   'ppc64_tuxrun' : 420,
   'ppc64_mac99' : 120,
   'riscv64_tuxrun' : 120,
   's390x_ccw_virtio' : 420,
   'sh4_tuxrun' : 240,
+  'virtio_balloon': 120,
   'x86_64_kvm_xen' : 180,
+  'x86_64_replay' : 480,
 }
 
 tests_generic_system = [
@@ -69,6 +77,7 @@ tests_aarch64_system_thorough = [
   'aarch64_aspeed',
   'aarch64_raspi3',
   'aarch64_raspi4',
+  'aarch64_replay',
   'aarch64_rme_virt',
   'aarch64_rme_sbsaref',
   'aarch64_sbsaref',
@@ -77,6 +86,7 @@ tests_aarch64_system_thorough = [
   'aarch64_tcg_plugins',
   'aarch64_tuxrun',
   'aarch64_virt',
+  'aarch64_virt_gpu',
   'aarch64_xen',
   'aarch64_xlnx_versal',
   'multiprocess',
@@ -88,6 +98,7 @@ tests_alpha_system_quick = [
 
 tests_alpha_system_thorough = [
   'alpha_clipper',
+  'alpha_replay',
 ]
 
 tests_arm_system_quick = [
@@ -98,8 +109,10 @@ tests_arm_system_thorough = [
   'arm_aspeed_ast1030',
   'arm_aspeed_palmetto',
   'arm_aspeed_romulus',
+  'arm_aspeed_witherspoon',
   'arm_aspeed_ast2500',
   'arm_aspeed_ast2600',
+  'arm_aspeed_bletchley',
   'arm_aspeed_rainier',
   'arm_bpim2u',
   'arm_canona1100',
@@ -111,6 +124,7 @@ tests_arm_system_thorough = [
   'arm_orangepi',
   'arm_quanta_gsj',
   'arm_raspi2',
+  'arm_replay',
   'arm_smdkc210',
   'arm_sx1',
   'arm_vexpress',
@@ -134,6 +148,14 @@ tests_i386_system_quick = [
   'migration',
 ]
 
+test_timeouts += {
+  'hexagon_minivm': 180,
+}
+
+tests_hexagon_system_quick = [
+  'hexagon_minivm',
+]
+
 tests_i386_system_thorough = [
   'i386_tuxrun',
 ]
@@ -145,11 +167,13 @@ tests_loongarch64_system_thorough = [
 tests_m68k_system_thorough = [
   'm68k_mcf5208evb',
   'm68k_nextcube',
+  'm68k_replay',
   'm68k_q800',
   'm68k_tuxrun',
 ]
 
 tests_microblaze_system_thorough = [
+  'microblaze_replay',
   'microblaze_s3adsp1800'
 ]
 
@@ -182,6 +206,7 @@ tests_mips64el_system_thorough = [
 ]
 
 tests_or1k_system_thorough = [
+  'or1k_replay',
   'or1k_sim',
 ]
 
@@ -191,12 +216,12 @@ tests_ppc_system_quick = [
 ]
 
 tests_ppc_system_thorough = [
-  'ppc_405',
   'ppc_40p',
   'ppc_amiga',
   'ppc_bamboo',
   'ppc_mac',
   'ppc_mpc8544ds',
+  'ppc_replay',
   'ppc_sam460ex',
   'ppc_tuxrun',
   'ppc_virtex_ml507',
@@ -211,6 +236,7 @@ tests_ppc64_system_thorough = [
   'ppc64_hv',
   'ppc64_powernv',
   'ppc64_pseries',
+  'ppc64_replay',
   'ppc64_tuxrun',
   'ppc64_mac99',
 ]
@@ -239,6 +265,7 @@ tests_rx_system_thorough = [
 
 tests_s390x_system_thorough = [
   's390x_ccw_virtio',
+  's390x_replay',
   's390x_topology',
   's390x_tuxrun',
 ]
@@ -257,6 +284,7 @@ tests_sparc_system_quick = [
 ]
 
 tests_sparc_system_thorough = [
+  'sparc_replay',
   'sparc_sun4m',
 ]
 
@@ -285,15 +313,18 @@ tests_x86_64_system_thorough = [
   'linux_initrd',
   'multiprocess',
   'netdev_ethtool',
+  'virtio_balloon',
   'virtio_gpu',
   'x86_64_hotplug_blk',
   'x86_64_hotplug_cpu',
   'x86_64_kvm_xen',
+  'x86_64_replay',
   'x86_64_tuxrun',
 ]
 
 tests_xtensa_system_thorough = [
   'xtensa_lx60',
+  'xtensa_replay',
 ]
 
 precache_all = []
@@ -362,7 +393,7 @@ foreach speed : ['quick', 'thorough']
       # 'run_target' logic below & in Makefile.include
       test('func-' + testname,
            python,
-           depends: [test_deps, test_emulator, emulator_modules],
+           depends: [test_deps, test_emulator, emulator_modules, plugin_modules],
            env: test_env,
            args: [testpath],
            protocol: 'tap',
diff --git a/tests/functional/qemu_test/__init__.py b/tests/functional/qemu_test/__init__.py
index 5c972843a6d05..45f7befa374ae 100644
--- a/tests/functional/qemu_test/__init__.py
+++ b/tests/functional/qemu_test/__init__.py
@@ -7,7 +7,7 @@
 
 
 from .asset import Asset
-from .config import BUILD_DIR
+from .config import BUILD_DIR, dso_suffix
 from .cmd import is_readable_executable_file, \
     interrupt_interactive_console_until_pattern, wait_for_console_pattern, \
     exec_command, exec_command_and_wait_for_pattern, get_qemu_img, which
diff --git a/tests/functional/qemu_test/asset.py b/tests/functional/qemu_test/asset.py
index f0730695f09c6..704b84d0ea6ef 100644
--- a/tests/functional/qemu_test/asset.py
+++ b/tests/functional/qemu_test/asset.py
@@ -17,6 +17,14 @@
 from shutil import copyfileobj
 from urllib.error import HTTPError
 
+class AssetError(Exception):
+    def __init__(self, asset, msg, transient=False):
+        self.url = asset.url
+        self.msg = msg
+        self.transient = transient
+
+    def __str__(self):
+        return "%s: %s" % (self.url, self.msg)
 
 # Instances of this class must be declared as class level variables
 # starting with a name "ASSET_". This enables the pre-caching logic
@@ -51,7 +59,7 @@ def _check(self, cache_file):
         elif len(self.hash) == 128:
             hl = hashlib.sha512()
         else:
-            raise Exception("unknown hash type")
+            raise AssetError(self, "unknown hash type")
 
         # Calculate the hash of the file:
         with open(cache_file, 'rb') as file:
@@ -111,7 +119,8 @@ def fetch(self):
             return str(self.cache_file)
 
         if not self.fetchable():
-            raise Exception("Asset cache is invalid and downloads disabled")
+            raise AssetError(self,
+                             "Asset cache is invalid and downloads disabled")
 
         self.log.info("Downloading %s to %s...", self.url, self.cache_file)
         tmp_cache_file = self.cache_file.with_suffix(".download")
@@ -121,6 +130,20 @@ def fetch(self):
                 with tmp_cache_file.open("xb") as dst:
                     with urllib.request.urlopen(self.url) as resp:
                         copyfileobj(resp, dst)
+                        length_hdr = resp.getheader("Content-Length")
+
+                # Verify downloaded file size against length metadata, if
+                # available.
+                if length_hdr is not None:
+                    length = int(length_hdr)
+                    fsize = tmp_cache_file.stat().st_size
+                    if fsize != length:
+                        self.log.error("Unable to download %s: "
+                                       "connection closed before "
+                                       "transfer complete (%d/%d)",
+                                       self.url, fsize, length)
+                        tmp_cache_file.unlink()
+                        continue
                 break
             except FileExistsError:
                 self.log.debug("%s already exists, "
@@ -133,10 +156,23 @@ def fetch(self):
                                tmp_cache_file)
                 tmp_cache_file.unlink()
                 continue
+            except HTTPError as e:
+                tmp_cache_file.unlink()
+                self.log.error("Unable to download %s: HTTP error %d",
+                               self.url, e.code)
+                # Treat 404 as fatal, since it is highly likely to
+                # indicate a broken test rather than a transient
+                # server or networking problem
+                if e.code == 404:
+                    raise AssetError(self, "Unable to download: "
+                                     "HTTP error %d" % e.code)
+                continue
             except Exception as e:
-                self.log.error("Unable to download %s: %s", self.url, e)
                 tmp_cache_file.unlink()
-                raise
+                raise AssetError(self, "Unable to download: " % e)
+
+        if not os.path.exists(tmp_cache_file):
+            raise AssetError(self, "Download retries exceeded", transient=True)
 
         try:
             # Set these just for informational purposes
@@ -150,8 +186,7 @@ def fetch(self):
 
         if not self._check(tmp_cache_file):
             tmp_cache_file.unlink()
-            raise Exception("Hash of %s does not match %s" %
-                            (self.url, self.hash))
+            raise AssetError(self, "Hash does not match %s" % self.hash)
         tmp_cache_file.replace(self.cache_file)
         # Remove write perms to stop tests accidentally modifying them
         os.chmod(self.cache_file, stat.S_IRUSR | stat.S_IRGRP)
@@ -173,15 +208,10 @@ def precache_test(test):
                 log.info("Attempting to cache '%s'" % asset)
                 try:
                     asset.fetch()
-                except HTTPError as e:
-                    # Treat 404 as fatal, since it is highly likely to
-                    # indicate a broken test rather than a transient
-                    # server or networking problem
-                    if e.code == 404:
+                except AssetError as e:
+                    if not e.transient:
                         raise
-
-                    log.debug(f"HTTP error {e.code} from {asset.url} " +
-                              "skipping asset precache")
+                    log.error("%s: skipping asset precache" % e)
 
         log.removeHandler(handler)
 
diff --git a/tests/functional/qemu_test/config.py b/tests/functional/qemu_test/config.py
index edd75b7fd06b9..6d4c9c3ce1db3 100644
--- a/tests/functional/qemu_test/config.py
+++ b/tests/functional/qemu_test/config.py
@@ -13,6 +13,7 @@
 
 import os
 from pathlib import Path
+import platform
 
 
 def _source_dir():
@@ -34,3 +35,14 @@ def _build_dir():
     raise Exception("Cannot identify build dir, set QEMU_BUILD_ROOT")
 
 BUILD_DIR = _build_dir()
+
+def dso_suffix():
+    '''Return the dynamic libraries suffix for the current platform'''
+
+    if platform.system() == "Darwin":
+        return "dylib"
+
+    if platform.system() == "Windows":
+        return "dll"
+
+    return "so"
diff --git a/tests/functional/qemu_test/linuxkernel.py b/tests/functional/qemu_test/linuxkernel.py
index 2c9598102d04a..2aca0ee3cd03b 100644
--- a/tests/functional/qemu_test/linuxkernel.py
+++ b/tests/functional/qemu_test/linuxkernel.py
@@ -3,8 +3,12 @@
 # This work is licensed under the terms of the GNU GPL, version 2 or
 # later.  See the COPYING file in the top-level directory.
 
+import hashlib
+import urllib.request
+
+from .cmd import wait_for_console_pattern, exec_command_and_wait_for_pattern
 from .testcase import QemuSystemTest
-from .cmd import wait_for_console_pattern
+from .utils import get_usernet_hostfwd_port
 
 
 class LinuxKernelTest(QemuSystemTest):
@@ -26,3 +30,23 @@ def launch_kernel(self, kernel, initrd=None, dtb=None, console_index=0,
         self.vm.launch()
         if wait_for:
                 self.wait_for_console_pattern(wait_for)
+
+    def check_http_download(self, filename, hashsum, guestport=8080,
+                            pythoncmd='python3 -m http.server'):
+        exec_command_and_wait_for_pattern(self,
+                        f'{pythoncmd} {guestport} & sleep 1',
+                        f'Serving HTTP on 0.0.0.0 port {guestport}')
+        hl = hashlib.sha256()
+        hostport = get_usernet_hostfwd_port(self.vm)
+        url = f'http://localhost:{hostport}{filename}'
+        self.log.info(f'Downloading {url} ...')
+        with urllib.request.urlopen(url) as response:
+            while True:
+                chunk = response.read(1 << 20)
+                if not chunk:
+                    break
+                hl.update(chunk)
+
+        digest = hl.hexdigest()
+        self.log.info(f'sha256sum of download is {digest}.')
+        self.assertEqual(digest, hashsum)
diff --git a/tests/functional/qemu_test/testcase.py b/tests/functional/qemu_test/testcase.py
index 869f3949fe9f1..50d232a7c6375 100644
--- a/tests/functional/qemu_test/testcase.py
+++ b/tests/functional/qemu_test/testcase.py
@@ -27,13 +27,12 @@
 
 from .archive import archive_extract
 from .asset import Asset
-from .config import BUILD_DIR
+from .config import BUILD_DIR, dso_suffix
 from .uncompress import uncompress
 
 
 class QemuBaseTest(unittest.TestCase):
 
-    qemu_bin = os.getenv('QEMU_TEST_QEMU_BINARY')
     arch = None
 
     workdir = None
@@ -184,6 +183,16 @@ def scratch_file(self, *args):
     def log_file(self, *args):
         return str(Path(self.outputdir, *args))
 
+    '''
+    @params plugin name
+
+    Return the full path to the plugin taking into account any host OS
+    specific suffixes.
+    '''
+    def plugin_file(self, plugin_name):
+        sfx = dso_suffix()
+        return os.path.join('tests', 'tcg', 'plugins', f'{plugin_name}.{sfx}')
+
     def assets_available(self):
         for name, asset in vars(self.__class__).items():
             if name.startswith("ASSET_") and type(asset) == Asset:
@@ -192,7 +201,8 @@ def assets_available(self):
                     return False
         return True
 
-    def setUp(self, bin_prefix):
+    def setUp(self):
+        self.qemu_bin = os.getenv('QEMU_TEST_QEMU_BINARY')
         self.assertIsNotNone(self.qemu_bin, 'QEMU_TEST_QEMU_BINARY must be set')
         self.arch = self.qemu_bin.split('-')[-1]
         self.socketdir = None
@@ -254,7 +264,7 @@ def main():
 class QemuUserTest(QemuBaseTest):
 
     def setUp(self):
-        super().setUp('qemu-')
+        super().setUp()
         self._ldpath = []
 
     def add_ldpath(self, ldpath):
@@ -277,7 +287,7 @@ class QemuSystemTest(QemuBaseTest):
     def setUp(self):
         self._vms = {}
 
-        super().setUp('qemu-system-')
+        super().setUp()
 
         console_log = logging.getLogger('console')
         console_log.setLevel(logging.DEBUG)
diff --git a/tests/functional/qemu_test/tuxruntest.py b/tests/functional/qemu_test/tuxruntest.py
index 41a4945a14f5d..ad74156f9c5e2 100644
--- a/tests/functional/qemu_test/tuxruntest.py
+++ b/tests/functional/qemu_test/tuxruntest.py
@@ -24,17 +24,6 @@ class TuxRunBaselineTest(QemuSystemTest):
     # Tests are ~10-40s, allow for --debug/--enable-gcov overhead
     timeout = 100
 
-    def get_tag(self, tagname, default=None):
-        """
-        Get the metadata tag or return the default.
-        """
-        utag = self._get_unique_tag_val(tagname)
-        print(f"{tagname}/{default} -> {utag}")
-        if utag:
-            return utag
-
-        return default
-
     def setUp(self):
         super().setUp()
 
diff --git a/tests/functional/qemu_test/uncompress.py b/tests/functional/qemu_test/uncompress.py
index 76dcf22385617..ce79da1b68600 100644
--- a/tests/functional/qemu_test/uncompress.py
+++ b/tests/functional/qemu_test/uncompress.py
@@ -13,7 +13,7 @@
 import stat
 import shutil
 from urllib.parse import urlparse
-from subprocess import check_call, CalledProcessError
+from subprocess import run, CalledProcessError, DEVNULL
 
 from .asset import Asset
 
@@ -46,8 +46,8 @@ def zstd_uncompress(zstd_path, output_path):
         return
 
     try:
-        check_call(['zstd', "-f", "-d", zstd_path,
-                    "-o", output_path])
+        run(['zstd', "-f", "-d", zstd_path,
+             "-o", output_path], capture_output=True, check=True)
     except CalledProcessError as e:
         os.remove(output_path)
         raise Exception(
diff --git a/tests/functional/replay_kernel.py b/tests/functional/replay_kernel.py
index 8e8ac7d052f5b..80795eb0520e9 100644
--- a/tests/functional/replay_kernel.py
+++ b/tests/functional/replay_kernel.py
@@ -34,7 +34,7 @@ def run_vm(self, kernel_path, kernel_command_line, console_pattern,
 
         logger = logging.getLogger('replay')
         start_time = time.time()
-        vm = self.get_vm()
+        vm = self.get_vm(name='recording' if record else 'replay')
         vm.set_console()
         if record:
             logger.info('recording the execution...')
diff --git a/tests/functional/test_aarch64_aspeed.py b/tests/functional/test_aarch64_aspeed.py
index 9595498ace797..c25c966278236 100755
--- a/tests/functional/test_aarch64_aspeed.py
+++ b/tests/functional/test_aarch64_aspeed.py
@@ -27,37 +27,37 @@ def do_test_aarch64_aspeed_sdk_start(self, image):
         wait_for_console_pattern(self, '## Loading kernel from FIT Image')
         wait_for_console_pattern(self, 'Starting kernel ...')
 
-    ASSET_SDK_V903_AST2700 = Asset(
-            'https://github.com/AspeedTech-BMC/openbmc/releases/download/v09.03/ast2700-default-obmc.tar.gz',
-            '91225f50d255e2905ba8d8e0c80b71b9d157c3609770c7a740cd786370d85a77')
+    ASSET_SDK_V905_AST2700 = Asset(
+            'https://github.com/AspeedTech-BMC/openbmc/releases/download/v09.05/ast2700-a0-default-obmc.tar.gz',
+            'cfbbd1cce72f2a3b73b9080c41eecdadebb7077fba4f7806d72ac99f3e84b74a')
 
-    def test_aarch64_ast2700_evb_sdk_v09_03(self):
-        self.set_machine('ast2700-evb')
-
-        self.archive_extract(self.ASSET_SDK_V903_AST2700)
+    ASSET_SDK_V905_AST2700A1 = Asset(
+            'https://github.com/AspeedTech-BMC/openbmc/releases/download/v09.05/ast2700-default-obmc.tar.gz',
+            'c1f4496aec06743c812a6e9a1a18d032f34d62f3ddb6956e924fef62aa2046a5')
 
+    def start_ast2700_test(self, name):
         num_cpu = 4
-        uboot_size = os.path.getsize(self.scratch_file('ast2700-default',
+        uboot_size = os.path.getsize(self.scratch_file(name,
                                                        'u-boot-nodtb.bin'))
         uboot_dtb_load_addr = hex(0x400000000 + uboot_size)
 
         load_images_list = [
             {
                 'addr': '0x400000000',
-                'file': self.scratch_file('ast2700-default',
+                'file': self.scratch_file(name,
                                           'u-boot-nodtb.bin')
             },
             {
                 'addr': str(uboot_dtb_load_addr),
-                'file': self.scratch_file('ast2700-default', 'u-boot.dtb')
+                'file': self.scratch_file(name, 'u-boot.dtb')
             },
             {
                 'addr': '0x430000000',
-                'file': self.scratch_file('ast2700-default', 'bl31.bin')
+                'file': self.scratch_file(name, 'bl31.bin')
             },
             {
                 'addr': '0x430080000',
-                'file': self.scratch_file('ast2700-default', 'optee',
+                'file': self.scratch_file(name, 'optee',
                                           'tee-raw.bin')
             }
         ]
@@ -76,23 +76,34 @@ def test_aarch64_ast2700_evb_sdk_v09_03(self):
         self.vm.add_args('-device',
                          'tmp105,bus=aspeed.i2c.bus.1,address=0x4d,id=tmp-test')
         self.do_test_aarch64_aspeed_sdk_start(
-            self.scratch_file('ast2700-default', 'image-bmc'))
+            self.scratch_file(name, 'image-bmc'))
 
-        wait_for_console_pattern(self, 'ast2700-default login:')
+        wait_for_console_pattern(self, f'{name} login:')
 
         exec_command_and_wait_for_pattern(self, 'root', 'Password:')
-        exec_command_and_wait_for_pattern(self,
-            '0penBmc', 'root@ast2700-default:~#')
+        exec_command_and_wait_for_pattern(self, '0penBmc', f'root@{name}:~#')
 
         exec_command_and_wait_for_pattern(self,
             'echo lm75 0x4d > /sys/class/i2c-dev/i2c-1/device/new_device ',
             'i2c i2c-1: new_device: Instantiated device lm75 at 0x4d');
         exec_command_and_wait_for_pattern(self,
-            'cat /sys/class/hwmon/hwmon20/temp1_input', '0')
+            'cat /sys/bus/i2c/devices/1-004d/hwmon/hwmon*/temp1_input', '0')
         self.vm.cmd('qom-set', path='/machine/peripheral/tmp-test',
                     property='temperature', value=18000)
         exec_command_and_wait_for_pattern(self,
-            'cat /sys/class/hwmon/hwmon20/temp1_input', '18000')
+            'cat /sys/bus/i2c/devices/1-004d/hwmon/hwmon*/temp1_input', '18000')
+
+    def test_aarch64_ast2700_evb_sdk_v09_05(self):
+        self.set_machine('ast2700-evb')
+
+        self.archive_extract(self.ASSET_SDK_V905_AST2700)
+        self.start_ast2700_test('ast2700-a0-default')
+
+    def test_aarch64_ast2700a1_evb_sdk_v09_05(self):
+        self.set_machine('ast2700a1-evb')
+
+        self.archive_extract(self.ASSET_SDK_V905_AST2700A1)
+        self.start_ast2700_test('ast2700-default')
 
 
 if __name__ == '__main__':
diff --git a/tests/functional/test_aarch64_replay.py b/tests/functional/test_aarch64_replay.py
new file mode 100755
index 0000000000000..04cde433bcf54
--- /dev/null
+++ b/tests/functional/test_aarch64_replay.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+#
+# Replay test that boots a Linux kernel on an aarch64 machine
+# and checks the console
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from qemu_test import Asset
+from replay_kernel import ReplayKernelBase
+
+
+class Aarch64Replay(ReplayKernelBase):
+
+    ASSET_KERNEL = Asset(
+        ('https://archives.fedoraproject.org/pub/archive/fedora/linux/'
+         'releases/29/Everything/aarch64/os/images/pxeboot/vmlinuz'),
+        '7e1430b81c26bdd0da025eeb8fbd77b5dc961da4364af26e771bd39f379cbbf7')
+
+    def test_aarch64_virt(self):
+        self.set_machine('virt')
+        self.cpu = 'cortex-a53'
+        kernel_path = self.ASSET_KERNEL.fetch()
+        kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
+                               'console=ttyAMA0')
+        console_pattern = 'VFS: Cannot open root device'
+        self.run_rr(kernel_path, kernel_command_line, console_pattern)
+
+
+if __name__ == '__main__':
+    ReplayKernelBase.main()
diff --git a/tests/functional/test_aarch64_rme_sbsaref.py b/tests/functional/test_aarch64_rme_sbsaref.py
index 93bb528338657..ddcc9493a6cf4 100755
--- a/tests/functional/test_aarch64_rme_sbsaref.py
+++ b/tests/functional/test_aarch64_rme_sbsaref.py
@@ -60,7 +60,8 @@ def test_aarch64_rme_sbsaref(self):
 
         self.vm.launch()
         # Wait for host VM boot to complete.
-        wait_for_console_pattern(self, 'Welcome to Buildroot')
+        wait_for_console_pattern(self, 'Welcome to Buildroot',
+                                 failure_message='Synchronous Exception at')
         exec_command_and_wait_for_pattern(self, 'root', '#')
 
         test_realms_guest(self)
diff --git a/tests/functional/test_aarch64_rme_virt.py b/tests/functional/test_aarch64_rme_virt.py
index 42b9229b4cb0d..38e01721a4e12 100755
--- a/tests/functional/test_aarch64_rme_virt.py
+++ b/tests/functional/test_aarch64_rme_virt.py
@@ -89,7 +89,8 @@ def test_aarch64_rme_virt(self):
 
         self.vm.launch()
         # Wait for host VM boot to complete.
-        wait_for_console_pattern(self, 'Welcome to Buildroot')
+        wait_for_console_pattern(self, 'Welcome to Buildroot',
+                                 failure_message='Synchronous Exception at')
         exec_command_and_wait_for_pattern(self, 'root', '#')
 
         test_realms_guest(self)
diff --git a/tests/functional/test_aarch64_tcg_plugins.py b/tests/functional/test_aarch64_tcg_plugins.py
index 7e8beacc833c9..4ea71f5f882e8 100755
--- a/tests/functional/test_aarch64_tcg_plugins.py
+++ b/tests/functional/test_aarch64_tcg_plugins.py
@@ -13,6 +13,7 @@
 
 import tempfile
 import mmap
+import os
 import re
 
 from qemu.machine.machine import VMLaunchFailure
@@ -74,7 +75,7 @@ def test_aarch64_virt_insn(self):
                                                  suffix=".log")
 
         self.run_vm(kernel_path, kernel_command_line,
-                    "tests/tcg/plugins/libinsn.so", plugin_log.name,
+                    self.plugin_file('libinsn'), plugin_log.name,
                     console_pattern)
 
         with plugin_log as lf, \
@@ -100,7 +101,7 @@ def test_aarch64_virt_insn_icount(self):
                                                  suffix=".log")
 
         self.run_vm(kernel_path, kernel_command_line,
-                    "tests/tcg/plugins/libinsn.so", plugin_log.name,
+                    self.plugin_file('libinsn'), plugin_log.name,
                     console_pattern,
                     args=('-icount', 'shift=1'))
 
diff --git a/tests/functional/test_aarch64_virt.py b/tests/functional/test_aarch64_virt.py
index 95f5ce8b4c021..884aad7af61f9 100755
--- a/tests/functional/test_aarch64_virt.py
+++ b/tests/functional/test_aarch64_virt.py
@@ -134,77 +134,6 @@ def test_aarch64_virt_gicv2(self):
         self.common_aarch64_virt("virt,gic-version=2")
 
 
-    ASSET_VIRT_GPU_KERNEL = Asset(
-        'https://fileserver.linaro.org/s/ce5jXBFinPxtEdx/'
-        'download?path=%2F&files='
-        'Image',
-        '89e5099d26166204cc5ca4bb6d1a11b92c217e1f82ec67e3ba363d09157462f6')
-
-    ASSET_VIRT_GPU_ROOTFS = Asset(
-        'https://fileserver.linaro.org/s/ce5jXBFinPxtEdx/'
-        'download?path=%2F&files='
-        'rootfs.ext4.zstd',
-        '792da7573f5dc2913ddb7c638151d4a6b2d028a4cb2afb38add513c1924bdad4')
-
-    @skipIfMissingCommands('zstd')
-    def test_aarch64_virt_with_gpu(self):
-        # This tests boots with a buildroot test image that contains
-        # vkmark and other GPU exercising tools. We run a headless
-        # weston that nevertheless still exercises the virtio-gpu
-        # backend.
-
-        self.set_machine('virt')
-        self.require_accelerator("tcg")
-
-        kernel_path = self.ASSET_VIRT_GPU_KERNEL.fetch()
-        image_path = self.uncompress(self.ASSET_VIRT_GPU_ROOTFS, format="zstd")
-
-        self.vm.set_console()
-        kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
-                               'console=ttyAMA0 root=/dev/vda')
-
-        self.vm.add_args("-accel", "tcg")
-        self.vm.add_args("-cpu", "neoverse-v1,pauth-impdef=on")
-        self.vm.add_args("-machine", "virt,gic-version=max",
-                         '-kernel', kernel_path,
-                         '-append', kernel_command_line)
-        self.vm.add_args("-smp", "2", "-m", "2048")
-        self.vm.add_args("-device",
-                         "virtio-gpu-gl-pci,hostmem=4G,blob=on,venus=on")
-        self.vm.add_args("-display", "egl-headless")
-        self.vm.add_args("-display", "dbus,gl=on")
-        self.vm.add_args("-device", "virtio-blk-device,drive=hd0")
-        self.vm.add_args("-blockdev",
-                         "driver=raw,file.driver=file,"
-                         "node-name=hd0,read-only=on,"
-                         f"file.filename={image_path}")
-        self.vm.add_args("-snapshot")
-
-        try:
-            self.vm.launch()
-        except VMLaunchFailure as excp:
-            if "old virglrenderer, blob resources unsupported" in excp.output:
-                self.skipTest("No blob support for virtio-gpu")
-            elif "old virglrenderer, venus unsupported" in excp.output:
-                self.skipTest("No venus support for virtio-gpu")
-            elif "egl: no drm render node available" in excp.output:
-                self.skipTest("Can't access host DRM render node")
-            elif "'type' does not accept value 'egl-headless'" in excp.output:
-                self.skipTest("egl-headless support is not available")
-            else:
-                self.log.info(f"unhandled launch failure: {excp.output}")
-                raise excp
-
-        self.wait_for_console_pattern('buildroot login:')
-        exec_command(self, 'root')
-        exec_command(self, 'export XDG_RUNTIME_DIR=/tmp')
-        exec_command_and_wait_for_pattern(self,
-                                          "weston -B headless "
-                                          "--renderer gl "
-                                          "--shell kiosk "
-                                          "-- vkmark -b:duration=1.0",
-                                          "vkmark Score")
-
 
 if __name__ == '__main__':
     QemuSystemTest.main()
diff --git a/tests/functional/test_aarch64_virt_gpu.py b/tests/functional/test_aarch64_virt_gpu.py
new file mode 100755
index 0000000000000..314d994a7aad6
--- /dev/null
+++ b/tests/functional/test_aarch64_virt_gpu.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+#
+# Functional tests for the various graphics modes we can support.
+#
+# Copyright (c) 2024, 2025 Linaro Ltd.
+#
+# Author:
+#  Alex Bennée <alex.bennee@linaro.org>
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from qemu.machine.machine import VMLaunchFailure
+
+from qemu_test import Asset
+from qemu_test import exec_command_and_wait_for_pattern as ec_and_wait
+from qemu_test import skipIfMissingCommands
+
+from qemu_test.linuxkernel import LinuxKernelTest
+
+from re import search
+from subprocess import check_output, CalledProcessError
+
+class Aarch64VirtGPUMachine(LinuxKernelTest):
+
+    ASSET_VIRT_GPU_KERNEL = Asset(
+        'https://fileserver.linaro.org/s/ce5jXBFinPxtEdx/'
+        'download?path=%2F&files='
+        'Image.6.12.16.aarch64',
+        '7888c51c55d37e86bbbdeb5acea9f08c34e6b0f03c1f5b2463285f6a6f6eec8b')
+
+    ASSET_VIRT_GPU_ROOTFS = Asset(
+        'https://fileserver.linaro.org/s/ce5jXBFinPxtEdx/'
+        'download?path=%2F&files='
+        'rootfs.aarch64.ext2.zstd',
+        'd45118c899420b7e673f1539a37a35480134b3e36e3a59e2cb69b1781cbb14ef')
+
+    def _launch_virt_gpu(self, gpu_device):
+
+        self.set_machine('virt')
+        self.require_accelerator("tcg")
+
+        kernel_path = self.ASSET_VIRT_GPU_KERNEL.fetch()
+        image_path = self.uncompress(self.ASSET_VIRT_GPU_ROOTFS, format="zstd")
+
+        self.vm.set_console()
+        kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
+                               'console=ttyAMA0 root=/dev/vda')
+
+        self.vm.add_args("-accel", "tcg")
+        self.vm.add_args("-cpu", "cortex-a72")
+        self.vm.add_args("-machine", "virt,gic-version=max",
+                         '-kernel', kernel_path,
+                         '-append', kernel_command_line)
+        self.vm.add_args("-smp", "2", "-m", "2048")
+        self.vm.add_args("-device", gpu_device)
+        self.vm.add_args("-display", "egl-headless")
+        self.vm.add_args("-display", "dbus,gl=on")
+
+        self.vm.add_args("-device", "virtio-blk-device,drive=hd0")
+        self.vm.add_args("-blockdev",
+                         "driver=raw,file.driver=file,"
+                         "node-name=hd0,read-only=on,"
+                         f"file.filename={image_path}")
+        self.vm.add_args("-snapshot")
+
+        try:
+            self.vm.launch()
+        except VMLaunchFailure as excp:
+            if "old virglrenderer, blob resources unsupported" in excp.output:
+                self.skipTest("No blob support for virtio-gpu")
+            elif "old virglrenderer, venus unsupported" in excp.output:
+                self.skipTest("No venus support for virtio-gpu")
+            elif "egl: no drm render node available" in excp.output:
+                self.skipTest("Can't access host DRM render node")
+            elif "'type' does not accept value 'egl-headless'" in excp.output:
+                self.skipTest("egl-headless support is not available")
+            else:
+                self.log.info("unhandled launch failure: %s", excp.output)
+                raise excp
+
+        self.wait_for_console_pattern('buildroot login:')
+        ec_and_wait(self, 'root', '#')
+
+    def _run_virt_weston_test(self, cmd, fail = None):
+
+        # make it easier to detect successful return to shell
+        PS1 = 'RES=[$?] # '
+        OK_CMD = 'RES=[0] # '
+
+        ec_and_wait(self, 'export XDG_RUNTIME_DIR=/tmp', '#')
+        ec_and_wait(self, f"export PS1='{PS1}'", OK_CMD)
+        full_cmd = f"weston -B headless --renderer gl --shell kiosk -- {cmd}"
+        ec_and_wait(self, full_cmd, OK_CMD, fail)
+
+    @skipIfMissingCommands('zstd')
+    def test_aarch64_virt_with_virgl_gpu(self):
+
+        self.require_device('virtio-gpu-gl-pci')
+
+        self._launch_virt_gpu("virtio-gpu-gl-pci")
+
+        # subset of the glmark tests
+        tests = " ".join([f"-b {test}" for test in
+                          ["build", "texture", "shading",
+                           "bump", "desktop", "buffer"]])
+
+        self._run_virt_weston_test("glmark2-wayland --validate " + tests)
+
+    @skipIfMissingCommands('zstd')
+    def test_aarch64_virt_with_virgl_blobs_gpu(self):
+
+        self.require_device('virtio-gpu-gl-pci')
+
+        self._launch_virt_gpu("virtio-gpu-gl-pci,hostmem=4G,blob=on")
+        self._run_virt_weston_test("glmark2-wayland -b:duration=1.0")
+
+    @skipIfMissingCommands('zstd')
+    @skipIfMissingCommands('vulkaninfo')
+    def test_aarch64_virt_with_vulkan_gpu(self):
+
+        self.require_device('virtio-gpu-gl-pci')
+
+        try:
+            vk_info = check_output(["vulkaninfo", "--summary"],
+                                   encoding="utf-8")
+        except CalledProcessError as excp:
+            self.skipTest(f"Miss-configured host Vulkan: {excp.output}")
+
+        if search(r"driverID\s+=\s+DRIVER_ID_NVIDIA_PROPRIETARY", vk_info):
+            self.skipTest("Test skipped on NVIDIA proprietary driver")
+
+        self._launch_virt_gpu("virtio-gpu-gl-pci,hostmem=4G,blob=on,venus=on")
+        self._run_virt_weston_test("vkmark -b:duration=1.0",
+                                   "debug: stuck in fence wait with iter at")
+
+
+if __name__ == '__main__':
+    LinuxKernelTest.main()
diff --git a/tests/functional/test_alpha_replay.py b/tests/functional/test_alpha_replay.py
new file mode 100755
index 0000000000000..24a17ef5904e7
--- /dev/null
+++ b/tests/functional/test_alpha_replay.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+#
+# Replay test that boots a Linux kernel on an Alpha machine
+# and checks the console
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from qemu_test import Asset
+from replay_kernel import ReplayKernelBase
+
+
+class AlphaReplay(ReplayKernelBase):
+
+    ASSET_KERNEL = Asset(
+        ('http://archive.debian.org/debian/dists/lenny/main/installer-alpha/'
+         '20090123lenny10/images/cdrom/vmlinuz'),
+        '34f53da3fa32212e4f00b03cb944b2ad81c06bc8faaf9b7193b2e544ceeca576')
+
+    def test_clipper(self):
+        self.set_machine('clipper')
+        kernel_path = self.uncompress(self.ASSET_KERNEL, format='gz')
+        kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=ttyS0'
+        console_pattern = 'Kernel command line: %s' % kernel_command_line
+        self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=9,
+            args=('-nodefaults', ))
+
+
+if __name__ == '__main__':
+    ReplayKernelBase.main()
diff --git a/tests/functional/test_arm_aspeed_bletchley.py b/tests/functional/test_arm_aspeed_bletchley.py
new file mode 100644
index 0000000000000..0da856c5ed4d2
--- /dev/null
+++ b/tests/functional/test_arm_aspeed_bletchley.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+#
+# Functional test that boots the ASPEED machines
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from qemu_test import Asset
+from aspeed import AspeedTest
+
+
+class BletchleyMachine(AspeedTest):
+
+    ASSET_BLETCHLEY_FLASH = Asset(
+        'https://github.com/legoater/qemu-aspeed-boot/raw/master/images/bletchley-bmc/openbmc-20250128071329/obmc-phosphor-image-bletchley-20250128071329.static.mtd.xz',
+        'db21d04d47d7bb2a276f59d308614b4dfb70b9c7c81facbbca40a3977a2d8844');
+
+    def test_arm_ast2600_bletchley_openbmc(self):
+        image_path = self.uncompress(self.ASSET_BLETCHLEY_FLASH)
+
+        self.do_test_arm_aspeed_openbmc('bletchley-bmc', image=image_path,
+                                        uboot='2019.04', cpu_id='0xf00',
+                                        soc='AST2600 rev A3');
+
+if __name__ == '__main__':
+    AspeedTest.main()
diff --git a/tests/functional/test_arm_aspeed_palmetto.py b/tests/functional/test_arm_aspeed_palmetto.py
index 6588c02aad795..35d832bc98ec5 100755
--- a/tests/functional/test_arm_aspeed_palmetto.py
+++ b/tests/functional/test_arm_aspeed_palmetto.py
@@ -7,18 +7,19 @@
 from qemu_test import Asset
 from aspeed import AspeedTest
 
+
 class PalmettoMachine(AspeedTest):
 
     ASSET_PALMETTO_FLASH = Asset(
-        ('https://github.com/openbmc/openbmc/releases/download/2.9.0/'
-         'obmc-phosphor-image-palmetto.static.mtd'),
-        '3e13bbbc28e424865dc42f35ad672b10f2e82cdb11846bb28fa625b48beafd0d');
+        'https://github.com/legoater/qemu-aspeed-boot/raw/master/images/palmetto-bmc/openbmc-20250128071432/obmc-phosphor-image-palmetto-20250128071432.static.mtd',
+        'bce7c392eec75c707a91cfc8fad7ca9a69d7e4f10df936930d65c1cb9897ac81');
 
-    def test_arm_ast2400_palmetto_openbmc_v2_9_0(self):
+    def test_arm_ast2400_palmetto_openbmc(self):
         image_path = self.ASSET_PALMETTO_FLASH.fetch()
 
-        self.do_test_arm_aspeed('palmetto-bmc', image_path)
-
+        self.do_test_arm_aspeed_openbmc('palmetto-bmc', image=image_path,
+                                        uboot='2019.04', cpu_id='0x0',
+                                        soc='AST2400 rev A1');
 
 if __name__ == '__main__':
     AspeedTest.main()
diff --git a/tests/functional/test_arm_aspeed_romulus.py b/tests/functional/test_arm_aspeed_romulus.py
index 747b616201ce2..b97ed951b1f3d 100755
--- a/tests/functional/test_arm_aspeed_romulus.py
+++ b/tests/functional/test_arm_aspeed_romulus.py
@@ -7,18 +7,19 @@
 from qemu_test import Asset
 from aspeed import AspeedTest
 
+
 class RomulusMachine(AspeedTest):
 
     ASSET_ROMULUS_FLASH = Asset(
-        ('https://github.com/openbmc/openbmc/releases/download/2.9.0/'
-         'obmc-phosphor-image-romulus.static.mtd'),
-        '820341076803f1955bc31e647a512c79f9add4f5233d0697678bab4604c7bb25')
+        'https://github.com/legoater/qemu-aspeed-boot/raw/master/images/romulus-bmc/openbmc-20250128071340/obmc-phosphor-image-romulus-20250128071340.static.mtd',
+        '6d031376440c82ed9d087d25e9fa76aea75b42f80daa252ec402c0bc3cf6cf5b');
 
-    def test_arm_ast2500_romulus_openbmc_v2_9_0(self):
+    def test_arm_ast2500_romulus_openbmc(self):
         image_path = self.ASSET_ROMULUS_FLASH.fetch()
 
-        self.do_test_arm_aspeed('romulus-bmc', image_path)
-
+        self.do_test_arm_aspeed_openbmc('romulus-bmc', image=image_path,
+                                        uboot='2019.04', cpu_id='0x0',
+                                        soc='AST2500 rev A1');
 
 if __name__ == '__main__':
     AspeedTest.main()
diff --git a/tests/functional/test_arm_aspeed_witherspoon.py b/tests/functional/test_arm_aspeed_witherspoon.py
new file mode 100644
index 0000000000000..ea1ce89b05cce
--- /dev/null
+++ b/tests/functional/test_arm_aspeed_witherspoon.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+#
+# Functional test that boots the ASPEED machines
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from qemu_test import Asset
+from aspeed import AspeedTest
+
+
+class WitherspoonMachine(AspeedTest):
+
+    ASSET_WITHERSPOON_FLASH = Asset(
+        'https://github.com/legoater/qemu-aspeed-boot/raw/master/images/witherspoon-bmc/openbmc-20240618035022/obmc-phosphor-image-witherspoon-20240618035022.ubi.mtd',
+        '937d9ed449ea6c6cbed983519088a42d0cafe276bcfe4fce07772ca6673f9213');
+
+    def test_arm_ast2500_witherspoon_openbmc(self):
+        image_path = self.ASSET_WITHERSPOON_FLASH.fetch()
+
+        self.do_test_arm_aspeed_openbmc('witherspoon-bmc', image=image_path,
+                                        uboot='2016.07', cpu_id='0x0',
+                                        soc='AST2500 rev A1');
+
+if __name__ == '__main__':
+    AspeedTest.main()
diff --git a/tests/functional/test_arm_replay.py b/tests/functional/test_arm_replay.py
new file mode 100755
index 0000000000000..e002e6a264753
--- /dev/null
+++ b/tests/functional/test_arm_replay.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+#
+# Replay test that boots a Linux kernel on arm machines and checks the console
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from qemu_test import Asset
+from replay_kernel import ReplayKernelBase
+
+
+class ArmReplay(ReplayKernelBase):
+
+    ASSET_VIRT = Asset(
+        ('https://archives.fedoraproject.org/pub/archive/fedora/linux/'
+         'releases/29/Everything/armhfp/os/images/pxeboot/vmlinuz'),
+        '18dd5f1a9a28bd539f9d047f7c0677211bae528e8712b40ca5a229a4ad8e2591')
+
+    def test_virt(self):
+        self.set_machine('virt')
+        kernel_path = self.ASSET_VIRT.fetch()
+        kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
+                               'console=ttyAMA0')
+        console_pattern = 'VFS: Cannot open root device'
+        self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=1)
+
+    ASSET_CUBIE_KERNEL = Asset(
+        ('https://apt.armbian.com/pool/main/l/linux-6.6.16/'
+         'linux-image-current-sunxi_24.2.1_armhf_'
+         '_6.6.16-Seb3e-D6b4a-P2359-Ce96bHfe66-HK01ba-V014b-B067e-R448a.deb'),
+        '3d968c15b121ede871dce49d13ee7644d6f74b6b121b84c9a40f51b0c80d6d22')
+
+    ASSET_CUBIE_INITRD = Asset(
+        ('https://github.com/groeck/linux-build-test/raw/'
+         '2eb0a73b5d5a28df3170c546ddaaa9757e1e0848/rootfs/arm/rootfs-armv5.cpio.gz'),
+        '334b8d256db67a3f2b3ad070aa08b5ade39624e0e7e35b02f4359a577bc8f39b')
+
+    def test_cubieboard(self):
+        self.set_machine('cubieboard')
+        kernel_path = self.archive_extract(self.ASSET_CUBIE_KERNEL,
+            member='boot/vmlinuz-6.6.16-current-sunxi')
+        dtb_path = self.archive_extract(self.ASSET_CUBIE_KERNEL,
+            member='usr/lib/linux-image-6.6.16-current-sunxi/sun4i-a10-cubieboard.dtb')
+        initrd_path = self.uncompress(self.ASSET_CUBIE_INITRD)
+
+        kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
+                               'console=ttyS0,115200 '
+                               'usbcore.nousb '
+                               'panic=-1 noreboot')
+        console_pattern = 'Boot successful.'
+        self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=1,
+                    args=('-dtb', dtb_path,
+                          '-initrd', initrd_path,
+                          '-no-reboot'))
+
+    ASSET_DAY16 = Asset(
+        'https://qemu-advcal.gitlab.io/qac-best-of-multiarch/download/day16.tar.xz',
+        '63311adb2d4c4e7a73214a86d29988add87266a909719c56acfadd026b4110a7')
+
+    def test_vexpressa9(self):
+        self.set_machine('vexpress-a9')
+        self.archive_extract(self.ASSET_DAY16)
+        kernel_path = self.scratch_file('day16', 'winter.zImage')
+        dtb_path = self.scratch_file('day16', 'vexpress-v2p-ca9.dtb')
+        self.run_rr(kernel_path, self.REPLAY_KERNEL_COMMAND_LINE,
+                    'QEMU advent calendar', args=('-dtb', dtb_path))
+
+
+if __name__ == '__main__':
+    ReplayKernelBase.main()
diff --git a/tests/functional/test_arm_sx1.py b/tests/functional/test_arm_sx1.py
index b85bfaa178f9d..25800b388c993 100755
--- a/tests/functional/test_arm_sx1.py
+++ b/tests/functional/test_arm_sx1.py
@@ -43,8 +43,9 @@ def test_arm_sx1_initrd(self):
         self.vm.add_args('-append', f'kunit.enable=0 rdinit=/sbin/init {self.CONSOLE_ARGS}')
         self.vm.add_args('-no-reboot')
         self.launch_kernel(zimage_path,
-                           initrd=initrd_path)
-        self.vm.wait(timeout=60)
+                           initrd=initrd_path,
+                           wait_for='Boot successful')
+        self.vm.wait(timeout=120)
 
     def test_arm_sx1_sd(self):
         self.set_machine('sx1')
@@ -54,8 +55,8 @@ def test_arm_sx1_sd(self):
         self.vm.add_args('-no-reboot')
         self.vm.add_args('-snapshot')
         self.vm.add_args('-drive', f'format=raw,if=sd,file={sd_fs_path}')
-        self.launch_kernel(zimage_path)
-        self.vm.wait(timeout=60)
+        self.launch_kernel(zimage_path, wait_for='Boot successful')
+        self.vm.wait(timeout=120)
 
     def test_arm_sx1_flash(self):
         self.set_machine('sx1')
@@ -65,8 +66,8 @@ def test_arm_sx1_flash(self):
         self.vm.add_args('-no-reboot')
         self.vm.add_args('-snapshot')
         self.vm.add_args('-drive', f'format=raw,if=pflash,file={flash_path}')
-        self.launch_kernel(zimage_path)
-        self.vm.wait(timeout=60)
+        self.launch_kernel(zimage_path, wait_for='Boot successful')
+        self.vm.wait(timeout=120)
 
 if __name__ == '__main__':
     LinuxKernelTest.main()
diff --git a/tests/functional/test_hexagon_minivm.py b/tests/functional/test_hexagon_minivm.py
new file mode 100755
index 0000000000000..2ba92bcce383a
--- /dev/null
+++ b/tests/functional/test_hexagon_minivm.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+#
+# Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+import os
+from glob import glob
+from qemu_test import QemuSystemTest, Asset
+from qemu_test import wait_for_console_pattern
+
+class MiniVMTest(QemuSystemTest):
+
+    timeout = 180
+    GUEST_ENTRY = 0xc0000000
+
+    REPO = 'https://artifacts.codelinaro.org/artifactory'
+    ASSET_TARBALL = \
+        Asset(f'{REPO}/codelinaro-toolchain-for-hexagon/'
+               '19.1.5/hexagon_minivm_2024_Dec_15.tar.gz',
+        'd7920b5ff14bed5a10b23ada7d4eb927ede08635281f25067e0d5711feee2c2a')
+
+    def test_minivm(self):
+        self.set_machine('virt')
+        self.archive_extract(self.ASSET_TARBALL)
+        rootfs_path = f'{self.workdir}/hexagon-unknown-linux-musl-rootfs'
+        kernel_path = f'{rootfs_path}/boot/minivm'
+
+        assert(os.path.exists(kernel_path))
+        for test_bin_path in glob(f'{rootfs_path}/boot/test_*'):
+            print(f'# Testing "{os.path.basename(test_bin_path)}"')
+
+            vm = self.get_vm()
+            vm.add_args('-kernel', kernel_path,
+                  '-device',
+                  f'loader,addr={hex(self.GUEST_ENTRY)},file={test_bin_path}')
+            vm.launch()
+            vm.wait()
+            self.assertEqual(vm.exitcode(), 0)
+
+if __name__ == '__main__':
+    QemuSystemTest.main()
diff --git a/tests/functional/test_intel_iommu.py b/tests/functional/test_intel_iommu.py
index a9e8f82ab5973..62268d6f2788c 100755
--- a/tests/functional/test_intel_iommu.py
+++ b/tests/functional/test_intel_iommu.py
@@ -10,11 +10,7 @@
 # This work is licensed under the terms of the GNU GPL, version 2 or
 # later.  See the COPYING file in the top-level directory.
 
-import hashlib
-import urllib.request
-
 from qemu_test import LinuxKernelTest, Asset, exec_command_and_wait_for_pattern
-from qemu_test.utils import get_usernet_hostfwd_port
 
 
 class IntelIOMMU(LinuxKernelTest):
@@ -125,23 +121,7 @@ def run_and_check(self):
 
         # Check virtio-net via HTTP:
         exec_command_and_wait_for_pattern(self, 'dhclient eth0', prompt)
-        exec_command_and_wait_for_pattern(self,
-                        f'python3 -m http.server {self.GUEST_PORT} & sleep 1',
-                        f'Serving HTTP on 0.0.0.0 port {self.GUEST_PORT}')
-        hl = hashlib.sha256()
-        hostport = get_usernet_hostfwd_port(self.vm)
-        url = f'http://localhost:{hostport}{filename}'
-        self.log.info(f'Downloading {url} ...')
-        with urllib.request.urlopen(url) as response:
-            while True:
-                chunk = response.read(1 << 20)
-                if not chunk:
-                    break
-                hl.update(chunk)
-
-        digest = hl.hexdigest()
-        self.log.info(f'sha256sum of download is {digest}.')
-        self.assertEqual(digest, hashsum)
+        self.check_http_download(filename, hashsum, self.GUEST_PORT)
 
     def test_intel_iommu(self):
         self.common_vm_setup()
diff --git a/tests/functional/test_m68k_replay.py b/tests/functional/test_m68k_replay.py
new file mode 100755
index 0000000000000..18c1db539c50e
--- /dev/null
+++ b/tests/functional/test_m68k_replay.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+#
+# Replay test that boots a Linux kernel on an m68k machine
+# and checks the console
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from qemu_test import Asset
+from replay_kernel import ReplayKernelBase
+
+
+class M68kReplay(ReplayKernelBase):
+
+    ASSET_Q800 = Asset(
+        ('https://snapshot.debian.org/'
+         'archive/debian-ports/20191021T083923Z/pool-m68k/main/l/linux/'
+         'kernel-image-5.3.0-1-m68k-di_5.3.7-1_m68k.udeb'),
+        '949e50d74d4b9bc15d26c06d402717b7a4c0e32ff8100014f5930d8024de7b73')
+
+    def test_q800(self):
+        self.set_machine('q800')
+        kernel_path = self.archive_extract(self.ASSET_Q800,
+                                           member='boot/vmlinux-5.3.0-1-m68k')
+        kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
+                               'console=ttyS0 vga=off')
+        console_pattern = 'No filesystem could mount root'
+        self.run_rr(kernel_path, kernel_command_line, console_pattern)
+
+    ASSET_MCF5208 = Asset(
+       'https://qemu-advcal.gitlab.io/qac-best-of-multiarch/download/day07.tar.xz',
+       '753c2f3837126b7c6ba92d0b1e0b156e8a2c5131d2d576bb0b9a763fae73c08a')
+
+    def test_mcf5208evb(self):
+        self.set_machine('mcf5208evb')
+        kernel_path = self.archive_extract(self.ASSET_MCF5208,
+                                           member='day07/sanity-clause.elf')
+        self.run_rr(kernel_path, self.KERNEL_COMMON_COMMAND_LINE,
+                    'QEMU advent calendar')
+
+
+if __name__ == '__main__':
+    ReplayKernelBase.main()
diff --git a/tests/functional/test_mem_addr_space.py b/tests/functional/test_mem_addr_space.py
index bb0cf062ca7f3..2d9d31efb597d 100755
--- a/tests/functional/test_mem_addr_space.py
+++ b/tests/functional/test_mem_addr_space.py
@@ -20,6 +20,25 @@ class MemAddrCheck(QemuSystemTest):
     # this reason.
     DELAY_Q35_BOOT_SEQUENCE = 1
 
+    # This helper can go away when the 32-bit host deprecation
+    # turns into full & final removal of support.
+    def ensure_64bit_binary(self):
+        with open(self.qemu_bin, "rb") as fh:
+            ident = fh.read(4)
+
+            # "\x7fELF"
+            if ident != bytes([0x7f, 0x45, 0x4C, 0x46]):
+                # Non-ELF file implies macOS or Windows which
+                # we already assume to be 64-bit only
+                return
+
+            # bits == 1 -> 32-bit; bits == 2 -> 64-bit
+            bits = int.from_bytes(fh.read(1), byteorder='little')
+            if bits != 2:
+                # 32-bit ELF builds won't be able to address sufficient
+                # RAM to run the tests
+                self.skipTest("64-bit build host is required")
+
     # first, lets test some 32-bit processors.
     # for all 32-bit cases, pci64_hole_size is 0.
     def test_phybits_low_pse36(self):
@@ -38,6 +57,7 @@ def test_phybits_low_pse36(self):
         If maxmem is set to 59.5G with all other QEMU parameters identical, QEMU
         should start fine.
         """
+        self.ensure_64bit_binary()
         self.vm.add_args('-S', '-machine', 'q35', '-m',
                          '512,slots=1,maxmem=59.6G',
                          '-cpu', 'pentium,pse36=on', '-display', 'none',
@@ -55,6 +75,7 @@ def test_phybits_low_pae(self):
         access up to a maximum of 64GiB of memory. Rest is the same as the case
         with pse36 above.
         """
+        self.ensure_64bit_binary()
         self.vm.add_args('-S', '-machine', 'q35', '-m',
                          '512,slots=1,maxmem=59.6G',
                          '-cpu', 'pentium,pae=on', '-display', 'none',
@@ -71,6 +92,7 @@ def test_phybits_ok_pentium_pse36(self):
         Setting maxmem to 59.5G and making sure that QEMU can start with the
         same options as the failing case above with pse36 cpu feature.
         """
+        self.ensure_64bit_binary()
         self.vm.add_args('-machine', 'q35', '-m',
                          '512,slots=1,maxmem=59.5G',
                          '-cpu', 'pentium,pse36=on', '-display', 'none',
@@ -88,6 +110,7 @@ def test_phybits_ok_pentium_pae(self):
         Setting maxmem to 59.5G and making sure that QEMU can start fine
         with the same options as the case above.
         """
+        self.ensure_64bit_binary()
         self.vm.add_args('-machine', 'q35', '-m',
                          '512,slots=1,maxmem=59.5G',
                          '-cpu', 'pentium,pae=on', '-display', 'none',
@@ -104,6 +127,7 @@ def test_phybits_ok_pentium2(self):
         Pentium2 has 36 bits of addressing, so its same as pentium
         with pse36 ON.
         """
+        self.ensure_64bit_binary()
         self.vm.add_args('-machine', 'q35', '-m',
                          '512,slots=1,maxmem=59.5G',
                          '-cpu', 'pentium2', '-display', 'none',
@@ -123,6 +147,7 @@ def test_phybits_low_nonpse36(self):
         message because the region for memory hotplug is always placed
         above 4 GiB due to the PCI hole and simplicity.
         """
+        self.ensure_64bit_binary()
         self.vm.add_args('-S', '-machine', 'q35', '-m',
                          '512,slots=1,maxmem=4G',
                          '-cpu', 'pentium', '-display', 'none',
@@ -150,6 +175,7 @@ def test_phybits_low_tcg_q35_70_amd(self):
         which is equal to 987.5 GiB. Setting the value to 988 GiB should
         make QEMU fail with the error message.
         """
+        self.ensure_64bit_binary()
         self.vm.add_args('-S', '-machine', 'pc-q35-7.0', '-m',
                          '512,slots=1,maxmem=988G',
                          '-display', 'none',
@@ -170,6 +196,7 @@ def test_phybits_low_tcg_q35_71_amd(self):
         Make sure QEMU fails when maxmem size is 976 GiB (12 GiB less
         than 988 GiB).
         """
+        self.ensure_64bit_binary()
         self.vm.add_args('-S', '-machine', 'pc-q35-7.1', '-m',
                          '512,slots=1,maxmem=976G',
                          '-display', 'none',
@@ -186,6 +213,7 @@ def test_phybits_ok_tcg_q35_70_amd(self):
         Same as q35-7.0 AMD case except that here we check that QEMU can
         successfully start when maxmem is < 988G.
         """
+        self.ensure_64bit_binary()
         self.vm.add_args('-S', '-machine', 'pc-q35-7.0', '-m',
                          '512,slots=1,maxmem=987.5G',
                          '-display', 'none',
@@ -202,6 +230,7 @@ def test_phybits_ok_tcg_q35_71_amd(self):
         Same as q35-7.1 AMD case except that here we check that QEMU can
         successfully start when maxmem is < 976G.
         """
+        self.ensure_64bit_binary()
         self.vm.add_args('-S', '-machine', 'pc-q35-7.1', '-m',
                          '512,slots=1,maxmem=975.5G',
                          '-display', 'none',
@@ -219,6 +248,7 @@ def test_phybits_ok_tcg_q35_71_intel(self):
         Intel cpu instead. QEMU should start fine in this case as
         "above_4G" memory starts at 4G.
         """
+        self.ensure_64bit_binary()
         self.vm.add_args('-S', '-cpu', 'Skylake-Server',
                          '-machine', 'pc-q35-7.1', '-m',
                          '512,slots=1,maxmem=976G',
@@ -243,6 +273,7 @@ def test_phybits_low_tcg_q35_71_amd_41bits(self):
         memory for the VM (1024 - 32 - 1 + 0.5). With 992 GiB, QEMU should
         fail to start.
         """
+        self.ensure_64bit_binary()
         self.vm.add_args('-S', '-cpu', 'EPYC-v4,phys-bits=41',
                          '-machine', 'pc-q35-7.1', '-m',
                          '512,slots=1,maxmem=992G',
@@ -261,6 +292,7 @@ def test_phybits_ok_tcg_q35_71_amd_41bits(self):
         Same as above but by setting maxram between 976 GiB and 992 Gib,
         QEMU should start fine.
         """
+        self.ensure_64bit_binary()
         self.vm.add_args('-S', '-cpu', 'EPYC-v4,phys-bits=41',
                          '-machine', 'pc-q35-7.1', '-m',
                          '512,slots=1,maxmem=990G',
@@ -281,6 +313,7 @@ def test_phybits_low_tcg_q35_intel_cxl(self):
         So maxmem here should be at most 986 GiB considering all memory boundary
         alignment constraints with 40 bits (1 TiB) of processor physical bits.
         """
+        self.ensure_64bit_binary()
         self.vm.add_args('-S', '-cpu', 'Skylake-Server,phys-bits=40',
                          '-machine', 'q35,cxl=on', '-m',
                          '512,slots=1,maxmem=987G',
@@ -299,6 +332,7 @@ def test_phybits_ok_tcg_q35_intel_cxl(self):
         with the exact same parameters as above, QEMU should start fine even
         with cxl enabled.
         """
+        self.ensure_64bit_binary()
         self.vm.add_args('-S', '-cpu', 'Skylake-Server,phys-bits=40',
                          '-machine', 'q35,cxl=on', '-m',
                          '512,slots=1,maxmem=987G',
diff --git a/tests/functional/test_microblaze_replay.py b/tests/functional/test_microblaze_replay.py
new file mode 100755
index 0000000000000..7484c4186f3f5
--- /dev/null
+++ b/tests/functional/test_microblaze_replay.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+#
+# Replay test that boots a Linux kernel on an microblaze machine
+# and checks the console
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from qemu_test import Asset
+from replay_kernel import ReplayKernelBase
+
+
+class MicroblazeReplay(ReplayKernelBase):
+
+    ASSET_DAY17 = Asset(
+        ('https://qemu-advcal.gitlab.io/qac-best-of-multiarch/download/'
+         'day17.tar.xz'),
+        '3ba7439dfbea7af4876662c97f8e1f0cdad9231fc166e4861d17042489270057')
+
+    def test_microblaze_s3adsp1800(self):
+        self.set_machine('petalogix-s3adsp1800')
+        kernel_path = self.archive_extract(self.ASSET_DAY17,
+                                           member='day17/ballerina.bin')
+        self.run_rr(kernel_path, self.REPLAY_KERNEL_COMMAND_LINE,
+                    'QEMU advent calendar')
+
+
+if __name__ == '__main__':
+    ReplayKernelBase.main()
diff --git a/tests/functional/test_microblaze_s3adsp1800.py b/tests/functional/test_microblaze_s3adsp1800.py
index 2c4464bd05a89..c93fa14232b3b 100755
--- a/tests/functional/test_microblaze_s3adsp1800.py
+++ b/tests/functional/test_microblaze_s3adsp1800.py
@@ -7,6 +7,7 @@
 # This work is licensed under the terms of the GNU GPL, version 2 or
 # later. See the COPYING file in the top-level directory.
 
+from qemu_test import exec_command_and_wait_for_pattern
 from qemu_test import QemuSystemTest, Asset
 from qemu_test import wait_for_console_pattern
 
@@ -15,14 +16,18 @@ class MicroblazeMachine(QemuSystemTest):
 
     timeout = 90
 
-    ASSET_IMAGE = Asset(
+    ASSET_IMAGE_BE = Asset(
         ('https://qemu-advcal.gitlab.io/qac-best-of-multiarch/download/'
          'day17.tar.xz'),
         '3ba7439dfbea7af4876662c97f8e1f0cdad9231fc166e4861d17042489270057')
 
-    def test_microblaze_s3adsp1800(self):
-        self.set_machine('petalogix-s3adsp1800')
-        self.archive_extract(self.ASSET_IMAGE)
+    ASSET_IMAGE_LE = Asset(
+        ('http://www.qemu-advent-calendar.org/2023/download/day13.tar.gz'),
+        'b9b3d43c5dd79db88ada495cc6e0d1f591153fe41355e925d791fbf44de50c22')
+
+    def do_ballerina_be_test(self, machine):
+        self.set_machine(machine)
+        self.archive_extract(self.ASSET_IMAGE_BE)
         self.vm.set_console()
         self.vm.add_args('-kernel',
                          self.scratch_file('day17', 'ballerina.bin'))
@@ -34,5 +39,30 @@ def test_microblaze_s3adsp1800(self):
         # message, that's why we don't test for a later string here. This
         # needs some investigation by a microblaze wizard one day...
 
+    def do_xmaton_le_test(self, machine):
+        self.require_netdev('user')
+        self.set_machine(machine)
+        self.archive_extract(self.ASSET_IMAGE_LE)
+        self.vm.set_console()
+        self.vm.add_args('-kernel', self.scratch_file('day13', 'xmaton.bin'))
+        tftproot = self.scratch_file('day13')
+        self.vm.add_args('-nic', f'user,tftp={tftproot}')
+        self.vm.launch()
+        wait_for_console_pattern(self, 'QEMU Advent Calendar 2023')
+        wait_for_console_pattern(self, 'buildroot login:')
+        exec_command_and_wait_for_pattern(self, 'root', '#')
+        exec_command_and_wait_for_pattern(self,
+                'tftp -g -r xmaton.png 10.0.2.2 ; md5sum xmaton.png',
+                '821cd3cab8efd16ad6ee5acc3642a8ea')
+
+
+class MicroblazeBigEndianMachine(MicroblazeMachine):
+
+    ASSET_IMAGE_BE = MicroblazeMachine.ASSET_IMAGE_BE
+
+    def test_microblaze_s3adsp1800_legacy_be(self):
+        self.do_ballerina_be_test('petalogix-s3adsp1800')
+
+
 if __name__ == '__main__':
     QemuSystemTest.main()
diff --git a/tests/functional/test_microblazeel_s3adsp1800.py b/tests/functional/test_microblazeel_s3adsp1800.py
index c382afe6bfa5f..ab59941d57ae3 100755
--- a/tests/functional/test_microblazeel_s3adsp1800.py
+++ b/tests/functional/test_microblazeel_s3adsp1800.py
@@ -7,36 +7,16 @@
 # This work is licensed under the terms of the GNU GPL, version 2 or
 # later. See the COPYING file in the top-level directory.
 
-import time
-from qemu_test import exec_command, exec_command_and_wait_for_pattern
-from qemu_test import QemuSystemTest, Asset
-from qemu_test import wait_for_console_pattern
+from test_microblaze_s3adsp1800 import MicroblazeMachine
 
 
-class MicroblazeelMachine(QemuSystemTest):
+class MicroblazeLittleEndianMachine(MicroblazeMachine):
 
-    timeout = 90
+    ASSET_IMAGE_LE = MicroblazeMachine.ASSET_IMAGE_LE
 
-    ASSET_IMAGE = Asset(
-        ('http://www.qemu-advent-calendar.org/2023/download/day13.tar.gz'),
-        'b9b3d43c5dd79db88ada495cc6e0d1f591153fe41355e925d791fbf44de50c22')
+    def test_microblaze_s3adsp1800_legacy_le(self):
+        self.do_xmaton_le_test('petalogix-s3adsp1800')
 
-    def test_microblazeel_s3adsp1800(self):
-        self.require_netdev('user')
-        self.set_machine('petalogix-s3adsp1800')
-        self.archive_extract(self.ASSET_IMAGE)
-        self.vm.set_console()
-        self.vm.add_args('-kernel', self.scratch_file('day13', 'xmaton.bin'))
-        tftproot = self.scratch_file('day13')
-        self.vm.add_args('-nic', f'user,tftp={tftproot}')
-        self.vm.launch()
-        wait_for_console_pattern(self, 'QEMU Advent Calendar 2023')
-        time.sleep(0.1)
-        exec_command(self, 'root')
-        time.sleep(0.1)
-        exec_command_and_wait_for_pattern(self,
-                'tftp -g -r xmaton.png 10.0.2.2 ; md5sum xmaton.png',
-                '821cd3cab8efd16ad6ee5acc3642a8ea')
 
 if __name__ == '__main__':
-    QemuSystemTest.main()
+    MicroblazeMachine.main()
diff --git a/tests/functional/test_mips_malta.py b/tests/functional/test_mips_malta.py
index eaf372255ba30..9697c7d63f832 100755
--- a/tests/functional/test_mips_malta.py
+++ b/tests/functional/test_mips_malta.py
@@ -45,12 +45,15 @@ def test_mips_malta(self):
         'dcfe3a7fe3200da3a00d176b95caaa086495eb158f2bff64afc67d7e1eb2cddc')
 
     def test_mips_malta_cpio(self):
+        self.require_netdev('user')
+        self.set_machine('malta')
+        self.require_device('pcnet')
+
         kernel_path = self.archive_extract(
             self.ASSET_KERNEL_4_5_0,
             member='boot/vmlinux-4.5.0-2-4kc-malta')
         initrd_path = self.uncompress(self.ASSET_INITRD)
 
-        self.set_machine('malta')
         self.vm.set_console()
         kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE
                                + 'console=ttyS0 console=tty '
@@ -58,6 +61,8 @@ def test_mips_malta_cpio(self):
         self.vm.add_args('-kernel', kernel_path,
                          '-initrd', initrd_path,
                          '-append', kernel_command_line,
+                         '-netdev', 'user,id=n1,tftp=' + self.scratch_file('boot'),
+                         '-device', 'pcnet,netdev=n1',
                          '-no-reboot')
         self.vm.launch()
         self.wait_for_console_pattern('Boot successful.')
@@ -66,6 +71,19 @@ def test_mips_malta_cpio(self):
                                                 'BogoMIPS')
         exec_command_and_wait_for_pattern(self, 'uname -a',
                                                 'Debian')
+
+        exec_command_and_wait_for_pattern(self, 'ip link set eth0 up',
+                                          'eth0: link up')
+        exec_command_and_wait_for_pattern(self,
+                                          'ip addr add 10.0.2.15 dev eth0',
+                                          '#')
+        exec_command_and_wait_for_pattern(self, 'route add default eth0', '#')
+        exec_command_and_wait_for_pattern(self,
+                         'tftp -g -r vmlinux-4.5.0-2-4kc-malta 10.0.2.2', '#')
+        exec_command_and_wait_for_pattern(self,
+                                          'md5sum vmlinux-4.5.0-2-4kc-malta',
+                                          'a98218a7efbdefb2dfdf9ecd08c98318')
+
         exec_command_and_wait_for_pattern(self, 'reboot',
                                                 'reboot: Restarting system')
         # Wait for VM to shut down gracefully
diff --git a/tests/functional/test_or1k_replay.py b/tests/functional/test_or1k_replay.py
new file mode 100755
index 0000000000000..2b60a9372c5af
--- /dev/null
+++ b/tests/functional/test_or1k_replay.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+#
+# Replay test that boots a Linux kernel on an OpenRISC-1000 SIM machine
+# and checks the console
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from qemu_test import Asset
+from replay_kernel import ReplayKernelBase
+
+
+class Or1kReplay(ReplayKernelBase):
+
+    ASSET_DAY20 = Asset(
+        'https://qemu-advcal.gitlab.io/qac-best-of-multiarch/download/day20.tar.xz',
+        'ff9d7dd7c6bdba325bd85ee85c02db61ff653e129558aeffe6aff55bffb6763a')
+
+    def test_sim(self):
+        self.set_machine('or1k-sim')
+        kernel_path = self.archive_extract(self.ASSET_DAY20,
+                                           member='day20/vmlinux')
+        self.run_rr(kernel_path, self.REPLAY_KERNEL_COMMAND_LINE,
+                    'QEMU advent calendar')
+
+
+if __name__ == '__main__':
+    ReplayKernelBase.main()
diff --git a/tests/functional/test_ppc64_e500.py b/tests/functional/test_ppc64_e500.py
index b92fe0b0e75e0..f5fcad9f6b667 100755
--- a/tests/functional/test_ppc64_e500.py
+++ b/tests/functional/test_ppc64_e500.py
@@ -5,20 +5,40 @@
 # SPDX-License-Identifier: GPL-2.0-or-later
 
 from qemu_test import LinuxKernelTest, Asset
+from qemu_test import exec_command_and_wait_for_pattern
 
 
 class E500Test(LinuxKernelTest):
 
-    ASSET_DAY19 = Asset(
-        'https://qemu-advcal.gitlab.io/qac-best-of-multiarch/download/day19.tar.xz',
-        '20b1bb5a8488c664defbb5d283addc91a05335a936c63b3f5ff7eee74b725755')
+    ASSET_BR2_E5500_UIMAGE = Asset(
+        'https://github.com/legoater/qemu-ppc-boot/raw/refs/heads/main/buildroot/qemu_ppc64_e5500-2023.11-8-gdcd9f0f6eb-20240104/uImage',
+        '2478187c455d6cca3984e9dfde9c635d824ea16236b85fd6b4809f744706deda')
 
-    def test_ppc64_e500(self):
+    ASSET_BR2_E5500_ROOTFS = Asset(
+        'https://github.com/legoater/qemu-ppc-boot/raw/refs/heads/main//buildroot/qemu_ppc64_e5500-2023.11-8-gdcd9f0f6eb-20240104/rootfs.ext2',
+        '9035ef97237c84c7522baaff17d25cdfca4bb7a053d5e296e902919473423d76')
+
+    def test_ppc64_e500_buildroot(self):
         self.set_machine('ppce500')
+        self.require_netdev('user')
         self.cpu = 'e5500'
-        self.archive_extract(self.ASSET_DAY19)
-        self.launch_kernel(self.scratch_file('day19', 'uImage'),
-                           wait_for='QEMU advent calendar')
+
+        uimage_path = self.ASSET_BR2_E5500_UIMAGE.fetch()
+        rootfs_path = self.ASSET_BR2_E5500_ROOTFS.fetch()
+
+        self.vm.set_console()
+        self.vm.add_args('-kernel', uimage_path,
+                         '-append', 'root=/dev/vda',
+                         '-drive', f'file={rootfs_path},if=virtio,format=raw',
+                         '-snapshot', '-no-shutdown')
+        self.vm.launch()
+
+        self.wait_for_console_pattern('Linux version')
+        self.wait_for_console_pattern('/init as init process')
+        self.wait_for_console_pattern('lease of 10.0.2.15')
+        self.wait_for_console_pattern('buildroot login:')
+        exec_command_and_wait_for_pattern(self, 'root', '#')
+        exec_command_and_wait_for_pattern(self, 'poweroff', 'Power down')
 
 if __name__ == '__main__':
     LinuxKernelTest.main()
diff --git a/tests/functional/test_ppc64_replay.py b/tests/functional/test_ppc64_replay.py
new file mode 100755
index 0000000000000..48ce1b7f1e197
--- /dev/null
+++ b/tests/functional/test_ppc64_replay.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+#
+# Replay test that boots a Linux kernel on ppc64 machines
+# and checks the console
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from qemu_test import Asset
+from replay_kernel import ReplayKernelBase
+
+
+class Ppc64Replay(ReplayKernelBase):
+
+    ASSET_DAY19 = Asset(
+        ('https://qemu-advcal.gitlab.io/qac-best-of-multiarch/download/'
+         'day19.tar.xz'),
+        '20b1bb5a8488c664defbb5d283addc91a05335a936c63b3f5ff7eee74b725755')
+
+    def test_ppc64_e500(self):
+        self.set_machine('ppce500')
+        self.cpu = 'e5500'
+        kernel_path = self.archive_extract(self.ASSET_DAY19,
+                                           member='day19/uImage')
+        self.run_rr(kernel_path, self.REPLAY_KERNEL_COMMAND_LINE,
+                    'QEMU advent calendar')
+
+    ASSET_KERNEL = Asset(
+        ('https://archives.fedoraproject.org/pub/archive/fedora-secondary/'
+         'releases/29/Everything/ppc64le/os/ppc/ppc64/vmlinuz'),
+        '383c2f5c23bc0d9d32680c3924d3fd7ee25cc5ef97091ac1aa5e1d853422fc5f')
+
+    def test_ppc64_pseries(self):
+        self.set_machine('pseries')
+        kernel_path = self.ASSET_KERNEL.fetch()
+        kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=hvc0'
+        console_pattern = 'VFS: Cannot open root device'
+        self.run_rr(kernel_path, kernel_command_line, console_pattern)
+
+    def test_ppc64_powernv(self):
+        self.set_machine('powernv')
+        kernel_path = self.ASSET_KERNEL.fetch()
+        kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + \
+                              'console=tty0 console=hvc0'
+        console_pattern = 'VFS: Cannot open root device'
+        self.run_rr(kernel_path, kernel_command_line, console_pattern)
+
+
+if __name__ == '__main__':
+    ReplayKernelBase.main()
diff --git a/tests/functional/test_ppc64_tuxrun.py b/tests/functional/test_ppc64_tuxrun.py
index 05c6162b5e2d2..e8f79c676e5ef 100755
--- a/tests/functional/test_ppc64_tuxrun.py
+++ b/tests/functional/test_ppc64_tuxrun.py
@@ -64,7 +64,7 @@ def ppc64_common_tuxrun(self, kernel_asset, rootfs_asset, prefix):
                          ',"index":1,"id":"pci.1"}')
         self.vm.add_args('-device', '{"driver":"spapr-vscsi","id":"scsi1"'
                          ',"reg":12288}')
-        self.vm.add_args('-m', '2G,slots=32,maxmem=4G',
+        self.vm.add_args('-m', '1G,slots=32,maxmem=2G',
                          '-object', 'memory-backend-ram,id=ram1,size=1G',
                          '-device', 'pc-dimm,id=dimm1,memdev=ram1')
 
diff --git a/tests/functional/test_ppc_405.py b/tests/functional/test_ppc_405.py
deleted file mode 100755
index 9851c03ee9989..0000000000000
--- a/tests/functional/test_ppc_405.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-#
-# Test that the U-Boot firmware boots on ppc 405 machines and check the console
-#
-# Copyright (c) 2021 Red Hat, Inc.
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or
-# later.  See the COPYING file in the top-level directory.
-
-from qemu_test import QemuSystemTest, Asset
-from qemu_test import wait_for_console_pattern
-from qemu_test import exec_command_and_wait_for_pattern
-
-class Ppc405Machine(QemuSystemTest):
-
-    timeout = 90
-
-    ASSET_UBOOT = Asset(
-        ('https://gitlab.com/huth/u-boot/-/raw/taihu-2021-10-09/'
-         'u-boot-taihu.bin'),
-        'a076bb6cdeaafa406330e51e074b66d8878d9036d67d4caa0137be03ee4c112c')
-
-    def do_test_ppc405(self):
-        file_path = self.ASSET_UBOOT.fetch()
-        self.vm.set_console(console_index=1)
-        self.vm.add_args('-bios', file_path)
-        self.vm.launch()
-        wait_for_console_pattern(self, 'AMCC PPC405EP Evaluation Board')
-        exec_command_and_wait_for_pattern(self, 'reset', 'AMCC PowerPC 405EP')
-
-    def test_ppc_ref405ep(self):
-        self.require_accelerator("tcg")
-        self.set_machine('ref405ep')
-        self.do_test_ppc405()
-
-if __name__ == '__main__':
-    QemuSystemTest.main()
diff --git a/tests/functional/test_ppc_replay.py b/tests/functional/test_ppc_replay.py
new file mode 100755
index 0000000000000..8382070abd1cd
--- /dev/null
+++ b/tests/functional/test_ppc_replay.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+#
+# Replay tests for ppc machines
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from qemu_test import Asset
+from replay_kernel import ReplayKernelBase
+
+
+class PpcReplay(ReplayKernelBase):
+
+    ASSET_DAY15 = Asset(
+        'https://qemu-advcal.gitlab.io/qac-best-of-multiarch/download/day15.tar.xz',
+        '03e0757c131d2959decf293a3572d3b96c5a53587165bf05ce41b2818a2bccd5')
+
+    def do_day15_test(self):
+        self.require_accelerator("tcg")
+        kernel_path = self.archive_extract(self.ASSET_DAY15,
+                                           member='day15/invaders.elf')
+        self.run_rr(kernel_path, self.REPLAY_KERNEL_COMMAND_LINE,
+                    'QEMU advent calendar', args=('-M', 'graphics=off'))
+
+    def test_g3beige(self):
+        self.set_machine('g3beige')
+        self.do_day15_test()
+
+    def test_mac99(self):
+        self.set_machine('mac99')
+        self.do_day15_test()
+
+
+if __name__ == '__main__':
+    ReplayKernelBase.main()
diff --git a/tests/functional/test_s390x_replay.py b/tests/functional/test_s390x_replay.py
new file mode 100755
index 0000000000000..33b5843adae3f
--- /dev/null
+++ b/tests/functional/test_s390x_replay.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+#
+# Replay test that boots a Linux kernel on an s390x machine
+# and checks the console
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from qemu_test import Asset
+from replay_kernel import ReplayKernelBase
+
+
+class S390xReplay(ReplayKernelBase):
+
+    ASSET_KERNEL = Asset(
+        ('https://archives.fedoraproject.org/pub/archive/fedora-secondary/'
+         'releases/29/Everything/s390x/os/images/kernel.img'),
+        'dace03b8ae0c9f670ebb9b8d6ce5eb24b62987f346de8f1300a439bb00bb99e7')
+
+    def test_s390_ccw_virtio(self):
+        self.set_machine('s390-ccw-virtio')
+        kernel_path = self.ASSET_KERNEL.fetch()
+        kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=sclp0'
+        console_pattern = 'Kernel command line: %s' % kernel_command_line
+        self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=9)
+
+
+if __name__ == '__main__':
+    ReplayKernelBase.main()
diff --git a/tests/functional/test_sparc_replay.py b/tests/functional/test_sparc_replay.py
new file mode 100755
index 0000000000000..865d6486f9989
--- /dev/null
+++ b/tests/functional/test_sparc_replay.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+#
+# Replay test that boots a Linux kernel on a sparc sun4m machine
+# and checks the console
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from qemu_test import Asset
+from replay_kernel import ReplayKernelBase
+
+
+class SparcReplay(ReplayKernelBase):
+
+    ASSET_DAY11 = Asset(
+        'https://qemu-advcal.gitlab.io/qac-best-of-multiarch/download/day11.tar.xz',
+        'c776533ba756bf4dd3f1fc4c024fb50ef0d853e05c5f5ddf0900a32d1eaa49e0')
+
+    def test_replay(self):
+        self.set_machine('SS-10')
+        kernel_path = self.archive_extract(self.ASSET_DAY11,
+                                           member="day11/zImage.elf")
+        self.run_rr(kernel_path, self.REPLAY_KERNEL_COMMAND_LINE,
+                    'QEMU advent calendar')
+
+
+if __name__ == '__main__':
+    ReplayKernelBase.main()
diff --git a/tests/functional/test_virtio_balloon.py b/tests/functional/test_virtio_balloon.py
new file mode 100755
index 0000000000000..5877b6c408ca8
--- /dev/null
+++ b/tests/functional/test_virtio_balloon.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+#
+# virtio-balloon tests
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+import time
+
+from qemu_test import QemuSystemTest, Asset
+from qemu_test import wait_for_console_pattern
+from qemu_test import exec_command_and_wait_for_pattern
+
+UNSET_STATS_VALUE = 18446744073709551615
+
+
+class VirtioBalloonx86(QemuSystemTest):
+
+    ASSET_KERNEL = Asset(
+        ('https://archives.fedoraproject.org/pub/archive/fedora/linux/releases'
+         '/31/Server/x86_64/os/images/pxeboot/vmlinuz'),
+        'd4738d03dbbe083ca610d0821d0a8f1488bebbdccef54ce33e3adb35fda00129')
+
+    ASSET_INITRD = Asset(
+        ('https://archives.fedoraproject.org/pub/archive/fedora/linux/releases'
+         '/31/Server/x86_64/os/images/pxeboot/initrd.img'),
+        '277cd6c7adf77c7e63d73bbb2cded8ef9e2d3a2f100000e92ff1f8396513cd8b')
+
+    ASSET_DISKIMAGE = Asset(
+        ('https://archives.fedoraproject.org/pub/archive/fedora/linux/releases'
+         '/31/Cloud/x86_64/images/Fedora-Cloud-Base-31-1.9.x86_64.qcow2'),
+        'e3c1b309d9203604922d6e255c2c5d098a309c2d46215d8fc026954f3c5c27a0')
+
+    DEFAULT_KERNEL_PARAMS = ('root=/dev/vda1 console=ttyS0 net.ifnames=0 '
+                             'rd.rescue quiet')
+
+    def wait_for_console_pattern(self, success_message, vm=None):
+        wait_for_console_pattern(
+            self,
+            success_message,
+            failure_message="Kernel panic - not syncing",
+            vm=vm,
+        )
+
+    def mount_root(self):
+        self.wait_for_console_pattern('Entering emergency mode.')
+        prompt = '# '
+        self.wait_for_console_pattern(prompt)
+
+        # Synchronize on virtio-block driver creating the root device
+        exec_command_and_wait_for_pattern(self,
+                        "while ! (dmesg -c | grep vda:) ; do sleep 1 ; done",
+                        "vda1")
+
+        exec_command_and_wait_for_pattern(self, 'mount /dev/vda1 /sysroot',
+                                          prompt)
+        exec_command_and_wait_for_pattern(self, 'chroot /sysroot',
+                                          prompt)
+        exec_command_and_wait_for_pattern(self, "modprobe virtio-balloon",
+                                          prompt)
+
+    def assert_initial_stats(self):
+        ret = self.vm.qmp('qom-get',
+                          {'path': '/machine/peripheral/balloon',
+                           'property': 'guest-stats'})['return']
+        when = ret.get('last-update')
+        assert when == 0
+        stats = ret.get('stats')
+        for name, val in stats.items():
+            assert val == UNSET_STATS_VALUE
+
+    def assert_running_stats(self, then):
+        # We told the QEMU to refresh stats every 100ms, but
+        # there can be a delay between virtio-ballon driver
+        # being modprobed and seeing the first stats refresh
+        # Retry a few times for robustness under heavy load
+        retries = 10
+        when = 0
+        while when == 0 and retries:
+            ret = self.vm.qmp('qom-get',
+                              {'path': '/machine/peripheral/balloon',
+                               'property': 'guest-stats'})['return']
+            when = ret.get('last-update')
+            if when == 0:
+                retries = retries - 1
+                time.sleep(0.5)
+
+        now = time.time()
+
+        assert when > then and when < now
+        stats = ret.get('stats')
+        # Stat we expect this particular Kernel to have set
+        expectData = [
+            "stat-available-memory",
+            "stat-disk-caches",
+            "stat-free-memory",
+            "stat-htlb-pgalloc",
+            "stat-htlb-pgfail",
+            "stat-major-faults",
+            "stat-minor-faults",
+            "stat-swap-in",
+            "stat-swap-out",
+            "stat-total-memory",
+        ]
+        for name, val in stats.items():
+            if name in expectData:
+                assert val != UNSET_STATS_VALUE
+            else:
+                assert val == UNSET_STATS_VALUE
+
+    def test_virtio_balloon_stats(self):
+        self.set_machine('q35')
+        self.require_accelerator("kvm")
+        kernel_path = self.ASSET_KERNEL.fetch()
+        initrd_path = self.ASSET_INITRD.fetch()
+        diskimage_path = self.ASSET_DISKIMAGE.fetch()
+
+        self.vm.set_console()
+        self.vm.add_args("-S")
+        self.vm.add_args("-cpu", "max")
+        self.vm.add_args("-m", "2G")
+        # Slow down BIOS phase with boot menu, so that after a system
+        # reset, we can reliably catch the clean stats again in BIOS
+        # phase before the guest OS launches
+        self.vm.add_args("-boot", "menu=on")
+        self.vm.add_args("-accel", "kvm")
+        self.vm.add_args("-device", "virtio-balloon,id=balloon")
+        self.vm.add_args('-drive',
+                         f'file={diskimage_path},if=none,id=drv0,snapshot=on')
+        self.vm.add_args('-device', 'virtio-blk-pci,bus=pcie.0,' +
+                         'drive=drv0,id=virtio-disk0,bootindex=1')
+
+        self.vm.add_args(
+            "-kernel",
+            kernel_path,
+            "-initrd",
+            initrd_path,
+            "-append",
+            self.DEFAULT_KERNEL_PARAMS
+        )
+        self.vm.launch()
+
+        # Poll stats at 100ms
+        self.vm.qmp('qom-set',
+                    {'path': '/machine/peripheral/balloon',
+                     'property': 'guest-stats-polling-interval',
+                     'value': 100 })
+
+        # We've not run any guest code yet, neither BIOS or guest,
+        # so stats should be all default values
+        self.assert_initial_stats()
+
+        self.vm.qmp('cont')
+
+        then = time.time()
+        self.mount_root()
+        self.assert_running_stats(then)
+
+        # Race window between these two commands, where we
+        # rely on '-boot menu=on' to (hopefully) ensure we're
+        # still executing the BIOS when QEMU processes the
+        # 'stop', and thus have not loaded the virtio-balloon
+        # driver in the guest
+        self.vm.qmp('system_reset')
+        self.vm.qmp('stop')
+
+        # If the above assumption held, we're in BIOS now and
+        # stats should be all back at their default values
+        self.assert_initial_stats()
+        self.vm.qmp('cont')
+
+        then = time.time()
+        self.mount_root()
+        self.assert_running_stats(then)
+
+
+if __name__ == '__main__':
+    QemuSystemTest.main()
diff --git a/tests/functional/test_x86_64_replay.py b/tests/functional/test_x86_64_replay.py
new file mode 100755
index 0000000000000..180f23a60c5d8
--- /dev/null
+++ b/tests/functional/test_x86_64_replay.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+#
+# Replay test that boots a Linux kernel on x86_64 machines
+# and checks the console
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from qemu_test import Asset, skipFlakyTest
+from replay_kernel import ReplayKernelBase
+
+
+class X86Replay(ReplayKernelBase):
+
+    ASSET_KERNEL = Asset(
+         ('https://archives.fedoraproject.org/pub/archive/fedora/linux'
+          '/releases/29/Everything/x86_64/os/images/pxeboot/vmlinuz'),
+        '8f237d84712b1b411baf3af2aeaaee10b9aae8e345ec265b87ab3a39639eb143')
+
+    def do_test_x86(self, machine):
+        self.set_machine(machine)
+        kernel_path = self.ASSET_KERNEL.fetch()
+        kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=ttyS0'
+        console_pattern = 'VFS: Cannot open root device'
+        self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=5)
+
+    @skipFlakyTest('https://gitlab.com/qemu-project/qemu/-/issues/2094')
+    def test_pc(self):
+        self.do_test_x86('pc')
+
+    def test_q35(self):
+        self.do_test_x86('q35')
+
+
+if __name__ == '__main__':
+    ReplayKernelBase.main()
diff --git a/tests/functional/test_xtensa_replay.py b/tests/functional/test_xtensa_replay.py
new file mode 100755
index 0000000000000..eb00a3b0044b2
--- /dev/null
+++ b/tests/functional/test_xtensa_replay.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+#
+# Replay test that boots a Linux kernel on an xtensa lx650 machine
+# and checks the console
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from qemu_test import Asset
+from replay_kernel import ReplayKernelBase
+
+
+class XTensaReplay(ReplayKernelBase):
+
+    ASSET_DAY02 = Asset(
+        'https://qemu-advcal.gitlab.io/qac-best-of-multiarch/download/day02.tar.xz',
+        '68ff07f9b3fd3df36d015eb46299ba44748e94bfbb2d5295fddc1a8d4a9fd324')
+
+    def test_replay(self):
+        self.set_machine('lx60')
+        self.cpu = 'dc233c'
+        kernel_path = self.archive_extract(self.ASSET_DAY02,
+                                         member='day02/santas-sleigh-ride.elf')
+        self.run_rr(kernel_path, self.REPLAY_KERNEL_COMMAND_LINE,
+                    'QEMU advent calendar')
+
+
+if __name__ == '__main__':
+    ReplayKernelBase.main()
diff --git a/tests/lcitool/libvirt-ci b/tests/lcitool/libvirt-ci
index b6a65806bc9b2..18c4bfe02c467 160000
--- a/tests/lcitool/libvirt-ci
+++ b/tests/lcitool/libvirt-ci
@@ -1 +1 @@
-Subproject commit b6a65806bc9b2b56985f5e97c936b77c7e7a99fc
+Subproject commit 18c4bfe02c467e5639bf9a687139735ccd7a3fff
diff --git a/tests/lcitool/projects/qemu.yml b/tests/lcitool/projects/qemu.yml
index 80bcac0902761..c07242f272873 100644
--- a/tests/lcitool/projects/qemu.yml
+++ b/tests/lcitool/projects/qemu.yml
@@ -122,6 +122,7 @@ packages:
  - usbredir
  - virglrenderer
  - vte
+ - vulkan-tools
  - which
  - xen
  - xorriso
diff --git a/tests/lcitool/refresh b/tests/lcitool/refresh
index 53f8d2585f822..aa551aca9be26 100755
--- a/tests/lcitool/refresh
+++ b/tests/lcitool/refresh
@@ -163,7 +163,7 @@ try:
     #
     # Standard native builds
     #
-    generate_dockerfile("alpine", "alpine-319")
+    generate_dockerfile("alpine", "alpine-321")
     generate_dockerfile("centos9", "centos-stream-9")
     generate_dockerfile("debian", "debian-12",
                         trailer="".join(debian12_extras))
diff --git a/tests/migration-stress/guestperf/comparison.py b/tests/migration-stress/guestperf/comparison.py
index 42cc0372d119e..dee3ac25e4622 100644
--- a/tests/migration-stress/guestperf/comparison.py
+++ b/tests/migration-stress/guestperf/comparison.py
@@ -127,7 +127,7 @@ def __init__(self, name, scenarios):
     # varying numbers of channels
     Comparison("compr-multifd", scenarios = [
         Scenario("compr-multifd-channels-4",
-                 multifd=True, multifd_channels=2),
+                 multifd=True, multifd_channels=4),
         Scenario("compr-multifd-channels-8",
                  multifd=True, multifd_channels=8),
         Scenario("compr-multifd-channels-32",
@@ -158,4 +158,17 @@ def __init__(self, name, scenarios):
         Scenario("compr-dirty-limit-50MB",
                  dirty_limit=True, vcpu_dirty_limit=50),
     ]),
+
+    # Looking at effect of multifd with
+    # different compression algorithms
+    Comparison("compr-multifd-compression", scenarios = [
+        Scenario("compr-multifd-compression-zlib",
+                 multifd=True, multifd_channels=2, multifd_compression="zlib"),
+        Scenario("compr-multifd-compression-zstd",
+                 multifd=True, multifd_channels=2, multifd_compression="zstd"),
+        Scenario("compr-multifd-compression-qpl",
+                 multifd=True, multifd_channels=2, multifd_compression="qpl"),
+        Scenario("compr-multifd-compression-uadk",
+                 multifd=True, multifd_channels=2, multifd_compression="uadk"),
+    ]),
 ]
diff --git a/tests/migration-stress/guestperf/engine.py b/tests/migration-stress/guestperf/engine.py
index 608d7270f65be..d8462db76534e 100644
--- a/tests/migration-stress/guestperf/engine.py
+++ b/tests/migration-stress/guestperf/engine.py
@@ -24,13 +24,15 @@
 import time
 
 from guestperf.progress import Progress, ProgressStats
-from guestperf.report import Report
+from guestperf.report import Report, ReportResult
 from guestperf.timings import TimingRecord, Timings
 
 sys.path.append(os.path.join(os.path.dirname(__file__),
                              '..', '..', '..', 'python'))
 from qemu.machine import QEMUMachine
 
+# multifd supported compression algorithms
+MULTIFD_CMP_ALGS = ("zlib", "zstd", "qpl", "uadk")
 
 class Engine(object):
 
@@ -106,7 +108,8 @@ def _migrate_progress(self, vm):
             info.get("dirty-limit-ring-full-time", 0),
         )
 
-    def _migrate(self, hardware, scenario, src, dst, connect_uri):
+    def _migrate(self, hardware, scenario, src,
+                 dst, connect_uri, defer_migrate):
         src_qemu_time = []
         src_vcpu_time = []
         src_pid = src.get_pid()
@@ -190,6 +193,12 @@ def _migrate(self, hardware, scenario, src, dst, connect_uri):
                                scenario._compression_xbzrle_cache))
 
         if scenario._multifd:
+            if (scenario._multifd_compression and
+                (scenario._multifd_compression not in MULTIFD_CMP_ALGS)):
+                    raise Exception("unsupported multifd compression "
+                                    "algorithm: %s" %
+                                    scenario._multifd_compression)
+
             resp = src.cmd("migrate-set-capabilities",
                            capabilities = [
                                { "capability": "multifd",
@@ -205,6 +214,12 @@ def _migrate(self, hardware, scenario, src, dst, connect_uri):
             resp = dst.cmd("migrate-set-parameters",
                            multifd_channels=scenario._multifd_channels)
 
+            if scenario._multifd_compression:
+                resp = src.cmd("migrate-set-parameters",
+                    multifd_compression=scenario._multifd_compression)
+                resp = dst.cmd("migrate-set-parameters",
+                    multifd_compression=scenario._multifd_compression)
+
         if scenario._dirty_limit:
             if not hardware._dirty_ring_size:
                 raise Exception("dirty ring size must be configured when "
@@ -220,6 +235,8 @@ def _migrate(self, hardware, scenario, src, dst, connect_uri):
             resp = src.cmd("migrate-set-parameters",
                            vcpu_dirty_limit=scenario._vcpu_dirty_limit)
 
+        if defer_migrate:
+            resp = dst.cmd("migrate-incoming", uri=connect_uri)
         resp = src.cmd("migrate", uri=connect_uri)
 
         post_copy = False
@@ -259,7 +276,11 @@ def _migrate(self, hardware, scenario, src, dst, connect_uri):
                         src_vcpu_time.extend(self._vcpu_timing(src_pid, src_threads))
                         sleep_secs -= 1
 
-                return [progress_history, src_qemu_time, src_vcpu_time]
+                result = ReportResult()
+                if progress._status == "completed" and not paused:
+                    result = ReportResult(True)
+
+                return [progress_history, src_qemu_time, src_vcpu_time, result]
 
             if self._verbose and (loop % 20) == 0:
                 print("Iter %d: remain %5dMB of %5dMB (total %5dMB @ %5dMb/sec)" % (
@@ -373,11 +394,14 @@ def _get_common_args(self, hardware, tunnelled=False):
     def _get_src_args(self, hardware):
         return self._get_common_args(hardware)
 
-    def _get_dst_args(self, hardware, uri):
+    def _get_dst_args(self, hardware, uri, defer_migrate):
         tunnelled = False
         if self._dst_host != "localhost":
             tunnelled = True
         argv = self._get_common_args(hardware, tunnelled)
+
+        if defer_migrate:
+            return argv + ["-incoming", "defer"]
         return argv + ["-incoming", uri]
 
     @staticmethod
@@ -424,6 +448,7 @@ def _get_timings(self, vm):
 
     def run(self, hardware, scenario, result_dir=os.getcwd()):
         abs_result_dir = os.path.join(result_dir, scenario._name)
+        defer_migrate = False
 
         if self._transport == "tcp":
             uri = "tcp:%s:9000" % self._dst_host
@@ -439,6 +464,9 @@ def run(self, hardware, scenario, result_dir=os.getcwd()):
             except:
                 pass
 
+        if scenario._multifd:
+            defer_migrate = True
+
         if self._dst_host != "localhost":
             dstmonaddr = ("localhost", 9001)
         else:
@@ -452,7 +480,7 @@ def run(self, hardware, scenario, result_dir=os.getcwd()):
                           monitor_address=srcmonaddr)
 
         dst = QEMUMachine(self._binary,
-                          args=self._get_dst_args(hardware, uri),
+                          args=self._get_dst_args(hardware, uri, defer_migrate),
                           wrapper=self._get_dst_wrapper(hardware),
                           name="qemu-dst-%d" % os.getpid(),
                           monitor_address=dstmonaddr)
@@ -461,10 +489,12 @@ def run(self, hardware, scenario, result_dir=os.getcwd()):
             src.launch()
             dst.launch()
 
-            ret = self._migrate(hardware, scenario, src, dst, uri)
+            ret = self._migrate(hardware, scenario, src,
+                                dst, uri, defer_migrate)
             progress_history = ret[0]
             qemu_timings = ret[1]
             vcpu_timings = ret[2]
+            result = ret[3]
             if uri[0:5] == "unix:" and os.path.exists(uri[5:]):
                 os.remove(uri[5:])
 
@@ -484,6 +514,7 @@ def run(self, hardware, scenario, result_dir=os.getcwd()):
                           Timings(self._get_timings(src) + self._get_timings(dst)),
                           Timings(qemu_timings),
                           Timings(vcpu_timings),
+                          result,
                           self._binary, self._dst_host, self._kernel,
                           self._initrd, self._transport, self._sleep)
         except Exception as e:
diff --git a/tests/migration-stress/guestperf/report.py b/tests/migration-stress/guestperf/report.py
index 1efd40c868036..e135e01be6ee2 100644
--- a/tests/migration-stress/guestperf/report.py
+++ b/tests/migration-stress/guestperf/report.py
@@ -24,6 +24,22 @@
 from guestperf.progress import Progress
 from guestperf.timings import Timings
 
+class ReportResult(object):
+
+    def __init__(self, success=False):
+        self._success = success
+
+    def serialize(self):
+        return {
+            "success": self._success,
+        }
+
+    @classmethod
+    def deserialize(cls, data):
+        return cls(
+            data["success"])
+
+
 class Report(object):
 
     def __init__(self,
@@ -33,6 +49,7 @@ def __init__(self,
                  guest_timings,
                  qemu_timings,
                  vcpu_timings,
+                 result,
                  binary,
                  dst_host,
                  kernel,
@@ -46,6 +63,7 @@ def __init__(self,
         self._guest_timings = guest_timings
         self._qemu_timings = qemu_timings
         self._vcpu_timings = vcpu_timings
+        self._result = result
         self._binary = binary
         self._dst_host = dst_host
         self._kernel = kernel
@@ -61,6 +79,7 @@ def serialize(self):
             "guest_timings": self._guest_timings.serialize(),
             "qemu_timings": self._qemu_timings.serialize(),
             "vcpu_timings": self._vcpu_timings.serialize(),
+            "result": self._result.serialize(),
             "binary": self._binary,
             "dst_host": self._dst_host,
             "kernel": self._kernel,
@@ -78,6 +97,7 @@ def deserialize(cls, data):
             Timings.deserialize(data["guest_timings"]),
             Timings.deserialize(data["qemu_timings"]),
             Timings.deserialize(data["vcpu_timings"]),
+            ReportResult.deserialize(data["result"]),
             data["binary"],
             data["dst_host"],
             data["kernel"],
diff --git a/tests/migration-stress/guestperf/scenario.py b/tests/migration-stress/guestperf/scenario.py
index 154c4f5d5fad7..4be7fafebf9b0 100644
--- a/tests/migration-stress/guestperf/scenario.py
+++ b/tests/migration-stress/guestperf/scenario.py
@@ -30,7 +30,7 @@ def __init__(self, name,
                  auto_converge=False, auto_converge_step=10,
                  compression_mt=False, compression_mt_threads=1,
                  compression_xbzrle=False, compression_xbzrle_cache=10,
-                 multifd=False, multifd_channels=2,
+                 multifd=False, multifd_channels=2, multifd_compression="",
                  dirty_limit=False, x_vcpu_dirty_limit_period=500,
                  vcpu_dirty_limit=1):
 
@@ -61,6 +61,7 @@ def __init__(self, name,
 
         self._multifd = multifd
         self._multifd_channels = multifd_channels
+        self._multifd_compression = multifd_compression
 
         self._dirty_limit = dirty_limit
         self._x_vcpu_dirty_limit_period = x_vcpu_dirty_limit_period
@@ -85,6 +86,7 @@ def serialize(self):
             "compression_xbzrle_cache": self._compression_xbzrle_cache,
             "multifd": self._multifd,
             "multifd_channels": self._multifd_channels,
+            "multifd_compression": self._multifd_compression,
             "dirty_limit": self._dirty_limit,
             "x_vcpu_dirty_limit_period": self._x_vcpu_dirty_limit_period,
             "vcpu_dirty_limit": self._vcpu_dirty_limit,
@@ -109,4 +111,5 @@ def deserialize(cls, data):
             data["compression_xbzrle"],
             data["compression_xbzrle_cache"],
             data["multifd"],
-            data["multifd_channels"])
+            data["multifd_channels"],
+            data["multifd_compression"])
diff --git a/tests/migration-stress/guestperf/shell.py b/tests/migration-stress/guestperf/shell.py
index 046afeb84eb19..63bbe3226c6da 100644
--- a/tests/migration-stress/guestperf/shell.py
+++ b/tests/migration-stress/guestperf/shell.py
@@ -131,6 +131,8 @@ def __init__(self):
                             action="store_true")
         parser.add_argument("--multifd-channels", dest="multifd_channels",
                             default=2, type=int)
+        parser.add_argument("--multifd-compression", dest="multifd_compression",
+                            default="")
 
         parser.add_argument("--dirty-limit", dest="dirty_limit", default=False,
                             action="store_true")
@@ -167,6 +169,7 @@ def get_scenario(self, args):
 
                         multifd=args.multifd,
                         multifd_channels=args.multifd_channels,
+                        multifd_compression=args.multifd_compression,
 
                         dirty_limit=args.dirty_limit,
                         x_vcpu_dirty_limit_period=\
diff --git a/tests/qapi-schema/doc-good.json b/tests/qapi-schema/doc-good.json
index f64bf38d8547f..0a4f139f8352d 100644
--- a/tests/qapi-schema/doc-good.json
+++ b/tests/qapi-schema/doc-good.json
@@ -11,6 +11,10 @@
 # = Section
 ##
 
+##
+# Just text, no heading.
+##
+
 ##
 # == Subsection
 #
diff --git a/tests/qapi-schema/doc-good.out b/tests/qapi-schema/doc-good.out
index ec277be91e926..5773f1dd6d62a 100644
--- a/tests/qapi-schema/doc-good.out
+++ b/tests/qapi-schema/doc-good.out
@@ -56,6 +56,9 @@ event EVT_BOXED Object
 doc freeform
     body=
 = Section
+doc freeform
+    body=
+Just text, no heading.
 doc freeform
     body=
 == Subsection
@@ -110,7 +113,7 @@ The _one_ {and only}, description on the same line
 Also _one_ {and only}
     feature=enum-member-feat
 a member feature
-    section=None
+    section=Plain
 @two is undocumented
 doc symbol=Base
     body=
@@ -168,15 +171,15 @@ description starts on the same line
 a feature
     feature=cmd-feat2
 another feature
-    section=None
+    section=Plain
 .. note:: @arg3 is undocumented
     section=Returns
 @Object
     section=Errors
 some
-    section=TODO
+    section=Todo
 frobnicate
-    section=None
+    section=Plain
 .. admonition:: Notes
 
  - Lorem ipsum dolor sit amet
@@ -209,7 +212,7 @@ If you're bored enough to read this, go see a video of boxed cats
 a feature
     feature=cmd-feat2
 another feature
-    section=None
+    section=Plain
 .. qmp-example::
 
    -> "this example"
diff --git a/tests/qapi-schema/test-qapi.py b/tests/qapi-schema/test-qapi.py
index 7e3f9f4aa1fee..4be930228cc31 100755
--- a/tests/qapi-schema/test-qapi.py
+++ b/tests/qapi-schema/test-qapi.py
@@ -96,17 +96,8 @@ def _print_variants(variants):
 
     @staticmethod
     def _print_if(ifcond, indent=4):
-        # TODO Drop this hack after replacing OrderedDict by plain
-        # dict (requires Python 3.7)
-        def _massage(subcond):
-            if isinstance(subcond, str):
-                return subcond
-            if isinstance(subcond, list):
-                return [_massage(val) for val in subcond]
-            return {key: _massage(val) for key, val in subcond.items()}
-
         if ifcond.is_present():
-            print('%sif %s' % (' ' * indent, _massage(ifcond.ifcond)))
+            print('%sif %s' % (' ' * indent, ifcond.ifcond))
 
     @classmethod
     def _print_features(cls, features, indent=4):
@@ -131,7 +122,7 @@ def test_frontend(fname):
         for feat, section in doc.features.items():
             print('    feature=%s\n%s' % (feat, section.text))
         for section in doc.sections:
-            print('    section=%s\n%s' % (section.tag, section.text))
+            print('    section=%s\n%s' % (section.kind, section.text))
 
 
 def open_test_result(dir_name, file_name, update):
diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out
index 7e10c5fa1b0c7..f19b532eb9b6a 100644
--- a/tests/qemu-iotests/051.pc.out
+++ b/tests/qemu-iotests/051.pc.out
@@ -181,7 +181,7 @@ QEMU X.Y.Z monitor - type 'help' for more information
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-scsi,id=virtio-scsi1 -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on
 QEMU X.Y.Z monitor - type 'help' for more information
-(qemu) QEMU_PROG: -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on: Cannot change iothread of active block backend
+(qemu) quit
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-blk-pci,drive=disk,iothread=thread0,share-rw=on
 QEMU X.Y.Z monitor - type 'help' for more information
diff --git a/tests/qemu-iotests/162 b/tests/qemu-iotests/162
index 94dae60d30416..956c2c5f339ba 100755
--- a/tests/qemu-iotests/162
+++ b/tests/qemu-iotests/162
@@ -65,6 +65,7 @@ done
 
 $QEMU_IMG info "json:{'driver': 'nbd', 'host': 'localhost', 'port': $port}" \
     | grep '^image' | sed -e "s/$port/PORT/"
+_stop_nbd_server
 
 # This is a test for NBD's bdrv_refresh_filename() implementation: It expects
 # either host or path to be set, but it must not assume that they are set to
diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out
index 07eebf358368e..146fc72388352 100644
--- a/tests/qemu-iotests/172.out
+++ b/tests/qemu-iotests/172.out
@@ -68,9 +68,6 @@ floppy0 (NODE_NAME): TEST_DIR/t.qcow2 (qcow2)
 ide1-cd0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -125,9 +122,6 @@ ide1-cd0: [not inserted]
 floppy0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -183,9 +177,6 @@ floppy1 (NODE_NAME): TEST_DIR/t.qcow2.2 (qcow2)
 ide1-cd0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -265,9 +256,6 @@ floppy0 (NODE_NAME): TEST_DIR/t.qcow2 (qcow2)
 ide1-cd0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -322,9 +310,6 @@ ide1-cd0: [not inserted]
 floppy0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -380,9 +365,6 @@ floppy1 (NODE_NAME): TEST_DIR/t.qcow2.2 (qcow2)
 ide1-cd0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -422,9 +404,6 @@ none0 (NODE_NAME): TEST_DIR/t.qcow2 (qcow2)
 ide1-cd0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -461,9 +440,6 @@ none0 (NODE_NAME): TEST_DIR/t.qcow2 (qcow2)
 ide1-cd0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -519,9 +495,6 @@ none1 (NODE_NAME): TEST_DIR/t.qcow2.2 (qcow2)
 ide1-cd0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -586,9 +559,6 @@ none0 (NODE_NAME): TEST_DIR/t.qcow2.2 (qcow2)
 ide1-cd0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -644,9 +614,6 @@ none0 (NODE_NAME): TEST_DIR/t.qcow2.2 (qcow2)
 ide1-cd0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -702,9 +669,6 @@ none0 (NODE_NAME): TEST_DIR/t.qcow2.2 (qcow2)
 ide1-cd0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -760,9 +724,6 @@ none0 (NODE_NAME): TEST_DIR/t.qcow2.2 (qcow2)
 ide1-cd0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -827,9 +788,6 @@ none0 (NODE_NAME): TEST_DIR/t.qcow2.2 (qcow2)
 ide1-cd0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -885,9 +843,6 @@ none0 (NODE_NAME): TEST_DIR/t.qcow2.2 (qcow2)
 ide1-cd0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -930,9 +885,6 @@ none0 (NODE_NAME): TEST_DIR/t.qcow2 (qcow2)
 ide1-cd0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -1106,9 +1058,6 @@ none0 (NODE_NAME): TEST_DIR/t.qcow2 (qcow2)
 ide1-cd0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -1145,9 +1094,6 @@ none0 (NODE_NAME): TEST_DIR/t.qcow2 (qcow2)
 ide1-cd0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -1187,9 +1133,6 @@ none0 (NODE_NAME): TEST_DIR/t.qcow2 (qcow2)
 ide1-cd0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
@@ -1226,9 +1169,6 @@ none0 (NODE_NAME): TEST_DIR/t.qcow2 (qcow2)
 ide1-cd0: [not inserted]
     Attached to:      /machine/unattached/device[N]
     Removable device: not locked, tray closed
-
-sd0: [not inserted]
-    Removable device: not locked, tray closed
 (qemu) quit
 
 
diff --git a/tests/qemu-iotests/302 b/tests/qemu-iotests/302
index a6d79e727b55d..e980ec513f261 100755
--- a/tests/qemu-iotests/302
+++ b/tests/qemu-iotests/302
@@ -115,13 +115,22 @@ with tarfile.open(tar_file, "w") as tar:
 
     disk = tarfile.TarInfo("disk")
     disk.size = actual_size
-    tar.addfile(disk)
 
-    # 6. Shrink the tar to the actual size, aligned to 512 bytes.
+    # Since python 3.13 we cannot use addfile() to create the member header.
+    # Add the tarinfo directly using public but undocumented attributes.
 
-    tar_size = offset + (disk.size + 511) & ~511
-    tar.fileobj.seek(tar_size)
-    tar.fileobj.truncate(tar_size)
+    buf = disk.tobuf(tar.format, tar.encoding, tar.errors)
+    tar.fileobj.write(buf)
+    tar.members.append(disk)
+
+    # Update the offset and position to the location of the next member.
+
+    tar.offset = offset + (disk.size + 511) & ~511
+    tar.fileobj.seek(tar.offset)
+
+    # 6. Shrink the tar to the actual size.
+
+    tar.fileobj.truncate(tar.offset)
 
 with tarfile.open(tar_file) as tar:
     members = [{"name": m.name, "size": m.size, "offset": m.offset_data}
diff --git a/tests/qemu-iotests/tests/qsd-migrate b/tests/qemu-iotests/tests/qsd-migrate
index de17562cb06a1..a4c6592420c4c 100755
--- a/tests/qemu-iotests/tests/qsd-migrate
+++ b/tests/qemu-iotests/tests/qsd-migrate
@@ -22,7 +22,7 @@ import iotests
 
 from iotests import filter_qemu_io, filter_qtest
 
-iotests.script_initialize(supported_fmts=['generic'],
+iotests.script_initialize(supported_fmts=['qcow2', 'qed', 'raw'],
                           supported_protocols=['file'],
                           supported_platforms=['linux'])
 
diff --git a/tests/qtest/libqos/pci.c b/tests/qtest/libqos/pci.c
index b23d72346b6e5..a59197b9922eb 100644
--- a/tests/qtest/libqos/pci.c
+++ b/tests/qtest/libqos/pci.c
@@ -328,8 +328,6 @@ bool qpci_msix_pending(QPCIDevice *dev, uint16_t entry)
 
     g_assert(dev->msix_enabled);
     pba_entry = qpci_io_readl(dev, dev->msix_pba_bar, dev->msix_pba_off + off);
-    qpci_io_writel(dev, dev->msix_pba_bar, dev->msix_pba_off + off,
-                   pba_entry & ~(1 << bit_n));
     return (pba_entry & (1 << bit_n)) != 0;
 }
 
diff --git a/tests/qtest/m48t59-test.c b/tests/qtest/m48t59-test.c
index 605797ab785d2..1e39a0e8f07a8 100644
--- a/tests/qtest/m48t59-test.c
+++ b/tests/qtest/m48t59-test.c
@@ -247,11 +247,6 @@ static void base_setup(void)
         base_year = 1968;
         base_machine = "SS-5";
         use_mmio = true;
-    } else if (g_str_equal(arch, "ppc") || g_str_equal(arch, "ppc64")) {
-        base = 0xF0000000;
-        base_year = 1968;
-        base_machine = "ref405ep";
-        use_mmio = true;
     } else {
         g_assert_not_reached();
     }
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 68316dbdc1a60..5a8c1f102c2b3 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -103,7 +103,8 @@ qtests_i386 = \
    config_all_devices.has_key('CONFIG_VIRTIO_PCI') and                                      \
    slirp.found() ? ['virtio-net-failover'] : []) +                                          \
   (unpack_edk2_blobs and                                                                    \
-   config_all_devices.has_key('CONFIG_HPET') and                                            \
+   (config_all_devices.has_key('CONFIG_HPET') or                                            \
+    config_all_devices.has_key('CONFIG_X_HPET_RUST')) and                                   \
    config_all_devices.has_key('CONFIG_PARALLEL') ? ['bios-tables-test'] : []) +             \
   qtests_pci +                                                                              \
   qtests_cxl +                                                                              \
@@ -170,7 +171,6 @@ qtests_mips64el = qtests_mips
 qtests_ppc = \
   qtests_filter + \
   (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) +            \
-  (config_all_devices.has_key('CONFIG_M48T59') ? ['m48t59-test'] : []) +                     \
   (config_all_accel.has_key('CONFIG_TCG') ? ['prom-env-test'] : []) +                              \
   (config_all_accel.has_key('CONFIG_TCG') ? ['boot-serial-test'] : []) +                           \
   ['boot-order-test']
@@ -297,6 +297,7 @@ qos_test_ss.add(
   'pca9552-test.c',
   'pci-test.c',
   'pcnet-test.c',
+  'rs5c372-test.c',
   'sdhci-test.c',
   'spapr-phb-test.c',
   'tmp105-test.c',
@@ -368,7 +369,8 @@ qtests = {
   'ivshmem-test': [rt, '../../contrib/ivshmem-server/ivshmem-server.c'],
   'migration-test': migration_files + migration_tls_files,
   'pxe-test': files('boot-sector.c'),
-  'pnv-xive2-test': files('pnv-xive2-common.c', 'pnv-xive2-flush-sync.c'),
+  'pnv-xive2-test': files('pnv-xive2-common.c', 'pnv-xive2-flush-sync.c',
+                          'pnv-xive2-nvpg_bar.c'),
   'qos-test': [chardev, io, qos_test_ss.apply({}).sources()],
   'tpm-crb-swtpm-test': [io, tpmemu_files],
   'tpm-crb-test': [io, tpmemu_files],
diff --git a/tests/qtest/migration/migration-util.c b/tests/qtest/migration/migration-util.c
index 6261d80e4ac92..642cf50c8d80b 100644
--- a/tests/qtest/migration/migration-util.c
+++ b/tests/qtest/migration/migration-util.c
@@ -236,6 +236,7 @@ char *resolve_machine_version(const char *alias, const char *var1,
 typedef struct {
     char *name;
     void (*func)(void);
+    void (*func_full)(void *);
 } MigrationTest;
 
 static void migration_test_destroy(gpointer data)
@@ -265,6 +266,29 @@ void migration_test_add(const char *path, void (*fn)(void))
                              migration_test_destroy);
 }
 
+static void migration_test_wrapper_full(const void *data)
+{
+    MigrationTest *test = (MigrationTest *)data;
+
+    g_test_message("Running /%s%s", qtest_get_arch(), test->name);
+    test->func_full(test->name);
+}
+
+void migration_test_add_suffix(const char *path, const char *suffix,
+                               void (*fn)(void *))
+{
+    MigrationTest *test = g_new0(MigrationTest, 1);
+
+    g_assert(g_str_has_suffix(path, "/"));
+    g_assert(!g_str_has_prefix(suffix, "/"));
+
+    test->func_full = fn;
+    test->name = g_strconcat(path, suffix, NULL);
+
+    qtest_add_data_func_full(test->name, test, migration_test_wrapper_full,
+                             migration_test_destroy);
+}
+
 #ifdef O_DIRECT
 /*
  * Probe for O_DIRECT support on the filesystem. Since this is used
diff --git a/tests/qtest/migration/migration-util.h b/tests/qtest/migration/migration-util.h
index f5f2e4650e76b..44815e9c42dc9 100644
--- a/tests/qtest/migration/migration-util.h
+++ b/tests/qtest/migration/migration-util.h
@@ -51,6 +51,8 @@ static inline bool probe_o_direct_support(const char *tmpfs)
 bool ufd_version_check(bool *uffd_feature_thread_id);
 bool kvm_dirty_ring_supported(void);
 void migration_test_add(const char *path, void (*fn)(void));
+void migration_test_add_suffix(const char *path, const char *suffix,
+                               void (*fn)(void *));
 char *migrate_get_connect_uri(QTestState *who);
 void migrate_set_ports(QTestState *to, QList *channel_list);
 
diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c
index 162fa695318e0..ba273d10b9a74 100644
--- a/tests/qtest/migration/precopy-tests.c
+++ b/tests/qtest/migration/precopy-tests.c
@@ -20,6 +20,7 @@
 #include "migration/migration-util.h"
 #include "ppc-util.h"
 #include "qobject/qlist.h"
+#include "qapi-types-migration.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
 #include "qemu/range.h"
@@ -536,6 +537,161 @@ static void test_multifd_tcp_cancel(void)
     migrate_end(from, to2, true);
 }
 
+static void test_cancel_src_after_failed(QTestState *from, QTestState *to,
+                                         const char *uri, const char *phase)
+{
+    /*
+     * No migrate_incoming_qmp() at the start to force source into
+     * failed state during migrate_qmp().
+     */
+
+    wait_for_serial("src_serial");
+    migrate_ensure_converge(from);
+
+    migrate_qmp(from, to, uri, NULL, "{}");
+
+    migration_event_wait(from, phase);
+    migrate_cancel(from);
+
+    /* cancelling will not move the migration out of 'failed' */
+
+    wait_for_migration_status(from, "failed",
+                              (const char * []) { "completed", NULL });
+
+    /*
+     * Not waiting for the destination because it never started
+     * migration.
+     */
+}
+
+static void test_cancel_src_after_cancelled(QTestState *from, QTestState *to,
+                                            const char *uri, const char *phase)
+{
+    migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
+
+    wait_for_serial("src_serial");
+    migrate_ensure_converge(from);
+
+    migrate_qmp(from, to, uri, NULL, "{}");
+
+    /* To move to cancelled/cancelling */
+    migrate_cancel(from);
+    migration_event_wait(from, phase);
+
+    /* The migrate_cancel under test */
+    migrate_cancel(from);
+
+    wait_for_migration_status(from, "cancelled",
+                              (const char * []) { "completed", NULL });
+
+    wait_for_migration_status(to, "failed",
+                              (const char * []) { "completed", NULL });
+}
+
+static void test_cancel_src_after_complete(QTestState *from, QTestState *to,
+                                           const char *uri, const char *phase)
+{
+    migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
+
+    wait_for_serial("src_serial");
+    migrate_ensure_converge(from);
+
+    migrate_qmp(from, to, uri, NULL, "{}");
+
+    migration_event_wait(from, phase);
+    migrate_cancel(from);
+
+    /*
+     * qmp_migrate_cancel() exits early if migration is not running
+     * anymore, the status will not change to cancelled.
+     */
+    wait_for_migration_complete(from);
+    wait_for_migration_complete(to);
+}
+
+static void test_cancel_src_after_none(QTestState *from, QTestState *to,
+                                       const char *uri, const char *phase)
+{
+    /*
+     * Test that cancelling without a migration happening does not
+     * affect subsequent migrations
+     */
+    migrate_cancel(to);
+
+    wait_for_serial("src_serial");
+    migrate_cancel(from);
+
+    migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
+
+    migrate_ensure_converge(from);
+    migrate_qmp(from, to, uri, NULL, "{}");
+
+    wait_for_migration_complete(from);
+    wait_for_migration_complete(to);
+}
+
+static void test_cancel_src_pre_switchover(QTestState *from, QTestState *to,
+                                           const char *uri, const char *phase)
+{
+    migrate_set_capability(from, "pause-before-switchover", true);
+    migrate_set_capability(to, "pause-before-switchover", true);
+
+    migrate_set_capability(from, "multifd", true);
+    migrate_set_capability(to, "multifd", true);
+
+    migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
+
+    wait_for_serial("src_serial");
+    migrate_ensure_converge(from);
+
+    migrate_qmp(from, to, uri, NULL, "{}");
+
+    migration_event_wait(from, phase);
+    migrate_cancel(from);
+    migration_event_wait(from, "cancelling");
+
+    wait_for_migration_status(from, "cancelled",
+                              (const char * []) { "completed", NULL });
+
+    wait_for_migration_status(to, "failed",
+                              (const char * []) { "completed", NULL });
+}
+
+static void test_cancel_src_after_status(void *opaque)
+{
+    const char *test_path = opaque;
+    g_autofree char *phase = g_path_get_basename(test_path);
+    g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
+    QTestState *from, *to;
+    MigrateStart args = {
+        .hide_stderr = true,
+    };
+
+    if (migrate_start(&from, &to, "defer", &args)) {
+        return;
+    }
+
+    if (g_str_equal(phase, "cancelling") ||
+        g_str_equal(phase, "cancelled")) {
+        test_cancel_src_after_cancelled(from, to, uri, phase);
+
+    } else if (g_str_equal(phase, "completed")) {
+        test_cancel_src_after_complete(from, to, uri, phase);
+
+    } else if (g_str_equal(phase, "failed")) {
+        test_cancel_src_after_failed(from, to, uri, phase);
+
+    } else if (g_str_equal(phase, "none")) {
+        test_cancel_src_after_none(from, to, uri, phase);
+
+    } else {
+        /* any state that comes before pre-switchover */
+        test_cancel_src_pre_switchover(from, to, uri, phase);
+    }
+
+    migrate_end(from, to, false);
+}
+
 static void calc_dirty_rate(QTestState *who, uint64_t calc_time)
 {
     qtest_qmp_assert_success(who,
@@ -1018,4 +1174,24 @@ void migration_test_add_precopy(MigrationTestEnv *env)
                                test_vcpu_dirty_limit);
         }
     }
+
+    /* ensure new status don't go unnoticed */
+    assert(MIGRATION_STATUS__MAX == 15);
+
+    for (int i = MIGRATION_STATUS_NONE; i < MIGRATION_STATUS__MAX; i++) {
+        switch (i) {
+        case MIGRATION_STATUS_DEVICE: /* happens too fast */
+        case MIGRATION_STATUS_WAIT_UNPLUG: /* no support in tests */
+        case MIGRATION_STATUS_COLO: /* no support in tests */
+        case MIGRATION_STATUS_POSTCOPY_ACTIVE: /* postcopy can't be cancelled */
+        case MIGRATION_STATUS_POSTCOPY_PAUSED:
+        case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
+        case MIGRATION_STATUS_POSTCOPY_RECOVER:
+            continue;
+        default:
+            migration_test_add_suffix("/migration/cancel/src/after/",
+                                      MigrationStatus_str(i),
+                                      test_cancel_src_after_status);
+        }
+    }
 }
diff --git a/tests/qtest/pnv-spi-seeprom-test.c b/tests/qtest/pnv-spi-seeprom-test.c
index 57f20af76e1fe..600493c425dae 100644
--- a/tests/qtest/pnv-spi-seeprom-test.c
+++ b/tests/qtest/pnv-spi-seeprom-test.c
@@ -92,7 +92,7 @@ static void test_spi_seeprom(const void *data)
     qts = qtest_initf("-machine powernv10 -smp 2,cores=2,"
                       "threads=1 -accel tcg,thread=single -nographic "
                       "-blockdev node-name=pib_spic2,driver=file,"
-                      "filename=%s -device 25csm04,bus=pnv-spi-bus.2,cs=0,"
+                      "filename=%s -device 25csm04,bus=chip0.spi.2,cs=0,"
                       "drive=pib_spic2", tmp_path);
     spi_seeprom_transaction(qts, chip);
     qtest_quit(qts);
diff --git a/tests/qtest/pnv-xive2-common.h b/tests/qtest/pnv-xive2-common.h
index 9ae34771aa41e..2077c05ebc7a8 100644
--- a/tests/qtest/pnv-xive2-common.h
+++ b/tests/qtest/pnv-xive2-common.h
@@ -107,5 +107,6 @@ extern void set_end(QTestState *qts, uint32_t index, uint32_t nvp_index,
 
 
 void test_flush_sync_inject(QTestState *qts);
+void test_nvpg_bar(QTestState *qts);
 
 #endif /* TEST_PNV_XIVE2_COMMON_H */
diff --git a/tests/qtest/pnv-xive2-flush-sync.c b/tests/qtest/pnv-xive2-flush-sync.c
index 3b32446adba21..142826bad0f4d 100644
--- a/tests/qtest/pnv-xive2-flush-sync.c
+++ b/tests/qtest/pnv-xive2-flush-sync.c
@@ -178,14 +178,14 @@ void test_flush_sync_inject(QTestState *qts)
     int test_nr;
     uint8_t byte;
 
-    printf("# ============================================================\n");
-    printf("# Starting cache flush/queue sync injection tests...\n");
+    g_test_message("=========================================================");
+    g_test_message("Starting cache flush/queue sync injection tests...");
 
     for (test_nr = 0; test_nr < sizeof(xive_inject_tests);
          test_nr++) {
         int op_type = xive_inject_tests[test_nr];
 
-        printf("# Running test %d\n", test_nr);
+        g_test_message("Running test %d", test_nr);
 
         /* start with status byte set to 0 */
         clr_sync(qts, src_pir, ic_topo_id, op_type);
diff --git a/tests/qtest/pnv-xive2-nvpg_bar.c b/tests/qtest/pnv-xive2-nvpg_bar.c
new file mode 100644
index 0000000000000..6ac8d36c821e1
--- /dev/null
+++ b/tests/qtest/pnv-xive2-nvpg_bar.c
@@ -0,0 +1,152 @@
+/*
+ * QTest testcase for PowerNV 10 interrupt controller (xive2)
+ *  - Test NVPG BAR MMIO operations
+ *
+ * Copyright (c) 2024, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "qemu/osdep.h"
+#include "libqtest.h"
+
+#include "pnv-xive2-common.h"
+
+#define NVPG_BACKLOG_OP_SHIFT   10
+#define NVPG_BACKLOG_PRIO_SHIFT 4
+
+#define XIVE_PRIORITY_MAX       7
+
+enum NVx {
+    NVP,
+    NVG,
+    NVC
+};
+
+typedef enum {
+    INCR_STORE = 0b100,
+    INCR_LOAD  = 0b000,
+    DECR_STORE = 0b101,
+    DECR_LOAD  = 0b001,
+    READ_x     = 0b010,
+    READ_y     = 0b011,
+} backlog_op;
+
+static uint32_t nvpg_backlog_op(QTestState *qts, backlog_op op,
+                                enum NVx type, uint64_t index,
+                                uint8_t priority, uint8_t delta)
+{
+    uint64_t addr, offset;
+    uint32_t count = 0;
+
+    switch (type) {
+    case NVP:
+        addr = XIVE_NVPG_ADDR + (index << (XIVE_PAGE_SHIFT + 1));
+        break;
+    case NVG:
+        addr = XIVE_NVPG_ADDR + (index << (XIVE_PAGE_SHIFT + 1)) +
+            (1 << XIVE_PAGE_SHIFT);
+        break;
+    case NVC:
+        addr = XIVE_NVC_ADDR + (index << XIVE_PAGE_SHIFT);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    offset = (op & 0b11) << NVPG_BACKLOG_OP_SHIFT;
+    offset |= priority << NVPG_BACKLOG_PRIO_SHIFT;
+    if (op >> 2) {
+        qtest_writeb(qts, addr + offset, delta);
+    } else {
+        count = qtest_readw(qts, addr + offset);
+    }
+    return count;
+}
+
+void test_nvpg_bar(QTestState *qts)
+{
+    uint32_t nvp_target = 0x11;
+    uint32_t group_target = 0x17; /* size 16 */
+    uint32_t vp_irq = 33, group_irq = 47;
+    uint32_t vp_end = 3, group_end = 97;
+    uint32_t vp_irq_data = 0x33333333;
+    uint32_t group_irq_data = 0x66666666;
+    uint8_t vp_priority = 0, group_priority = 5;
+    uint32_t vp_count[XIVE_PRIORITY_MAX + 1] = { 0 };
+    uint32_t group_count[XIVE_PRIORITY_MAX + 1] = { 0 };
+    uint32_t count, delta;
+    uint8_t i;
+
+    g_test_message("=========================================================");
+    g_test_message("Testing NVPG BAR operations");
+
+    set_nvg(qts, group_target, 0);
+    set_nvp(qts, nvp_target, 0x04);
+    set_nvp(qts, group_target, 0x04);
+
+    /*
+     * Setup: trigger a VP-specific interrupt and a group interrupt
+     * so that the backlog counters are initialized to something else
+     * than 0 for at least one priority level
+     */
+    set_eas(qts, vp_irq, vp_end, vp_irq_data);
+    set_end(qts, vp_end, nvp_target, vp_priority, false /* group */);
+
+    set_eas(qts, group_irq, group_end, group_irq_data);
+    set_end(qts, group_end, group_target, group_priority, true /* group */);
+
+    get_esb(qts, vp_irq, XIVE_EOI_PAGE, XIVE_ESB_SET_PQ_00);
+    set_esb(qts, vp_irq, XIVE_TRIGGER_PAGE, 0, 0);
+    vp_count[vp_priority]++;
+
+    get_esb(qts, group_irq, XIVE_EOI_PAGE, XIVE_ESB_SET_PQ_00);
+    set_esb(qts, group_irq, XIVE_TRIGGER_PAGE, 0, 0);
+    group_count[group_priority]++;
+
+    /* check the initial counters */
+    for (i = 0; i <= XIVE_PRIORITY_MAX; i++) {
+        count = nvpg_backlog_op(qts, READ_x, NVP, nvp_target, i, 0);
+        g_assert_cmpuint(count, ==, vp_count[i]);
+
+        count = nvpg_backlog_op(qts, READ_y, NVG, group_target, i, 0);
+        g_assert_cmpuint(count, ==, group_count[i]);
+    }
+
+    /* do a few ops on the VP. Counter can only be 0 and 1 */
+    vp_priority = 2;
+    delta = 7;
+    nvpg_backlog_op(qts, INCR_STORE, NVP, nvp_target, vp_priority, delta);
+    vp_count[vp_priority] = 1;
+    count = nvpg_backlog_op(qts, INCR_LOAD, NVP, nvp_target, vp_priority, 0);
+    g_assert_cmpuint(count, ==, vp_count[vp_priority]);
+    count = nvpg_backlog_op(qts, READ_y, NVP, nvp_target, vp_priority, 0);
+    g_assert_cmpuint(count, ==, vp_count[vp_priority]);
+
+    count = nvpg_backlog_op(qts, DECR_LOAD, NVP, nvp_target, vp_priority, 0);
+    g_assert_cmpuint(count, ==, vp_count[vp_priority]);
+    vp_count[vp_priority] = 0;
+    nvpg_backlog_op(qts, DECR_STORE, NVP, nvp_target, vp_priority, delta);
+    count = nvpg_backlog_op(qts, READ_x, NVP, nvp_target, vp_priority, 0);
+    g_assert_cmpuint(count, ==, vp_count[vp_priority]);
+
+    /* do a few ops on the group */
+    group_priority = 2;
+    delta = 9;
+    /* can't go negative */
+    nvpg_backlog_op(qts, DECR_STORE, NVG, group_target, group_priority, delta);
+    count = nvpg_backlog_op(qts, READ_y, NVG, group_target, group_priority, 0);
+    g_assert_cmpuint(count, ==, 0);
+    nvpg_backlog_op(qts, INCR_STORE, NVG, group_target, group_priority, delta);
+    group_count[group_priority] += delta;
+    count = nvpg_backlog_op(qts, INCR_LOAD, NVG, group_target,
+                            group_priority, delta);
+    g_assert_cmpuint(count, ==, group_count[group_priority]);
+    group_count[group_priority]++;
+
+    count = nvpg_backlog_op(qts, DECR_LOAD, NVG, group_target,
+                            group_priority, delta);
+    g_assert_cmpuint(count, ==,  group_count[group_priority]);
+    group_count[group_priority]--;
+    count = nvpg_backlog_op(qts, READ_x, NVG, group_target, group_priority, 0);
+    g_assert_cmpuint(count, ==, group_count[group_priority]);
+}
diff --git a/tests/qtest/pnv-xive2-test.c b/tests/qtest/pnv-xive2-test.c
index dd19e888614ea..5313d4ef18b72 100644
--- a/tests/qtest/pnv-xive2-test.c
+++ b/tests/qtest/pnv-xive2-test.c
@@ -2,6 +2,9 @@
  * QTest testcase for PowerNV 10 interrupt controller (xive2)
  *  - Test irq to hardware thread
  *  - Test 'Pull Thread Context to Odd Thread Reporting Line'
+ *  - Test irq to hardware group
+ *  - Test irq to hardware group going through backlog
+ *  - Test irq to pool thread
  *
  * Copyright (c) 2024, IBM Corporation.
  *
@@ -218,8 +221,8 @@ static void test_hw_irq(QTestState *qts)
     uint16_t reg16;
     uint8_t pq, nsr, cppr;
 
-    printf("# ============================================================\n");
-    printf("# Testing irq %d to hardware thread %d\n", irq, target_pir);
+    g_test_message("=========================================================");
+    g_test_message("Testing irq %d to hardware thread %d", irq, target_pir);
 
     /* irq config */
     set_eas(qts, irq, end_index, irq_data);
@@ -264,6 +267,79 @@ static void test_hw_irq(QTestState *qts)
     g_assert_cmphex(cppr, ==, 0xFF);
 }
 
+static void test_pool_irq(QTestState *qts)
+{
+    uint32_t irq = 2;
+    uint32_t irq_data = 0x600d0d06;
+    uint32_t end_index = 5;
+    uint32_t target_pir = 1;
+    uint32_t target_nvp = 0x100 + target_pir;
+    uint8_t priority = 5;
+    uint32_t reg32;
+    uint16_t reg16;
+    uint8_t pq, nsr, cppr, ipb;
+
+    g_test_message("=========================================================");
+    g_test_message("Testing irq %d to pool thread %d", irq, target_pir);
+
+    /* irq config */
+    set_eas(qts, irq, end_index, irq_data);
+    set_end(qts, end_index, target_nvp, priority, false /* group */);
+
+    /* enable and trigger irq */
+    get_esb(qts, irq, XIVE_EOI_PAGE, XIVE_ESB_SET_PQ_00);
+    set_esb(qts, irq, XIVE_TRIGGER_PAGE, 0, 0);
+
+    /* check irq is raised on cpu */
+    pq = get_esb(qts, irq, XIVE_EOI_PAGE, XIVE_ESB_GET);
+    g_assert_cmpuint(pq, ==, XIVE_ESB_PENDING);
+
+    /* check TIMA values in the PHYS ring (shared by POOL ring) */
+    reg32 = get_tima32(qts, target_pir, TM_QW3_HV_PHYS + TM_WORD0);
+    nsr = reg32 >> 24;
+    cppr = (reg32 >> 16) & 0xFF;
+    g_assert_cmphex(nsr, ==, 0x40);
+    g_assert_cmphex(cppr, ==, 0xFF);
+
+    /* check TIMA values in the POOL ring */
+    reg32 = get_tima32(qts, target_pir, TM_QW2_HV_POOL + TM_WORD0);
+    nsr = reg32 >> 24;
+    cppr = (reg32 >> 16) & 0xFF;
+    ipb = (reg32 >> 8) & 0xFF;
+    g_assert_cmphex(nsr, ==, 0);
+    g_assert_cmphex(cppr, ==, 0);
+    g_assert_cmphex(ipb, ==, 0x80 >> priority);
+
+    /* ack the irq */
+    reg16 = get_tima16(qts, target_pir, TM_SPC_ACK_HV_REG);
+    nsr = reg16 >> 8;
+    cppr = reg16 & 0xFF;
+    g_assert_cmphex(nsr, ==, 0x40);
+    g_assert_cmphex(cppr, ==, priority);
+
+    /* check irq data is what was configured */
+    reg32 = qtest_readl(qts, xive_get_queue_addr(end_index));
+    g_assert_cmphex((reg32 & 0x7fffffff), ==, (irq_data & 0x7fffffff));
+
+    /* check IPB is cleared in the POOL ring */
+    reg32 = get_tima32(qts, target_pir, TM_QW2_HV_POOL + TM_WORD0);
+    ipb = (reg32 >> 8) & 0xFF;
+    g_assert_cmphex(ipb, ==, 0);
+
+    /* End Of Interrupt */
+    set_esb(qts, irq, XIVE_EOI_PAGE, XIVE_ESB_STORE_EOI, 0);
+    pq = get_esb(qts, irq, XIVE_EOI_PAGE, XIVE_ESB_GET);
+    g_assert_cmpuint(pq, ==, XIVE_ESB_RESET);
+
+    /* reset CPPR */
+    set_tima8(qts, target_pir, TM_QW3_HV_PHYS + TM_CPPR, 0xFF);
+    reg32 = get_tima32(qts, target_pir, TM_QW3_HV_PHYS + TM_WORD0);
+    nsr = reg32 >> 24;
+    cppr = (reg32 >> 16) & 0xFF;
+    g_assert_cmphex(nsr, ==, 0x00);
+    g_assert_cmphex(cppr, ==, 0xFF);
+}
+
 #define XIVE_ODD_CL 0x80
 static void test_pull_thread_ctx_to_odd_thread_cl(QTestState *qts)
 {
@@ -276,8 +352,9 @@ static void test_pull_thread_ctx_to_odd_thread_cl(QTestState *qts)
     uint32_t cl_word;
     uint32_t word2;
 
-    printf("# ============================================================\n");
-    printf("# Testing 'Pull Thread Context to Odd Thread Reporting Line'\n");
+    g_test_message("=========================================================");
+    g_test_message("Testing 'Pull Thread Context to Odd Thread Reporting " \
+                   "Line'");
 
     /* clear odd cache line prior to pull operation */
     memset(cl_pair, 0, sizeof(cl_pair));
@@ -315,6 +392,158 @@ static void test_pull_thread_ctx_to_odd_thread_cl(QTestState *qts)
     word2 = get_tima32(qts, target_pir, TM_QW3_HV_PHYS + TM_WORD2);
     g_assert_cmphex(xive_get_field32(TM_QW3W2_VT, word2), ==, 0);
 }
+
+static void test_hw_group_irq(QTestState *qts)
+{
+    uint32_t irq = 100;
+    uint32_t irq_data = 0xdeadbeef;
+    uint32_t end_index = 23;
+    uint32_t chosen_one;
+    uint32_t target_nvp = 0x81; /* group size = 4 */
+    uint8_t priority = 6;
+    uint32_t reg32;
+    uint16_t reg16;
+    uint8_t pq, nsr, cppr;
+
+    g_test_message("=========================================================");
+    g_test_message("Testing irq %d to hardware group of size 4", irq);
+
+    /* irq config */
+    set_eas(qts, irq, end_index, irq_data);
+    set_end(qts, end_index, target_nvp, priority, true /* group */);
+
+    /* enable and trigger irq */
+    get_esb(qts, irq, XIVE_EOI_PAGE, XIVE_ESB_SET_PQ_00);
+    set_esb(qts, irq, XIVE_TRIGGER_PAGE, 0, 0);
+
+    /* check irq is raised on cpu */
+    pq = get_esb(qts, irq, XIVE_EOI_PAGE, XIVE_ESB_GET);
+    g_assert_cmpuint(pq, ==, XIVE_ESB_PENDING);
+
+    /* find the targeted vCPU */
+    for (chosen_one = 0; chosen_one < SMT; chosen_one++) {
+        reg32 = get_tima32(qts, chosen_one, TM_QW3_HV_PHYS + TM_WORD0);
+        nsr = reg32 >> 24;
+        if (nsr == 0x82) {
+            break;
+        }
+    }
+    g_assert_cmphex(chosen_one, <, SMT);
+    cppr = (reg32 >> 16) & 0xFF;
+    g_assert_cmphex(nsr, ==, 0x82);
+    g_assert_cmphex(cppr, ==, 0xFF);
+
+    /* ack the irq */
+    reg16 = get_tima16(qts, chosen_one, TM_SPC_ACK_HV_REG);
+    nsr = reg16 >> 8;
+    cppr = reg16 & 0xFF;
+    g_assert_cmphex(nsr, ==, 0x82);
+    g_assert_cmphex(cppr, ==, priority);
+
+    /* check irq data is what was configured */
+    reg32 = qtest_readl(qts, xive_get_queue_addr(end_index));
+    g_assert_cmphex((reg32 & 0x7fffffff), ==, (irq_data & 0x7fffffff));
+
+    /* End Of Interrupt */
+    set_esb(qts, irq, XIVE_EOI_PAGE, XIVE_ESB_STORE_EOI, 0);
+    pq = get_esb(qts, irq, XIVE_EOI_PAGE, XIVE_ESB_GET);
+    g_assert_cmpuint(pq, ==, XIVE_ESB_RESET);
+
+    /* reset CPPR */
+    set_tima8(qts, chosen_one, TM_QW3_HV_PHYS + TM_CPPR, 0xFF);
+    reg32 = get_tima32(qts, chosen_one, TM_QW3_HV_PHYS + TM_WORD0);
+    nsr = reg32 >> 24;
+    cppr = (reg32 >> 16) & 0xFF;
+    g_assert_cmphex(nsr, ==, 0x00);
+    g_assert_cmphex(cppr, ==, 0xFF);
+}
+
+static void test_hw_group_irq_backlog(QTestState *qts)
+{
+    uint32_t irq = 31;
+    uint32_t irq_data = 0x01234567;
+    uint32_t end_index = 129;
+    uint32_t target_nvp = 0x81; /* group size = 4 */
+    uint32_t chosen_one = 3;
+    uint8_t blocking_priority, priority = 3;
+    uint32_t reg32;
+    uint16_t reg16;
+    uint8_t pq, nsr, cppr, lsmfb, i;
+
+    g_test_message("=========================================================");
+    g_test_message("Testing irq %d to hardware group of size 4 going " \
+                   "through backlog",
+                   irq);
+
+    /*
+     * set current priority of all threads in the group to something
+     * higher than what we're about to trigger
+     */
+    blocking_priority = priority - 1;
+    for (i = 0; i < SMT; i++) {
+        set_tima8(qts, i, TM_QW3_HV_PHYS + TM_CPPR, blocking_priority);
+    }
+
+    /* irq config */
+    set_eas(qts, irq, end_index, irq_data);
+    set_end(qts, end_index, target_nvp, priority, true /* group */);
+
+    /* enable and trigger irq */
+    get_esb(qts, irq, XIVE_EOI_PAGE, XIVE_ESB_SET_PQ_00);
+    set_esb(qts, irq, XIVE_TRIGGER_PAGE, 0, 0);
+
+    /* check irq is raised on cpu */
+    pq = get_esb(qts, irq, XIVE_EOI_PAGE, XIVE_ESB_GET);
+    g_assert_cmpuint(pq, ==, XIVE_ESB_PENDING);
+
+    /* check no interrupt is pending on the 2 possible targets */
+    for (i = 0; i < SMT; i++) {
+        reg32 = get_tima32(qts, i, TM_QW3_HV_PHYS + TM_WORD0);
+        nsr = reg32 >> 24;
+        cppr = (reg32 >> 16) & 0xFF;
+        lsmfb = reg32 & 0xFF;
+        g_assert_cmphex(nsr, ==, 0x0);
+        g_assert_cmphex(cppr, ==, blocking_priority);
+        g_assert_cmphex(lsmfb, ==, priority);
+    }
+
+    /* lower priority of one thread */
+    set_tima8(qts, chosen_one, TM_QW3_HV_PHYS + TM_CPPR, priority + 1);
+
+    /* check backlogged interrupt is presented */
+    reg32 = get_tima32(qts, chosen_one, TM_QW3_HV_PHYS + TM_WORD0);
+    nsr = reg32 >> 24;
+    cppr = (reg32 >> 16) & 0xFF;
+    g_assert_cmphex(nsr, ==, 0x82);
+    g_assert_cmphex(cppr, ==, priority + 1);
+
+    /* ack the irq */
+    reg16 = get_tima16(qts, chosen_one, TM_SPC_ACK_HV_REG);
+    nsr = reg16 >> 8;
+    cppr = reg16 & 0xFF;
+    g_assert_cmphex(nsr, ==, 0x82);
+    g_assert_cmphex(cppr, ==, priority);
+
+    /* check irq data is what was configured */
+    reg32 = qtest_readl(qts, xive_get_queue_addr(end_index));
+    g_assert_cmphex((reg32 & 0x7fffffff), ==, (irq_data & 0x7fffffff));
+
+    /* End Of Interrupt */
+    set_esb(qts, irq, XIVE_EOI_PAGE, XIVE_ESB_STORE_EOI, 0);
+    pq = get_esb(qts, irq, XIVE_EOI_PAGE, XIVE_ESB_GET);
+    g_assert_cmpuint(pq, ==, XIVE_ESB_RESET);
+
+    /* reset CPPR */
+    set_tima8(qts, chosen_one, TM_QW3_HV_PHYS + TM_CPPR, 0xFF);
+    reg32 = get_tima32(qts, chosen_one, TM_QW3_HV_PHYS + TM_WORD0);
+    nsr = reg32 >> 24;
+    cppr = (reg32 >> 16) & 0xFF;
+    lsmfb = reg32 & 0xFF;
+    g_assert_cmphex(nsr, ==, 0x00);
+    g_assert_cmphex(cppr, ==, 0xFF);
+    g_assert_cmphex(lsmfb, ==, 0xFF);
+}
+
 static void test_xive(void)
 {
     QTestState *qts;
@@ -330,9 +559,21 @@ static void test_xive(void)
     /* omit reset_state here and use settings from test_hw_irq */
     test_pull_thread_ctx_to_odd_thread_cl(qts);
 
+    reset_state(qts);
+    test_pool_irq(qts);
+
+    reset_state(qts);
+    test_hw_group_irq(qts);
+
+    reset_state(qts);
+    test_hw_group_irq_backlog(qts);
+
     reset_state(qts);
     test_flush_sync_inject(qts);
 
+    reset_state(qts);
+    test_nvpg_bar(qts);
+
     qtest_quit(qts);
 }
 
diff --git a/tests/qtest/qom-test.c b/tests/qtest/qom-test.c
index 1e30a5bfe8d28..27d70bc11ce27 100644
--- a/tests/qtest/qom-test.c
+++ b/tests/qtest/qom-test.c
@@ -88,6 +88,17 @@ static void test_machine(gconstpointer data)
 
     qts = qtest_initf("-machine %s", machine);
 
+    if (g_test_slow()) {
+        /* Make sure we can get the machine class properties: */
+        g_autofree char *qom_machine = g_strdup_printf("%s-machine", machine);
+
+        response = qtest_qmp(qts, "{ 'execute': 'qom-list-properties',"
+                                  "  'arguments': { 'typename': %s } }",
+                             qom_machine);
+        g_assert(response);
+        qobject_unref(response);
+    }
+
     test_properties(qts, "/machine", true);
 
     response = qtest_qmp(qts, "{ 'execute': 'quit' }");
diff --git a/tests/qtest/rs5c372-test.c b/tests/qtest/rs5c372-test.c
new file mode 100644
index 0000000000000..0f6a9b68b9f24
--- /dev/null
+++ b/tests/qtest/rs5c372-test.c
@@ -0,0 +1,43 @@
+/*
+ * QTest testcase for the RS5C372 RTC
+ *
+ * Copyright (c) 2025 Bernhard Beschow <shentey@gmail.com>
+ *
+ * Based on ds1338-test.c
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/bcd.h"
+#include "libqos/i2c.h"
+
+#define RS5C372_ADDR 0x32
+
+static void rs5c372_read_date(void *obj, void *data, QGuestAllocator *alloc)
+{
+    QI2CDevice *i2cdev = obj;
+
+    uint8_t resp[0x10];
+    time_t now = time(NULL);
+    struct tm *utc = gmtime(&now);
+
+    i2c_read_block(i2cdev, 0, resp, sizeof(resp));
+
+    /* check retrieved time against local time */
+    g_assert_cmpuint(from_bcd(resp[5]), == , utc->tm_mday);
+    g_assert_cmpuint(from_bcd(resp[6]), == , 1 + utc->tm_mon);
+    g_assert_cmpuint(2000 + from_bcd(resp[7]), == , 1900 + utc->tm_year);
+}
+
+static void rs5c372_register_nodes(void)
+{
+    QOSGraphEdgeOptions opts = { };
+    add_qi2c_address(&opts, &(QI2CAddress) { RS5C372_ADDR });
+
+    qos_node_create_driver("rs5c372", i2c_device_create);
+    qos_node_consumes("rs5c372", "i2c-bus", &opts);
+    qos_add_test("read_date", "rs5c372", rs5c372_read_date, NULL);
+}
+
+libqos_init(rs5c372_register_nodes);
diff --git a/tests/qtest/ufs-test.c b/tests/qtest/ufs-test.c
index 1f860b41c0629..4867ccf08a12c 100644
--- a/tests/qtest/ufs-test.c
+++ b/tests/qtest/ufs-test.c
@@ -8,13 +8,14 @@
 
 #include "qemu/osdep.h"
 #include "qemu/module.h"
-#include "qemu/units.h"
 #include "libqtest.h"
 #include "libqos/qgraph.h"
 #include "libqos/pci.h"
 #include "scsi/constants.h"
 #include "block/ufs.h"
+#include "qemu/bitmap.h"
 
+#define DWORD_BYTE 4
 /* Test images sizes in Bytes */
 #define TEST_IMAGE_SIZE (64 * 1024 * 1024)
 /* Timeout for various operations, in seconds. */
@@ -26,6 +27,12 @@
 #define UTP_COMMAND_DESCRIPTOR_SIZE 4096
 #define UTP_RESPONSE_UPIU_OFFSET 1024
 #define UTP_PRDT_UPIU_OFFSET 2048
+#define UTRD_TEST_SLOT 0
+#define UFS_MAX_CMD_DESC 32
+/* Constants for MCQ */
+#define TEST_QID 0
+#define QUEUE_SIZE 32
+#define UFS_MCQ_MAX_QNUM 32
 
 typedef struct QUfs QUfs;
 
@@ -34,12 +41,22 @@ struct QUfs {
     QPCIDevice dev;
     QPCIBar bar;
 
-    uint64_t utrlba;
-    uint64_t utmrlba;
+    DECLARE_BITMAP(cmd_desc_bitmap, UFS_MAX_CMD_DESC);
     uint64_t cmd_desc_addr;
     uint64_t data_buffer_addr;
 
     bool enabled;
+    bool support_mcq;
+
+    /* for legacy doorbell mode */
+    uint64_t utrlba;
+
+    /* for mcq mode */
+    uint32_t maxq;
+    uint64_t sqlba[UFS_MCQ_MAX_QNUM];
+    uint64_t cqlba[UFS_MCQ_MAX_QNUM];
+    uint64_t sqdao[UFS_MCQ_MAX_QNUM];
+    uint64_t cqdao[UFS_MCQ_MAX_QNUM];
 };
 
 static inline uint32_t ufs_rreg(QUfs *ufs, size_t offset)
@@ -52,6 +69,24 @@ static inline void ufs_wreg(QUfs *ufs, size_t offset, uint32_t value)
     qpci_io_writel(&ufs->dev, ufs->bar, offset, value);
 }
 
+static int alloc_cmd_desc_slot(QUfs *ufs)
+{
+    int slot = find_first_zero_bit(ufs->cmd_desc_bitmap, UFS_MAX_CMD_DESC);
+    if (slot == UFS_MAX_CMD_DESC) {
+        g_assert_not_reached();
+    }
+    set_bit(slot, ufs->cmd_desc_bitmap);
+    return slot;
+}
+
+static void release_cmd_desc_slot(QUfs *ufs, int slot)
+{
+    if (!test_bit(slot, ufs->cmd_desc_bitmap)) {
+        g_assert_not_reached();
+    }
+    clear_bit(slot, ufs->cmd_desc_bitmap);
+}
+
 static void ufs_wait_for_irq(QUfs *ufs)
 {
     uint64_t end_time;
@@ -64,14 +99,11 @@ static void ufs_wait_for_irq(QUfs *ufs)
     } while (is == 0 && g_get_monotonic_time() < end_time);
 }
 
-static UtpTransferReqDesc ufs_build_req_utrd(uint64_t cmd_desc_addr,
-                                             uint8_t slot,
+static UtpTransferReqDesc ufs_build_req_utrd(uint64_t command_desc_base_addr,
                                              uint32_t data_direction,
                                              uint16_t prd_table_length)
 {
     UtpTransferReqDesc req = { 0 };
-    uint64_t command_desc_base_addr =
-        cmd_desc_addr + slot * UTP_COMMAND_DESCRIPTOR_SIZE;
 
     req.header.dword_0 =
         cpu_to_le32(1 << 28 | data_direction | UFS_UTP_REQ_DESC_INT_CMD);
@@ -88,54 +120,109 @@ static UtpTransferReqDesc ufs_build_req_utrd(uint64_t cmd_desc_addr,
     return req;
 }
 
-static void ufs_send_nop_out(QUfs *ufs, uint8_t slot,
-                             UtpTransferReqDesc *utrd_out, UtpUpiuRsp *rsp_out)
+static enum UtpOcsCodes
+__ufs_send_transfer_request_doorbell(QUfs *ufs, uint8_t lun,
+                                     const UtpTransferReqDesc *utrd)
 {
-    /* Build up utp transfer request descriptor */
-    UtpTransferReqDesc utrd = ufs_build_req_utrd(ufs->cmd_desc_addr, slot,
-                                                 UFS_UTP_NO_DATA_TRANSFER, 0);
-    uint64_t utrd_addr = ufs->utrlba + slot * sizeof(UtpTransferReqDesc);
+    uint64_t utrd_addr =
+        ufs->utrlba + UTRD_TEST_SLOT * sizeof(UtpTransferReqDesc);
+    UtpTransferReqDesc utrd_result;
+
+    qtest_memwrite(ufs->dev.bus->qts, utrd_addr, utrd, sizeof(*utrd));
+
+    /* Ring the doorbell */
+    ufs_wreg(ufs, A_UTRLDBR, 1);
+    ufs_wait_for_irq(ufs);
+    g_assert_true(FIELD_EX32(ufs_rreg(ufs, A_IS), IS, UTRCS));
+    ufs_wreg(ufs, A_IS, FIELD_DP32(0, IS, UTRCS, 1));
+
+    /* Handle completed command */
+    qtest_memread(ufs->dev.bus->qts, utrd_addr, &utrd_result,
+                  sizeof(utrd_result));
+    return le32_to_cpu(utrd_result.header.dword_2) & 0xf;
+}
+
+static enum UtpOcsCodes
+__ufs_send_transfer_request_mcq(QUfs *ufs, uint8_t lun,
+                                const UtpTransferReqDesc *utrd)
+{
+    uint32_t sqtp = ufs_rreg(ufs, ufs->sqdao[TEST_QID] + 0x4);
+    uint64_t utrd_addr = ufs->sqlba[TEST_QID] + sqtp;
+    uint32_t cqhp;
+    uint64_t cqentry_addr;
+    UfsCqEntry cqentry;
+
+    qtest_memwrite(ufs->dev.bus->qts, utrd_addr, utrd, sizeof(*utrd));
+
+    /* Insert a new entry into the submission queue */
+    sqtp = ufs_rreg(ufs, ufs->sqdao[TEST_QID] + 0x4);
+    sqtp = (sqtp + sizeof(UfsSqEntry)) % (QUEUE_SIZE * sizeof(UfsSqEntry));
+    ufs_wreg(ufs, ufs->sqdao[TEST_QID] + 0x4, sqtp);
+    ufs_wait_for_irq(ufs);
+    g_assert_true(FIELD_EX32(ufs_rreg(ufs, A_IS), IS, CQES));
+    ufs_wreg(ufs, A_IS, FIELD_DP32(0, IS, CQES, 1));
+
+    /* Handle the completed command from the completion queue */
+    cqhp = ufs_rreg(ufs, ufs->cqdao[TEST_QID]);
+    cqentry_addr = ufs->cqlba[TEST_QID] + cqhp;
+    qtest_memread(ufs->dev.bus->qts, cqentry_addr, &cqentry, sizeof(cqentry));
+    ufs_wreg(ufs, ufs->cqdao[TEST_QID], cqhp);
+
+    return cqentry.status;
+}
+
+static enum UtpOcsCodes
+ufs_send_transfer_request_sync(QUfs *ufs, uint8_t lun,
+                               const UtpTransferReqDesc *utrd)
+{
+    if (ufs->support_mcq) {
+        return __ufs_send_transfer_request_mcq(ufs, lun, utrd);
+    }
+
+    return __ufs_send_transfer_request_doorbell(ufs, lun, utrd);
+}
+
+static enum UtpOcsCodes ufs_send_nop_out(QUfs *ufs, UtpUpiuRsp *rsp_out)
+{
+    int cmd_desc_slot = alloc_cmd_desc_slot(ufs);
     uint64_t req_upiu_addr =
-        ufs->cmd_desc_addr + slot * UTP_COMMAND_DESCRIPTOR_SIZE;
+        ufs->cmd_desc_addr + cmd_desc_slot * UTP_COMMAND_DESCRIPTOR_SIZE;
     uint64_t rsp_upiu_addr = req_upiu_addr + UTP_RESPONSE_UPIU_OFFSET;
-    qtest_memwrite(ufs->dev.bus->qts, utrd_addr, &utrd, sizeof(utrd));
 
     /* Build up request upiu */
     UtpUpiuReq req_upiu = { 0 };
     req_upiu.header.trans_type = UFS_UPIU_TRANSACTION_NOP_OUT;
-    req_upiu.header.task_tag = slot;
+    req_upiu.header.task_tag = cmd_desc_slot;
     qtest_memwrite(ufs->dev.bus->qts, req_upiu_addr, &req_upiu,
                    sizeof(req_upiu));
 
-    /* Ring Doorbell */
-    ufs_wreg(ufs, A_UTRLDBR, 1);
-    ufs_wait_for_irq(ufs);
-    g_assert_true(FIELD_EX32(ufs_rreg(ufs, A_IS), IS, UTRCS));
-    ufs_wreg(ufs, A_IS, FIELD_DP32(0, IS, UTRCS, 1));
+    /* Build up utp transfer request descriptor */
+    UtpTransferReqDesc utrd =
+        ufs_build_req_utrd(req_upiu_addr, UFS_UTP_NO_DATA_TRANSFER, 0);
+
+    /* Send Transfer Request */
+    enum UtpOcsCodes ret = ufs_send_transfer_request_sync(ufs, 0, &utrd);
 
-    qtest_memread(ufs->dev.bus->qts, utrd_addr, utrd_out, sizeof(*utrd_out));
     qtest_memread(ufs->dev.bus->qts, rsp_upiu_addr, rsp_out, sizeof(*rsp_out));
+    release_cmd_desc_slot(ufs, cmd_desc_slot);
+    return ret;
 }
 
-static void ufs_send_query(QUfs *ufs, uint8_t slot, uint8_t query_function,
-                           uint8_t query_opcode, uint8_t idn, uint8_t index,
-                           uint8_t selector, uint32_t attr_value,
-                           UtpTransferReqDesc *utrd_out, UtpUpiuRsp *rsp_out)
+static enum UtpOcsCodes ufs_send_query(QUfs *ufs, uint8_t query_function,
+                                       uint8_t query_opcode, uint8_t idn,
+                                       uint8_t index, uint8_t selector,
+                                       uint32_t attr_value, UtpUpiuRsp *rsp_out)
 {
-    /* Build up utp transfer request descriptor */
-    UtpTransferReqDesc utrd = ufs_build_req_utrd(ufs->cmd_desc_addr, slot,
-                                                 UFS_UTP_NO_DATA_TRANSFER, 0);
-    uint64_t utrd_addr = ufs->utrlba + slot * sizeof(UtpTransferReqDesc);
+    int cmd_desc_slot = alloc_cmd_desc_slot(ufs);
     uint64_t req_upiu_addr =
-        ufs->cmd_desc_addr + slot * UTP_COMMAND_DESCRIPTOR_SIZE;
+        ufs->cmd_desc_addr + cmd_desc_slot * UTP_COMMAND_DESCRIPTOR_SIZE;
     uint64_t rsp_upiu_addr = req_upiu_addr + UTP_RESPONSE_UPIU_OFFSET;
-    qtest_memwrite(ufs->dev.bus->qts, utrd_addr, &utrd, sizeof(utrd));
 
     /* Build up request upiu */
     UtpUpiuReq req_upiu = { 0 };
     req_upiu.header.trans_type = UFS_UPIU_TRANSACTION_QUERY_REQ;
     req_upiu.header.query_func = query_function;
-    req_upiu.header.task_tag = slot;
+    req_upiu.header.task_tag = cmd_desc_slot;
     /*
      * QEMU UFS does not currently support Write descriptor,
      * so the value of data_segment_length is always 0.
@@ -150,22 +237,23 @@ static void ufs_send_query(QUfs *ufs, uint8_t slot, uint8_t query_function,
     qtest_memwrite(ufs->dev.bus->qts, req_upiu_addr, &req_upiu,
                    sizeof(req_upiu));
 
-    /* Ring Doorbell */
-    ufs_wreg(ufs, A_UTRLDBR, 1);
-    ufs_wait_for_irq(ufs);
-    g_assert_true(FIELD_EX32(ufs_rreg(ufs, A_IS), IS, UTRCS));
-    ufs_wreg(ufs, A_IS, FIELD_DP32(0, IS, UTRCS, 1));
+    /* Build up utp transfer request descriptor */
+    UtpTransferReqDesc utrd =
+        ufs_build_req_utrd(req_upiu_addr, UFS_UTP_NO_DATA_TRANSFER, 0);
+
+    /* Send Transfer Request */
+    enum UtpOcsCodes ret = ufs_send_transfer_request_sync(ufs, 0, &utrd);
 
-    qtest_memread(ufs->dev.bus->qts, utrd_addr, utrd_out, sizeof(*utrd_out));
     qtest_memread(ufs->dev.bus->qts, rsp_upiu_addr, rsp_out, sizeof(*rsp_out));
+    release_cmd_desc_slot(ufs, cmd_desc_slot);
+    return ret;
 }
 
-static void ufs_send_scsi_command(QUfs *ufs, uint8_t slot, uint8_t lun,
-                                  const uint8_t *cdb, const uint8_t *data_in,
-                                  size_t data_in_len, uint8_t *data_out,
-                                  size_t data_out_len,
-                                  UtpTransferReqDesc *utrd_out,
-                                  UtpUpiuRsp *rsp_out)
+static enum UtpOcsCodes
+ufs_send_scsi_command(QUfs *ufs, uint8_t lun, const uint8_t *cdb,
+                      const uint8_t *data_in, size_t data_in_len,
+                      uint8_t *data_out, size_t data_out_len,
+                      UtpUpiuRsp *rsp_out)
 
 {
     /* Build up PRDT */
@@ -175,8 +263,9 @@ static void ufs_send_scsi_command(QUfs *ufs, uint8_t slot, uint8_t lun,
     uint8_t flags;
     uint16_t prd_table_length, i;
     uint32_t data_direction, data_len;
+    int cmd_desc_slot = alloc_cmd_desc_slot(ufs);
     uint64_t req_upiu_addr =
-        ufs->cmd_desc_addr + slot * UTP_COMMAND_DESCRIPTOR_SIZE;
+        ufs->cmd_desc_addr + cmd_desc_slot * UTP_COMMAND_DESCRIPTOR_SIZE;
     uint64_t prdt_addr = req_upiu_addr + UTP_PRDT_UPIU_OFFSET;
 
     g_assert_true(data_in_len < MAX_PRD_ENTRY_COUNT * PRD_ENTRY_DATA_SIZE);
@@ -218,36 +307,33 @@ static void ufs_send_scsi_command(QUfs *ufs, uint8_t slot, uint8_t lun,
     qtest_memwrite(ufs->dev.bus->qts, prdt_addr, entries,
                    prd_table_length * sizeof(UfshcdSgEntry));
 
-    /* Build up utp transfer request descriptor */
-    UtpTransferReqDesc utrd = ufs_build_req_utrd(
-        ufs->cmd_desc_addr, slot, data_direction, prd_table_length);
-    uint64_t utrd_addr = ufs->utrlba + slot * sizeof(UtpTransferReqDesc);
     uint64_t rsp_upiu_addr = req_upiu_addr + UTP_RESPONSE_UPIU_OFFSET;
-    qtest_memwrite(ufs->dev.bus->qts, utrd_addr, &utrd, sizeof(utrd));
 
     /* Build up request upiu */
     UtpUpiuReq req_upiu = { 0 };
     req_upiu.header.trans_type = UFS_UPIU_TRANSACTION_COMMAND;
     req_upiu.header.flags = flags;
     req_upiu.header.lun = lun;
-    req_upiu.header.task_tag = slot;
+    req_upiu.header.task_tag = cmd_desc_slot;
     req_upiu.sc.exp_data_transfer_len = cpu_to_be32(data_len);
     memcpy(req_upiu.sc.cdb, cdb, UFS_CDB_SIZE);
     qtest_memwrite(ufs->dev.bus->qts, req_upiu_addr, &req_upiu,
                    sizeof(req_upiu));
 
-    /* Ring Doorbell */
-    ufs_wreg(ufs, A_UTRLDBR, 1);
-    ufs_wait_for_irq(ufs);
-    g_assert_true(FIELD_EX32(ufs_rreg(ufs, A_IS), IS, UTRCS));
-    ufs_wreg(ufs, A_IS, FIELD_DP32(0, IS, UTRCS, 1));
+    /* Build up utp transfer request descriptor */
+    UtpTransferReqDesc utrd =
+        ufs_build_req_utrd(req_upiu_addr, data_direction, prd_table_length);
+
+    /* Send Transfer Request */
+    enum UtpOcsCodes ret = ufs_send_transfer_request_sync(ufs, lun, &utrd);
 
-    qtest_memread(ufs->dev.bus->qts, utrd_addr, utrd_out, sizeof(*utrd_out));
     qtest_memread(ufs->dev.bus->qts, rsp_upiu_addr, rsp_out, sizeof(*rsp_out));
     if (data_out_len) {
         qtest_memread(ufs->dev.bus->qts, ufs->data_buffer_addr, data_out,
                       data_out_len);
     }
+    release_cmd_desc_slot(ufs, cmd_desc_slot);
+    return ret;
 }
 
 /**
@@ -257,10 +343,10 @@ static void ufs_send_scsi_command(QUfs *ufs, uint8_t slot, uint8_t lun,
 static void ufs_init(QUfs *ufs, QGuestAllocator *alloc)
 {
     uint64_t end_time;
-    uint32_t nutrs, nutmrs;
+    uint32_t nutrs;
     uint32_t hcs, is, ucmdarg2, cap;
     uint32_t hce = 0, ie = 0;
-    UtpTransferReqDesc utrd;
+    enum UtpOcsCodes ocs;
     UtpUpiuRsp rsp_upiu;
 
     ufs->bar = qpci_iomap(&ufs->dev, 0, NULL);
@@ -305,9 +391,12 @@ static void ufs_init(QUfs *ufs, QGuestAllocator *alloc)
     hcs = ufs_rreg(ufs, A_HCS);
     g_assert_true(FIELD_EX32(hcs, HCS, DP));
     g_assert_true(FIELD_EX32(hcs, HCS, UTRLRDY));
-    g_assert_true(FIELD_EX32(hcs, HCS, UTMRLRDY));
     g_assert_true(FIELD_EX32(hcs, HCS, UCRDY));
 
+    /* Check MCQ support */
+    cap = ufs_rreg(ufs, A_CAP);
+    ufs->support_mcq = FIELD_EX32(cap, CAP, MCQS);
+
     /* Enable all interrupt functions */
     ie = FIELD_DP32(ie, IE, UTRCE, 1);
     ie = FIELD_DP32(ie, IE, UEE, 1);
@@ -320,45 +409,89 @@ static void ufs_init(QUfs *ufs, QGuestAllocator *alloc)
     ie = FIELD_DP32(ie, IE, HCFEE, 1);
     ie = FIELD_DP32(ie, IE, SBFEE, 1);
     ie = FIELD_DP32(ie, IE, CEFEE, 1);
+    if (ufs->support_mcq) {
+        ie = FIELD_DP32(ie, IE, CQEE, 1);
+    }
     ufs_wreg(ufs, A_IE, ie);
     ufs_wreg(ufs, A_UTRIACR, 0);
 
-    /* Enable transfer request and task management request */
-    cap = ufs_rreg(ufs, A_CAP);
-    nutrs = FIELD_EX32(cap, CAP, NUTRS) + 1;
-    nutmrs = FIELD_EX32(cap, CAP, NUTMRS) + 1;
+    /* Enable transfer request */
     ufs->cmd_desc_addr =
-        guest_alloc(alloc, nutrs * UTP_COMMAND_DESCRIPTOR_SIZE);
+        guest_alloc(alloc, UFS_MAX_CMD_DESC * UTP_COMMAND_DESCRIPTOR_SIZE);
     ufs->data_buffer_addr =
         guest_alloc(alloc, MAX_PRD_ENTRY_COUNT * PRD_ENTRY_DATA_SIZE);
-    ufs->utrlba = guest_alloc(alloc, nutrs * sizeof(UtpTransferReqDesc));
-    ufs->utmrlba = guest_alloc(alloc, nutmrs * sizeof(UtpTaskReqDesc));
 
-    ufs_wreg(ufs, A_UTRLBA, ufs->utrlba & 0xffffffff);
-    ufs_wreg(ufs, A_UTRLBAU, ufs->utrlba >> 32);
-    ufs_wreg(ufs, A_UTMRLBA, ufs->utmrlba & 0xffffffff);
-    ufs_wreg(ufs, A_UTMRLBAU, ufs->utmrlba >> 32);
-    ufs_wreg(ufs, A_UTRLRSR, 1);
-    ufs_wreg(ufs, A_UTMRLRSR, 1);
+    if (ufs->support_mcq) {
+        uint32_t mcqcap, qid, qcfgptr, mcq_reg_offset;
+        uint32_t cqattr = 0, sqattr = 0;
+
+        mcqcap = ufs_rreg(ufs, A_MCQCAP);
+        qcfgptr = FIELD_EX32(mcqcap, MCQCAP, QCFGPTR);
+        ufs->maxq = FIELD_EX32(mcqcap, MCQCAP, MAXQ) + 1;
+        for (qid = 0; qid < ufs->maxq; ++qid) {
+            ufs->sqlba[qid] =
+                guest_alloc(alloc, QUEUE_SIZE * sizeof(UtpTransferReqDesc));
+            ufs->cqlba[qid] =
+                guest_alloc(alloc, QUEUE_SIZE * sizeof(UtpTransferReqDesc));
+            mcq_reg_offset = qcfgptr * 0x200 + qid * 0x40;
+
+            ufs_wreg(ufs, mcq_reg_offset + A_SQLBA,
+                     ufs->sqlba[qid] & 0xffffffff);
+            ufs_wreg(ufs, mcq_reg_offset + A_SQUBA, ufs->sqlba[qid] >> 32);
+            ufs_wreg(ufs, mcq_reg_offset + A_CQLBA,
+                     ufs->cqlba[qid] & 0xffffffff);
+            ufs_wreg(ufs, mcq_reg_offset + A_CQUBA, ufs->cqlba[qid] >> 32);
+
+            /* Enable Completion Queue */
+            cqattr = FIELD_DP32(cqattr, CQATTR, CQEN, 1);
+            cqattr = FIELD_DP32(cqattr, CQATTR, SIZE,
+                                QUEUE_SIZE * sizeof(UtpTransferReqDesc) /
+                                    DWORD_BYTE);
+            ufs_wreg(ufs, mcq_reg_offset + A_CQATTR, cqattr);
+
+            /* Enable Submission Queue */
+            sqattr = FIELD_DP32(sqattr, SQATTR, SQEN, 1);
+            sqattr = FIELD_DP32(sqattr, SQATTR, SIZE,
+                                QUEUE_SIZE * sizeof(UtpTransferReqDesc) /
+                                    DWORD_BYTE);
+            sqattr = FIELD_DP32(sqattr, SQATTR, CQID, qid);
+            ufs_wreg(ufs, mcq_reg_offset + A_SQATTR, sqattr);
+
+            /* Cache head & tail pointer */
+            ufs->sqdao[qid] = ufs_rreg(ufs, mcq_reg_offset + A_SQDAO);
+            ufs->cqdao[qid] = ufs_rreg(ufs, mcq_reg_offset + A_CQDAO);
+        }
+    } else {
+        nutrs = FIELD_EX32(cap, CAP, NUTRS) + 1;
+        ufs->utrlba = guest_alloc(alloc, nutrs * sizeof(UtpTransferReqDesc));
+
+        ufs_wreg(ufs, A_UTRLBA, ufs->utrlba & 0xffffffff);
+        ufs_wreg(ufs, A_UTRLBAU, ufs->utrlba >> 32);
+        ufs_wreg(ufs, A_UTRLRSR, 1);
+    }
 
     /* Send nop out to test transfer request */
-    ufs_send_nop_out(ufs, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_nop_out(ufs, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
 
     /* Set fDeviceInit flag via query request */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_SET_FLAG,
-                   UFS_QUERY_FLAG_IDN_FDEVICEINIT, 0, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_SET_FLAG,
+                         UFS_QUERY_FLAG_IDN_FDEVICEINIT, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
+    g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
 
     /* Wait for device to reset */
     end_time = g_get_monotonic_time() + TIMEOUT_SECONDS * G_TIME_SPAN_SECOND;
     do {
         qtest_clock_step(ufs->dev.bus->qts, 100);
-        ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                       UFS_UPIU_QUERY_OPCODE_READ_FLAG,
-                       UFS_QUERY_FLAG_IDN_FDEVICEINIT, 0, 0, 0, &utrd,
-                       &rsp_upiu);
+        ocs =
+            ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                           UFS_UPIU_QUERY_OPCODE_READ_FLAG,
+                           UFS_QUERY_FLAG_IDN_FDEVICEINIT, 0, 0, 0, &rsp_upiu);
+        g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
+        g_assert_cmpuint(rsp_upiu.header.response, ==,
+                         UFS_COMMAND_RESULT_SUCCESS);
     } while (be32_to_cpu(rsp_upiu.qr.value) != 0 &&
              g_get_monotonic_time() < end_time);
     g_assert_cmpuint(be32_to_cpu(rsp_upiu.qr.value), ==, 0);
@@ -369,8 +502,15 @@ static void ufs_init(QUfs *ufs, QGuestAllocator *alloc)
 static void ufs_exit(QUfs *ufs, QGuestAllocator *alloc)
 {
     if (ufs->enabled) {
-        guest_free(alloc, ufs->utrlba);
-        guest_free(alloc, ufs->utmrlba);
+        if (ufs->support_mcq) {
+            for (uint32_t qid = 0; qid < ufs->maxq; ++qid) {
+                guest_free(alloc, ufs->sqlba[qid]);
+                guest_free(alloc, ufs->cqlba[qid]);
+            }
+        } else {
+            guest_free(alloc, ufs->utrlba);
+        }
+
         guest_free(alloc, ufs->cmd_desc_addr);
         guest_free(alloc, ufs->data_buffer_addr);
     }
@@ -433,15 +573,15 @@ static void ufstest_init(void *obj, void *data, QGuestAllocator *alloc)
     const uint8_t request_sense_cdb[UFS_CDB_SIZE] = {
         REQUEST_SENSE,
     };
-    UtpTransferReqDesc utrd;
+    enum UtpOcsCodes ocs;
     UtpUpiuRsp rsp_upiu;
 
     ufs_init(ufs, alloc);
 
     /* Check REPORT_LUNS */
-    ufs_send_scsi_command(ufs, 0, 0, report_luns_cdb, NULL, 0, buf, sizeof(buf),
-                          &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_scsi_command(ufs, 0, report_luns_cdb, NULL, 0, buf,
+                                sizeof(buf), &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.scsi_status, ==, GOOD);
     /* LUN LIST LENGTH should be 8, in big endian */
     g_assert_cmpuint(buf[3], ==, 8);
@@ -449,15 +589,15 @@ static void ufstest_init(void *obj, void *data, QGuestAllocator *alloc)
     g_assert_cmpuint(buf[9], ==, 0);
 
     /* Clear Unit Attention */
-    ufs_send_scsi_command(ufs, 0, 0, request_sense_cdb, NULL, 0, buf,
-                          sizeof(buf), &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_scsi_command(ufs, 0, request_sense_cdb, NULL, 0, buf,
+                                sizeof(buf), &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.scsi_status, ==, CHECK_CONDITION);
 
     /* Check TEST_UNIT_READY */
-    ufs_send_scsi_command(ufs, 0, 0, test_unit_ready_cdb, NULL, 0, NULL, 0,
-                          &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_scsi_command(ufs, 0, test_unit_ready_cdb, NULL, 0, NULL, 0,
+                                &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.scsi_status, ==, GOOD);
 
     ufs_exit(ufs, alloc);
@@ -499,22 +639,22 @@ static void ufstest_read_write(void *obj, void *data, QGuestAllocator *alloc)
         WRITE_10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00
     };
     uint32_t block_size;
-    UtpTransferReqDesc utrd;
+    enum UtpOcsCodes ocs;
     UtpUpiuRsp rsp_upiu;
     const int test_lun = 1;
 
     ufs_init(ufs, alloc);
 
     /* Clear Unit Attention */
-    ufs_send_scsi_command(ufs, 0, test_lun, request_sense_cdb, NULL, 0,
-                          read_buf, sizeof(read_buf), &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_scsi_command(ufs, test_lun, request_sense_cdb, NULL, 0,
+                                read_buf, sizeof(read_buf), &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.scsi_status, ==, CHECK_CONDITION);
 
     /* Read capacity */
-    ufs_send_scsi_command(ufs, 0, test_lun, read_capacity_cdb, NULL, 0,
-                          read_buf, sizeof(read_buf), &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_scsi_command(ufs, test_lun, read_capacity_cdb, NULL, 0,
+                                read_buf, sizeof(read_buf), &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.scsi_status, ==,
                      UFS_COMMAND_RESULT_SUCCESS);
     block_size = ldl_be_p(&read_buf[8]);
@@ -522,16 +662,16 @@ static void ufstest_read_write(void *obj, void *data, QGuestAllocator *alloc)
 
     /* Write data */
     memset(write_buf, 0xab, block_size);
-    ufs_send_scsi_command(ufs, 0, test_lun, write_cdb, write_buf, block_size,
-                          NULL, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_scsi_command(ufs, test_lun, write_cdb, write_buf, block_size,
+                                NULL, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.scsi_status, ==,
                      UFS_COMMAND_RESULT_SUCCESS);
 
     /* Read data and verify */
-    ufs_send_scsi_command(ufs, 0, test_lun, read_cdb, NULL, 0, read_buf,
-                          block_size, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_scsi_command(ufs, test_lun, read_cdb, NULL, 0, read_buf,
+                                block_size, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.scsi_status, ==,
                      UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpint(memcmp(read_buf, write_buf, block_size), ==, 0);
@@ -544,76 +684,74 @@ static void ufstest_query_flag_request(void *obj, void *data,
 {
     QUfs *ufs = obj;
 
-    UtpTransferReqDesc utrd;
+    enum UtpOcsCodes ocs;
     UtpUpiuRsp rsp_upiu;
     ufs_init(ufs, alloc);
 
     /* Read read-only flag */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_FLAG,
-                   UFS_QUERY_FLAG_IDN_FDEVICEINIT, 0, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_FLAG,
+                         UFS_QUERY_FLAG_IDN_FDEVICEINIT, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.opcode, ==, UFS_UPIU_QUERY_OPCODE_READ_FLAG);
     g_assert_cmpuint(rsp_upiu.qr.idn, ==, UFS_QUERY_FLAG_IDN_FDEVICEINIT);
     g_assert_cmpuint(rsp_upiu.qr.value, ==, be32_to_cpu(0));
 
     /* Flag Set, Clear, Toggle Test with fDeviceLifeSpanModeEn */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_FLAG,
-                   UFS_QUERY_FLAG_IDN_LIFE_SPAN_MODE_ENABLE, 0, 0, 0, &utrd,
-                   &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_FLAG,
+                         UFS_QUERY_FLAG_IDN_LIFE_SPAN_MODE_ENABLE, 0, 0, 0,
+                         &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.value, ==, be32_to_cpu(0));
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_SET_FLAG,
-                   UFS_QUERY_FLAG_IDN_LIFE_SPAN_MODE_ENABLE, 0, 0, 0, &utrd,
-                   &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_SET_FLAG,
+                         UFS_QUERY_FLAG_IDN_LIFE_SPAN_MODE_ENABLE, 0, 0, 0,
+                         &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.value, ==, be32_to_cpu(1));
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_CLEAR_FLAG,
-                   UFS_QUERY_FLAG_IDN_LIFE_SPAN_MODE_ENABLE, 0, 0, 0, &utrd,
-                   &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_CLEAR_FLAG,
+                         UFS_QUERY_FLAG_IDN_LIFE_SPAN_MODE_ENABLE, 0, 0, 0,
+                         &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.value, ==, be32_to_cpu(0));
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_TOGGLE_FLAG,
-                   UFS_QUERY_FLAG_IDN_LIFE_SPAN_MODE_ENABLE, 0, 0, 0, &utrd,
-                   &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_TOGGLE_FLAG,
+                         UFS_QUERY_FLAG_IDN_LIFE_SPAN_MODE_ENABLE, 0, 0, 0,
+                         &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.value, ==, be32_to_cpu(1));
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_TOGGLE_FLAG,
-                   UFS_QUERY_FLAG_IDN_LIFE_SPAN_MODE_ENABLE, 0, 0, 0, &utrd,
-                   &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_TOGGLE_FLAG,
+                         UFS_QUERY_FLAG_IDN_LIFE_SPAN_MODE_ENABLE, 0, 0, 0,
+                         &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.value, ==, be32_to_cpu(0));
 
     /* Read Write-only Flag (Intended Failure) */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_FLAG,
-                   UFS_QUERY_FLAG_IDN_PURGE_ENABLE, 0, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==,
-                     UFS_OCS_INVALID_CMD_TABLE_ATTR);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_FLAG,
+                         UFS_QUERY_FLAG_IDN_PURGE_ENABLE, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_INVALID_CMD_TABLE_ATTR);
     g_assert_cmpuint(rsp_upiu.header.response, ==,
                      UFS_QUERY_RESULT_NOT_READABLE);
 
     /* Write Read-Only Flag (Intended Failure) */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_SET_FLAG, UFS_QUERY_FLAG_IDN_BUSY_RTC,
-                   0, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==,
-                     UFS_OCS_INVALID_CMD_TABLE_ATTR);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_SET_FLAG,
+                         UFS_QUERY_FLAG_IDN_BUSY_RTC, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_INVALID_CMD_TABLE_ATTR);
     g_assert_cmpuint(rsp_upiu.header.response, ==,
                      UFS_QUERY_RESULT_NOT_WRITEABLE);
 
@@ -625,130 +763,146 @@ static void ufstest_query_attr_request(void *obj, void *data,
 {
     QUfs *ufs = obj;
 
-    UtpTransferReqDesc utrd;
+    enum UtpOcsCodes ocs;
     UtpUpiuRsp rsp_upiu;
     ufs_init(ufs, alloc);
 
     /* Read Readable Attributes*/
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_ATTR,
-                   UFS_QUERY_ATTR_IDN_BOOT_LU_EN, 0, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_ATTR,
+                         UFS_QUERY_ATTR_IDN_BOOT_LU_EN, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.opcode, ==, UFS_UPIU_QUERY_OPCODE_READ_ATTR);
     g_assert_cmpuint(rsp_upiu.qr.idn, ==, UFS_QUERY_ATTR_IDN_BOOT_LU_EN);
     g_assert_cmpuint(rsp_upiu.qr.value, ==, cpu_to_be32(0x00));
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_ATTR,
-                   UFS_QUERY_ATTR_IDN_BKOPS_STATUS, 0, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_ATTR,
+                         UFS_QUERY_ATTR_IDN_BKOPS_STATUS, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.value, ==, cpu_to_be32(0x00));
 
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_ATTR,
+                         UFS_QUERY_ATTR_IDN_CASE_ROUGH_TEMP, 0, 0, 0,
+                         &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
+    g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
+    g_assert_cmpuint(rsp_upiu.qr.value, ==, cpu_to_be32(0x00));
+
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_ATTR,
+                         UFS_QUERY_ATTR_IDN_HIGH_TEMP_BOUND, 0, 0, 0,
+                         &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
+    g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
+    g_assert_cmpuint(rsp_upiu.qr.value, ==, cpu_to_be32(160));
+
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_ATTR,
+                         UFS_QUERY_ATTR_IDN_LOW_TEMP_BOUND, 0, 0, 0,
+                         &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
+    g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
+    g_assert_cmpuint(rsp_upiu.qr.value, ==, cpu_to_be32(60));
+
     /* Write Writable Attributes & Read Again */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_WRITE_ATTR,
-                   UFS_QUERY_ATTR_IDN_ACTIVE_ICC_LVL, 0, 0, 0x03, &utrd,
-                   &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_WRITE_ATTR,
+                         UFS_QUERY_ATTR_IDN_ACTIVE_ICC_LVL, 0, 0, 0x03,
+                         &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.value, ==, cpu_to_be32(0x03));
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_WRITE_ATTR,
-                   UFS_QUERY_ATTR_IDN_EE_CONTROL, 0, 0, 0x07, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_WRITE_ATTR,
+                         UFS_QUERY_ATTR_IDN_EE_CONTROL, 0, 0, 0x07, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.value, ==, cpu_to_be32(0x07));
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_ATTR,
-                   UFS_QUERY_ATTR_IDN_ACTIVE_ICC_LVL, 0, 0, 0, &utrd,
-                   &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_ATTR,
+                         UFS_QUERY_ATTR_IDN_ACTIVE_ICC_LVL, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.value, ==, cpu_to_be32(0x03));
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_ATTR,
-                   UFS_QUERY_ATTR_IDN_EE_CONTROL, 0, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_ATTR,
+                         UFS_QUERY_ATTR_IDN_EE_CONTROL, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.value, ==, cpu_to_be32(0x07));
 
     /* Write Invalid Value (Intended Error) */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_WRITE_ATTR,
-                   UFS_QUERY_ATTR_IDN_ACTIVE_ICC_LVL, 0, 0, 0x10, &utrd,
-                   &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==,
-                     UFS_OCS_INVALID_CMD_TABLE_ATTR);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_WRITE_ATTR,
+                         UFS_QUERY_ATTR_IDN_ACTIVE_ICC_LVL, 0, 0, 0x10,
+                         &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_INVALID_CMD_TABLE_ATTR);
     g_assert_cmpuint(rsp_upiu.header.response, ==,
                      UFS_QUERY_RESULT_INVALID_VALUE);
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_ATTR,
-                   UFS_QUERY_ATTR_IDN_ACTIVE_ICC_LVL, 0, 0, 0, &utrd,
-                   &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_ATTR,
+                         UFS_QUERY_ATTR_IDN_ACTIVE_ICC_LVL, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.value, ==, cpu_to_be32(0x03));
 
     /* Read Write-Only Attribute (Intended Error) */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_ATTR,
-                   UFS_QUERY_ATTR_IDN_SECONDS_PASSED, 0, 0, 0, &utrd,
-                   &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==,
-                     UFS_OCS_INVALID_CMD_TABLE_ATTR);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_ATTR,
+                         UFS_QUERY_ATTR_IDN_SECONDS_PASSED, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_INVALID_CMD_TABLE_ATTR);
     g_assert_cmpuint(rsp_upiu.header.response, ==,
                      UFS_QUERY_RESULT_NOT_READABLE);
 
     /* Write Read-Only Attribute (Intended Error) */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_WRITE_ATTR,
-                   UFS_QUERY_ATTR_IDN_POWER_MODE, 0, 0, 0x01, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==,
-                     UFS_OCS_INVALID_CMD_TABLE_ATTR);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_WRITE_ATTR,
+                         UFS_QUERY_ATTR_IDN_POWER_MODE, 0, 0, 0x01, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_INVALID_CMD_TABLE_ATTR);
     g_assert_cmpuint(rsp_upiu.header.response, ==,
                      UFS_QUERY_RESULT_NOT_WRITEABLE);
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_ATTR,
-                   UFS_QUERY_ATTR_IDN_POWER_MODE, 0, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_ATTR,
+                         UFS_QUERY_ATTR_IDN_POWER_MODE, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.value, ==, cpu_to_be32(0x00));
 
     /* Reset Written Attributes */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_WRITE_ATTR,
-                   UFS_QUERY_ATTR_IDN_ACTIVE_ICC_LVL, 0, 0, 0, &utrd,
-                   &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_WRITE_ATTR,
+                         UFS_QUERY_ATTR_IDN_ACTIVE_ICC_LVL, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.value, ==, cpu_to_be32(0x00));
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_WRITE_ATTR,
-                   UFS_QUERY_ATTR_IDN_EE_CONTROL, 0, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_WRITE_ATTR,
+                         UFS_QUERY_ATTR_IDN_EE_CONTROL, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.value, ==, cpu_to_be32(0x00));
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_ATTR,
-                   UFS_QUERY_ATTR_IDN_ACTIVE_ICC_LVL, 0, 0, 0, &utrd,
-                   &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_ATTR,
+                         UFS_QUERY_ATTR_IDN_ACTIVE_ICC_LVL, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.value, ==, cpu_to_be32(0x00));
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_ATTR,
-                   UFS_QUERY_ATTR_IDN_EE_CONTROL, 0, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_ATTR,
+                         UFS_QUERY_ATTR_IDN_EE_CONTROL, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.value, ==, cpu_to_be32(0x00));
 
@@ -760,17 +914,17 @@ static void ufstest_query_desc_request(void *obj, void *data,
 {
     QUfs *ufs = obj;
 
-    UtpTransferReqDesc utrd;
+    enum UtpOcsCodes ocs;
     UtpUpiuRsp rsp_upiu;
     ufs_init(ufs, alloc);
 
     /* Write Descriptor is not supported yet */
 
     /* Read Device Descriptor */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_DESC, UFS_QUERY_DESC_IDN_DEVICE,
-                   0, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_DESC,
+                         UFS_QUERY_DESC_IDN_DEVICE, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.opcode, ==, UFS_UPIU_QUERY_OPCODE_READ_DESC);
     g_assert_cmpuint(rsp_upiu.qr.idn, ==, UFS_QUERY_DESC_IDN_DEVICE);
@@ -780,126 +934,123 @@ static void ufstest_query_desc_request(void *obj, void *data,
     /* Read Configuration Descriptor is not supported yet*/
 
     /* Read Unit Descriptor */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_DESC, UFS_QUERY_DESC_IDN_UNIT, 0,
-                   0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_DESC,
+                         UFS_QUERY_DESC_IDN_UNIT, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.data[0], ==, sizeof(UnitDescriptor));
     g_assert_cmpuint(rsp_upiu.qr.data[1], ==, UFS_QUERY_DESC_IDN_UNIT);
     g_assert_cmpuint(rsp_upiu.qr.data[2], ==, 0);
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_DESC, UFS_QUERY_DESC_IDN_UNIT, 1,
-                   0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_DESC,
+                         UFS_QUERY_DESC_IDN_UNIT, 1, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.data[0], ==, sizeof(UnitDescriptor));
     g_assert_cmpuint(rsp_upiu.qr.data[1], ==, UFS_QUERY_DESC_IDN_UNIT);
     g_assert_cmpuint(rsp_upiu.qr.data[2], ==, 1);
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_DESC, UFS_QUERY_DESC_IDN_UNIT,
-                   UFS_UPIU_RPMB_WLUN, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs =
+        ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                       UFS_UPIU_QUERY_OPCODE_READ_DESC, UFS_QUERY_DESC_IDN_UNIT,
+                       UFS_UPIU_RPMB_WLUN, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.data[0], ==, sizeof(RpmbUnitDescriptor));
     g_assert_cmpuint(rsp_upiu.qr.data[1], ==, UFS_QUERY_DESC_IDN_UNIT);
     g_assert_cmpuint(rsp_upiu.qr.data[2], ==, UFS_UPIU_RPMB_WLUN);
 
     /* Read Interconnect Descriptor */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_DESC,
-                   UFS_QUERY_DESC_IDN_INTERCONNECT, 0, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_DESC,
+                         UFS_QUERY_DESC_IDN_INTERCONNECT, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.data[0], ==, sizeof(InterconnectDescriptor));
     g_assert_cmpuint(rsp_upiu.qr.data[1], ==, UFS_QUERY_DESC_IDN_INTERCONNECT);
 
     /* Read String Descriptor */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_DESC, UFS_QUERY_DESC_IDN_STRING,
-                   0, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_DESC,
+                         UFS_QUERY_DESC_IDN_STRING, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.data[0], ==, 0x12);
     g_assert_cmpuint(rsp_upiu.qr.data[1], ==, UFS_QUERY_DESC_IDN_STRING);
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_DESC, UFS_QUERY_DESC_IDN_STRING,
-                   1, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_DESC,
+                         UFS_QUERY_DESC_IDN_STRING, 1, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.data[0], ==, 0x22);
     g_assert_cmpuint(rsp_upiu.qr.data[1], ==, UFS_QUERY_DESC_IDN_STRING);
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_DESC, UFS_QUERY_DESC_IDN_STRING,
-                   4, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_DESC,
+                         UFS_QUERY_DESC_IDN_STRING, 4, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.data[0], ==, 0x0a);
     g_assert_cmpuint(rsp_upiu.qr.data[1], ==, UFS_QUERY_DESC_IDN_STRING);
 
     /* Read Geometry Descriptor */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_DESC, UFS_QUERY_DESC_IDN_GEOMETRY,
-                   0, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_DESC,
+                         UFS_QUERY_DESC_IDN_GEOMETRY, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.data[0], ==, sizeof(GeometryDescriptor));
     g_assert_cmpuint(rsp_upiu.qr.data[1], ==, UFS_QUERY_DESC_IDN_GEOMETRY);
 
     /* Read Power Descriptor */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_DESC, UFS_QUERY_DESC_IDN_POWER, 0,
-                   0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_DESC,
+                         UFS_QUERY_DESC_IDN_POWER, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.data[0], ==,
                      sizeof(PowerParametersDescriptor));
     g_assert_cmpuint(rsp_upiu.qr.data[1], ==, UFS_QUERY_DESC_IDN_POWER);
 
     /* Read Health Descriptor */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_DESC, UFS_QUERY_DESC_IDN_HEALTH,
-                   0, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_DESC,
+                         UFS_QUERY_DESC_IDN_HEALTH, 0, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_SUCCESS);
     g_assert_cmpuint(rsp_upiu.header.response, ==, UFS_COMMAND_RESULT_SUCCESS);
     g_assert_cmpuint(rsp_upiu.qr.data[0], ==, sizeof(DeviceHealthDescriptor));
     g_assert_cmpuint(rsp_upiu.qr.data[1], ==, UFS_QUERY_DESC_IDN_HEALTH);
 
     /* Invalid Index (Intended Failure) */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_DESC, UFS_QUERY_DESC_IDN_UNIT, 4,
-                   0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==,
-                     UFS_OCS_INVALID_CMD_TABLE_ATTR);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_DESC,
+                         UFS_QUERY_DESC_IDN_UNIT, 4, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_INVALID_CMD_TABLE_ATTR);
     g_assert_cmpuint(rsp_upiu.header.response, ==,
                      UFS_QUERY_RESULT_INVALID_INDEX);
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_DESC, UFS_QUERY_DESC_IDN_STRING,
-                   5, 0, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==,
-                     UFS_OCS_INVALID_CMD_TABLE_ATTR);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_DESC,
+                         UFS_QUERY_DESC_IDN_STRING, 5, 0, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_INVALID_CMD_TABLE_ATTR);
     g_assert_cmpuint(rsp_upiu.header.response, ==,
                      UFS_QUERY_RESULT_INVALID_INDEX);
 
     /* Invalid Selector (Intended Failure) */
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_DESC, UFS_QUERY_DESC_IDN_DEVICE,
-                   0, 1, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==,
-                     UFS_OCS_INVALID_CMD_TABLE_ATTR);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_DESC,
+                         UFS_QUERY_DESC_IDN_DEVICE, 0, 1, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_INVALID_CMD_TABLE_ATTR);
     g_assert_cmpuint(rsp_upiu.header.response, ==,
                      UFS_QUERY_RESULT_INVALID_SELECTOR);
 
-    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
-                   UFS_UPIU_QUERY_OPCODE_READ_DESC, UFS_QUERY_DESC_IDN_STRING,
-                   0, 1, 0, &utrd, &rsp_upiu);
-    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==,
-                     UFS_OCS_INVALID_CMD_TABLE_ATTR);
+    ocs = ufs_send_query(ufs, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                         UFS_UPIU_QUERY_OPCODE_READ_DESC,
+                         UFS_QUERY_DESC_IDN_STRING, 0, 1, 0, &rsp_upiu);
+    g_assert_cmpuint(ocs, ==, UFS_OCS_INVALID_CMD_TABLE_ATTR);
     g_assert_cmpuint(rsp_upiu.header.response, ==,
                      UFS_QUERY_RESULT_INVALID_SELECTOR);
 
@@ -947,12 +1098,16 @@ static void ufs_register_nodes(void)
     QOSGraphEdgeOptions edge_opts = {
         .before_cmd_line = "-blockdev null-co,node-name=drv0,read-zeroes=on",
         .after_cmd_line = "-device ufs-lu,bus=ufs0,drive=drv0,lun=0",
-        .extra_device_opts = "addr=04.0,id=ufs0,nutrs=32,nutmrs=8"
+        .extra_device_opts = "addr=04.0,id=ufs0"
     };
 
-    QOSGraphTestOptions io_test_opts = {
-        .before = ufs_blk_test_setup,
-    };
+    QOSGraphTestOptions io_test_opts = { .before = ufs_blk_test_setup,
+                                         .edge.extra_device_opts =
+                                             "mcq=false,nutrs=32,nutmrs=8" };
+
+    QOSGraphTestOptions mcq_test_opts = { .before = ufs_blk_test_setup,
+                                          .edge.extra_device_opts =
+                                              "mcq=true,mcq-maxq=1" };
 
     add_qpci_address(&edge_opts, &(QPCIAddress){ .devfn = QPCI_DEVFN(4, 0) });
 
@@ -972,13 +1127,14 @@ static void ufs_register_nodes(void)
         return;
     }
     qos_add_test("init", "ufs", ufstest_init, NULL);
-    qos_add_test("read-write", "ufs", ufstest_read_write, &io_test_opts);
-    qos_add_test("flag read-write", "ufs",
-                 ufstest_query_flag_request, &io_test_opts);
-    qos_add_test("attr read-write", "ufs",
-                 ufstest_query_attr_request, &io_test_opts);
-    qos_add_test("desc read-write", "ufs",
-                 ufstest_query_desc_request, &io_test_opts);
+    qos_add_test("legacy-read-write", "ufs", ufstest_read_write, &io_test_opts);
+    qos_add_test("mcq-read-write", "ufs", ufstest_read_write, &mcq_test_opts);
+    qos_add_test("query-flag", "ufs", ufstest_query_flag_request,
+                 &io_test_opts);
+    qos_add_test("query-attribute", "ufs", ufstest_query_attr_request,
+                 &io_test_opts);
+    qos_add_test("query-desciptor", "ufs", ufstest_query_desc_request,
+                 &io_test_opts);
 }
 
 libqos_init(ufs_register_nodes);
diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target
index 9efe2f81adf5f..16ddcf4f8831d 100644
--- a/tests/tcg/aarch64/Makefile.target
+++ b/tests/tcg/aarch64/Makefile.target
@@ -83,7 +83,8 @@ test-aes: CFLAGS += -O -march=armv8-a+aes
 test-aes: test-aes-main.c.inc
 
 # Vector SHA1
-sha1-vector: CFLAGS=-O3
+# Work around compiler false-positive warning, as we do for the 'sha1' test
+sha1-vector: CFLAGS=-O3 -Wno-stringop-overread
 sha1-vector: sha1.c
 	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
 run-sha1-vector: sha1-vector run-sha1
diff --git a/tests/tcg/arm/Makefile.target b/tests/tcg/arm/Makefile.target
index 99a953b667118..6189d7a0e2430 100644
--- a/tests/tcg/arm/Makefile.target
+++ b/tests/tcg/arm/Makefile.target
@@ -61,7 +61,8 @@ endif
 ARM_TESTS += commpage
 
 # Vector SHA1
-sha1-vector: CFLAGS=-O3
+# Work around compiler false-positive warning, as we do for the 'sha1' test
+sha1-vector: CFLAGS=-O3 -Wno-stringop-overread
 sha1-vector: sha1.c
 	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
 run-sha1-vector: sha1-vector run-sha1
diff --git a/tests/tcg/hexagon/Makefile.softmmu-target b/tests/tcg/hexagon/Makefile.softmmu-target
new file mode 100644
index 0000000000000..0b12f7485b62f
--- /dev/null
+++ b/tests/tcg/hexagon/Makefile.softmmu-target
@@ -0,0 +1,114 @@
+##
+##  Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+##
+##  SPDX-License-Identifier: GPL-2.0-or-later
+##
+
+# -*- Mode: makefile -*-
+#
+# Hexagon SoftMMU tests - included from tests/tcg/Makefile
+#
+
+HEXAGON_SYSTEM_SRC=$(SRC_PATH)/tests/tcg/hexagon/system
+
+# Set search path for all sources
+VPATH 		+= $(HEXAGON_SYSTEM_SRC)
+
+########### Compiling options
+# We force -O0 to avoid optimizations that would break the
+# libc simplifications we made at min_libc.c
+#
+CFLAGS=-mv73 -U__linux__ -G0 -nodefaultlibs -nostdlib -static -fno-PIC -O0 -g -Werror
+LDFLAGS=-lclang_rt.builtins-hexagon
+
+########### QEMU options
+QEMU_BASE_MACHINE=-M V66G_1024 -semihosting-config usefs=$(SRC_PATH)/tests/tcg/hexagon/system
+QEMU_OPTS+=-display none
+
+QEMU_OPTS+=$(QEMU_BASE_MACHINE) -kernel
+
+crt0.o: crt0/crt0.S crt0/crt0.inc
+crt0_standalone.o: crt0/crt0_standalone.S crt0/crt0.inc
+pte.o: crt0/pte.S
+min_libc.o: crt0/min_libc.c
+tlb.o: crt0/tlb.c
+
+CRT0_OBJS=crt0.o crt0_standalone.o pte.o min_libc.o tlb.o
+
+TESTS_BUILT_WITH_DEFAULT_RULES = \
+	semihost \
+	mmu_overlap \
+	mmu_asids \
+	standalone_hw \
+	ciad-siad \
+	badva \
+	vid_reg \
+	hvx-multi \
+	standalone_vec \
+	fastl2vic \
+	int_range \
+	$()
+
+TESTS += \
+	$(TESTS_BUILT_WITH_DEFAULT_RULES) \
+	tlb-miss-tlblock \
+	$()
+
+$(TESTS_BUILT_WITH_DEFAULT_RULES): $(CRT0_OBJS)
+
+# Build and link the tests
+echo-and-run = echo $(1) && $(1)
+define build_fn
+    @if test "$(3)" = LINK; then extra="$(LDFLAGS)"; else extra=-c; fi && \
+    $(call echo-and-run, $(CC) $(CFLAGS) $(1) -o $(2) $$extra)
+endef
+
+$(CRT0_OBJS):
+	$(call build_fn,$<,$@)
+$(TESTS_BUILT_WITH_DEFAULT_RULES):
+	$(call build_fn,$^,$@,LINK)
+
+%.o: %.S
+	$(call build_fn,$<,$@)
+%.o: %.c
+	$(call build_fn,$<,$@)
+
+mmu.h: ../hex_test.h
+
+semihost.o: semihost.c strutils.h
+semihost: semihost.o
+mmu_overlap.o: mmu_overlap.c mmu.h
+mmu_overlap: mmu_overlap.o
+mmu_asids.o: mmu_asids.c mmu.h
+mmu_asids: mmu_asids.o
+ciad-siad: ciad-siad.o
+standalone_hw: standalone_hw.o monitor_insts.o
+vid_reg: vid_reg.o
+hvx-multi.o: hvx-multi.c ../hvx_misc.h
+hvx-multi: hvx-multi.o
+standalone_vec.o: standalone_vec.c cfgtable.h
+standalone_vec: standalone_vec.o
+badva.o: badva.c ../hex_test.h crt0/hexagon_standalone.h
+badva: badva.o
+fastl2vic.o: fastl2vic.c cfgtable.h
+fastl2vic: fastl2vic.o
+int_range.o: int_range.c cfgtable.h
+int_range: int_range.o
+
+############# Custom build options
+
+standalone_vec.o: CFLAGS+= -mv69 -O2 -mhvx  -fvectorize
+hvx-multi.o: CFLAGS+= -O2 -mhvx
+
+# We don't want to link this one with crt0 files
+tlb-miss-tlblock: tlb-miss-tlblock.o
+	$(CC) $(CFLAGS) $< -o $@ -nostartfiles -Wl,-Ttext,0x9b800000 -Wl,-entry,0x9b800000
+
+############# Custom test rules
+
+run-semihost: semihost
+	mkdir -p _semihost_dir
+	touch _semihost_dir/fileA _semihost_dir/fileB
+	$(call run-test, $<, $(QEMU) --append "arg1 arg2" $(QEMU_OPTS) $< \
+		             > $<.stdout)
+	$(call quiet-command, grep -q "PASS" $<.stdout, "GREP", "PASS")
diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target
index e5182c01d8a0c..44dd927b59372 100644
--- a/tests/tcg/hexagon/Makefile.target
+++ b/tests/tcg/hexagon/Makefile.target
@@ -52,6 +52,7 @@ HEX_TESTS += hvx_misc
 HEX_TESTS += hvx_histogram
 HEX_TESTS += invalid-slots
 HEX_TESTS += unaligned_pc
+HEX_TESTS += utimer
 
 run-and-check-exception = $(call run-test,$2,$3 2>$2.stderr; \
 	test $$? -eq 1 && grep -q "exception $(strip $1)" $2.stderr)
@@ -109,6 +110,7 @@ preg_alias: preg_alias.c hex_test.h
 read_write_overlap: read_write_overlap.c hex_test.h
 reg_mut: reg_mut.c hex_test.h
 unaligned_pc: unaligned_pc.c
+utimer: utimer.c hex_test.h
 
 # This test has to be compiled for the -mv67t target
 usr: usr.c hex_test.h
diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c
index 90c3733da0711..319d7c0dd052e 100644
--- a/tests/tcg/hexagon/hvx_misc.c
+++ b/tests/tcg/hexagon/hvx_misc.c
@@ -495,6 +495,28 @@ void test_store_new()
     check_output_w(__LINE__, 1);
 }
 
+void test_qfloat()
+{
+    asm volatile(
+        "r0 = #0xf\n"
+        "v0 = vsplat(r0)\n"
+        "v1 = vsplat(r0)\n"
+        "{\n"
+        "   v2.qf16 = vadd(v0.qf16, v1.qf16)\n"
+        "}\n"
+        "vmem(%0) = v2\n"
+        :
+        : "r"(&output[0])
+        : "r0", "v0", "v1", "v2", "memory"
+    );
+
+    for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) {
+        expect[0].w[i] = 0x10010;
+    }
+
+    check_output_w(__LINE__, 1);
+}
+
 int main()
 {
     init_buffers();
@@ -538,6 +560,8 @@ int main()
 
     test_store_new();
 
+    test_qfloat();
+
     puts(err ? "FAIL" : "PASS");
     return err ? 1 : 0;
 }
diff --git a/tests/tcg/hexagon/reg_mut.c b/tests/tcg/hexagon/reg_mut.c
index c5a39e55100da..45db9ae5cd157 100644
--- a/tests/tcg/hexagon/reg_mut.c
+++ b/tests/tcg/hexagon/reg_mut.c
@@ -77,10 +77,10 @@ static inline void write_control_registers(void)
     check32(result, 0x00000000);
 
     WRITE_REG_NOCLOBBER(result, "utimerlo", 0xffffffff);
-    check32(result, 0x00000000);
+    check32_ne(result, 0xffffffff);
 
     WRITE_REG_NOCLOBBER(result, "utimerhi", 0xffffffff);
-    check32(result, 0x00000000);
+    check32_ne(result, 0xffffffff);
 
     /*
      * PC is special.  Setting it to these values
@@ -107,7 +107,7 @@ static inline void write_control_register_pairs(void)
     check64(result, 0x0000000000000000);
 
     WRITE_REG_NOCLOBBER(result, "c31:30", 0xffffffffffffffff);
-    check64(result, 0x0000000000000000);
+    check64_ne(result, 0xffffffffffffffff);
 
     WRITE_REG_PAIR_ENCODED(result, "c9:8", (uint64_t) 0x0000000000000000,
                            C9_8_EQ_R1_0);
diff --git a/tests/tcg/hexagon/system/badva.c b/tests/tcg/hexagon/system/badva.c
new file mode 100644
index 0000000000000..1351269d10776
--- /dev/null
+++ b/tests/tcg/hexagon/system/badva.c
@@ -0,0 +1,335 @@
+/*
+ *  Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "crt0/hexagon_standalone.h"
+
+#define DEBUG 0
+
+int err;
+#include "../hex_test.h"
+
+/* volatile because it is written through different MMU mappings */
+typedef volatile int mmu_variable;
+mmu_variable data0 = 0xdeadbeef;
+mmu_variable data1 = 0xabcdef01;
+
+#define ONE_MB (1 << 20)
+#define INVALID_BADVA 0xbadabada
+
+static uint32_t read_badva(void)
+{
+    uint32_t ret;
+    __asm__ __volatile__("%0 = badva\n\t" : "=r"(ret));
+    return ret;
+}
+
+static uint32_t read_badva0(void)
+{
+    uint32_t ret;
+    __asm__ __volatile__("%0 = badva0\n\t" : "=r"(ret));
+    return ret;
+}
+
+static uint32_t read_badva1(void)
+{
+    uint32_t ret;
+    __asm__ __volatile__("%0 = badva1\n\t" : "=r"(ret));
+    return ret;
+}
+
+static uint32_t read_ssr(void)
+{
+    uint32_t ret;
+    __asm__ __volatile__("%0 = ssr\n\t" : "=r"(ret));
+    return ret;
+}
+
+static void write_badva0(uint32_t val)
+{
+    __asm__ __volatile__("badva0=%0;" : : "r"(val));
+    return;
+}
+
+static void write_badva1(uint32_t val)
+{
+    __asm__ __volatile__("badva1=%0;" : : "r"(val));
+    return;
+}
+
+#define SSR_V0_BIT 20
+#define SSR_V1_BIT 21
+#define SSR_BVS_BIT 21
+
+static uint32_t read_ssr_v0(void)
+{
+    return (read_ssr() >> SSR_V0_BIT) & 0x1;
+}
+
+static uint32_t read_ssr_v1(void)
+{
+    return (read_ssr() >> SSR_V1_BIT) & 0x1;
+}
+
+static uint32_t read_ssr_bvs(void)
+{
+    return (read_ssr() >> SSR_BVS_BIT) & 0x1;
+}
+
+static void dual_store(mmu_variable *p, mmu_variable *q, uint32_t pval,
+                       uint32_t qval)
+{
+#if DEBUG
+    printf("dual_store:\t0x%p, 0x%p, 0x%lx, 0x%lx\n", p, q, pval, qval);
+#endif
+
+    __asm__ __volatile__("r6 = #0\n\t"
+                         "badva0 = r6\n\t"
+                         "badva1 = r6\n\t"
+                         "r6 = ssr\n\t"
+                         "r6 = clrbit(r6, #%4) // V0\n\t"
+                         "r6 = clrbit(r6, #%5) // V1\n\t"
+                         "r6 = clrbit(r6, #%6) // BVS\n\t"
+                         "ssr = r6\n\t"
+                         "{\n\t"
+                         "    memw(%0) = %2    // slot 1\n\t"
+                         "    memw(%1) = %3    // slot 0\n\t"
+                         "}\n\t"
+                         : "=m"(*p), "=m"(*q)
+                         : "r"(pval), "r"(qval), "i"(SSR_V0_BIT),
+                           "i"(SSR_V1_BIT), "i"(SSR_BVS_BIT)
+                         : "r6");
+}
+
+static void dual_load(mmu_variable *p, mmu_variable *q, uint32_t *pval,
+                      uint32_t *qval)
+{
+    uint32_t val0, val1;
+
+#if DEBUG
+    printf("dual_load:\t0x%p, 0x%p\n", p, q);
+#endif
+
+    __asm__ __volatile__("r6 = #0\n\t"
+                         "badva0 = r6\n\t"
+                         "badva1 = r6\n\t"
+                         "r6 = ssr\n\t"
+                         "r6 = clrbit(r6, #%4) // V0\n\t"
+                         "r6 = clrbit(r6, #%5) // V1\n\t"
+                         "r6 = clrbit(r6, #%6) // BVS\n\t"
+                         "ssr = r6\n\t"
+                         "{\n\t"
+                         "    %1 = memw(%3)    // slot 1\n\t"
+                         "    %0 = memw(%2)    // slot 0\n\t"
+                         "}\n\t"
+                         : "=r"(val0), "=r"(val1)
+                         : "m"(*p), "m"(*q), "i"(SSR_V0_BIT), "i"(SSR_V1_BIT),
+                           "i"(SSR_BVS_BIT)
+                         : "r6");
+
+#if DEBUG
+    printf("\t\t0x%lx, 0x%lx\n", val0, val1);
+#endif
+
+    *pval = val0;
+    *qval = val1;
+}
+
+static void load_store(mmu_variable *p, mmu_variable *q, uint32_t *pval,
+                       uint32_t qval)
+{
+    uint32_t val;
+
+#if DEBUG
+    printf("load_store:\t0x%p, 0x%p, 0x%lx\n", p, q, qval);
+#endif
+
+    __asm__ __volatile__("r6 = #0\n\t"
+                         "badva0 = r6\n\t"
+                         "badva1 = r6\n\t"
+                         "r6 = ssr\n\t"
+                         "r6 = clrbit(r6, #%4) // V0\n\t"
+                         "r6 = clrbit(r6, #%5) // V1\n\t"
+                         "r6 = clrbit(r6, #%6) // BVS\n\t"
+                         "ssr = r6\n\t"
+                         "{\n\t"
+                         "    %0 = memw(%2)    // slot 1\n\t"
+                         "    memw(%1) = %3    // slot 0\n\t"
+                         "}\n\t"
+                         : "=r"(val), "=m"(*q)
+                         : "m"(*p), "r"(qval), "i"(SSR_V0_BIT), "i"(SSR_V1_BIT),
+                           "i"(SSR_BVS_BIT)
+                         : "r6");
+
+#if DEBUG
+    printf("\t\t0x%lx\n", val);
+#endif
+
+    *pval = val;
+}
+
+enum {
+    TLB_U = (1 << 0),
+    TLB_R = (1 << 1),
+    TLB_W = (1 << 2),
+    TLB_X = (1 << 3),
+};
+
+uint32_t add_trans_pgsize(uint32_t page_size_bits)
+{
+    switch (page_size_bits) {
+    case 12: /* 4KB   */
+        return 1;
+    case 14: /* 16KB  */
+        return 2;
+    case 16: /* 64KB  */
+        return 4;
+    case 18: /* 256KB */
+        return 8;
+    case 20: /* 1MB   */
+        return 16;
+    case 22: /* 4MB   */
+        return 32;
+    case 24: /* 16MB  */
+        return 64;
+    default:
+        return 1;
+    }
+}
+
+int mb_counter = 1;
+
+static mmu_variable *map_data_address(mmu_variable *p, uint32_t data_offset)
+{
+    uint32_t page_size_bits = 12;
+    uint32_t page_size = 1 << page_size_bits;
+    uint32_t page_align = ~(page_size - 1);
+
+    uint32_t data_addr = (uint32_t)p;
+    uint32_t data_page = data_addr & page_align;
+
+    uint32_t new_data_page = data_page + data_offset;
+    uint32_t read_data_addr = data_addr + data_offset;
+    unsigned int data_perm = TLB_X | TLB_W | TLB_U;
+    add_translation((void *)new_data_page, (void *)data_page, 0);
+
+    return (mmu_variable *)read_data_addr;
+}
+
+static void test_dual_store(void)
+{
+    data0 = 0x12345678;
+    data1 = 0x87654321;
+
+    mmu_variable *new_data0 = map_data_address(&data0, mb_counter * ONE_MB);
+    mb_counter++;
+    mmu_variable *new_data1 = map_data_address(&data1, mb_counter * ONE_MB);
+    mb_counter++;
+
+    dual_store(new_data0, new_data1, 0x1, 0x2);
+    if (read_badva() == (uint32_t)new_data0) {
+        check32(read_badva0(), (uint32_t)new_data0);
+        check32(read_badva1(), INVALID_BADVA);
+        check32(read_ssr_v0(), 1);
+        check32(read_ssr_v1(), 0);
+        check32(read_ssr_bvs(), 0);
+    } else if (read_badva() == (uint32_t)new_data1) {
+        check32(read_badva0(), INVALID_BADVA);
+        check32(read_badva1(), (uint32_t)new_data1);
+        check32(read_ssr_v0(), 0);
+        check32(read_ssr_v1(), 1);
+        check32(read_ssr_bvs(), 1);
+    } else {
+        /* Something went wrong! */
+        check32(0, 1);
+    }
+    check32(data0, 0x1);
+    check32(data1, 0x2);
+}
+
+static void test_dual_load(void)
+{
+    uint32_t val0, val1;
+
+    data0 = 0xaabbccdd;
+    data1 = 0xeeff0011;
+
+    mmu_variable *new_data0 = map_data_address(&data0, mb_counter * ONE_MB);
+    mb_counter++;
+    mmu_variable *new_data1 = map_data_address(&data1, mb_counter * ONE_MB);
+    mb_counter++;
+
+    dual_load(new_data0, new_data1, &val0, &val1);
+    if (read_badva() == (uint32_t)new_data0) {
+        check32(read_badva0(), (uint32_t)new_data0);
+        check32(read_badva1(), INVALID_BADVA);
+        check32(read_ssr_v0(), 1);
+        check32(read_ssr_v1(), 0);
+        check32(read_ssr_bvs(), 0);
+    } else if (read_badva() == (uint32_t)new_data1) {
+        check32(read_badva0(), INVALID_BADVA);
+        check32(read_badva1(), (uint32_t)new_data1);
+        check32(read_ssr_v0(), 0);
+        check32(read_ssr_v1(), 1);
+        check32(read_ssr_bvs(), 1);
+    } else {
+        /* Something went wrong! */
+        check32(0, 1);
+    }
+    check32(val0, 0xaabbccdd);
+    check32(val1, 0xeeff0011);
+}
+
+static void test_load_store(void)
+{
+    uint32_t val;
+
+    data0 = 0x11223344;
+    data1 = 0x55667788;
+
+    mmu_variable *new_data0 = map_data_address(&data0, mb_counter * ONE_MB);
+    mb_counter++;
+    mmu_variable *new_data1 = map_data_address(&data1, mb_counter * ONE_MB);
+    mb_counter++;
+
+    load_store(new_data0, new_data1, &val, 0x123);
+    if (read_badva() == (uint32_t)new_data1) {
+        check32(read_badva0(), (uint32_t)new_data1);
+        check32(read_badva1(), INVALID_BADVA);
+        check32(read_ssr_v0(), 1);
+        check32(read_ssr_v1(), 0);
+        check32(read_ssr_bvs(), 0);
+    } else if (read_badva() == (uint32_t)new_data0) {
+        check32(read_badva0(), INVALID_BADVA);
+        check32(read_badva1(), (uint32_t)new_data0);
+        check32(read_ssr_v0(), 0);
+        check32(read_ssr_v1(), 1);
+        check32(read_ssr_bvs(), 1);
+    } else {
+        /* Something went wrong! */
+        check32(0, 1);
+    }
+    check32(val, 0x11223344);
+    check32(data1, 0x123);
+}
+static void test_badva_write(void)
+{
+    uint32_t va = 0x11223344;
+    write_badva0(va);
+    check32(read_badva(), va);
+}
+
+int main()
+{
+    puts("Hexagon badva test");
+
+    test_dual_store();
+    test_dual_load();
+    test_load_store();
+    test_badva_write();
+
+    printf("%s\n", ((err) ? "FAIL" : "PASS"));
+    return err;
+}
diff --git a/tests/tcg/hexagon/system/cfgtable.h b/tests/tcg/hexagon/system/cfgtable.h
new file mode 100644
index 0000000000000..fff84ef56950b
--- /dev/null
+++ b/tests/tcg/hexagon/system/cfgtable.h
@@ -0,0 +1,39 @@
+/*
+ *  Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef CFGTABLE_H
+#define CFGTABLE_H
+
+#include <stdint.h>
+
+static uint32_t read_cfgtable_field(uint32_t offset)
+{
+    uint32_t val;
+    asm volatile("r0 = cfgbase\n\t"
+                 "r0 = asl(r0, #5)\n\t"
+                 "%0 = memw_phys(%1, r0)\n\t"
+                 : "=r"(val)
+                 : "r"(offset)
+                 : "r0");
+    return val;
+}
+
+#define GET_SUBSYSTEM_BASE() (read_cfgtable_field(0x8) << 16)
+#define GET_FASTL2VIC_BASE() (read_cfgtable_field(0x28) << 16)
+
+static uintptr_t get_vtcm_base(void)
+{
+#if __HEXAGON_ARCH__ == 65
+    return 0xD8200000L;
+#elif __HEXAGON_ARCH__ >= 66
+    int vtcm_offset = 0x038;
+    return read_cfgtable_field(vtcm_offset) << 16;
+#else
+#error "unsupported hexagon revision"
+#endif
+}
+
+#endif /* CFGTABLE_H */
diff --git a/tests/tcg/hexagon/system/ciad-siad.c b/tests/tcg/hexagon/system/ciad-siad.c
new file mode 100644
index 0000000000000..e3fbb7a506dc9
--- /dev/null
+++ b/tests/tcg/hexagon/system/ciad-siad.c
@@ -0,0 +1,50 @@
+/*
+ *  Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+
+
+static inline void siad(uint32_t val)
+{
+    asm volatile ("siad(%0);"
+                  : : "r"(val));
+    return;
+}
+static inline void ciad(uint32_t val)
+{
+    asm volatile ("ciad(%0);"
+                  : : "r"(val));
+    return;
+}
+
+static inline uint32_t getipendad()
+{
+    uint32_t reg;
+    asm volatile ("%0=s20;"
+                  : "=r"(reg));
+    return reg;
+}
+int
+main(int argc, char *argv[])
+{
+    siad(4);
+    int ipend = getipendad();
+    if (ipend != (0x4 << 16)) {
+        goto fail;
+    }
+    ciad(4);
+    ipend = getipendad();
+    if (ipend) {
+        goto fail;
+    }
+
+    printf("PASS\n");
+    return 0;
+fail:
+    printf("FAIL\n");
+    return 1;
+}
diff --git a/tests/tcg/hexagon/system/crt0/crt0.S b/tests/tcg/hexagon/system/crt0/crt0.S
new file mode 100644
index 0000000000000..8a40e39536ebb
--- /dev/null
+++ b/tests/tcg/hexagon/system/crt0/crt0.S
@@ -0,0 +1,103 @@
+/*
+ *  Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "crt0.inc"
+	.equ DEFAULT_HEAP_SIZE, 0x4000000 /* 64MB */
+	.equ DEFAULT_STACK_SIZE, 0x100000 /* 1MB */
+
+	.section .start, "ax", @progbits
+	.subsection 0
+	.org 0
+
+	.global _start
+	.type _start, @function
+        .p2align 5
+_start:
+	jump hexagon_start_init
+	jump hexagon_start_main
+	.size _start, . - _start
+
+/*----------------------------------------------------------------------------*/
+
+	.global hexagon_pre_main
+        .type hexagon_pre_main, @function
+
+hexagon_pre_main:
+	/* Mark first stack frame. */
+	fp = #0
+
+        ReadFrom heapBase, r4
+
+        AddrOf DEFAULT_HEAP_SIZE
+        r5 = r0
+
+        r5 = add (r4, r5)               /* Calculate aligned heap top. */
+        r5 = add (r5, #15)
+        r5 = and (r5, #-16)
+        WriteTo heapLimit, r5
+
+        /* Set up stack. */
+        AddrOf DEFAULT_STACK_SIZE
+        r7 = r0
+
+        r6 = add (r5, r7)               /* Assume stack after heap. */
+        r6 = and (r6, #-16)
+
+        WriteTo stackBase, r6
+
+        ReadFrom stackBase, r6
+
+        r7 = sub (r6, r7)               /* Desired stack size. */
+        r7 = add (r7, #15)
+        r7 = and (r7, #-16)
+        WriteTo stackLimit, r7
+
+        /* Set stack up. */
+        ReadFrom stackBase, r0
+        sp = and (r0, #-16)             /* Align top of stack. */
+
+        /* Zero up BSS. */
+        AddrOf __bss_start, r0
+        AddrOf _end, r2
+        AddrOf memset, r28		/* bzero () is deprecated. */
+        { r1 = #0
+          r2 = sub (r2, r0)
+          callr r28 }
+        .size hexagon_pre_main, . - hexagon_pre_main
+
+/*----------------------------------------------------------------------------*/
+
+        .global hexagon_start_main
+        .type hexagon_start_main, @function
+hexagon_start_main:
+        AddrOf _start_main, r28
+        callr r28
+        /*Stop all threads to terminate execution */
+        r0 = #0x3f
+        stop (r0)
+        .size hexagon_start_main, . - hexagon_start_main
+
+/*----------------------------------------------------------------------------*/
+
+	.data
+        .global heapBase
+        .global heapLimit
+        .global stackBase
+        .global stackLimit
+	.global setHeapAngelCallParams
+
+.HeapParams:
+heapBase:
+        .word end                       /* Provided by the linker script. */
+heapLimit:
+        .word end + (DEFAULT_HEAP_SIZE & -16)
+stackBase:
+        .word 0
+stackLimit:
+        .word end + ((DEFAULT_HEAP_SIZE + 15) & -16)
+
+setHeapAngelCallParams:
+        .word .HeapParams
diff --git a/tests/tcg/hexagon/system/crt0/crt0.inc b/tests/tcg/hexagon/system/crt0/crt0.inc
new file mode 100755
index 0000000000000..a28d68c51cd5a
--- /dev/null
+++ b/tests/tcg/hexagon/system/crt0/crt0.inc
@@ -0,0 +1,25 @@
+/*
+ *  Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+	.macro AddrOf Var, To = r0
+	\To\() = ## (\Var)
+	.endm
+
+	.macro ReadFrom Var, To = r0
+	AddrOf \Var, \To
+	\To = memw (\To)
+	.endm
+
+	.macro WriteTo Var, From = r0, Ptr = r1
+	.ifnc "\From", "\Ptr"
+	AddrOf \Var, \Ptr
+	memw (\Ptr) = \From
+	\From = memw (\Ptr)
+	.else
+	.print "Macro arguments \"From\" and \"Ptr\" cannot be the same."
+	.err
+	.endif
+	.endm
diff --git a/tests/tcg/hexagon/system/crt0/crt0_standalone.S b/tests/tcg/hexagon/system/crt0/crt0_standalone.S
new file mode 100644
index 0000000000000..a3ca6ea95da2a
--- /dev/null
+++ b/tests/tcg/hexagon/system/crt0/crt0_standalone.S
@@ -0,0 +1,1206 @@
+/*
+ *  Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "crt0.inc"
+	.equ TLB_FIXED_ENTRIES, 6
+
+	.org	0x20			/* This must be at address 0x20 */
+EventVectorBase:
+	.word .EventVectors
+
+/* This can vary based on the revid of the part:
+   64, 128, 192.  Most are 128 */
+_NumTLBEntries:
+	.word 127
+
+TLBMapTable:
+	.word UPTE_START
+
+CoreDump:
+	.word RegDump
+
+	.subsection 0
+
+	/* Make sure that data and code don't end up in the same L2 cache-line. */
+        .p2align 6, 0
+
+ 	.global hexagon_start_init
+ 	.type hexagon_start_init, @function
+hexagon_start_init:
+.Init:
+        /* Clean up house (make sure that R0 is initialized before DCKILL). */
+	dckill
+	isync
+	ickill
+	isync
+
+.InitSSR:
+	/* SFD = 0, IE = 0, UM = 0, EX = 0, ASID = 0 */
+	r0 = #0
+	ssr = r0
+	isync
+
+	/* Setup events */
+.InitVector:
+	ReadFrom EventVectorBase
+	evb = r0
+
+.InitStack:
+	ReadFrom exc_stack_tops
+	sgp0 = r0
+
+.InitFramekey:
+	r0 = #0
+	framekey = r0
+
+	/* Configure cycle counter. */
+.InitPcycle:
+	r1 = #1
+	r0 = syscfg
+	r0 = insert (r1, #1, #6)
+	syscfg = r0
+
+	/* Configure IMT/DMT. */
+.InitDMT:
+	r1 = #1
+	r0 = syscfg
+	r0 = insert (r1, #1, #15)
+	syscfg = r0
+.InitQoS:
+	r1 = #1
+	r0 = syscfg
+	r0 = insert (r1, #1, #13)
+	syscfg = r0
+1:
+.InitXE:
+	r1 = #1
+	r0 = ssr
+	r0 = insert (r1, #1, #31)
+	ssr = r0
+
+        //{ 0x4066, 0x4, 0x7F, 0, 4 }, // v66a_512
+	{
+		r0 = #0x2c // JTLB size
+		r2 = cfgbase
+	}
+	r1 = asl(r2, #5)
+	r0 = memw_phys(r0, r1)
+	{
+		r0 = add(r0, #-1);
+		memw(##_tlbmax) = r0.new
+	}
+
+	{
+		r0 = #0x40 // L2 Tag size
+		r2 = cfgbase
+	}
+	r0 = memw_phys(r0, r1)
+	r1 = #0;
+	p0 = cmp.eq(r0, #0x400)
+	{
+		if (p0) r1 = #5
+		if (p0) jump 1f
+	}
+	p0 = cmp.eq(r0, #0x200)
+	{
+		if (p0) r1 = #4
+		if (p0) jump 1f
+	}
+	p0 = cmp.eq(r0, #0x100)
+	{
+		if (p0) r1 = #3
+		if (p0) jump 1f
+	}
+	p0 = cmp.eq(r0, #0x080)
+	{
+		if (p0) r1 = #2
+		if (p0) jump 1f
+	}
+1:
+	memw(##_l2cfg) = r1
+
+/* L2 config sequence:
+ *    1	- Disable prefetching by clearing HFd/i bits in ssr/ccr
+ */
+        r0 = ccr
+	r3 = #0
+	r0 = insert (r3, #4, #16)	/* Clear HFi, HFd, HFiL2 HFdL2 bits */
+	ccr = r0
+
+	/* Configure L2 cache. */
+    	r0 = syscfg
+	r0 = insert (r3, #3, #16)	/* Set L2 size to 0 via L2CFG. */
+
+
+/* L2 config sequence:
+ *    2	- execute an isync which is aligned to a 32byte boundary.
+ */
+	.p2alignl 5, 0x7f00c000
+	isync
+
+/* L2 config sequence:
+ *    3	- execute an syncht insn to insure there are no outstanding
+ *        memory transactions.
+ */
+        syncht
+
+/* L2 config sequence:
+ *    4	- Set the desired L2 size for < V4 (set to 0 for >= V4).
+ */
+	syscfg = r0
+	isync
+
+/* L2 config sequence:
+ *    5	- Execute the L2KILL insn to initiate the cache.
+ */
+	l2kill
+	syncht
+
+/* L2 config sequence:
+ *    6	- Set the desired L2 size.
+ */
+	r2 = memw(##_l2cfg)
+        r3 = #0x5
+	r3 = min (r2, r3)	        /* min between desired and hwmax */
+	r0 = insert (r3, #4, #16)       /* Set L2 size via L2CFG. */
+	syscfg = r0
+	isync
+
+	/* Configure L1 caches. */
+.InitCache:
+	r1 = #0
+	r1 = #1
+	r2 = syscfg
+	r2 = insert (r1, #1, #1)
+	r2 = insert (r0, #1, #2)
+
+	r1 = #1
+	r2 = insert (r1, #1, #23)
+
+	syscfg = r2
+	isync
+
+	/* BEGIN code to turn on translation */
+.InitTLB:
+	// V65 an later use a table for this stuff, should get a table for all of it!
+	r0 = memw(##_tlbmax)
+
+	/* Clear TLB and store the number of TLBs */
+	{
+		r3:2 = combine(#0,#0)
+		memw(##_NumTLBEntries) = r0
+	}
+
+	loop0(.InitTLBLoop, r0)
+.falign
+.InitTLBLoop:
+	tlbw(r3:2,r0)
+	r0 = add (r0, #-1)
+	{}:endloop0
+	isync
+
+.InitTLBGlobal:				/* Fixed entry for everything. */
+        AddrOf _start, r2
+        r2 = lsr (r2, #12)
+
+	AddrOf 0xc3f00000, r1		/* Global, 1-1 mapping. */
+	AddrOf 0xf7000000, r0		/* Full perms, fully cacheable WB */
+        r1 = or (r1, r2)		/* 1M translation */
+        r0 |= asl (r2,#1)
+	r0 = setbit(r0,#4)
+	r0 = and(r0,#-16)
+	r2 = #0
+	tlbw(r1:0,r3)
+
+	/* TODO Should there be a TLB entry for TCM too? */
+
+	r0 = syscfg
+	r0 = setbit (r0, #0)		/* Turn the MMU on. */
+	syscfg = r0
+	isync
+
+.InitInt:
+	/* Set up rising edge triggered interrupts */
+        r0 = #0
+	imask = r0
+	r1 = #-1
+	cswi (r1)
+
+        /* Enable interrupts globally. */
+        r0 = ssr
+	r0 = setbit (r0, #18)
+        ssr= r0
+
+        r0 = syscfg
+        r0 = setbit (r0, #4)
+        syscfg = r0
+        isync
+
+        /* Set up input params to Angel call   */
+        r0 = #22
+        AddrOf setHeapAngelCallParams, r1
+        trap0 (#0)
+
+.PreMain:
+        AddrOf hexagon_pre_main, r28
+        jumpr r28
+        .size hexagon_start_init, . - hexagon_start_init
+
+.global	qdsp6_start_init
+.set	qdsp6_start_init, \
+	hexagon_start_init
+
+/* (At this point the machine is mostly ready for normal execution */
+
+ /* This code is jumped to when we start a new thread.        */
+ /* It reads some values out of memory and uses them          */
+ /* to begin execution.                                       */
+ /* The code supports going to a function of the type:        */
+ /*     void foo (void *arg);                                  */
+ /* or                                                        */
+ /*     void foo (int arg);                                    */
+ /* All we have to do is get the location of "foo", the       */
+ /* value for "arg", and set up the stack.                    */
+ /* This stuff has been set up for us by thread_create, below.*/
+ /* Under the OS, we have no need for this, it is merely for  */
+ /* trying multithreaded applications on the raw hardware.    */
+
+	.p2align 4
+	.weak thread_stop
+	.type thread_stop, @function
+thread_stop:
+{
+	r0 = htid
+	r1 = #1
+}
+	r1 = lsl (r1, r0)
+	stop (r1)
+
+	.p2align 4
+
+	.type event_handle_reset, @function
+
+event_handle_reset:
+	r1 = htid /* do not alter until final register initialization */
+
+	{
+		r28 = ##(start_pc)
+		r29 = ##(start_sp)
+	}
+
+	r2 = #0 				/* UM = 0 EX = 0 IE = 0 ASID = 0 */
+	ssr = r2
+	isync
+	imask = r2
+
+	r2 = ##(exc_stack_tops)
+	r2 = memw (r2+r1<<#2)
+	sgp0 = r2
+
+	/* Initialize GP to the start of the global data area. */
+	//r2 = ##(_SDA_BASE_)
+	//gp = r2
+
+        r2.h = #4
+	r2.l = #0
+	ssr = r2 /* Turn on interrupts */
+
+	r3 = #1
+	r2 = ssr
+	r2 = insert (r3, #1, #31)
+	ssr = r2
+
+	r2.h = #0x1  /* Enable cache fetching */
+	usr  = r2
+
+	r0 = #1
+	r2 = #1
+	r0 |= asl (r2, #1)
+	r2 = ccr
+	r2 = insert (r0, #2, #16)
+	/* Enable dcfetch and l2fetch. */
+	r2 = setbit (r2, #20)
+	ccr = r2
+
+	isync
+
+	{
+		r2 = ##framekey_tbl
+		r3 = ##stack_size
+	}
+	{
+		r2 = memw(r2+r1<<#2) /* load framekey from memory array */
+		r3 = memw(r3+r1<<#2) /* load stack_size from memory array */
+	}
+	{
+		framekey = r2 /* store into framekey register */
+		r2 = memw (sp+r1<<#2)
+	}
+	r3 = sub(r2, r3) /* framelimt = sp-stack_size) */
+	framelimit = r3 /* store into framelimit register */
+
+	{
+		r28 = memw (r28+r1<<#2)
+		sp = memw (sp+r1<<#2)
+		fp = #0
+	}
+
+        {
+                r0 = ##(start_param)
+	        lr = ##(thread_stop)
+        }
+	fp = #0
+	r1 = htid
+	r0 = memw (r0+r1<<#2)
+
+	jump thread_start
+
+	.size event_handle_reset, . - event_handle_reset
+
+        .global __coredump
+        .type coredump, @function
+        .set __coredump, coredump
+coredump:
+        r0 = ssr
+        r0 = clrbit (r0, #16) /* UM = 0 */
+        r0 = clrbit (r0, #17) /* EX = 0 */
+        ssr = r0
+        isync
+        r0 = #0xCD
+        trap0 (#0)
+        r2 = #-1
+        r0 = #-1
+        stop (r0)
+	.size event_core_dump, . - event_core_dump
+
+        .type event_handle_nmi, @function
+event_handle_nmi:
+        r0 = #1
+        stid = r0
+        jump coredump
+        .size event_handle_nmi, . - event_handle_nmi
+
+        .type event_handle_error, @function
+event_handle_error:
+        r0 = #2
+        stid = r0
+        jump coredump
+        .size event_handle_error, . - event_handle_error
+
+        .type event_handle_rsvd, @function
+event_handle_rsvd:
+        r0.h = #0xdead
+        r0.l = #0xbeef
+        stid = r0
+        jump coredump
+        .size event_handle_rsvd, . - event_handle_rsvd
+
+	.global thread_start
+	.type thread_start, @function
+thread_start:
+		jumpr r28
+	.size thread_start, . - thread_start
+
+ /* TLB HANDLING                                                  */
+ /* There are a few strategies we have tried for TLB handling.    */
+ /* The first is just to map every page 1:1 for virtual:physical  */
+ /* This means we have nothing to look up but no flexibility      */
+ /* The strategy implemented here is to divide memory into        */
+ /* a bunch of 1MB pages.  Each page is by default set to the     */
+ /* corresponding physical 1M page, but the translation (and the  */
+ /* cacheability) can be changed with the add_translation function*/
+ /* below.                                                        */
+ /* We have to keep the table in memory, and it's down in the data*/
+ /* section.                                                      */
+ /* The page at address 0 is always kept in the TLB.              */
+ /* You will run into problems if the data gets pushed out into   */
+ /* another page, because you don't have a translation for the    */
+ /* data you need to do the translation!                          */
+ /* The solution is to put the translation table (and probably    */
+ /* the TLB fill code) in special section (s) that go near address 0 */
+ /* You can set that up in the linker script.                     */
+ /* TLB miss because of eXecution                                 */
+ /* See HEXAGON Architecture System-Level Spec for more information */
+
+
+
+ 	.subsection 0
+
+	.p2align 6
+	.global event_handle_tlbmissx
+ 	.type event_handle_tlbmissx, @function
+
+event_handle_tlbmissx:
+	crswap (sp, sgp0)
+	sp = add (sp, #-64)
+	/* Save off state */
+	{
+		memd (sp + #0) = r1:0
+		memd (sp + #8) = r3:2
+	}
+	{
+		memd (sp + #16) = r5:4
+		memd (sp + #24) = r7:6
+	}
+	{
+		memd (sp + #32) = r9:8
+		r9 = p3:0
+	}
+	r8 = ssr
+	r7 = elr
+	p1 = tstbit (r8, #0)
+	{
+		/* Calculate 4K page index */
+		r7 = lsr (r7, #12)
+		/* Check for next page hit */
+		if (!p1) jump 1f
+		r0 = ##(__tlb_idx)
+	}
+	r7 = add (r7, #1)
+1:
+	{
+		r1 = memw(##_tlb_fixed_entries)	/* First non-fixed entry. */
+		r3 = memw(##_NumTLBEntries)
+	}
+	/* Atomically increment index */
+	/* NEVER overwrite fixed entries */
+1:
+	r6 = memw_locked (r0)
+	{
+		r6 = add (r6, #1)
+		/* This was hard coded to p0 = cmp.ge(r6, #NUM_TLB_ENTRIES)
+		   Now we are using 2 registers so switch to the equivalent
+		   p0 = !cmp.gt(r3, r6) */
+		p0 = !cmp.gt (r3, r6)
+	}
+	/* Will never store a number greater than
+           _NumTLBEntries in &__tlb_idx */
+	r6 = mux (p0, r1, r6)
+	memw_locked (r0, p0) = r6
+	if (!p0) jump 1b              /* Retry, lost reservation. */
+
+	{
+		r7 = lsr (r7, #8) /* 1M page index */
+		r3 = memw (##TLBMapTable)
+	}
+	r3 = addasl (r3, r7, #1)
+	{
+		r3 = memh (r3)
+		r7 = asl (r7, #8) /* VPN */
+	}
+	r5 = extractu (r3, #12, #4)
+	{
+		r4 = extractu (r3, #4, #0)
+		r0 = #0x0010 /* 1M */
+		r1 = #0
+	}
+	{
+		r4 = asl (r4, #24)
+		r1.h = #0xc000
+		r0.h = #0xf000
+	}
+1:
+	{
+		r1 = or (r1, r7)	/* c000_0000 + VPN */
+		r0 |= asl(r5,#9)	/* f000_0000 + PPD */
+	}
+	r0 = or (r0, r4)
+	/* Get Lock */
+	tlblock
+	r5 = tlbp(r1)
+	p0 = tstbit (r5, #31)
+	if (!p0) jump 1f
+
+	tlbw(r1:0,r6)
+	isync
+
+1:
+	tlbunlock
+
+	p3:0 = r9
+	{
+		r9:8 = memd (sp + #32)
+		r7:6 = memd (sp + #24)
+	}
+	{
+		r5:4 = memd (sp + #16)
+		r3:2 = memd (sp + #8)
+	}
+	{
+		r1:0 = memd (sp + #0)
+		sp = add (sp, #64)
+	}
+	crswap (sp, sgp0)
+	rte
+
+	.size  .event_handle_tlbmissx, . - event_handle_tlbmissx
+
+ /* TLB Miss RW                                            */
+ /* Basically the same as TLB MissX, but we get            */
+ /* The address from BADVA instead of EVB... see the       */
+ /* HEXAGON Architecture System-level Spec for more details. */
+
+	.p2align 6
+
+	.global event_handle_tlbmissrw
+ 	.type event_handle_tlbmissrw, @function
+
+event_handle_tlbmissrw:
+	crswap (sp, sgp0)
+	sp = add (sp, #-64)
+	{
+		memd (sp + #0) = r1:0
+		memd (sp + #8) = r3:2
+	}
+	{
+		memd (sp + #16) = r5:4
+		memd (sp + #24) = r7:6
+	}
+	{
+		memd (sp + #32) = r9:8
+		r8 = ssr
+	}
+	r7 = badva
+	r9 = p3:0
+	{
+		r0 = ##__tlb_idx
+		r1 = memw(##_tlb_fixed_entries)
+	}
+	{
+		r7 = lsr (r7, #20)
+		r3 = memw(##_NumTLBEntries) /* 31, 63, 127, or 191 */
+	}
+	/* Atomically increment index */
+	/* NEVER overwrite entry 0 */
+1:
+	r6 = memw_locked (r0)
+	{
+		r6 = add (r6, #1)
+		/* This was hard coded to p0 = cmp.ge(r6, #NUM_TLB_ENTRIES)
+		   Now we are using 2 registers so switch to the equivalent
+		   p0 = !cmp.gt(r3, r6) */
+		p0 = !cmp.gt (r3, r6)
+	}
+	/* Will never store a number greater than
+           _NumTLBEntries in &__tlb_idx */
+	r6 = mux (p0, r1, r6)
+	memw_locked (r0, p0) = r6
+	if (!p0) jump 1b              /* Retry, lost reservation. */
+
+	r3 = memw (##TLBMapTable)
+	r3 = addasl (r3, r7, #1)
+	{
+		r3 = memh (r3)
+		r7 = asl (r7, #8) /* VPN */
+	}
+
+	r4 = extractu (r3, #4, #0)
+.L_OK:
+	{
+	        r5 = extractu (r3, #12, #4)
+		r0 = #0x0010	/* 1M */
+		r1 = #0
+	}
+	{
+		r4 = asl (r4, #24)
+		r1.h = #0xc000
+		r0.h = #0xf000
+	}
+1:
+	{
+		r1 = or (r1, r7) /* R5: VPN | C000_0000 */
+		r0 |= asl(r5,#9) /* R4: PPD | F000_0000 */
+	}
+	r0 = or (r0, r4)
+
+	tlblock
+	r5 = tlbp(r1)
+	p0 = tstbit (r5, #31)
+	if (!p0) jump 1f
+
+	tlbw(r1:0,r6)
+	isync
+	jump 2f
+1:
+        // If we take a miss around a user defined page they need to
+        // manually create another page or not touch the regions above
+        // and below their page within a 1M boundary.
+	r4 = memw(##_tlb_fixed_entries)
+	p0 = cmp.gt(r4, r5) // r4>r5 == r5<r4, (entryfound < num_fixed)
+	if (p0) jump .  // DEAD
+2:
+	tlbunlock
+
+	p3:0 = r9
+	{
+		r9:8 = memd (sp + #32)
+		r7:6 = memd (sp + #24)
+	}
+	{
+		r5:4 = memd (sp + #16)
+		r3:2 = memd (sp + #8)
+	}
+	{
+		r1:0 = memd (sp + #0)
+		sp = add (sp, #64)
+	}
+	crswap (sp, sgp0)
+	rte
+
+	.size event_handle_tlbmissrw, . - event_handle_tlbmissrw
+
+/* This code handles the OS-like requests coming   */
+/* from the application.                           */
+
+	.p2align 4
+
+ 	.type event_handle_trap0, @function
+
+event_handle_trap0:
+	crswap (sp, sgp0)
+	{
+		sp = add (sp, #-40)
+		memd (sp + #-40) = r5:4
+		r5 = p3:0
+		p0 = cmp.eq (r0, #0x40)  /* read (thread) cycles */
+	}
+	{
+		memd (sp + #8) = r3:2
+		p1 = cmp.eq (r0, #0x44)  /* read tcycles */
+		p2 = cmp.eq (r0, #0x52)  /* read pcycles */
+		r4.h = #HI (0x55555555)  /* 1/3 in 0.32 fixed point */
+	}
+
+7:
+	{
+		p3:0 = r5
+		r3:2 = memd (sp + #8)
+		r5:4 = memd (sp)
+		sp = add (sp, #40)
+	}
+	crswap (sp, sgp0)
+        rte
+
+8:
+	{
+		if (!p2) jump 9f
+		r6.l = #38
+	}
+	{
+		p2 = cmp.eq (r1, r6)
+		jump 1b
+	}
+
+9:
+	r1 = memw (##CoreDump)
+
+	jump 1b
+
+        .size event_handle_trap0, . - event_handle_trap0
+
+        .p2align 4
+
+        .type event_handle_trap1, @function
+
+event_handle_trap1:
+        r0 = #9
+        stid = r0
+        jump coredump
+
+        .size event_handle_trap1, . - event_handle_trap1
+
+ /* This is the code jumped to by the interrupt vectors */
+ /* (above).  We save context, jump to the function,    */
+ /* restore context, and return to where we left off.   */
+
+ 	.type event_handle_int, @function
+
+event_handle_int:
+	crswap (sp, sgp0)
+	allocframe (#160)
+	{
+		memd (sp + #0) = r1:0
+		memd (sp + #8) = r3:2
+		r0 = SA0
+	}
+	{
+		memd (sp + #16) = r5:4
+		memd (sp + #24) = r7:6
+		r1 = LC0
+	}
+	{
+		memd (sp + #32) = r9:8
+		memd (sp + #40) = r11:10
+		r2 = SA1
+	}
+	{
+		memd (sp + #48) = r13:12
+		memd (sp + #56) = r15:14
+		r3 = LC1
+	}
+	{
+		memd (sp + #64) = r17:16
+		memd (sp + #72) = r19:18
+		r6 = p3:0
+	}
+	{
+		memd (sp + #80) = r21:20
+		memd (sp + #88) = r23:22
+		r5:4 = C7:6 /* M1 and M0 */
+	}
+	{
+		memd (sp + #96) = r25:24
+		memd (sp + #104) = r27:26
+		r7 = USR
+	}
+	{
+		memd (sp + #112) = r1:0
+		memd (sp + #136) = r7:6
+		r8 = UGP
+	}
+	r0 = ssr
+	{
+		memd (sp + #120) = r3:2
+		r2 = r0
+		r7 = insert (r0, #8, #16)
+	}
+	{
+		r9 = ELR
+		memd (sp + #128) = r5:4
+		r0 = and (r0, #0x1f)
+		r1 = ##(__IntHandlers)
+	}
+	{
+		r1 = addasl (r1, r0, #2)
+	}
+	{
+		memd (sp + #144) = r9:8
+		r1 = memw (r1)
+		r3 = #0
+		lr = r28
+	}
+	{
+		memd (sp + #152) = lr:fp
+		r2 = insert (r3, #3, #16)
+		p0 = cmp.eq (r1, #0)
+	}
+	if (p0) jump 1f // if null, skip a bunch of stuff
+	ssr = r2
+	crswap (sp, sgp0)
+	/* Call interrupt handler */
+	callr r1
+	/* Ok, we're back... */
+	crswap (sp, sgp0)
+	/* R7.H is also intnum.. use for ciad */
+	/* ciad ... do early to jump over */
+	r0 = ssr
+	{
+		r26.h = #0x0000
+		r7:6 = memd (sp + #136)
+		r1 = #6  /* EX, IE, !UM */
+	}
+	{
+		r7 = asrh (r7)
+		r26.l = #0x0001
+		r0 = insert(r1, #3, #16)
+	}
+	r7 = and (r7, #0x1f)
+	r26 = lsl (r26, r7)
+
+	ssr = r0
+	ciad (r26)
+1:
+	{
+		lr:fp = memd (sp + #152)
+		r9:8 = memd (sp + #144)
+	}
+	elr = r9
+	{
+		r7:6 = memd (sp + #136)
+		r5:4 = memd (sp + #128)
+		UGP = r8
+	}
+	{
+		r3:2 = memd (sp + #120)
+		r1:0 = memd (sp + #112)
+		usr = r7
+		r28 = lr
+	}
+	{
+		r27:26 = memd (sp + #104)
+		r25:24 = memd (sp + #96)
+		m0 = r4
+	}
+	{
+		r23:22 = memd (sp + #88)
+		r21:20 = memd (sp + #80)
+		m1 = r5
+	}
+	{
+		r19:18 = memd (sp + #72)
+		r17:16 = memd (sp + #64)
+		p3:0 = r6
+	}
+	{
+		r15:14 = memd (sp + #56)
+		r13:12 = memd (sp + #48)
+		lc1 = r3
+	}
+	{
+		r11:10 = memd (sp + #40)
+		r9:8   = memd (sp + #32)
+		sa1 = r2
+	}
+	{
+		r7:6   = memd (sp + #24)
+		r5:4   = memd (sp + #16)
+		lc0 = r1
+	}
+	{
+		r3:2   = memd (sp + #8)
+		r1:0   = memd (sp + #0)
+		sa0 = r0
+	}
+	deallocframe
+	crswap (sp, sgp0)
+	rte
+
+	.size event_handle_int, . - event_handle_int
+
+ /* Dummy function for when we don't have code registered for an interrupt.*/
+
+	.p2align 4
+
+ 	.type .NoHandler, @function
+
+.NoHandler:
+	jumpr lr
+
+	.size .NoHandler, . - .NoHandler
+
+	.text
+
+/* Next we have the event vectors */
+/* See the HEXAGON Architecture System-Level Specification  */
+/* for more information.*/
+
+	.p2align 12, 0
+
+ 	.type .EventVectors, @function
+
+.EventVectors:
+	jump event_handle_reset
+	jump event_handle_nmi
+	jump event_handle_error
+	jump event_handle_rsvd
+	jump event_handle_tlbmissx
+	jump event_handle_rsvd
+	jump event_handle_tlbmissrw
+	jump event_handle_rsvd
+	jump event_handle_trap0
+	jump event_handle_trap1
+	jump event_handle_rsvd /* 10 */
+	jump event_handle_rsvd /* 11 */
+	jump event_handle_rsvd /* 12 */
+	jump event_handle_rsvd /* 13 */
+	jump event_handle_rsvd /* 14 */
+	jump event_handle_rsvd /* 15 */
+	jump event_handle_int  /* Event number 16, Interrupt 0 */
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int
+	jump event_handle_int  /* Event number 47, Interrupt 31 */
+
+	.size .EventVectors, . - .EventVectors
+
+/**************** DATA SECTION ****************/
+
+	/* Here are definitions for some of the data we use above */
+
+	.section .start, "awx"
+	.subsection 1
+
+	.p2align 4, 0
+
+	.global __IntHandlers
+	.set __IntHandlers, .IntHandlers
+.IntHandlers:
+	.word .NoHandler	/* 0 */
+	.word .NoHandler	/* 1 */
+	.word .NoHandler	/* 2 */
+	.word .NoHandler	/* 3 */
+	.word .NoHandler	/* 4 */
+	.word .NoHandler	/* 5 */
+	.word .NoHandler	/* 6 */
+	.word .NoHandler	/* 7 */
+	.word .NoHandler	/* 8 */
+	.word .NoHandler	/* 9 */
+	.word .NoHandler	/* 10 */
+	.word .NoHandler	/* 11 */
+	.word .NoHandler	/* 12 */
+	.word .NoHandler /* 13 */
+	.word .NoHandler /* 14 */
+	.word .NoHandler /* 15 */
+	.word .NoHandler /* 16 */
+	.word .NoHandler /* 17 */
+	.word .NoHandler /* 18 */
+	.word .NoHandler /* 19 */
+	.word .NoHandler /* 20 */
+	.word .NoHandler /* 21 */
+	.word .NoHandler /* 22 */
+	.word .NoHandler /* 23 */
+	.word .NoHandler /* 24 */
+	.word .NoHandler /* 25 */
+	.word .NoHandler /* 26 */
+	.word .NoHandler /* 27 */
+	.word .NoHandler /* 28 */
+	.word .NoHandler /* 29 */
+	.word .NoHandler /* 30 */
+	.word .NoHandler /* 31 */
+
+	.p2align 5, 0
+RegDump:
+	.space 4 * (32 + 10 + 29)
+
+    /* This space is used by the supervisor code for saving      */
+    /* context for kernel stuff.  It's also used to hold the     */
+    /* normal user code registers while we call the user-defined */
+    /* interrupt service routine  */
+/* Stack tops... enough for a couple context saves... */
+	.p2align 3, 0
+exc_stack_lim0:	.space 384
+exc_stack_top0:	.word 0
+	.p2align 3, 0
+exc_stack_lim1:	.space 384
+exc_stack_top1:	.word 0
+	.p2align 3, 0
+exc_stack_lim2:	.space 384
+exc_stack_top2:	.word 0
+	.p2align 3, 0
+exc_stack_lim3:	.space 384
+exc_stack_top3:	.word 0
+	.p2align 3, 0
+exc_stack_lim4:	.space 384
+exc_stack_top4:	.word 0
+	.p2align 3, 0
+exc_stack_lim5:	.space 384
+exc_stack_top5:	.word 0
+	.p2align 3, 0
+exc_stack_lim6: .space 384
+exc_stack_top6: .word 0
+	.p2align 3, 0
+exc_stack_lim7: .space 384
+exc_stack_top7: .word 0
+	.p2align 3, 0
+exc_stack_lim8:	.space 384
+exc_stack_top8:	.word 0
+	.p2align 3, 0
+exc_stack_lim9:	.space 384
+exc_stack_top9:	.word 0
+	.p2align 3, 0
+exc_stack_lim10: .space 384
+exc_stack_top10: .word 0
+	.p2align 3, 0
+exc_stack_lim11: .space 384
+exc_stack_top11: .word 0
+	.p2align 3, 0
+exc_stack_lim12: .space 384
+exc_stack_top12: .word 0
+	.p2align 3, 0
+exc_stack_lim13: .space 384
+exc_stack_top13: .word 0
+	.p2align 3, 0
+exc_stack_lim14: .space 384
+exc_stack_top14: .word 0
+	.p2align 3, 0
+exc_stack_lim15: .space 384
+exc_stack_top15: .word 0
+exc_stack_tops:
+	.word exc_stack_top0
+	.word exc_stack_top1
+	.word exc_stack_top2
+	.word exc_stack_top3
+	.word exc_stack_top4
+	.word exc_stack_top5
+	.word exc_stack_top6
+	.word exc_stack_top7
+	.word exc_stack_top8
+	.word exc_stack_top9
+	.word exc_stack_top10
+	.word exc_stack_top11
+	.word exc_stack_top12
+	.word exc_stack_top13
+	.word exc_stack_top14
+	.word exc_stack_top15
+
+	.global __start_pc
+	.set __start_pc, start_pc
+start_pc:
+	jump .
+	jump .
+	jump .
+	jump .
+	jump .
+	jump .
+	jump .
+	jump .
+	jump .
+	jump .
+	jump .
+	jump .
+	jump .
+	jump .
+	jump .
+	jump .
+
+
+	.global __start_sp
+	.set __start_sp, start_sp
+start_sp:
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+
+	.global __start_param
+	.set __start_param, start_param
+start_param:
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+
+	.global __stack_size
+	.set __stack_size, stack_size
+stack_size:
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+
+	.global __framekey
+	.set __framekey, framekey_tbl
+framekey_tbl:
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+	.word 0
+
+_l2cfg:
+	.word 0
+_tlbmax:
+	.word 0
+
+syscfg_l2_table:
+        .byte 0x0       /* rev: 0x0xxx: No L2 -> 0k L2 cache */
+        .byte 0x2       /* rev: 0x1xxx: 128K L2 -> 128k L2 cache */
+        .byte 0x3       /* rev: 0x2xxx: 256K L2 -> 256k L2 cache */
+        .byte 0x3       /* rev: 0x3xxx: Not valid at this time */
+        .byte 0x4       /* rev: 0x4xxx: 512K L2 -> 512k L2 cache */
+        .byte 0x4       /* rev: 0x5xxx: Not valid at this time */
+        .byte 0x4       /* rev: 0x6xxx: 768K L2 -> 512k L2 cache */
+        .byte 0x4       /* rev: 0x7xxx: Not valid at this time */
+        .byte 0x5       /* rev: 0x8xxx: 1024K L2 -> 1024 L2 cache */
+        .byte 0x4       /* rev: 0x9xxx: Not valid at this time */
+        .byte 0x5       /* rev: 0xAxxx: 1536K L2 -> 1024 L2 cache */
+        .byte 0x4       /* rev: 0xBxxx: Not valid at this time */
+        .byte 0x4       /* rev: 0xCxxx: Not valid at this time */
+        .byte 0x4       /* rev: 0xDxxx: Not valid at this time */
+        .byte 0x4       /* rev: 0xExxx: Not valid at this time */
+        .byte 0x4       /* rev: 0xFxxx: Not valid at this time */
+
+
+ /* Data used for TLB refill */
+
+	.p2align 6, 0
+
+	.global __tlb_lock
+	.set __tlb_lock, tlb_lock
+tlb_lock:
+	.word 0
+	.global __tlb_idx
+	.set __tlb_idx, tlb_idx
+tlb_idx:
+	.word TLB_FIXED_ENTRIES - 1
+
+	.global _tlb_fixed_entries
+_tlb_fixed_entries:
+	.word TLB_FIXED_ENTRIES
diff --git a/tests/tcg/hexagon/system/crt0/hexagon_standalone.h b/tests/tcg/hexagon/system/crt0/hexagon_standalone.h
new file mode 100644
index 0000000000000..01ca41349f0f1
--- /dev/null
+++ b/tests/tcg/hexagon/system/crt0/hexagon_standalone.h
@@ -0,0 +1,103 @@
+/*
+ *  Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#ifndef _TLB_H
+#define _TLB_H
+
+typedef enum {
+    SHIFT_4K = 0,
+    SHIFT_16K,
+    SHIFT_64K,
+    SHIFT_256K,
+    SHIFT_1M,
+    SHIFT_4M,
+    SHIFT_16M,
+    SHIFT_64M,
+    SHIFT_256M,
+    SHIFT_1G,
+} PageShift;
+
+typedef enum {
+    PAGE_4K   = 1 << SHIFT_4K,
+    PAGE_16K  = 1 << SHIFT_16K,
+    PAGE_64K  = 1 << SHIFT_64K,
+    PAGE_256K = 1 << SHIFT_256K,
+    PAGE_1M   = 1 << SHIFT_1M,
+    PAGE_4M   = 1 << SHIFT_4M,
+    PAGE_16M  = 1 << SHIFT_16M,
+    PAGE_64M  = 1 << SHIFT_64M,
+    PAGE_256M = 1 << SHIFT_256M,
+    PAGE_1G   = 1 << SHIFT_1G,
+} PageSize;
+
+
+/*
+ * TLB entry format:
+ *
+ * TLBHI:
+ *    63 | 62 | 61 | 60:59 | 58 -- 52 | 51 -------- 32 |
+ *    V  | G  | EP   PPNex | ASID     | Virtual Page # |
+ *    -------------------------------------------
+ *
+ *    V            - Valid bit.
+ *    G            - Global bit.  If set ASID is ignored and the page
+ *                   is globally  accessible.
+ *    EP           - Extra Physical Bit
+ *    PPNex        - Extended Physical Page. (V73 and beyond)
+ *    ASID         - Address Space Identifier.
+ *    Virtual Page - Virtual Page number.  It has a minimum 4K alignment.
+ *                   This means the input value is right shifted 12 bits
+ *                   and that is what is placed into this field.
+ *
+ * TLBLO:
+ *    31 | 30 | 29 | 28 | 27 -- 24 | 23 --------- 1  | 0 |
+ *    X  | W  | R  | U  | C        | Physical Page # | S |
+ *    ----------------------------------------------------
+ *
+ *    X              - Execute Enabled
+ *    W              - Write Enabled
+ *    R              - Read Enabled
+ *    U              - User mode accessible
+ *    C              - Cacheablilty attributes: L1/L2 Cacheable Writeback/thru
+ *    Physical Page  - Physical Page #
+ *
+ */
+
+typedef union {
+  struct {
+    uint64_t S:1;
+    uint64_t PPN:23;
+    uint64_t CacheAttr:4;
+    uint64_t XWRU:4;
+    uint64_t VirtualPage:20;
+    uint64_t ASID:7;
+#if __HEXAGON_ARCH__ < 73
+    uint64_t A0:1;
+    uint64_t A1:1;
+#else
+    uint64_t PPN_EX:2;
+#endif
+    uint64_t EP:1;
+    uint64_t VG:2;
+  };
+  uint64_t raw;
+} TLBEntry;
+
+
+#define TLB_NOT_FOUND 0x80000000
+
+int add_translation_extended(int index, void *va, uint64_t pa,
+                              unsigned int page_size, unsigned int xwru,
+                              unsigned int cccc, unsigned int asid,
+                              unsigned int aa, unsigned int vg);
+void add_translation_fixed(int index, void *va, void *pa, int cccc,
+                            int permissions);
+void add_translation(void *va, void *pa, int cccc);
+
+#endif /* _TLB_H */
diff --git a/tests/tcg/hexagon/system/crt0/min_libc.c b/tests/tcg/hexagon/system/crt0/min_libc.c
new file mode 100644
index 0000000000000..f44ee49f8f444
--- /dev/null
+++ b/tests/tcg/hexagon/system/crt0/min_libc.c
@@ -0,0 +1,359 @@
+/*
+ *  Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+/*
+ * Small cheat: take size_t, NULL, and other type/symbol definitions from the
+ * hexagon toolchain. We cannot link with the libc, though, as the actual
+ * implementation for functions like printf and open are defined for Linux, and
+ * we are running on "bare metal".
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include <assert.h>
+#include <string.h>
+
+FILE *const stdout = (FILE *)1;
+
+void exit(int code)
+{
+    asm volatile(
+        "r2 = %0\n"
+        "stop(r0)\n"
+        :
+        : "r"(code)
+        : "r2");
+    __builtin_unreachable();
+}
+
+/* The assert() macro will use this. */
+void __assert_fail(const char *assertion, const char *file, int line,
+                   const char *function)
+{
+    printf("ASSERT fail '%s' at file '%s' line %d function %s\n",
+           assertion, file, line, function);
+    exit(1);
+}
+
+void *memset(void *b, int c, size_t len)
+{
+    for (size_t i = 0; i < len; i++) {
+        ((unsigned char *)b)[i] = (unsigned char)c;
+    }
+    return b;
+}
+
+int memcmp(const void *p1, const void *p2, size_t n)
+{
+    const char *s1 = p1;
+    const char *s2 = p2;
+    for ( ; n && (*s1 == *s2); s1++, s2++, n--) {
+        /* empty */
+    }
+    return n ? *(unsigned char *)s1 - *(unsigned char *)s2 : 0;
+}
+
+int bcmp(const void *s1, const void *s2, size_t n)
+{
+    return __builtin_bcmp(s1, s2, n);
+}
+
+
+#define HEX_SYS_WRITEC          0x03
+#define HEX_SYS_WRITE0          0x04
+#define HEX_SYS_GET_CMDLINE     0x15
+
+/*
+ * Macro flavors:
+ * - DIRECT_SWI takes up to two args an put them at r1 and r2.
+ * - SWI takes up to four args and puts them in an array, placing the
+ *   array address at r1.
+ */
+
+static int swi_ret, swi_err, swi_args[4];
+#define DO_SWI(CODE, ARG0, ARG1) \
+    do { \
+        asm volatile( \
+                "r0 = %2\n" \
+                "r1 = %3\n" \
+                "r2 = %4\n" \
+                "trap0(#0)\n" \
+                "%0 = r0\n" \
+                "%1 = r1\n" \
+                : "=r"(swi_ret), "=r"(swi_err) \
+                : "r"(CODE), "r"(ARG0), "r"(ARG1) \
+                : "r0", "r1", "r2", "memory" \
+                ); \
+    } while (0)
+
+#define SWI0(CODE) DO_SWI(CODE, swi_args, 0)
+#define SWI1(CODE, ARG0) \
+    do { swi_args[0] = (uint32_t)(ARG0); SWI0(CODE); } while (0)
+#define SWI2(CODE, ARG0, ARG1) \
+    do { swi_args[1] = (uint32_t)(ARG1); SWI1(CODE, ARG0); } while (0)
+#define SWI3(CODE, ARG0, ARG1, ARG2) \
+    do { swi_args[2] = (uint32_t)(ARG2); SWI2(CODE, ARG0, ARG1); } while (0)
+#define SWI4(CODE, ARG0, ARG1, ARG2, ARG3) \
+    do { swi_args[3] = (uint32_t)(ARG3); SWI3(CODE, ARG0, ARG1, ARG2); } while (0)
+
+#define GET_MACRO_5(_1, _2, _3, _4, _5, NAME, ...) NAME
+#define SWI(...) \
+    ({ GET_MACRO_5(__VA_ARGS__, SWI4, SWI3, SWI2, SWI1, SWI0)(__VA_ARGS__); \
+       swi_ret; })
+
+#define DIRECT_SWI0(CODE) DO_SWI(CODE, 0, 0)
+#define DIRECT_SWI1(CODE, ARG1) DO_SWI(CODE, ARG1, 0)
+#define DIRECT_SWI2(CODE, ARG1, ARG2) DO_SWI(CODE, ARG1, ARG2)
+
+#define GET_MACRO_3(_1, _2, _3, NAME, ...) NAME
+#define DIRECT_SWI(...) \
+    ({ GET_MACRO_3(__VA_ARGS__, DIRECT_SWI2, DIRECT_SWI1, DIRECT_SWI0)(__VA_ARGS__); \
+       swi_ret; })
+
+int puts(const char *str)
+{
+    DIRECT_SWI(HEX_SYS_WRITE0, str);
+    DIRECT_SWI(HEX_SYS_WRITE0, "\n");
+    return 0;
+}
+
+int fputs(const char *str, FILE *f)
+{
+    assert(f == stdout); /* Only stdout is supported. */
+    DIRECT_SWI(HEX_SYS_WRITE0, str);
+    return 0;
+}
+
+size_t fwrite(const void *ptr, size_t size, size_t nitems, FILE *f)
+{
+    assert(f == stdout); /* Only stdout is supported. */
+    for (size_t i = 0; i < size * nitems; i++) {
+        DIRECT_SWI(HEX_SYS_WRITEC, &ptr[i]);
+    }
+    return size * nitems;
+}
+
+int putchar(int c)
+{
+    DIRECT_SWI(HEX_SYS_WRITEC, &c);
+    return c;
+}
+
+static char *num_to_s(uint64_t signed_num, uint64_t base)
+{
+    static char buffer[1024];
+    char *bptr = buffer;
+    uint64_t num;
+
+    if (base == 16) {
+        num = signed_num;
+    } else if (base == 10) {
+        if (signed_num < 0) {
+            *bptr++ = '-';
+            signed_num *= -1;
+        }
+        num = signed_num;
+    } else {
+        puts("fatal: num_to_s expects base 16 or 10");
+        exit(1);
+    }
+
+    if (!num) {
+        return "0";
+    }
+
+    uint64_t divider = 1;
+    for (uint64_t n = num; n >= base; n /= base) {
+        divider *= base;
+    }
+
+    while (num) {
+        unsigned int digit = num / divider;
+        if (digit) {
+            num %= divider;
+            divider /= base;
+            if (digit >= 10) {
+                *bptr++ = 'a' + (digit - 10);
+            } else {
+                *bptr++ = '0' + digit;
+            }
+            while (num < divider) {
+                *bptr++ = '0';
+                divider /= base;
+            }
+        } else {
+            divider /= base;
+        }
+    }
+
+    *bptr = '\0';
+    return buffer;
+}
+
+static int advance_prefix(const char **str_ptr, char *prefix)
+{
+    const char *str = *str_ptr;
+    while (*str && *str == *prefix) {
+        str++;
+        prefix++;
+    }
+    str--;
+    if (!*prefix) {
+        *str_ptr = str;
+        return 1;
+    }
+    return 0;
+}
+
+static char *pad0(char *str, int n)
+{
+    static char buffer[1024];
+    int len = strlen(str);
+    assert(n < 1024);
+
+    int i;
+    for (i = 0; i < n - len; i++) {
+        buffer[i] = '0';
+    }
+    strcpy(&buffer[i], str);
+    return buffer;
+}
+
+/*
+ * Very simple implementation. No error checking.
+ * Supported formats are:
+ * %d, %s, %c, %x, %016llx
+ */
+int printf(const char *format, ...)
+{
+    va_list ap;
+    __builtin_va_start(ap, format);
+    for (const char *ptr = format; *ptr; ptr++) {
+        if (*ptr == '%') {
+            ptr++;
+            switch (*ptr) {
+            case 'd':
+            case 'x':
+            case 'p':
+            {
+                int num = __builtin_va_arg(ap, int);
+                fputs(num_to_s(num, *ptr == 'd' ? 10 : 16), stdout);
+                break;
+            }
+            case 's':
+                fputs(__builtin_va_arg(ap, char *), stdout);
+                break;
+            case 'c':
+                putchar(__builtin_va_arg(ap, int));
+                break;
+            case '%':
+                putchar('%');
+                break;
+            case '0':
+                if (advance_prefix(&ptr, "016llx")) {
+                    uint64_t num = __builtin_va_arg(ap, uint64_t);
+                    fputs(pad0(num_to_s(num, 16), 16), stdout);
+                    break;
+                }
+                /* else: fallthrough */
+            default:
+                fputs("fatal: unknown printf modifier '", stdout);
+                putchar(*ptr);
+                puts("'");
+                exit(1);
+            }
+        } else {
+            putchar(*ptr);
+        }
+    }
+    __builtin_va_end(ap);
+    return 1;
+}
+
+size_t strlen(const char *s)
+{
+    size_t len = 0;
+    for ( ; *s; s++) {
+        len++;
+    }
+    return len;
+}
+
+char *strcpy(char *dst, const char *src)
+{
+    int i;
+    for (i = 0; src[i]; i++) {
+        dst[i] = src[i];
+    }
+    dst[i] = '\0';
+    return dst;
+}
+
+int strcmp(const char *s1, const char *s2)
+{
+    for ( ; *s1 && (*s1 == *s2); s1++, s2++) {
+        /* empty */
+    }
+    return *(unsigned char *)s1 - *(unsigned char *)s2;
+}
+
+char *strrchr(const char *s, int c)
+{
+    for (int i = strlen(s) - 1; i >= 0; i--) {
+        if (s[i] == c) {
+            return (char *)&s[i];
+        }
+    }
+    return NULL;
+}
+
+#define MAX_ARGS 15
+/*
+ * Very simplistic implementation, using static buffers, and assuming no
+ * args will contain spaces.
+ */
+static inline char **getcmdline(int *argc)
+{
+    static char *args[MAX_ARGS] = { NULL };
+    char buf[4096];
+    char *c;
+    int id = 0;
+
+    assert(!SWI(HEX_SYS_GET_CMDLINE, buf, sizeof(buf)));
+
+    *argc = 1;
+    for (c = buf; *c; c++) {
+        if (*c == ' ' && *(c + 1)) {
+            (*argc)++;
+        }
+    }
+    assert(*argc <= MAX_ARGS);
+
+    if (*argc == 0) {
+        return args;
+    }
+
+    args[id++] = buf;
+    for (c = buf; *c; c++) {
+        if (*c == ' ') {
+            *c = '\0';
+            if (id < *argc) {
+                args[id++] = c + 1;
+            }
+        }
+    }
+    return args;
+}
+
+int main(int argc, char **argv, char **envp);
+void _start_main(void)
+{
+    int argc;
+    char **argv = getcmdline(&argc);
+    /* For now, we ignore envp */
+    char *envp[] = { NULL };
+    exit(main(argc, argv, envp));
+    exit(1);
+}
diff --git a/tests/tcg/hexagon/system/crt0/pte.S b/tests/tcg/hexagon/system/crt0/pte.S
new file mode 100644
index 0000000000000..406e453891185
--- /dev/null
+++ b/tests/tcg/hexagon/system/crt0/pte.S
@@ -0,0 +1,80 @@
+/*
+ *  Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+	.section .start, "awx", @progbits
+	.p2align 3
+	.subsection 1
+/* This is the translation table    */
+/* We make a table of 2^12 entries  */
+/* Each entry is a .hword (16 bits) */
+/* Each entry is initialized to 0 in the 4 LSBs (cached WB, see system spec) */
+/* Each entry is initialized to 1:1 Virtual:Physical in the upper 12 bits.   */
+/* We use the preprocessor to avoid copy-paste errors and to avoid  */
+/* an 8192-line addition  to the file.                              */
+
+	.set __UPTE_START, UPTE_START
+	.weak __UPTE_START, UPTE_START
+UPTE_START:
+#define TLBENTRY(X) .hword ((((X) >> 16) & (0xfff0)) | 0x7);
+
+#define TLB_1M(X)  TLBENTRY ((X) << 20)
+#define TLB_16M(X) \
+	TLB_1M (((X) << 4) + 0) \
+	TLB_1M (((X) << 4) + 1) \
+	TLB_1M (((X) << 4) + 2) \
+	TLB_1M (((X) << 4) + 3) \
+	TLB_1M (((X) << 4) + 4) \
+	TLB_1M (((X) << 4) + 5) \
+	TLB_1M (((X) << 4) + 6) \
+	TLB_1M (((X) << 4) + 7) \
+	TLB_1M (((X) << 4) + 8) \
+	TLB_1M (((X) << 4) + 9) \
+	TLB_1M (((X) << 4) + 10) \
+	TLB_1M (((X) << 4) + 11) \
+	TLB_1M (((X) << 4) + 12) \
+	TLB_1M (((X) << 4) + 13) \
+	TLB_1M (((X) << 4) + 14) \
+	TLB_1M (((X) << 4) + 15)
+
+#define TLB_256M(X) \
+	TLB_16M (((X) << 4) + 0) \
+	TLB_16M (((X) << 4) + 1) \
+	TLB_16M (((X) << 4) + 2) \
+	TLB_16M (((X) << 4) + 3) \
+	TLB_16M (((X) << 4) + 4) \
+	TLB_16M (((X) << 4) + 5) \
+	TLB_16M (((X) << 4) + 6) \
+	TLB_16M (((X) << 4) + 7) \
+	TLB_16M (((X) << 4) + 8) \
+	TLB_16M (((X) << 4) + 9) \
+	TLB_16M (((X) << 4) + 10) \
+	TLB_16M (((X) << 4) + 11) \
+	TLB_16M (((X) << 4) + 12) \
+	TLB_16M (((X) << 4) + 13) \
+	TLB_16M (((X) << 4) + 14) \
+	TLB_16M (((X) << 4) + 15)
+
+#define TLB_4G  \
+	TLB_256M (0) \
+	TLB_256M (1) \
+	TLB_256M (2) \
+	TLB_256M (3) \
+	TLB_256M (4) \
+	TLB_256M (5) \
+	TLB_256M (6) \
+	TLB_256M (7) \
+	TLB_256M (8) \
+	TLB_256M (9) \
+	TLB_256M (10) \
+	TLB_256M (11) \
+	TLB_256M (12) \
+	TLB_256M (13) \
+	TLB_256M (14) \
+	TLB_256M (15)
+
+TLB_4G
+
+	.size UPTE_START, . - UPTE_START
diff --git a/tests/tcg/hexagon/system/crt0/tlb.c b/tests/tcg/hexagon/system/crt0/tlb.c
new file mode 100644
index 0000000000000..00e07761dbe90
--- /dev/null
+++ b/tests/tcg/hexagon/system/crt0/tlb.c
@@ -0,0 +1,198 @@
+/*
+ *  Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <stdint.h>
+#include "hexagon_standalone.h"
+
+/*
+ * The following 2 functions use global addressing mode
+ * to avoid GP relative overflows.
+ */
+static inline uint32_t get_tlb_fixed_entries(void)
+{
+  uint32_t *addr;
+  asm volatile ("%0=##_tlb_fixed_entries\n\t"
+                : "=r"(addr));
+  return *addr;
+}
+static inline uint32_t *get_UPTE_START(void)
+{
+  uint32_t addr;
+  asm volatile ("%0=##__UPTE_START\n\t"
+                : "=r"(addr));
+  return (uint32_t *)addr;
+}
+
+static inline uint32_t get_ssr(void)
+{
+  uint32_t reg;
+  asm volatile ("%0=ssr\n\t"
+                : "=r"(reg));
+  return reg;
+}
+
+
+static inline int64_t read_tlb_entry(int index)
+{
+  uint64_t reg;
+  asm volatile ("%[reg]=tlbr(%[index])"
+                : [reg] "=r" (reg)
+                : [index] "r" (index));
+  asm volatile ("isync");
+  return reg;
+}
+
+
+static inline void write_tlb_entry(TLBEntry tlb, int index)
+{
+  uint64_t entry = tlb.raw;
+  asm volatile ("tlblock\n"
+                "tlbw(%[entry], %[index])\n"
+                "isync\n"
+                "tlbunlock\n"
+                :
+                : [entry] "r" (entry), [index] "r" (index));
+}
+
+static inline int32_t  tlb_probe(uint32_t va)
+{
+  uint32_t VirtualPageNumber = va >> 12;
+  uint32_t ASID = (get_ssr() >> 8) & 0x7f;
+  uint32_t probe = ((ASID << 20) | VirtualPageNumber) & 0x7ffffff;
+  uint32_t result = 0;
+  asm volatile ("%[result]=tlbp(%[probe])"
+                : [result] "=r" (result)
+                : [probe] "r" (probe));
+
+  return result;
+}
+
+
+static inline void tlb_invalidate(uint32_t va)
+{
+  int entry = tlb_probe(va);
+  if (entry == TLB_NOT_FOUND) {
+    return;
+  }
+
+  TLBEntry tlb;
+  tlb.raw = read_tlb_entry(entry);
+  tlb.raw = tlb.raw & ~(1ull << 63); /* Clear the V bit. */
+  write_tlb_entry(tlb, entry);
+}
+
+
+static inline TLBEntry basic_entry(uint32_t va, uint64_t pa, PageSize pagesize)
+{
+  TLBEntry T;
+  uint64_t  PPN;
+  T.raw = 0ull;
+  T.VirtualPage = va >> 12;  /* 63-51 */
+#if __HEXAGON_ARCH__ > 72
+  T.PPN_EX = (pa & (3ull << 36)) >> 36;
+#endif
+  T.EP = (pa & (1ull << 35)) >> 35;
+  PPN = pa >> 12ull;
+  PPN = (PPN << 1ull) | pagesize;
+  if (pagesize == 1) {
+    T.S = 1;
+  }
+  T.raw |= PPN;
+  return T;
+}
+/*
+ * function: mkentry
+ * description:
+ *  - Given just a Physical Address (pa) and a Virtual Address (va)
+ *  create a default entry.
+ *  - A user wanting to change the cache attributes or permissions
+ *  can do so prior to writing the entry.
+ */
+static TLBEntry mkentry(uint32_t va, uint64_t pa, PageSize pagesize)
+{
+
+  /* Make an entry and set some reasonable defaults */
+  TLBEntry T = basic_entry(va, pa, pagesize);
+
+  T.CacheAttr = 0x7;
+  T.XWRU = 0x6;
+  T.VG = 0x3;
+  return T;
+}
+
+int add_translation_extended(int index, void *va, uint64_t pa,
+                             unsigned int page_size, unsigned int xwru,
+                             unsigned int cccc, unsigned int asid,
+                             unsigned int aa, unsigned int vg)
+{
+  uint32_t num_entries = get_tlb_fixed_entries();
+
+  if ((index < 1) || (index > (num_entries - 1))) {
+    return -1;
+  }
+
+  tlb_invalidate((uint32_t)va);
+  TLBEntry T;
+  T = basic_entry((uint32_t)va, pa, page_size);
+  T.ASID = ((uint64_t)asid & 0x7f);
+  T.CacheAttr = ((uint64_t)cccc & 0xf);
+  T.XWRU = ((uint64_t)xwru & 0xf);
+  T.VG = ((uint64_t)vg & 0x3);
+#if __HEXAGON_ARCH__ < 73
+  T.raw |= ((uint64_t)aa & 0x3) << 59ull;
+#endif
+  write_tlb_entry(T, index);
+
+  return 0;
+}
+
+
+void add_translation_fixed(int index, void *va, void *pa, int cccc,
+                           int permissions)
+{
+  tlb_invalidate((uint32_t)va);
+  add_translation_extended(index, va, (uint64_t)pa, PAGE_1M, permissions, cccc,
+                           0, 0, 3);
+}
+
+/*
+ * The following deals with the PTE software structure. The actual entry will
+ * not be placed into the TLB until an address fault occurrs.
+ */
+
+typedef union {
+    struct {
+        uint16_t cache:4;
+        uint16_t pa:12;
+    };
+    uint16_t PTE_raw;
+} SMALL_PTE;
+
+static SMALL_PTE *findPTEAddr(uint32_t va)
+{
+    uint32_t *PTE = get_UPTE_START();
+    int index = va >> 20;
+    return (SMALL_PTE *)PTE + index;
+}
+static SMALL_PTE findPTEValue(uint32_t va)
+{
+    SMALL_PTE *A = findPTEAddr(va);
+    return *A;
+}
+
+/* This function adds a translation into the mapping table, see above */
+/* Because we use 1MB pages, we only need to translate 12 bits.       */
+/* We keep those 12 bits plus 4 bits (where we keep the C field,      */
+/* see the System-level architecture spec on TLB entries) in          */
+/* a 16-bit entry in the table.                                       */
+/* We index into the table using the upper 12 bits.                   */
+/* As a note, 2 bytes x 2^12 entries == 8KB table                     */
+void add_translation(void *va, void *pa, int cccc)
+{
+    SMALL_PTE *S = findPTEAddr((uint32_t)va);
+    S->pa = (uint32_t)pa >> 20;
+    S->cache = cccc;
+}
diff --git a/tests/tcg/hexagon/system/fastl2vic.c b/tests/tcg/hexagon/system/fastl2vic.c
new file mode 100644
index 0000000000000..a115ae73f7990
--- /dev/null
+++ b/tests/tcg/hexagon/system/fastl2vic.c
@@ -0,0 +1,73 @@
+/*
+ *  Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+/*
+ * Test the fastl2vic interface.
+ *
+ *  hexagon-sim a.out --subsystem_base=0xfab0  --cosim_file q6ss.cfg
+ */
+
+#include "crt0/hexagon_standalone.h"
+
+#include "cfgtable.h"
+
+#define CSR_BASE  0xfab00000
+#define L2VIC_BASE ((CSR_BASE) + 0x10000)
+#define L2VIC_INT_ENABLE(b, n) \
+        ((unsigned int *) ((b) + 0x100 + 4 * (n / 32)))
+#define L2VIC_INT_ENABLE_SET(b, n) \
+        ((unsigned int *) ((b) + 0x200 + 4 * (n / 32)))
+
+int main()
+{
+    int ret = 0;
+    unsigned int irq_bit;
+
+    /* setup the fastl2vic interface and setup an indirect mapping */
+    volatile  uint32_t *A = (uint32_t *)0x888e0000;
+    add_translation_extended(3, (void *)A, GET_FASTL2VIC_BASE(), 16, 7, 4, 0, 0, 3);
+
+    uint32_t l2vic_base = GET_SUBSYSTEM_BASE() + 0x10000;
+
+    /* set and verify an interrupt using the L2VIC_BASE */
+    irq_bit  = (1 << (66  % 32));
+    *L2VIC_INT_ENABLE_SET(l2vic_base, 66)   = irq_bit;
+    if (*L2VIC_INT_ENABLE(l2vic_base, 64) != 0x4) {
+        ret = __LINE__;
+    }
+
+    /* set and verify an interrupt using the FASTL2VIC interface */
+    *A = 68;
+    if (*L2VIC_INT_ENABLE(l2vic_base, 64) != 0x14) {
+        ret = __LINE__;
+    }
+    *A = 67;
+    if (*L2VIC_INT_ENABLE(l2vic_base, 64) != 0x1C) {
+        ret = __LINE__;
+    }
+
+
+    /* Now clear the lines */
+    *A = ((1 << 16) | 68);
+    if (*L2VIC_INT_ENABLE(l2vic_base, 64) != 0xC) {
+        ret = __LINE__;
+    }
+    *A = ((1 << 16) | 66);
+    if (*L2VIC_INT_ENABLE(l2vic_base, 64) != 0x8) {
+        ret = __LINE__;
+    }
+    *A = ((1 << 16) | 67);
+    if (*L2VIC_INT_ENABLE(l2vic_base, 64) != 0x0) {
+        ret = __LINE__;
+    }
+
+    if (ret) {
+        printf("%s: FAIL, last failure near line %d\n", __FILE__, ret);
+    } else {
+        printf("PASS\n");
+    }
+    return ret;
+}
diff --git a/tests/tcg/hexagon/system/hvx-multi.c b/tests/tcg/hexagon/system/hvx-multi.c
new file mode 100644
index 0000000000000..0d2e90c2c79b2
--- /dev/null
+++ b/tests/tcg/hexagon/system/hvx-multi.c
@@ -0,0 +1,119 @@
+/*
+ *  Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+
+int err;
+
+#include "../hvx_misc.h"
+
+void set_hvx_context(int n)
+{
+    uint32_t ssr_context_bits = n << 27;
+    asm volatile(
+        "r1 = ssr\n"
+        "r1 = and(r1, ##0xc7ffffff)\n"
+        "r1 = or(r1, %0)\n"
+        "ssr = r1\r"
+        "isync\n"
+        :
+        : "r"(ssr_context_bits)
+        : "r1"
+    );
+}
+
+void setv0(int n)
+{
+    asm volatile(
+        "v0 = vsplat(%0)\n"
+        : : "r"(n) : "v0"
+    );
+}
+
+void store_v0(MMVector *v)
+{
+    asm volatile(
+        "vmemu(%0) = v0\n"
+        :
+        : "r"(v)
+        : "memory"
+    );
+}
+
+uint32_t get_num_contexts(void)
+{
+    const int EXT_CONTEXT_OFFSET = 13;
+    unsigned int cfgbase;
+    asm volatile("%0 = cfgbase\n" : "=r"(cfgbase));
+    uint32_t *cfgtable = (uint32_t *)(cfgbase << 16);
+    return *(cfgtable + EXT_CONTEXT_OFFSET);
+}
+
+uint32_t get_rev(void)
+{
+    uint32_t rev;
+    asm volatile("%0 = rev\n" : "=r"(rev));
+    return rev;
+}
+
+/*
+ * This test verifies that each new context is properly selected and is
+ * independent of the thread.
+ */
+int main()
+{
+    int num_contexts = get_num_contexts();
+    printf("rev=v%x, HVX-contexts=%d\n", (int)(get_rev() & 0xff), num_contexts);
+    memset(&output[0], 0, 8 * sizeof(MMVector));
+
+    /* First set v0 on all the contexts. */
+    for (int i = 0; i < num_contexts; i++) {
+        set_hvx_context(i);
+        setv0(i + 1);
+    }
+
+    /*
+     * Now each context should have its own v0 value. Save it to memory. We
+     * check all possible SSR.XA values to make sure the "aliases" are
+     * implemented correctly.
+     */
+    for (int i = 0; i < 8; i++) {
+        set_hvx_context(i);
+        store_v0(&output[i]);
+    }
+
+
+    /*
+     * Set expected values:
+     *
+     *                            num contexts
+     * SSR.XA     2              4              6              8
+     * 000      HVX Context 0  HVX Context 0  HVX Context 0  HVX Context 0
+     * 001      HVX Context 1  HVX Context 1  HVX Context 1  HVX Context 1
+     * 010      HVX Context 0  HVX Context 2  HVX Context 2  HVX Context 2
+     * 011      HVX Context 1  HVX Context 3  HVX Context 3  HVX Context 3
+     * 100      HVX Context 0  HVX Context 0  HVX Context 4  HVX Context 4
+     * 101      HVX Context 1  HVX Context 1  HVX Context 5  HVX Context 5
+     * 110      HVX Context 0  HVX Context 2  HVX Context 2  HVX Context 6
+     * 111      HVX Context 1  HVX Context 3  HVX Context 3  HVX Context 7
+     */
+    for (int i = 0; i < 8; i++) {
+        int expected = (i % num_contexts) + 1;
+        /* Exception for num_contexts=6 */
+        if (num_contexts == 6 && i >= 6) {
+            expected = (i - 6 + 2) + 1;
+        }
+        for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
+            expect[i].w[j] = expected;
+        }
+    }
+
+    check_output_w(__LINE__, 8);
+    puts(err ? "FAIL" : "PASS");
+    return !!err;
+}
diff --git a/tests/tcg/hexagon/system/int_range.c b/tests/tcg/hexagon/system/int_range.c
new file mode 100644
index 0000000000000..688355886362e
--- /dev/null
+++ b/tests/tcg/hexagon/system/int_range.c
@@ -0,0 +1,94 @@
+/*
+ *  Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+/*
+ * Test the range of the l2vic interface.
+ */
+
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include "cfgtable.h"
+
+#define L2VIC_INT_ENABLE(b, n)                                               \
+    ((volatile unsigned int *)((b) + 0x100 + 4 * (n / 32))) /* device mem */
+
+#define L2VIC_INT_ENABLE_SET(b, n)                                           \
+    ((volatile unsigned int *)((b) + 0x200 + 4 * (n / 32))) /* device mem */
+
+#define L2VIC_INT_ENABLE_CLEAR(b, n)                                         \
+    ((volatile unsigned int *)((b) + 0x180 + 4 * (n / 32))) /* device mem */
+
+#define L2VIC_SOFT_INT_SET(b, n)                                             \
+    ((volatile unsigned int *)((b) + 0x480 + 4 * (n / 32))) /* device mem */
+
+#define L2VIC_INT_TYPE(b, n)                                                 \
+    ((volatile unsigned int *)((b) + 0x280 + 4 * (n / 32))) /* device mem */
+
+volatile int pass; /* must use volatile */
+int g_irq;
+volatile uint32_t g_l2vic_base; /* must use volatile */
+
+
+/*
+ * When complete the irqlog will contain the value of the vid when the
+ * handler was active.
+ */
+#define INTMAX 1024
+#define LEFT_SET 666
+
+int main()
+{
+    unsigned int irq_bit;
+    unsigned int left_set = 0;
+    int ret = 0;
+
+    /* setup the fastl2vic interface and setup an indirect mapping */
+    g_l2vic_base = GET_SUBSYSTEM_BASE() + 0x10000;
+
+    /* Setup interrupts */
+    for (int irq = 1; irq < INTMAX; irq++) {
+        irq_bit = (1 << (irq % 32));
+        *L2VIC_INT_ENABLE(g_l2vic_base, irq) |= irq_bit;
+    }
+
+    /* Read them all back and check */
+    for (int irq = 1; irq < INTMAX; irq++) {
+        if ((*L2VIC_INT_ENABLE(g_l2vic_base, irq) & (1 << (irq % 32))) !=
+            (1 << irq % 32)) {
+            printf("%d: ERROR: irq: %d: 0x%x\n", __LINE__, irq,
+                   *L2VIC_INT_ENABLE(g_l2vic_base, irq));
+            ret = 1;
+        }
+    }
+    /* Clear them all, except int 1 and LEFT_SET (test)  */
+    for (int irq = 1; irq < INTMAX; irq++) {
+        if (!(irq % LEFT_SET)) {
+            continue;
+        }
+        irq_bit = (1 << (irq % 32));
+        *L2VIC_INT_ENABLE_CLEAR(g_l2vic_base, irq) |= irq_bit;
+    }
+
+    /* make sure just LEFT_SET is set */
+    for (int irq = 0; irq < INTMAX; irq++) {
+        if ((*L2VIC_INT_ENABLE(g_l2vic_base, irq) & (1 << (irq % 32))) !=
+            (0 << irq % 32)) {
+            if (irq != LEFT_SET) {
+                printf("%d: ERROR: irq: %d: 0x%x\n", __LINE__, irq,
+                       *L2VIC_INT_ENABLE(g_l2vic_base, irq));
+                ret = 1;
+            } else {
+                left_set = irq;
+            }
+        }
+    }
+    if (left_set == LEFT_SET) {
+        printf("PASS\n");
+    }
+    return ret;
+}
diff --git a/tests/tcg/hexagon/system/mmu.h b/tests/tcg/hexagon/system/mmu.h
new file mode 100644
index 0000000000000..0856c94ab5ddb
--- /dev/null
+++ b/tests/tcg/hexagon/system/mmu.h
@@ -0,0 +1,718 @@
+/*
+ *  Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef MMU_H
+#define MMU_H
+#include <assert.h>
+#include <string.h>
+#include <stdint.h>
+#include "crt0/hexagon_standalone.h"
+
+/*
+ * Helpers for MMU tests
+ */
+
+#define TARGET_PAGE_BITS            12
+#ifndef TLB_NOT_FOUND
+#define TLB_NOT_FOUND               (1 << 31)
+#endif
+
+static inline uint32_t page_start(uint32_t addr, uint32_t page_size_bits)
+{
+    uint32_t page_size = 1 << page_size_bits;
+    uint32_t page_align = ~(page_size - 1);
+    return addr & page_align;
+}
+
+/*
+ * The Hexagon standalone runtime leaves TLB entries 1-5 reserved for
+ * user-defined entries.  We'll set them up to map virtual addresses at
+ * 1MB offsets above the actual physical address
+ *     PA == VA - (entry_num * 1MB)
+ *
+ * We'll define some macros/functions to help with the manipulation
+ */
+
+#define ONE_MB                      (1 << 20)
+#define TWO_MB                      (2 * ONE_MB)
+#define THREE_MB                    (3 * ONE_MB)
+#define FOUR_MB                     (4 * ONE_MB)
+#define FIVE_MB                     (5 * ONE_MB)
+
+#define ONE_MB_ENTRY                1
+#define TWO_MB_ENTRY                2
+#define THREE_MB_ENTRY              3
+#define FOUR_MB_ENTRY               4
+#define FIVE_MB_ENTRY               5
+
+static inline uint32_t tlb_entry_num(uint32_t va)
+{
+    return va >> 20;
+}
+
+#define fZXTN(N, M, VAL) ((VAL) & ((1LL << (N)) - 1))
+#define fEXTRACTU_BITS(INREG, WIDTH, OFFSET) \
+    (fZXTN(WIDTH, 32, (INREG >> OFFSET)))
+
+#define fINSERT_BITS(REG, WIDTH, OFFSET, INVAL) \
+    do { \
+        REG = ((REG) & ~(((1LL << (WIDTH)) - 1) << (OFFSET))) | \
+           (((INVAL) & ((1LL << (WIDTH)) - 1)) << (OFFSET)); \
+    } while (0)
+
+#define GET_FIELD(ENTRY, FIELD) \
+    fEXTRACTU_BITS(ENTRY, reg_field_info[FIELD].width, \
+                   reg_field_info[FIELD].offset)
+#define SET_FIELD(ENTRY, FIELD, VAL) \
+    fINSERT_BITS(ENTRY, reg_field_info[FIELD].width, \
+                 reg_field_info[FIELD].offset, (VAL))
+
+typedef struct {
+    int offset;
+    int width;
+} reg_field_t;
+
+enum reg_fields_enum {
+#define DEF_REG_FIELD(TAG, NAME, START, WIDTH, DESCRIPTION) \
+    TAG,
+#include "reg_fields_def.h"
+    NUM_REG_FIELDS
+#undef DEF_REG_FIELD
+};
+
+static const reg_field_t reg_field_info[] = {
+#define DEF_REG_FIELD(TAG, NAME, START, WIDTH, DESCRIPTION)    \
+      { START, WIDTH },
+
+#include "reg_fields_def.h"
+
+      { 0, 0 }
+#undef DEF_REG_FIELD
+};
+
+/*
+ * PPD (physical page descriptor) is formed by putting the PTE_PA35 field
+ * in the MSB of the PPD
+ */
+#define GET_PPD(ENTRY) \
+    ((GET_FIELD((ENTRY), PTE_PPD) | \
+     (GET_FIELD((ENTRY), PTE_PA35) << reg_field_info[PTE_PPD].width)))
+
+#define NUM_PGSIZE_TYPES (SHIFT_1G + 1)
+
+static const char *pgsize_str(PageSize pgsize)
+{
+    static const char *size_str[NUM_PGSIZE_TYPES] = {
+        "4K",
+        "16K",
+        "64K",
+        "256K",
+        "1M",
+        "4M",
+        "16M",
+        "64M",
+        "256M",
+        "1G"
+    };
+    assert(pgsize);
+    return size_str[__builtin_ctz(pgsize)];
+}
+
+static const uint64_t encmask_2_mask[] = {
+    0x0fffLL,                           /* 4k,   0000 */
+    0x3fffLL,                           /* 16k,  0001 */
+    0xffffLL,                           /* 64k,  0010 */
+    0x3ffffLL,                          /* 256k, 0011 */
+    0xfffffLL,                          /* 1m,   0100 */
+    0x3fffffLL,                         /* 4m,   0101 */
+    0xffffffLL,                         /* 16M,  0110 */
+    0xffffffffLL,                       /* RSVD, 0111 */
+};
+
+static inline int hex_tlb_pgsize(uint64_t entry)
+{
+    assert(entry != 0);
+    int size = __builtin_ctzll(entry);
+    assert(size < NUM_PGSIZE_TYPES);
+    return size;
+}
+
+static inline uint32_t hex_tlb_page_size(uint64_t entry)
+{
+    return 1 << (TARGET_PAGE_BITS + 2 * hex_tlb_pgsize(entry));
+}
+
+static inline uint64_t hex_tlb_phys_page_num(uint64_t entry)
+{
+    uint32_t ppd = GET_PPD(entry);
+    return ppd >> 1;
+}
+
+static inline uint64_t hex_tlb_phys_addr(uint64_t entry)
+{
+    uint64_t pagemask = encmask_2_mask[hex_tlb_pgsize(entry)];
+    uint64_t pagenum = hex_tlb_phys_page_num(entry);
+    uint64_t PA = (pagenum << TARGET_PAGE_BITS) & (~pagemask);
+    return PA;
+}
+
+static inline uint64_t hex_tlb_virt_addr(uint64_t entry)
+{
+    return GET_FIELD(entry, PTE_VPN) << TARGET_PAGE_BITS;
+}
+
+static inline uint64_t create_mmu_entry(uint8_t G, uint8_t A0, uint8_t A1,
+                                        uint8_t ASID, uint32_t VA,
+                                        uint8_t X, int8_t W, uint8_t R,
+                                        uint8_t U, uint8_t C, uint64_t PA,
+                                        PageSize SZ)
+{
+    uint64_t entry = 0;
+    SET_FIELD(entry, PTE_V, 1);
+    SET_FIELD(entry, PTE_G, G);
+    SET_FIELD(entry, PTE_ATR0, A0);
+    SET_FIELD(entry, PTE_ATR1, A1);
+    SET_FIELD(entry, PTE_ASID, ASID);
+    SET_FIELD(entry, PTE_VPN, VA >> TARGET_PAGE_BITS);
+    SET_FIELD(entry, PTE_X, X);
+    SET_FIELD(entry, PTE_W, W);
+    SET_FIELD(entry, PTE_R, R);
+    SET_FIELD(entry, PTE_U, U);
+    SET_FIELD(entry, PTE_C, C);
+    SET_FIELD(entry, PTE_PA35, (PA >> (TARGET_PAGE_BITS + 35)) & 1);
+    SET_FIELD(entry, PTE_PPD, ((PA >> (TARGET_PAGE_BITS - 1))));
+    entry |= SZ;
+    return entry;
+}
+
+static inline uint64_t tlbr(uint32_t i)
+{
+    uint64_t ret;
+    asm volatile ("%0 = tlbr(%1)\n\t" : "=r"(ret) : "r"(i));
+    return ret;
+}
+
+static inline uint32_t ctlbw(uint64_t entry, uint32_t idx)
+{
+    uint32_t ret;
+    asm volatile ("%0 = ctlbw(%1, %2)\n\t" : "=r"(ret) : "r"(entry), "r"(idx));
+    return ret;
+}
+
+static inline uint32_t tlbp(uint32_t asid, uint32_t VA)
+{
+    uint32_t x = ((asid & 0x7f) << 20) | ((VA >> 12) & 0xfffff);
+    uint32_t ret;
+    asm volatile ("%0 = tlbp(%1)\n\t" : "=r"(ret) : "r"(x));
+    return ret;
+}
+
+static inline void tlbw(uint64_t entry, uint32_t idx)
+{
+    asm volatile ("tlbw(%0, %1)\n\t" :: "r"(entry), "r"(idx));
+}
+
+static inline uint32_t tlboc(uint64_t entry)
+{
+    uint32_t ret;
+    asm volatile ("%0 = tlboc(%1)\n\t" : "=r"(ret) : "r"(entry));
+    return ret;
+}
+
+void tlbinvasid(uint32_t entry_hi)
+{
+    asm volatile ("tlbinvasid(%0)\n\t" :: "r"(entry_hi));
+}
+
+static inline void enter_user_mode(void)
+{
+    asm volatile ("r0 = ssr\n\t"
+                  "r0 = clrbit(r0, #17) // EX\n\t"
+                  "r0 = setbit(r0, #16) // UM\n\t"
+                  "r0 = clrbit(r0, #19) // GM\n\t"
+                  "ssr = r0\n\t" : : : "r0");
+}
+
+static inline void enter_kernel_mode(void)
+{
+    asm volatile ("r0 = ssr\n\t"
+                  "r0 = clrbit(r0, #17) // EX\n\t"
+                  "r0 = clrbit(r0, #16) // UM\n\t"
+                  "r0 = clrbit(r0, #19) // GM\n\t"
+                  "ssr = r0\n\t" : : : "r0");
+}
+
+static inline uint32_t *getevb()
+{
+    uint32_t reg;
+    asm volatile ("%0 = evb\n\t" : "=r"(reg));
+    return (uint32_t *)reg;
+}
+
+static inline void setevb(void *new_evb)
+{
+    asm volatile("evb = %0\n\t" : : "r"(new_evb));
+}
+
+static inline uint32_t getbadva()
+{
+    uint32_t badva;
+    asm volatile ("%0 = badva\n\t" : "=r"(badva));
+    return badva;
+}
+
+static void inc_elr(uint32_t inc)
+{
+
+    asm volatile ("r1 = %0\n\t"
+                  "r2 = elr\n\t"
+                  "r1 = add(r2, r1)\n\t"
+                  "elr = r1\n\t"
+                  :  : "r"(inc) : "r1", "r2");
+}
+
+static inline void do_coredump(void)
+{
+    asm volatile("r0 = #2\n\t"
+                 "stid = r0\n\t"
+                 "jump __coredump\n\t" : : : "r0");
+}
+
+static inline uint32_t getssr(void)
+{
+    uint32_t ret;
+    asm volatile ("%0 = ssr\n\t" : "=r"(ret));
+    return ret;
+}
+
+static inline void setssr(uint32_t new_ssr)
+{
+    asm volatile ("ssr = %0\n\t" :: "r"(new_ssr));
+}
+
+static inline void set_asid(uint32_t asid)
+{
+    uint32_t ssr = getssr();
+    SET_FIELD(ssr, SSR_ASID, asid);
+    setssr(ssr);
+}
+
+int err;
+#include "../hex_test.h"
+
+static void *old_evb;
+
+typedef uint64_t exception_vector[2];
+static exception_vector my_exceptions;
+
+static inline void clear_exception_vector(exception_vector excp)
+{
+    excp[0] = 0;
+    excp[1] = 0;
+}
+
+static inline void set_exception_vector_bit(exception_vector excp, uint32_t bit)
+{
+    if (bit < 64) {
+        excp[0] |= 1LL << bit;
+    } else if (bit < 128) {
+        excp[1] |= 1LL << (bit - 64);
+    }
+}
+
+#define check_exception_vector(excp, expect) \
+    do { \
+        check64(excp[0], expect[0]); \
+        check64(excp[1], expect[1]); \
+    } while (0)
+
+static inline void print_exception_vector(exception_vector excp)
+{
+    printf("exceptions (0x%016llx 0x%016llx):", excp[1], excp[0]);
+    for (int i = 0; i < 64; i++) {
+        if (excp[0] & (1LL << i)) {
+            printf(" 0x%x", i);
+        }
+    }
+    for (int i = 0; i < 64; i++) {
+        if (excp[1] & (1LL << i)) {
+            printf(" 0x%x", i + 64);
+        }
+    }
+    printf("\n");
+}
+
+/* volatile because it is written through different MMU mappings */
+typedef volatile int mmu_variable;
+mmu_variable data = 0xdeadbeef;
+
+typedef int (*func_t)(void);
+/* volatile because it will be invoked via different MMU mappings */
+typedef volatile func_t mmu_func_t;
+
+/*
+ * Create a function that returns its (virtual) address
+ * Write it fully in assembly so we don't have to worry about
+ * which optimization level we are compiled with
+ */
+extern int func_return_pc(void);
+asm(
+".global func_return_pc\n"
+".balign 4\n"
+".type func_return_pc, @function\n"
+"func_return_pc:\n"
+"    r0 = pc\n"
+"    jumpr r31\n"
+".size func_return_pc, . - func_return_pc\n"
+);
+
+enum {
+    TLB_U = (1 << 0),
+    TLB_R = (1 << 1),
+    TLB_W = (1 << 2),
+    TLB_X = (1 << 3),
+};
+
+#define HEX_CAUSE_FETCH_NO_XPAGE                  0x011
+#define HEX_CAUSE_FETCH_NO_UPAGE                  0x012
+#define HEX_CAUSE_PRIV_NO_READ                    0x022
+#define HEX_CAUSE_PRIV_NO_WRITE                   0x023
+#define HEX_CAUSE_PRIV_NO_UREAD                   0x024
+#define HEX_CAUSE_PRIV_NO_UWRITE                  0x025
+#define HEX_CAUSE_IMPRECISE_MULTI_TLB_MATCH       0x044
+#define HEX_CAUSE_TLBMISSX_NORMAL                 0x060
+#define HEX_CAUSE_TLBMISSX_NEXTPAGE               0x061
+#define HEX_CAUSE_TLBMISSRW_READ                  0x070
+#define HEX_CAUSE_TLBMISSRW_WRITE                 0x071
+
+/*
+ * The following lets us override the default exception handlers
+ * This can be handy for adding code to check that they are called as well
+ * as special handling needed for the test to succeed.
+ *
+ * MY_EVENT_HANDLE           Use this to define your own event handler
+ * DEFAULT_EVENT_HANDLE      Use this to point to the default handler
+ * my_event_vectors          New event vector table
+ * install_my_event_vectors  Change from the default event handlers
+ */
+
+extern void *my_event_vectors;
+
+#define MY_EVENT_HANDLE(name, helper) \
+void name(void) \
+{ \
+    asm volatile("crswap(sp, sgp0)\n\t" \
+                 "memd(sp++#8) = r1:0\n\t" \
+                 "memd(sp++#8) = r3:2\n\t" \
+                 "memd(sp++#8) = r5:4\n\t" \
+                 "memd(sp++#8) = r7:6\n\t" \
+                 "memd(sp++#8) = r9:8\n\t" \
+                 "memd(sp++#8) = r11:10\n\t" \
+                 "memd(sp++#8) = r13:12\n\t" \
+                 "memd(sp++#8) = r15:14\n\t" \
+                 "memd(sp++#8) = r17:16\n\t" \
+                 "memd(sp++#8) = r19:18\n\t" \
+                 "memd(sp++#8) = r21:20\n\t" \
+                 "memd(sp++#8) = r23:22\n\t" \
+                 "memd(sp++#8) = r25:24\n\t" \
+                 "memd(sp++#8) = r27:26\n\t" \
+                 "memd(sp++#8) = r31:30\n\t" \
+                 "r0 = ssr\n\t" \
+                 "call " #helper "\n\t" \
+                 "sp = add(sp, #-8)\n\t" \
+                 "r31:30 = memd(sp++#-8)\n\t" \
+                 "r27:26 = memd(sp++#-8)\n\t" \
+                 "r25:24 = memd(sp++#-8)\n\t" \
+                 "r23:22 = memd(sp++#-8)\n\t" \
+                 "r21:20 = memd(sp++#-8)\n\t" \
+                 "r19:18 = memd(sp++#-8)\n\t" \
+                 "r17:16 = memd(sp++#-8)\n\t" \
+                 "r15:14 = memd(sp++#-8)\n\t" \
+                 "r13:12 = memd(sp++#-8)\n\t" \
+                 "r11:10 = memd(sp++#-8)\n\t" \
+                 "r9:8 = memd(sp++#-8)\n\t" \
+                 "r7:6 = memd(sp++#-8)\n\t" \
+                 "r5:4 = memd(sp++#-8)\n\t" \
+                 "r3:2 = memd(sp++#-8)\n\t" \
+                 "r1:0 = memd(sp)\n\t" \
+                 "crswap(sp, sgp0);\n\t" \
+                 "rte\n\t"); \
+}
+
+#ifndef NO_DEFAULT_EVENT_HANDLES
+
+#define DEFAULT_EVENT_HANDLE(name, offset) \
+void name(void) \
+{ \
+    asm volatile("r0 = %0\n\t" \
+                 "r0 = add(r0, #" #offset ")\n\t" \
+                 "jumpr r0\n\t" \
+                 : : "r"(old_evb) : "r0"); \
+}
+
+
+/* Use these values as the offset for DEFAULT_EVENT_HANDLE */
+asm (
+".set HANDLE_RESET_OFFSET,               0x00\n\t"
+".set HANDLE_NMI_OFFSET,                 0x04\n\t"
+".set HANDLE_ERROR_OFFSET,               0x08\n\t"
+".set HANDLE_RSVD_OFFSET,                0x0c\n\t"
+".set HANDLE_TLBMISSX_OFFSET,            0x10\n\t"
+".set HANDLE_TLBMISSRW_OFFSET,           0x18\n\t"
+".set HANDLE_TRAP0_OFFSET,               0x20\n\t"
+".set HANDLE_TRAP1_OFFSET,               0x24\n\t"
+".set HANDLE_FPERROR_OFFSET,             0x28\n\t"
+".set HANDLE_INT_OFFSET,                 0x40\n\t"
+);
+
+asm(
+".align 0x1000\n\t"
+"my_event_vectors:\n\t"
+    "jump my_event_handle_reset\n\t"
+    "jump my_event_handle_nmi\n\t"
+    "jump my_event_handle_error\n\t"
+    "jump my_event_handle_rsvd\n\t"
+    "jump my_event_handle_tlbmissx\n\t"
+    "jump my_event_handle_rsvd\n\t"
+    "jump my_event_handle_tlbmissrw\n\t"
+    "jump my_event_handle_rsvd\n\t"
+    "jump my_event_handle_trap0\n\t"
+    "jump my_event_handle_trap1\n\t"
+    "jump my_event_handle_rsvd\n\t"
+    "jump my_event_handle_fperror\n\t"
+    "jump my_event_handle_rsvd\n\t"
+    "jump my_event_handle_rsvd\n\t"
+    "jump my_event_handle_rsvd\n\t"
+    "jump my_event_handle_rsvd\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+    "jump my_event_handle_int\n\t"
+);
+
+#define DEFAULT_EVENT_HANDLES \
+DEFAULT_EVENT_HANDLE(my_event_handle_error,       HANDLE_ERROR_OFFSET) \
+DEFAULT_EVENT_HANDLE(my_event_handle_nmi,         HANDLE_NMI_OFFSET) \
+DEFAULT_EVENT_HANDLE(my_event_handle_tlbmissrw,   HANDLE_TLBMISSRW_OFFSET) \
+DEFAULT_EVENT_HANDLE(my_event_handle_tlbmissx,    HANDLE_TLBMISSX_OFFSET) \
+DEFAULT_EVENT_HANDLE(my_event_handle_reset,       HANDLE_RESET_OFFSET) \
+DEFAULT_EVENT_HANDLE(my_event_handle_rsvd,        HANDLE_RSVD_OFFSET) \
+DEFAULT_EVENT_HANDLE(my_event_handle_trap0,       HANDLE_TRAP0_OFFSET) \
+DEFAULT_EVENT_HANDLE(my_event_handle_trap1,       HANDLE_TRAP1_OFFSET) \
+DEFAULT_EVENT_HANDLE(my_event_handle_int,         HANDLE_INT_OFFSET) \
+DEFAULT_EVENT_HANDLE(my_event_handle_fperror,     HANDLE_FPERROR_OFFSET)
+
+#endif /* NO_DEFAULT_EVENT_HANDLES */
+
+/* When a permission error happens, add the permission to the TLB entry */
+void my_event_handle_error_helper(uint32_t ssr)
+{
+    uint32_t cause = GET_FIELD(ssr, SSR_CAUSE);
+    uint32_t badva = getbadva();
+    uint32_t entry_num = tlb_entry_num(badva);
+    uint64_t entry;
+
+    set_exception_vector_bit(my_exceptions, cause);
+
+    switch (cause) {
+    case HEX_CAUSE_FETCH_NO_XPAGE:
+        entry = tlbr(entry_num);
+        SET_FIELD(entry, PTE_X, 1);
+        tlbw(entry, entry_num);
+        break;
+    case HEX_CAUSE_FETCH_NO_UPAGE:
+        entry = tlbr(entry_num);
+        SET_FIELD(entry, PTE_U, 1);
+        tlbw(entry, entry_num);
+        break;
+    case HEX_CAUSE_PRIV_NO_READ:
+        entry = tlbr(entry_num);
+        SET_FIELD(entry, PTE_R, 1);
+        tlbw(entry, entry_num);
+        break;
+    case HEX_CAUSE_PRIV_NO_WRITE:
+        entry = tlbr(entry_num);
+        SET_FIELD(entry, PTE_W, 1);
+        tlbw(entry, entry_num);
+        break;
+    case HEX_CAUSE_PRIV_NO_UREAD:
+        entry = tlbr(entry_num);
+        SET_FIELD(entry, PTE_U, 1);
+        tlbw(entry, entry_num);
+        break;
+    case HEX_CAUSE_PRIV_NO_UWRITE:
+        entry = tlbr(entry_num);
+        SET_FIELD(entry, PTE_U, 1);
+        tlbw(entry, entry_num);
+        break;
+    default:
+        do_coredump();
+        break;
+    }
+}
+
+void my_event_handle_nmi_helper(uint32_t ssr)
+{
+    uint32_t cause = GET_FIELD(ssr, SSR_CAUSE);
+
+    set_exception_vector_bit(my_exceptions, cause);
+
+    switch (cause) {
+    case HEX_CAUSE_IMPRECISE_MULTI_TLB_MATCH:
+        break;
+    default:
+        do_coredump();
+        break;
+    }
+}
+
+/*
+ * When a TLB miss happens, create a mapping
+ * We'll set different read/write/execute permissions
+ * for different entry numbers.
+ */
+void my_event_handle_tlbmissrw_helper(uint32_t ssr)
+{
+    uint32_t cause = GET_FIELD(ssr, SSR_CAUSE);
+    uint32_t badva = getbadva();
+    uint32_t entry_num = tlb_entry_num(badva);
+    uint32_t VA = page_start(badva, TARGET_PAGE_BITS);
+    uint32_t PA = VA - (entry_num * ONE_MB);
+
+    uint64_t entry =
+        create_mmu_entry(1, 0, 0, 0, VA, 0, 0, 0, 1, 0x3, PA, PAGE_4K);
+    if (entry_num == TWO_MB_ENTRY) {
+        SET_FIELD(entry, PTE_R, 1);
+    }
+    if (entry_num == THREE_MB_ENTRY) {
+        SET_FIELD(entry, PTE_W, 1);
+    }
+
+    set_exception_vector_bit(my_exceptions, cause);
+
+    switch (cause) {
+    case HEX_CAUSE_TLBMISSRW_READ:
+        tlbw(entry, entry_num);
+        break;
+    case HEX_CAUSE_TLBMISSRW_WRITE:
+        tlbw(entry, entry_num);
+        break;
+    default:
+        do_coredump();
+        break;
+    }
+}
+
+void my_event_handle_tlbmissx_helper(uint32_t ssr)
+{
+    uint32_t cause = GET_FIELD(ssr, SSR_CAUSE);
+    uint32_t badva = getbadva();
+    uint32_t entry_num = tlb_entry_num(badva);
+    uint32_t VA = page_start(badva, TARGET_PAGE_BITS);
+    uint32_t PA = VA - (entry_num * ONE_MB);
+
+    uint64_t entry =
+        create_mmu_entry(1, 0, 0, 0, VA, 0, 0, 0, 1, 0x3, PA, PAGE_4K);
+
+    set_exception_vector_bit(my_exceptions, cause);
+
+    switch (cause) {
+    case HEX_CAUSE_TLBMISSX_NORMAL:
+        tlbw(entry, entry_num);
+        break;
+    default:
+        do_coredump();
+        break;
+    }
+}
+
+static inline void install_my_event_vectors(void)
+{
+    old_evb = getevb();
+    setevb(&my_event_vectors);
+}
+
+#define MAKE_GOTO(name) \
+void goto_##name(void) \
+{ \
+    asm volatile("r0 = ##" #name "\n\t" \
+                 "jumpr r0\n\t" \
+                 : : : "r0"); \
+}
+
+#define MAKE_ERR_HANDLER(name, helper_fn) \
+    MY_EVENT_HANDLE(name, helper_fn) \
+    MAKE_GOTO(name)
+
+#define INSTALL_ERR_HANDLER(name) { \
+    /*
+     * Install our own privelege exception handler.
+     * The normal behavior is to coredump
+     * Read and decode the jump displacemnts from evb
+     * ASSUME negative displacement which is the standard.
+     */ \
+    uint32_t *evb_err = getevb() + 2; \
+    uint32_t err_distance = -(0xfe000000 | *evb_err) << 1; \
+    uint32_t err_handler = (uint32_t)evb_err - err_distance; \
+    memcpy((void *)err_handler, goto_##name, 12); \
+} while (0)
+
+static inline void remove_trans(int index)
+{
+    uint64_t entry = tlbr(index);
+    SET_FIELD(entry, PTE_V, 0);
+    tlbw(entry, index);
+}
+
+static inline void clear_overlapping_entry(unsigned int asid, uint32_t va)
+{
+    int32_t index = tlbp(asid, va);
+    if (index != TLB_NOT_FOUND) {
+        remove_trans(index);
+    }
+}
+
+static void add_trans(int index, uint32_t va, uint64_t pa,
+                      PageSize page_size, uint8_t xwru,
+                      unsigned int asid, uint8_t V, uint8_t G)
+{
+    if (V) {
+        clear_overlapping_entry(asid, va);
+    }
+    assert(!add_translation_extended(index, (void *)va, pa, page_size,
+                                     xwru, 0, asid, 0,
+                                     ((V & 1) << 1) | (G & 1)));
+}
+
+#endif
diff --git a/tests/tcg/hexagon/system/mmu_asids.c b/tests/tcg/hexagon/system/mmu_asids.c
new file mode 100644
index 0000000000000..34f25c25a3d7f
--- /dev/null
+++ b/tests/tcg/hexagon/system/mmu_asids.c
@@ -0,0 +1,80 @@
+/*
+ *  Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+
+
+#define DEBUG        0
+
+#include "mmu.h"
+
+DEFAULT_EVENT_HANDLES
+
+void test_asids(void)
+{
+    uint32_t addr = (uint32_t)&data;
+    uint32_t page = page_start(addr, TARGET_PAGE_BITS);
+    uint32_t offset = FIVE_MB;
+    uint32_t new_addr = addr + offset;
+    uint32_t new_page = page + offset;
+    uint64_t entry =
+        create_mmu_entry(0, 0, 0, 1, new_page, 1, 1, 1, 0, 7, page, PAGE_4K);
+    /*
+     * Create a TLB entry for ASID=1
+     * Write it at index 1
+     * Check that it is present
+     * Invalidate the ASID
+     * Check that it is not found
+     */
+    tlbw(entry, 1);
+    check32(tlboc(entry), 1);
+    tlbinvasid(entry >> 32);
+    check32(tlboc(entry), TLB_NOT_FOUND);
+
+    /*
+     * Re-install the entry
+     * Put ourselves in ASID=1
+     * Do a load and a store
+     */
+    data = 0xdeadbeef;
+    tlbw(entry, 1);
+    set_asid(1);
+    check32(*(mmu_variable *)new_addr, 0xdeadbeef);
+    *(mmu_variable *)new_addr = 0xcafebabe;
+    check32(data, 0xcafebabe);
+
+    /*
+     * Make sure a load from ASID 2 gets a different value.
+     * The standalone runtime will create a VA==PA entry on
+     * a TLB miss, so the load will be reading from uninitialized
+     * memory.
+     */
+    set_asid(2);
+    data = 0xdeadbeef;
+    check32_ne(*(mmu_variable *)new_addr, 0xdeadbeef);
+
+    /*
+     * Invalidate the ASID and make sure a loads from ASID 1
+     * gets a different value.
+     */
+    tlbinvasid(entry >> 32);
+    set_asid(1);
+    data = 0xcafebabe;
+    check32_ne(*(mmu_variable *)new_addr, 0xcafebabe);
+}
+
+int main()
+{
+    puts("Hexagon MMU ASID test");
+
+    test_asids();
+
+    printf("%s\n", ((err) ? "FAIL" : "PASS"));
+    return err;
+}
diff --git a/tests/tcg/hexagon/system/mmu_overlap.c b/tests/tcg/hexagon/system/mmu_overlap.c
new file mode 100644
index 0000000000000..73d0565abed43
--- /dev/null
+++ b/tests/tcg/hexagon/system/mmu_overlap.c
@@ -0,0 +1,65 @@
+/*
+ *  Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+
+#define DEBUG        0
+
+#include "mmu.h"
+
+DEFAULT_EVENT_HANDLES
+
+void test_overlap(void)
+{
+    uint32_t addr = (uint32_t)&data;
+    uint32_t page = page_start(addr, 20);
+    uint32_t offset = FIVE_MB;
+    uint32_t new_page = page + offset;
+    uint32_t new_addr = addr + offset;
+    uint8_t data_perm = TLB_X | TLB_W | TLB_R | TLB_U;
+    uint64_t entry;
+
+    add_trans(1, new_page, page, PAGE_1M, data_perm, 0, 1, 1);
+    check32(tlbp(0, new_addr), 1);
+
+    /* Check an entry that overlaps with the one we just created */
+    entry =
+        create_mmu_entry(1, 0, 0, 0, new_page, 1, 1, 1, 0, 7, page, PAGE_4K);
+    check32(tlboc(entry), 1);
+    /* Check that conditional TLB write (ctlbw) does NOT write the new entry */
+    check32(ctlbw(entry, 2), 0x1);
+
+    /* Create an entry that does not overlap with the one we just created */
+    entry = create_mmu_entry(1, 0, 0, 0, new_page + ONE_MB, 1, 1, 1, 0, 7, page,
+                             PAGE_4K);
+    check32(tlboc(entry), TLB_NOT_FOUND);
+    /* Check that conditional TLB write (ctlbw) does write the new entry */
+    check32(ctlbw(entry, 2), TLB_NOT_FOUND);
+
+    /* Create an entry that overalps both of these entries */
+    entry =
+        create_mmu_entry(1, 0, 0, 0, new_page, 1, 1, 1, 0, 7, page, PAGE_4M);
+    check32(tlboc(entry), 0xffffffff);
+
+    /* Clear the TLB entries */
+    remove_trans(1);
+    check32(tlbp(0, new_addr), TLB_NOT_FOUND);
+    remove_trans(2);
+    check32(tlbp(0, (new_addr + ONE_MB)), TLB_NOT_FOUND);
+}
+
+int main()
+{
+    puts("Hexagon MMU overlap test");
+
+    test_overlap();
+
+    printf("%s\n", ((err) ? "FAIL" : "PASS"));
+    return err;
+}
diff --git a/tests/tcg/hexagon/system/monitor_insts.S b/tests/tcg/hexagon/system/monitor_insts.S
new file mode 100644
index 0000000000000..8027068511f1f
--- /dev/null
+++ b/tests/tcg/hexagon/system/monitor_insts.S
@@ -0,0 +1,18 @@
+/*
+ *  Copyright(c) 2020-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+    .text
+    .type test_set_prio, @function
+    .global test_set_prio
+
+test_set_prio:
+    r0 = #3
+    r1 = #1
+    p0 = cmp.eq(r0,r1)
+    setprio(p0, r0)
+    jumpr lr
+
+    .size test_set_prio, . - test_set_prio
diff --git a/tests/tcg/hexagon/system/reg_fields_def.h b/tests/tcg/hexagon/system/reg_fields_def.h
new file mode 100644
index 0000000000000..ff2769a1399d6
--- /dev/null
+++ b/tests/tcg/hexagon/system/reg_fields_def.h
@@ -0,0 +1,87 @@
+/* PTE (aka TLB entry) fields */
+DEF_REG_FIELD(PTE_PPD,
+    "PPD", 0, 24,
+    "Physical page number that the corresponding virtual page maps to.")
+DEF_REG_FIELD(PTE_C,
+    "C", 24, 4,
+    "Cacheability attributes for the page.")
+DEF_REG_FIELD(PTE_U,
+    "U", 28, 1,
+    "User mode permitted.")
+DEF_REG_FIELD(PTE_R,
+    "R", 29, 1,
+    "Read-enable.")
+DEF_REG_FIELD(PTE_W,
+    "W", 30, 1,
+    "Write-enable.")
+DEF_REG_FIELD(PTE_X,
+    "X", 31, 1,
+    "Execute-enable.")
+DEF_REG_FIELD(PTE_VPN,
+    "VPN", 32, 20,
+    "Virtual page number that is matched against the load or store address.")
+DEF_REG_FIELD(PTE_ASID,
+    "ASID", 52, 7,
+    "7-bit address space identifier (tag extender)")
+DEF_REG_FIELD(PTE_ATR0,
+    "ATR0", 59, 1,
+    "General purpose attribute bit kept as an attribute of each cache line.")
+DEF_REG_FIELD(PTE_ATR1,
+    "ATR1", 60, 1,
+    "General purpose attribute bit kept as an attribute of each cache line.")
+DEF_REG_FIELD(PTE_PA35,
+    "PA35", 61, 1,
+    "The Extra Physical bit is the most-significant physical address bit.")
+DEF_REG_FIELD(PTE_G,
+    "G", 62, 1,
+    "Global bit. If set, then the ASID is ignored in the match.")
+DEF_REG_FIELD(PTE_V,
+    "V", 63, 1,
+    "Valid bit. indicates whether this entry should be used for matching.")
+
+/* SSR fields */
+DEF_REG_FIELD(SSR_CAUSE,
+    "cause", 0, 8,
+    "8-bit field that contains the reason for various exception.")
+DEF_REG_FIELD(SSR_ASID,
+    "asid", 8, 7,
+    "7-bit field that contains the Address Space Identifier.")
+DEF_REG_FIELD(SSR_UM,
+    "um", 16, 1,
+    "read-write bit.")
+DEF_REG_FIELD(SSR_EX,
+    "ex", 17, 1,
+    "set when an interrupt or exception is accepted.")
+DEF_REG_FIELD(SSR_IE,
+    "ie", 18, 1,
+    "indicates whether the global interrupt is enabled.")
+DEF_REG_FIELD(SSR_GM,
+    "gm", 19, 1,
+    "Guest mode bit.")
+DEF_REG_FIELD(SSR_V0,
+    "v0", 20, 1,
+    "if BADVA0 register contents are from a valid slot 0 instruction.")
+DEF_REG_FIELD(SSR_V1,
+     "v1", 21, 1,
+    "if BADVA1 register contents are from a valid slot 1 instruction.")
+DEF_REG_FIELD(SSR_BVS,
+    "bvs", 22, 1,
+    "BADVA Selector.")
+DEF_REG_FIELD(SSR_CE,
+    "ce", 23, 1,
+    "grants user or guest read permissions to the PCYCLE register aliases.")
+DEF_REG_FIELD(SSR_PE,
+    "pe", 24, 1,
+    "grants guest read permissions to the PMU register aliases.")
+DEF_REG_FIELD(SSR_BP,
+    "bp", 25, 1,
+    "Internal Bus Priority bit.")
+DEF_REG_FIELD(SSR_XA,
+    "xa", 27, 3,
+    "Extension Active, which control operation of an attached coprocessor.")
+DEF_REG_FIELD(SSR_SS,
+    "ss", 30, 1,
+    "Single Step, which enables single-step exceptions.")
+DEF_REG_FIELD(SSR_XE,
+    "xe", 31, 1,
+    "Coprocessor Enable, which enables use of an attached coprocessor.")
diff --git a/tests/tcg/hexagon/system/semihost.c b/tests/tcg/hexagon/system/semihost.c
new file mode 100644
index 0000000000000..7a0fa0cb73ff2
--- /dev/null
+++ b/tests/tcg/hexagon/system/semihost.c
@@ -0,0 +1,297 @@
+/*
+ *  Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <unistd.h>
+#include <dirent.h>
+#include "strutils.h"
+
+/* Defines in order of testing */
+
+/* env/CLI-related */
+#define HEX_SYS_GET_CMDLINE     0x15
+#define HEX_SYS_GETCWD          0x104
+
+/* File manipulation */
+#define HEX_SYS_TMPNAM          0x0d
+#define HEX_SYS_OPEN            0x01
+#define HEX_SYS_ACCESS          0x105
+#define HEX_SYS_ISTTY           0x09
+#define HEX_SYS_WRITE           0x05
+#define HEX_SYS_SEEK            0x0a
+#define HEX_SYS_READ            0x06
+#define HEX_SYS_FTELL           0x100
+#define HEX_SYS_FSTAT           0x101
+#define HEX_SYS_FTRUNC          0x186
+#define HEX_SYS_FLEN            0x0c
+#define HEX_SYS_CLOSE           0x02
+#define HEX_SYS_ERRNO           0x13
+#define HEX_SYS_RENAME          0x0f
+#define HEX_SYS_STAT            0x103
+#define HEX_SYS_REMOVE          0x0e
+
+/* Time */
+#define HEX_SYS_CLOCK           0x10
+#define HEX_SYS_TIME            0x11
+
+/* dirent */
+#define HEX_SYS_OPENDIR         0x180
+#define HEX_SYS_CLOSEDIR        0x181
+#define HEX_SYS_READDIR         0x182
+
+/* STDOUT */
+#define HEX_SYS_WRITEC          0x03
+#define HEX_SYS_WRITE0          0x04
+#define HEX_SYS_WRITECREG       0x43
+
+static uint32_t ret, err, args[4];
+
+/*
+ * Macro flavors:
+ * - DIRECT_SWI takes up to two args an put them at r1 and r2.
+ * - SWI takes up to four args and puts them in an array, placing the
+ *   array address at r1.
+ */
+
+#define DO_SWI(CODE, ARG0, ARG1) \
+    do { \
+        asm volatile( \
+                "r0 = %2\n" \
+                "r1 = %3\n" \
+                "r2 = %4\n" \
+                "trap0(#0)\n" \
+                "%0 = r0\n" \
+                "%1 = r1\n" \
+                : "=r"(ret), "=r"(err) \
+                : "r"(CODE), "r"(ARG0), "r"(ARG1) \
+                : "r0", "r1", "r2", "memory" \
+                ); \
+    } while (0)
+
+#define SWI0(CODE) DO_SWI(CODE, args, 0)
+#define SWI1(CODE, ARG0) \
+    do { args[0] = (uint32_t)(ARG0); SWI0(CODE); } while (0)
+#define SWI2(CODE, ARG0, ARG1) \
+    do { args[1] = (uint32_t)(ARG1); SWI1(CODE, ARG0); } while (0)
+#define SWI3(CODE, ARG0, ARG1, ARG2) \
+    do { args[2] = (uint32_t)(ARG2); SWI2(CODE, ARG0, ARG1); } while (0)
+#define SWI4(CODE, ARG0, ARG1, ARG2, ARG3) \
+    do { args[3] = (uint32_t)(ARG3); SWI3(CODE, ARG0, ARG1, ARG2); } while (0)
+
+#define GET_MACRO_5(_1, _2, _3, _4, _5, NAME, ...) NAME
+#define SWI(...) \
+    GET_MACRO_5(__VA_ARGS__, SWI4, SWI3, SWI2, SWI1, SWI0)(__VA_ARGS__)
+
+#define DIRECT_SWI0(CODE) DO_SWI(CODE, 0, 0)
+#define DIRECT_SWI1(CODE, ARG1) DO_SWI(CODE, ARG1, 0)
+#define DIRECT_SWI2(CODE, ARG1, ARG2) DO_SWI(CODE, ARG1, ARG2)
+
+#define GET_MACRO_3(_1, _2, _3, NAME, ...) NAME
+#define DIRECT_SWI(...) \
+    GET_MACRO_3(__VA_ARGS__, DIRECT_SWI2, DIRECT_SWI1, DIRECT_SWI0)(__VA_ARGS__)
+
+#define is_path_sep(C) ((C) == '/' || (C) == '\\')
+
+static int path_ends_with(const char *str, const char *suffix)
+{
+    const char *str_cursor = str + strlen(str) - 1;
+    const char *suffix_cursor = suffix + strlen(suffix) - 1;
+    while (str_cursor >= str && suffix_cursor >= suffix) {
+        /* is_path_sep handles the semihosting-on-Windows case */
+        if (*str_cursor != *suffix_cursor &&
+            !(is_path_sep(*str_cursor) && is_path_sep(*suffix_cursor))) {
+            return 0;
+        }
+        str_cursor--;
+        suffix_cursor--;
+    }
+    return 1;
+}
+
+/*
+ * This must match the caller's definition, it would be in the
+ * caller's angel.h or equivalent header.
+ */
+struct __SYS_STAT {
+    uint64_t dev;
+    uint64_t ino;
+    uint32_t mode;
+    uint32_t nlink;
+    uint64_t rdev;
+    uint32_t size;
+    uint32_t __pad1;
+    uint32_t atime;
+    uint32_t mtime;
+    uint32_t ctime;
+    uint32_t __pad2;
+};
+
+int main(int argc, char **argv)
+{
+    /* GET_CMDLINE */
+    char argv_concat[1024];
+    char *cursor = argv_concat;
+    for (int i = 0; i < argc; i++) {
+        strcpy(cursor, argv[i]);
+        cursor += strlen(argv[i]);
+        *cursor = ' ';
+        cursor++;
+    }
+    *(cursor - 1) = '\0';
+    char buf[4096];
+    SWI(HEX_SYS_GET_CMDLINE, buf, sizeof(buf));
+    assert(!ret && !strcmp(buf, argv_concat));
+
+    /* GETCWD */
+    const char *expected_cwd = "tests/tcg/hexagon-softmmu";
+    SWI(HEX_SYS_GETCWD, buf, sizeof(buf));
+    assert(ret && path_ends_with(buf, expected_cwd));
+
+    /* TMPNAM */
+    char fname[4096];
+    SWI(HEX_SYS_TMPNAM, fname, 0, sizeof(fname));
+    assert(!ret);
+
+    /* OPEN */
+    /* 13 is O_RDWR | O_CREAT | O_EXCL */
+    SWI(HEX_SYS_OPEN, fname, 13, strlen(fname));
+    int fd = (int)ret;
+    assert(fd >= 0);
+
+    /* ACCESS */
+    SWI(HEX_SYS_ACCESS, fname, R_OK);
+    assert(!ret);
+    /* ACCESS with error */
+    SWI(HEX_SYS_ACCESS, "non-existent-semihost-file", R_OK);
+    assert(ret);
+    assert(err == ENOENT);
+
+    /* ISTTY */
+    SWI(HEX_SYS_ISTTY, fd);
+    assert(!ret);
+
+    /* WRITE */
+    char *str = "hello";
+    SWI(HEX_SYS_WRITE, fd, str, strlen(str));
+    assert(!ret);
+
+    /* SEEK */
+    SWI(HEX_SYS_SEEK, fd, 0);
+    assert(!ret);
+
+    /* READ */
+    int n = strlen(str);
+    SWI(HEX_SYS_READ, fd, buf, n);
+    buf[n] = '\0';
+    assert(!ret && !strcmp(str, buf));
+
+    /* FTELL */
+    SWI(HEX_SYS_FTELL, fd);
+    assert(ret == strlen(str));
+
+    /* FSTAT */
+    struct __SYS_STAT st;
+    SWI(HEX_SYS_FSTAT, fd, &st);
+    assert(!ret);
+    assert(st.atime && st.ctime && st.mtime);
+    assert(st.size == strlen(str));
+    assert((st.mode & S_IFMT) == S_IFREG);
+
+    /* FTRUNC */
+    SWI(HEX_SYS_FTRUNC, fd, 1, 0);
+    assert(!ret);
+
+    /* FLEN */
+    SWI(HEX_SYS_FLEN, fd);
+    assert(ret == 1);
+
+    /* CLOSE */
+    SWI(HEX_SYS_CLOSE, fd);
+    assert(!ret);
+
+    /* CLOSE w/ error && ERRNO */
+    SWI(HEX_SYS_CLOSE, fd);
+    assert(ret);
+    assert(err == EBADF);
+    SWI(HEX_SYS_ERRNO);
+    assert(ret == EBADF);
+
+    /* RENAME */
+    char ogfname[4096];
+    int len = strlen(fname);
+    strcpy(ogfname, fname);
+    fname[len - 1] = (fname[len - 1] == 'a' ? 'b' : 'a');
+    SWI(HEX_SYS_RENAME, ogfname, len, fname, len);
+    assert(!ret);
+
+    /* STAT */
+    SWI(HEX_SYS_STAT, fname, &st);
+    assert(!ret);
+    assert(st.atime && st.ctime && st.mtime);
+    assert(st.size == 1);
+    assert((st.mode & S_IFMT) == S_IFREG);
+
+    /* REMOVE */
+    SWI(HEX_SYS_REMOVE, fname, strlen(fname));
+    assert(!ret);
+
+    /* STAT w/ error */
+    SWI(HEX_SYS_STAT, fname, &st);
+    assert(ret);
+    assert(err == ENOENT);
+
+    /* TIME && CLOCK */
+    SWI(HEX_SYS_TIME);
+    assert(ret);
+    SWI(HEX_SYS_CLOCK);
+    assert(ret);
+
+    /* OPENDIR */
+    char *dname = "./_semihost_dir";
+    DIRECT_SWI(HEX_SYS_OPENDIR, dname);
+    assert(ret);
+    int dir_index = ret;
+
+    /* READDIR */
+    char *expected_files[4] = { ".", "..", "fileA", "fileB" };
+    char found_files_buffer[4][256];
+    char *found_files[4];
+    for (int i = 0; 1; i++) {
+        struct __attribute__((__packed__)) { int32_t _; char d_name[256]; } dirent;
+        DIRECT_SWI(HEX_SYS_READDIR, dir_index, &dirent);
+        if (!ret) {
+            break;
+        }
+        assert(i < 4);
+        found_files[i] = found_files_buffer[i];
+        strcpy(found_files[i], dirent.d_name);
+    }
+
+    sort_str_arr(found_files, 4);
+    for (int i = 0; i < 4; i++) {
+        assert(!strcmp(found_files[i], expected_files[i]));
+    }
+
+    /* CLOSEDIR */
+    DIRECT_SWI(HEX_SYS_CLOSEDIR, dir_index);
+    assert(!ret);
+
+    /* WRITEC, WRITECREG, WRITE0 */
+    /* We use DO_SWI directly here to bypass the args array */
+    char *pass = "PASS\n";
+    DIRECT_SWI(HEX_SYS_WRITEC, &pass[0]);
+    DIRECT_SWI(HEX_SYS_WRITECREG, pass[1]);
+    DIRECT_SWI(HEX_SYS_WRITE0, &pass[2]);
+
+    return 0;
+}
diff --git a/tests/tcg/hexagon/system/standalone_hw.c b/tests/tcg/hexagon/system/standalone_hw.c
new file mode 100644
index 0000000000000..c67343204a805
--- /dev/null
+++ b/tests/tcg/hexagon/system/standalone_hw.c
@@ -0,0 +1,43 @@
+#include <stdio.h>
+#include <assert.h>
+
+void test_set_prio();
+
+void inst_test()
+{
+    asm volatile("dczeroa(r0)\n\t"
+                 "dccleanidx(r0)\n\t"
+                 "dcinvidx(r0)\n\t"
+                 "r1 = dctagr(r0)\n\t"
+                 "dctagw(r0, r1)\n\t"
+                 "dcfetch(r0)\n\t"
+                 "dccleaninvidx(r0)\n\t"
+                 "l2gclean\n\t"
+                 "l2gclean(r1:0)\n\t"
+                 "l2gcleaninv\n\t"
+                 "l2gcleaninv(r1:0)\n\t"
+                 "l2gunlock\n\t"
+                 "l2kill\n\t"
+                 "trace(r0)\n\t"
+                 "pause(#1)\n\t"
+                );
+
+    asm volatile("r0 = #0\n\t"
+                 "r1 = iassignr(r0)\n\t"
+                 /* Set interrupt 0 to disabled on all threads */
+                 "r0 = #0\n\t"
+                 "iassignw(r0)\n\t");
+
+    test_set_prio();
+    printf("Executed monitor mode instructions\n");
+}
+
+int main(int argc, const char *argv[])
+{
+    inst_test();
+    printf("Hello, World: (argc: %d)\n", argc);
+    assert(argc >= 1);
+    for (int i = 0; i < argc; i++) {
+        printf("\t> '%s'\n", argv[i]);
+    }
+}
diff --git a/tests/tcg/hexagon/system/standalone_vec.c b/tests/tcg/hexagon/system/standalone_vec.c
new file mode 100644
index 0000000000000..eb1b2ef4830ce
--- /dev/null
+++ b/tests/tcg/hexagon/system/standalone_vec.c
@@ -0,0 +1,1419 @@
+/*
+ *  Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <hexagon_types.h>
+#include <hexagon_protos.h>
+
+#include "cfgtable.h"
+
+int err;
+
+#ifdef __linux__
+#define VTCM_SIZE_KB (2048)
+#define VTCM_BYTES_PER_KB (1024)
+
+static char vtcm_buffer[VTCM_SIZE_KB * VTCM_BYTES_PER_KB]
+    __attribute__((aligned(0x10000)));
+#endif
+
+/* define the number of rows/cols in a square matrix */
+#define MATRIX_SIZE 64
+
+/* define the size of the scatter buffer */
+#define SCATTER_BUFFER_SIZE (MATRIX_SIZE * MATRIX_SIZE)
+
+#define SCATTER16_BUF_SIZE (2 * SCATTER_BUFFER_SIZE)
+#define SCATTER32_BUF_SIZE (4 * SCATTER_BUFFER_SIZE)
+
+#define GATHER16_BUF_SIZE (2 * MATRIX_SIZE)
+#define GATHER32_BUF_SIZE (4 * MATRIX_SIZE)
+
+uintptr_t VTCM_BASE_ADDRESS;
+uintptr_t VTCM_SCATTER16_ADDRESS;
+uintptr_t VTCM_GATHER16_ADDRESS;
+uintptr_t VTCM_SCATTER32_ADDRESS;
+uintptr_t VTCM_GATHER32_ADDRESS;
+uintptr_t VTCM_SCATTER16_32_ADDRESS;
+uintptr_t VTCM_GATHER16_32_ADDRESS;
+
+/* the vtcm base address */
+unsigned char *vtcm_base;
+
+/* scatter gather 16 bit elements using 16 bit offsets */
+unsigned short *vscatter16;
+unsigned short *vgather16;
+unsigned short vscatter16_ref[SCATTER_BUFFER_SIZE];
+unsigned short vgather16_ref[MATRIX_SIZE];
+
+/* scatter gather 32 bit elements using 32 bit offsets */
+unsigned int *vscatter32;
+unsigned int *vgather32;
+unsigned int vscatter32_ref[SCATTER_BUFFER_SIZE];
+unsigned int vgather32_ref[MATRIX_SIZE];
+
+/* scatter gather 16 bit elements using 32 bit offsets */
+unsigned short *vscatter16_32;
+unsigned short *vgather16_32;
+unsigned short vscatter16_32_ref[SCATTER_BUFFER_SIZE];
+unsigned short vgather16_32_ref[MATRIX_SIZE];
+
+
+/* declare the arrays of offsets */
+unsigned short half_offsets[MATRIX_SIZE];
+unsigned int word_offsets[MATRIX_SIZE];
+
+/* declare the arrays of values */
+unsigned short half_values[MATRIX_SIZE];
+unsigned short half_acc_values[MATRIX_SIZE];
+unsigned short half_q_values[MATRIX_SIZE];
+unsigned int word_values[MATRIX_SIZE];
+unsigned int word_acc_values[MATRIX_SIZE];
+unsigned int word_q_values[MATRIX_SIZE];
+
+/* declare the array of predicates */
+unsigned short half_predicates[MATRIX_SIZE];
+unsigned int word_predicates[MATRIX_SIZE];
+
+/* make this big enough for all the intrinsics */
+unsigned int region_len = 4 * SCATTER_BUFFER_SIZE - 1;
+
+/* optionally add sync instructions */
+#define SYNC_VECTOR 1
+
+/* optionally print cycle counts */
+#define PRINT_CYCLE_COUNTS 0
+
+#if PRINT_CYCLE_COUNTS
+unsigned long long start_cycles;
+#define START_CYCLES start_cycles = hexagon_sim_read_pcycles();
+#define PRINT_CYCLES(x) printf(x, hexagon_sim_read_pcycles() - start_cycles);
+#else
+#define START_CYCLES
+#define PRINT_CYCLES(x)
+#endif
+
+/* define a scratch area for debug and prefill */
+#define SCRATCH_SIZE 0x8800
+
+#define FILL_CHAR '.'
+
+/* fill vtcm scratch with ee */
+void prefill_vtcm_scratch(void)
+{
+    memset((void *)VTCM_BASE_ADDRESS, FILL_CHAR, SCRATCH_SIZE * sizeof(char));
+}
+
+/* print vtcm scratch buffer */
+void print_vtcm_scratch_16(void)
+{
+    unsigned short *vtmp = (unsigned short *)VTCM_BASE_ADDRESS;
+
+    printf("\n\nPrinting the vtcm scratch in half words");
+
+    for (int i = 0; i < SCRATCH_SIZE; i++) {
+        if ((i % MATRIX_SIZE) == 0) {
+            printf("\n");
+        }
+        for (int j = 0; j < 2; j++) {
+            printf("%c", (char)((vtmp[i] >> j * 8) & 0xff));
+        }
+
+        printf(" ");
+    }
+}
+
+/* print vtcm scratch buffer */
+void print_vtcm_scratch_32(void)
+{
+    unsigned int *vtmp = (unsigned int *)VTCM_BASE_ADDRESS;
+
+    printf("\n\nPrinting the vtcm scratch in words");
+
+    for (int i = 0; i < SCRATCH_SIZE; i++) {
+        if ((i % MATRIX_SIZE) == 0) {
+            printf("\n");
+        }
+        for (int j = 0; j < 4; j++) {
+            printf("%c", (char)((vtmp[i] >> j * 8) & 0xff));
+        }
+
+        printf(" ");
+    }
+}
+
+
+/* create byte offsets to be a diagonal of the matrix with 16 bit elements */
+void create_offsets_and_values_16(void)
+{
+    unsigned short half_element = 0;
+    unsigned short half_q_element = 0;
+    char letter = 'A';
+    char q_letter = '@';
+
+    for (int i = 0; i < MATRIX_SIZE; i++) {
+        half_offsets[i] = i * (2 * MATRIX_SIZE + 2);
+
+        half_element = 0;
+        half_q_element = 0;
+        for (int j = 0; j < 2; j++) {
+            half_element |= letter << j * 8;
+            half_q_element |= q_letter << j * 8;
+        }
+
+        half_values[i] = half_element;
+        half_acc_values[i] = ((i % 10) << 8) + (i % 10);
+        half_q_values[i] = half_q_element;
+
+        letter++;
+        /* reset to 'A' */
+        if (letter == 'M') {
+            letter = 'A';
+        }
+    }
+}
+
+/* create a predicate mask for the half word scatter */
+void create_preds_16()
+{
+    for (int i = 0; i < MATRIX_SIZE; i++) {
+        half_predicates[i] = (i % 3 == 0 || i % 5 == 0) ? ~0 : 0;
+    }
+}
+
+
+/* create byte offsets to be a diagonal of the matrix with 32 bit elements */
+void create_offsets_and_values_32(void)
+{
+    unsigned int word_element = 0;
+    unsigned int word_q_element = 0;
+    char letter = 'A';
+    char q_letter = '&';
+
+    for (int i = 0; i < MATRIX_SIZE; i++) {
+        word_offsets[i] = i * (4 * MATRIX_SIZE + 4);
+
+        word_element = 0;
+        word_q_element = 0;
+        for (int j = 0; j < 4; j++) {
+            word_element |= letter << j * 8;
+            word_q_element |= q_letter << j * 8;
+        }
+
+        word_values[i] = word_element;
+        word_acc_values[i] = ((i % 10) << 8) + (i % 10);
+        word_q_values[i] = word_q_element;
+
+        letter++;
+        /* reset to 'A' */
+        if (letter == 'M') {
+            letter = 'A';
+        }
+    }
+}
+
+/* create a predicate mask for the word scatter */
+void create_preds_32()
+{
+    for (int i = 0; i < MATRIX_SIZE; i++) {
+        word_predicates[i] = (i % 4 == 0 || i % 7 == 0) ? ~0 : 0;
+    }
+}
+
+
+void dump_buf(char *str, void *addr, int element_size, int byte_len)
+
+{
+    unsigned short *sptr = addr;
+    unsigned int *ptr = addr;
+
+    printf("\n\nBuffer: %s\n", str);
+    for (int i = 0; i < byte_len / element_size; ++ptr, ++sptr, ++i) {
+        if (i != 0 && (i % 16) == 0) {
+            printf("\n");
+        }
+        if (element_size == 2) {
+            printf("%c ", *sptr);
+        } else if (element_size == 4) {
+            printf("%4.4x ", *ptr);
+        }
+    }
+}
+
+/*
+ * create byte offsets to be a diagonal of the matrix with 16 bit elements and
+ * 32 bit offsets
+ */
+void create_offsets_and_values_16_32(void)
+{
+    unsigned int half_element = 0;
+    unsigned short half_q_element = 0;
+    char letter = 'D';
+    char q_letter = '$';
+
+    for (int i = 0; i < MATRIX_SIZE; i++) {
+        word_offsets[i] = i * (2 * MATRIX_SIZE + 2);
+
+        half_element = 0;
+        half_q_element = 0;
+        for (int j = 0; j < 2; j++) {
+            half_element |= letter << j * 8;
+            half_q_element |= q_letter << j * 8;
+        }
+
+        half_values[i] = half_element;
+        half_acc_values[i] = ((i % 10) << 8) + (i % 10);
+        half_q_values[i] = half_q_element;
+
+        letter++;
+        /* reset to 'A' */
+        if (letter == 'P') {
+            letter = 'D';
+        }
+    }
+
+    /*
+     * dump_buf("word_offsets", word_offsets, sizeof(*word_offsets),
+     * sizeof(word_offsets)); dump_buf("half_offsets", half_offsets,
+     * sizeof(*half_offsets), sizeof(half_offsets));
+     */
+}
+
+void create_preds_16_32()
+{
+    for (int i = 0; i < MATRIX_SIZE; i++) {
+        half_predicates[i] = (i % 2 == 0 || i % 13 == 0) ? ~0 : 0;
+    }
+}
+
+#define SCATTER_RELEASE(ADDR) \
+    asm volatile("vmem(%0 + #0):scatter_release\n" : : "r"(ADDR));
+
+/* scatter the 16 bit elements using intrinsics */
+void vector_scatter_16(void)
+{
+    START_CYCLES;
+
+    /* copy the offsets and values to vectors */
+    HVX_Vector offsets = *(HVX_Vector *)half_offsets;
+    HVX_Vector values = *(HVX_Vector *)half_values;
+
+    /* do the scatter */
+    Q6_vscatter_RMVhV(VTCM_SCATTER16_ADDRESS, region_len, offsets, values);
+
+#if SYNC_VECTOR
+    /* do the sync operation */
+    SCATTER_RELEASE(vscatter16);
+    /*
+     * This dummy load from vscatter16 is to complete the synchronization.
+     * Normally this load would be deferred as long as possible to minimize
+     * stalls.
+     */
+    volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16;
+#endif
+
+    PRINT_CYCLES("\nVector Scatter 16 cycles = %llu\n");
+}
+
+/* scatter-accumulate the 16 bit elements using intrinsics */
+void vector_scatter_acc_16(void)
+{
+    START_CYCLES;
+
+    /* copy the offsets and values to vectors */
+    HVX_Vector offsets = *(HVX_Vector *)half_offsets;
+    HVX_Vector values = *(HVX_Vector *)half_acc_values;
+
+    /* do the scatter */
+    Q6_vscatteracc_RMVhV(VTCM_SCATTER16_ADDRESS, region_len, offsets, values);
+
+#if SYNC_VECTOR
+    /* do the sync operation */
+    SCATTER_RELEASE(vscatter16);
+    /*
+     * This dummy load from vscatter16 is to complete the synchronization.
+     * Normally this load would be deferred as long as possible to minimize
+     * stalls.
+     */
+    volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16;
+#endif
+
+    PRINT_CYCLES("\nVector Scatter Acc 16 cycles = %llu\n");
+}
+
+/* scatter the 16 bit elements using intrinsics */
+void vector_scatter_q_16(void)
+{
+    START_CYCLES;
+
+    /* copy the offsets and values to vectors */
+    HVX_Vector offsets = *(HVX_Vector *)half_offsets;
+    HVX_Vector values = *(HVX_Vector *)half_q_values;
+    HVX_Vector pred_reg = *(HVX_Vector *)half_predicates;
+    HVX_VectorPred preds = Q6_Q_vand_VR(pred_reg, ~0);
+
+    /* do the scatter */
+    Q6_vscatter_QRMVhV(preds, VTCM_SCATTER16_ADDRESS, region_len, offsets,
+                       values);
+
+#if SYNC_VECTOR
+    /* do the sync operation */
+    SCATTER_RELEASE(vscatter16);
+    /*
+     * This dummy load from vscatter16 is to complete the synchronization.
+     * Normally this load would be deferred as long as possible to minimize
+     * stalls.
+     */
+    volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16;
+#endif
+
+    PRINT_CYCLES("\nVector Scatter Q 16 cycles = %llu\n");
+}
+
+/* scatter the 32 bit elements using intrinsics */
+void vector_scatter_32(void)
+{
+    START_CYCLES;
+
+    /* copy the offsets and values to vectors */
+    HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
+    HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+    HVX_Vector valueslo = *(HVX_Vector *)word_values;
+    HVX_Vector valueshi = *(HVX_Vector *)&word_values[MATRIX_SIZE / 2];
+
+    /* do the scatter */
+    Q6_vscatter_RMVwV(VTCM_SCATTER32_ADDRESS, region_len, offsetslo, valueslo);
+    Q6_vscatter_RMVwV(VTCM_SCATTER32_ADDRESS, region_len, offsetshi, valueshi);
+
+#if SYNC_VECTOR
+    /* do the sync operation */
+    SCATTER_RELEASE(vscatter32);
+    /*
+     * This dummy load from vscatter32 is to complete the synchronization.
+     * Normally this load would be deferred as long as possible to minimize
+     * stalls.
+     */
+    volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter32;
+#endif
+
+    PRINT_CYCLES("\nVector Scatter 32 cycles = %llu\n");
+}
+
+/* scatter-acc the 32 bit elements using intrinsics */
+void vector_scatter_acc_32(void)
+{
+    START_CYCLES;
+
+    /* copy the offsets and values to vectors */
+    HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
+    HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+    HVX_Vector valueslo = *(HVX_Vector *)word_acc_values;
+    HVX_Vector valueshi = *(HVX_Vector *)&word_acc_values[MATRIX_SIZE / 2];
+
+    /* do the scatter */
+    Q6_vscatteracc_RMVwV(VTCM_SCATTER32_ADDRESS, region_len, offsetslo,
+                         valueslo);
+    Q6_vscatteracc_RMVwV(VTCM_SCATTER32_ADDRESS, region_len, offsetshi,
+                         valueshi);
+
+#if SYNC_VECTOR
+    /* do the sync operation */
+    SCATTER_RELEASE(vscatter32);
+    /*
+     * This dummy load from vscatter32 is to complete the synchronization.
+     * Normally this load would be deferred as long as possible to minimize
+     * stalls.
+     */
+    volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter32;
+#endif
+
+    PRINT_CYCLES("\nVector Scatter Acc 32 cycles = %llu\n");
+}
+
+/* scatter the 32 bit elements using intrinsics */
+void vector_scatter_q_32(void)
+{
+    START_CYCLES;
+
+    /* copy the offsets and values to vectors */
+    HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
+    HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+    HVX_Vector valueslo = *(HVX_Vector *)word_q_values;
+    HVX_Vector valueshi = *(HVX_Vector *)&word_q_values[MATRIX_SIZE / 2];
+    HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates;
+    HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2];
+    HVX_VectorPred predslo = Q6_Q_vand_VR(pred_reglo, ~0);
+    HVX_VectorPred predshi = Q6_Q_vand_VR(pred_reghi, ~0);
+
+    /* do the scatter */
+    Q6_vscatter_QRMVwV(predslo, VTCM_SCATTER32_ADDRESS, region_len, offsetslo,
+                       valueslo);
+    Q6_vscatter_QRMVwV(predshi, VTCM_SCATTER32_ADDRESS, region_len, offsetshi,
+                       valueshi);
+
+#if SYNC_VECTOR
+    /* do the sync operation */
+    SCATTER_RELEASE(vscatter16);
+    /*
+     * This dummy load from vscatter16 is to complete the synchronization.
+     * Normally this load would be deferred as long as possible to minimize
+     * stalls.
+     */
+    volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16;
+#endif
+
+    PRINT_CYCLES("\nVector Scatter Q 16 cycles = %llu\n");
+}
+
+void print_vector(char *str, HVX_Vector *v)
+
+{
+    unsigned char *ptr = (unsigned char *)v;
+
+    printf("\n\nVector: %s\n", str);
+    for (int i = 0; i < sizeof(HVX_Vector) * 4; ++ptr, ++i) {
+        if (i != 0 && (i % 16) == 0) {
+            printf("\n");
+        }
+        printf("%c ", *ptr);
+    }
+    printf("\n");
+}
+
+void print_vectorpair(char *str, HVX_VectorPair *v)
+
+{
+    unsigned char *ptr = (unsigned char *)v;
+
+    printf("\n\nVectorPair: %s\n", str);
+    for (int i = 0; i < sizeof(HVX_VectorPair); ++ptr, ++i) {
+        if (i != 0 && (i % 16) == 0) {
+            printf("\n");
+        }
+        printf("%c ", *ptr);
+    }
+    printf("\n");
+}
+
+/* scatter the 16 bit elements with 32 bit offsets using intrinsics */
+void vector_scatter_16_32(void)
+{
+    START_CYCLES;
+
+    /* get the word offsets in a vector pair */
+    HVX_VectorPair offsets = *(HVX_VectorPair *)word_offsets;
+    /* print_vectorpair("word_offsets", (HVX_VectorPair *)&word_offsets); */
+
+    /* these values need to be shuffled for the RMWwV scatter */
+    HVX_Vector values = *(HVX_Vector *)half_values;
+    values = Q6_Vh_vshuff_Vh(values);
+    /* print_vector("values", (HVX_Vector *)&values); */
+
+    /* do the scatter */
+    Q6_vscatter_RMWwV(VTCM_SCATTER16_32_ADDRESS, region_len, offsets, values);
+    /* print_vector("scatter16_32_address", (HVX_Vector */
+    /* *)VTCM_SCATTER16_32_ADDRESS); */
+
+#if SYNC_VECTOR
+    /* do the sync operation */
+    SCATTER_RELEASE(vscatter16_32);
+    /*
+     * This dummy load from vscatter16_32 is to complete the synchronization.
+     * Normally this load would be deferred as long as possible to minimize
+     * stalls.
+     */
+    volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16_32;
+#endif
+
+    PRINT_CYCLES("\nVector Scatter 16_32 cycles = %llu\n");
+}
+
+/* scatter-acc the 16 bit elements with 32 bit offsets using intrinsics */
+void vector_scatter_acc_16_32(void)
+{
+    START_CYCLES;
+
+    /* get the word offsets in a vector pair */
+    HVX_VectorPair offsets = *(HVX_VectorPair *)word_offsets;
+    /* print_vectorpair("word_offsets", (HVX_VectorPair *)&word_offsets); */
+
+    /* these values need to be shuffled for the RMWwV scatter */
+    HVX_Vector values = *(HVX_Vector *)half_acc_values;
+    values = Q6_Vh_vshuff_Vh(values);
+    /* print_vector("values", (HVX_Vector *)&values); */
+
+    /* do the scatter */
+    Q6_vscatteracc_RMWwV(VTCM_SCATTER16_32_ADDRESS, region_len, offsets,
+                         values);
+    /* print_vector("scatter16_32_address", (HVX_Vector */
+    /* *)VTCM_SCATTER16_32_ADDRESS); */
+
+#if SYNC_VECTOR
+    /* do the sync operation */
+    SCATTER_RELEASE(vscatter16_32);
+    /*
+     * This dummy load from vscatter16_32 is to complete the synchronization.
+     * Normally this load would be deferred as long as possible to minimize
+     * stalls.
+     */
+    volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16_32;
+#endif
+
+    PRINT_CYCLES("\nVector Scatter Acc 16_32 cycles = %llu\n");
+}
+
+/* scatter-acc the 16 bit elements with 32 bit offsets using intrinsics */
+void vector_scatter_q_16_32(void)
+{
+    START_CYCLES;
+
+    /* get the word offsets in a vector pair */
+    HVX_VectorPair offsets = *(HVX_VectorPair *)word_offsets;
+    /* print_vectorpair("word_offsets", (HVX_VectorPair *)&word_offsets); */
+
+    /* these values need to be shuffled for the RMWwV scatter */
+    HVX_Vector values = *(HVX_Vector *)half_q_values;
+    values = Q6_Vh_vshuff_Vh(values);
+    /* print_vector("values", (HVX_Vector *)&values); */
+
+    HVX_Vector pred_reg = *(HVX_Vector *)half_predicates;
+    pred_reg = Q6_Vh_vshuff_Vh(pred_reg);
+    HVX_VectorPred preds = Q6_Q_vand_VR(pred_reg, ~0);
+
+    /* do the scatter */
+    Q6_vscatter_QRMWwV(preds, VTCM_SCATTER16_32_ADDRESS, region_len, offsets,
+                       values);
+    /* print_vector("scatter16_32_address", (HVX_Vector */
+    /* *)VTCM_SCATTER16_32_ADDRESS); */
+
+#if SYNC_VECTOR
+    /* do the sync operation */
+    SCATTER_RELEASE(vscatter16_32);
+    /*
+     * This dummy load from vscatter16_32 is to complete the synchronization.
+     * Normally this load would be deferred as long as possible to minimize
+     * stalls.
+     */
+    volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16_32;
+#endif
+
+    PRINT_CYCLES("\nVector Scatter Q 16_32 cycles = %llu\n");
+}
+
+
+/* gather the elements from the scatter16 buffer */
+void vector_gather_16(void)
+{
+    START_CYCLES;
+
+    HVX_Vector *vgather = (HVX_Vector *)VTCM_GATHER16_ADDRESS;
+    HVX_Vector offsets = *(HVX_Vector *)half_offsets;
+
+    /* do the gather to the gather16 buffer */
+    Q6_vgather_ARMVh(vgather, VTCM_SCATTER16_ADDRESS, region_len, offsets);
+
+
+#if SYNC_VECTOR
+    /* This dummy read of vgather will stall until completion */
+    volatile HVX_Vector vDummy = *(HVX_Vector *)vgather;
+#endif
+
+    PRINT_CYCLES("\nVector Gather 16 cycles = %llu\n");
+}
+
+static unsigned short gather_q_16_init(void)
+{
+    char letter = '?';
+    return letter | (letter << 8);
+}
+
+void vector_gather_q_16(void)
+{
+    START_CYCLES;
+
+    HVX_Vector *vgather = (HVX_Vector *)VTCM_GATHER16_ADDRESS;
+    HVX_Vector offsets = *(HVX_Vector *)half_offsets;
+    HVX_Vector pred_reg = *(HVX_Vector *)half_predicates;
+    HVX_VectorPred preds = Q6_Q_vand_VR(pred_reg, ~0);
+
+    *vgather = Q6_Vh_vsplat_R(gather_q_16_init());
+    /* do the gather to the gather16 buffer */
+    Q6_vgather_AQRMVh(vgather, preds, VTCM_SCATTER16_ADDRESS, region_len,
+                      offsets);
+
+
+#if SYNC_VECTOR
+    /* This dummy read of vgather will stall until completion */
+    volatile HVX_Vector vDummy = *(HVX_Vector *)vgather;
+#endif
+
+    PRINT_CYCLES("\nVector Gather Q 16 cycles = %llu\n");
+}
+
+
+/* gather the elements from the scatter32 buffer */
+void vector_gather_32(void)
+{
+    START_CYCLES;
+
+    HVX_Vector *vgatherlo = (HVX_Vector *)VTCM_GATHER32_ADDRESS;
+    HVX_Vector *vgatherhi =
+        (HVX_Vector *)(VTCM_GATHER32_ADDRESS + (MATRIX_SIZE * 2));
+    HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
+    HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+
+    /* do the gather to vgather */
+    Q6_vgather_ARMVw(vgatherlo, VTCM_SCATTER32_ADDRESS, region_len, offsetslo);
+    Q6_vgather_ARMVw(vgatherhi, VTCM_SCATTER32_ADDRESS, region_len, offsetshi);
+
+#if SYNC_VECTOR
+    /* This dummy read of vgatherhi will stall until completion */
+    volatile HVX_Vector vDummy = *(HVX_Vector *)vgatherhi;
+#endif
+
+    PRINT_CYCLES("\nVector Gather 32 cycles = %llu\n");
+}
+
+static unsigned int gather_q_32_init(void)
+{
+    char letter = '?';
+    return letter | (letter << 8) | (letter << 16) | (letter << 24);
+}
+
+void vector_gather_q_32(void)
+{
+    START_CYCLES;
+
+    HVX_Vector *vgatherlo = (HVX_Vector *)VTCM_GATHER32_ADDRESS;
+    HVX_Vector *vgatherhi =
+        (HVX_Vector *)(VTCM_GATHER32_ADDRESS + (MATRIX_SIZE * 2));
+    HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
+    HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+    HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates;
+    HVX_VectorPred predslo = Q6_Q_vand_VR(pred_reglo, ~0);
+    HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2];
+    HVX_VectorPred predshi = Q6_Q_vand_VR(pred_reghi, ~0);
+
+    *vgatherlo = Q6_Vh_vsplat_R(gather_q_32_init());
+    *vgatherhi = Q6_Vh_vsplat_R(gather_q_32_init());
+    /* do the gather to vgather */
+    Q6_vgather_AQRMVw(vgatherlo, predslo, VTCM_SCATTER32_ADDRESS, region_len,
+                      offsetslo);
+    Q6_vgather_AQRMVw(vgatherhi, predshi, VTCM_SCATTER32_ADDRESS, region_len,
+                      offsetshi);
+
+#if SYNC_VECTOR
+    /* This dummy read of vgatherhi will stall until completion */
+    volatile HVX_Vector vDummy = *(HVX_Vector *)vgatherhi;
+#endif
+
+    PRINT_CYCLES("\nVector Gather Q 32 cycles = %llu\n");
+}
+
+/* gather the elements from the scatter16_32 buffer */
+void vector_gather_16_32(void)
+{
+    START_CYCLES;
+
+    /* get the vtcm address to gather from */
+    HVX_Vector *vgather = (HVX_Vector *)VTCM_GATHER16_32_ADDRESS;
+
+    /* get the word offsets in a vector pair */
+    HVX_VectorPair offsets = *(HVX_VectorPair *)word_offsets;
+
+    /* do the gather to vgather */
+    Q6_vgather_ARMWw(vgather, VTCM_SCATTER16_32_ADDRESS, region_len, offsets);
+
+    /* the read of gather will stall until completion */
+    volatile HVX_Vector values = *(HVX_Vector *)vgather;
+
+    /* deal the elements to get the order back */
+    values = Q6_Vh_vdeal_Vh(values);
+
+    /* write it back to vtcm address */
+    *(HVX_Vector *)vgather = values;
+
+
+    PRINT_CYCLES("\nVector Gather 16_32 cycles = %llu\n");
+}
+
+void vector_gather_q_16_32(void)
+{
+    START_CYCLES;
+
+    /* get the vtcm address to gather from */
+    HVX_Vector *vgather = (HVX_Vector *)VTCM_GATHER16_32_ADDRESS;
+
+    /* get the word offsets in a vector pair */
+    HVX_VectorPair offsets = *(HVX_VectorPair *)word_offsets;
+    HVX_Vector pred_reg = *(HVX_Vector *)half_predicates;
+    pred_reg = Q6_Vh_vshuff_Vh(pred_reg);
+    HVX_VectorPred preds = Q6_Q_vand_VR(pred_reg, ~0);
+
+    *vgather = Q6_Vh_vsplat_R(gather_q_16_init());
+    /* do the gather to vgather */
+    Q6_vgather_AQRMWw(vgather, preds, VTCM_SCATTER16_32_ADDRESS, region_len,
+                      offsets);
+
+    /* the read of gather will stall until completion */
+    volatile HVX_Vector values = *(HVX_Vector *)vgather;
+
+    /* deal the elements to get the order back */
+    values = Q6_Vh_vdeal_Vh(values);
+
+    /* write it back to vtcm address */
+    *(HVX_Vector *)vgather = values;
+
+
+    PRINT_CYCLES("\nVector Gather Q 16_32 cycles = %llu\n");
+}
+
+
+static void check_buffer(const char *name, void *c, void *r, size_t size)
+{
+    char *check = (char *)c;
+    char *ref = (char *)r;
+    /*  printf("check buffer %s 0x%x, 0x%x, %d\n", name, check, ref, size); */
+    for (int i = 0; i < size; i++) {
+        if (check[i] != ref[i]) {
+            printf("Error %s [%d]: 0x%x (%c) != 0x%x (%c)\n", name, i, check[i],
+                   check[i], ref[i], ref[i]);
+            err++;
+        }
+    }
+}
+
+
+/*
+ * These scalar functions are the C equivalents of the vector functions that
+ * use HVX
+ */
+
+/* scatter the 16 bit elements using C */
+void scalar_scatter_16(unsigned short *vscatter16)
+{
+    START_CYCLES;
+
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        vscatter16[half_offsets[i] / 2] = half_values[i];
+    }
+
+    PRINT_CYCLES("\nScalar Scatter 16 cycles = %llu\n");
+}
+
+void check_scatter_16()
+{
+    memset(vscatter16_ref, FILL_CHAR,
+           SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+    scalar_scatter_16(vscatter16_ref);
+    check_buffer("check_scatter_16", vscatter16, vscatter16_ref,
+                 SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+}
+
+/* scatter the 16 bit elements using C */
+void scalar_scatter_acc_16(unsigned short *vscatter16)
+{
+    START_CYCLES;
+
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        vscatter16[half_offsets[i] / 2] += half_acc_values[i];
+    }
+
+    PRINT_CYCLES("\nScalar Scatter Acc 16 cycles = %llu\n");
+}
+
+/* scatter the 16 bit elements using C */
+void scalar_scatter_q_16(unsigned short *vscatter16)
+{
+    START_CYCLES;
+
+    for (int i = 0; i < MATRIX_SIZE; i++) {
+        if (half_predicates[i]) {
+            vscatter16[half_offsets[i] / 2] = half_q_values[i];
+        }
+    }
+
+    PRINT_CYCLES("\nScalar Scatter Q 16 cycles = %llu\n");
+}
+
+
+void check_scatter_acc_16()
+{
+    memset(vscatter16_ref, FILL_CHAR,
+           SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+    scalar_scatter_16(vscatter16_ref);
+    scalar_scatter_acc_16(vscatter16_ref);
+    check_buffer("check_scatter_acc_16", vscatter16, vscatter16_ref,
+                 SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+}
+
+void check_scatter_q_16()
+{
+    memset(vscatter16_ref, FILL_CHAR,
+           SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+    scalar_scatter_16(vscatter16_ref);
+    scalar_scatter_acc_16(vscatter16_ref);
+    scalar_scatter_q_16(vscatter16_ref);
+    check_buffer("check_scatter_q_16", vscatter16, vscatter16_ref,
+                 SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+}
+
+/* scatter the 32 bit elements using C */
+void scalar_scatter_32(unsigned int *vscatter32)
+{
+    START_CYCLES;
+
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        vscatter32[word_offsets[i] / 4] = word_values[i];
+    }
+
+    PRINT_CYCLES("\n\nScalar Scatter 32 cycles = %llu\n");
+}
+
+/* scatter the 32 bit elements using C */
+void scalar_scatter_acc_32(unsigned int *vscatter32)
+{
+    START_CYCLES;
+
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        vscatter32[word_offsets[i] / 4] += word_acc_values[i];
+    }
+
+    PRINT_CYCLES("\nScalar Scatter Acc 32 cycles = %llu\n");
+}
+
+/* scatter the 32 bit elements using C */
+void scalar_scatter_q_32(unsigned int *vscatter32)
+{
+    START_CYCLES;
+
+    for (int i = 0; i < MATRIX_SIZE; i++) {
+        if (word_predicates[i]) {
+            vscatter32[word_offsets[i] / 4] = word_q_values[i];
+        }
+    }
+
+    PRINT_CYCLES("\nScalar Scatter Q 32 cycles = %llu\n");
+}
+
+void check_scatter_32()
+{
+    memset(vscatter32_ref, FILL_CHAR,
+           SCATTER_BUFFER_SIZE * sizeof(unsigned int));
+    scalar_scatter_32(vscatter32_ref);
+    check_buffer("check_scatter_32", vscatter32, vscatter32_ref,
+                 SCATTER_BUFFER_SIZE * sizeof(unsigned int));
+}
+
+void check_scatter_acc_32()
+{
+    memset(vscatter32_ref, FILL_CHAR,
+           SCATTER_BUFFER_SIZE * sizeof(unsigned int));
+    scalar_scatter_32(vscatter32_ref);
+    scalar_scatter_acc_32(vscatter32_ref);
+    check_buffer("check_scatter_acc_32", vscatter32, vscatter32_ref,
+                 SCATTER_BUFFER_SIZE * sizeof(unsigned int));
+}
+
+void check_scatter_q_32()
+{
+    memset(vscatter32_ref, FILL_CHAR,
+           SCATTER_BUFFER_SIZE * sizeof(unsigned int));
+    scalar_scatter_32(vscatter32_ref);
+    scalar_scatter_acc_32(vscatter32_ref);
+    scalar_scatter_q_32(vscatter32_ref);
+    check_buffer("check_scatter_q_32", vscatter32, vscatter32_ref,
+                 SCATTER_BUFFER_SIZE * sizeof(unsigned int));
+}
+
+/* scatter the 32 bit elements using C */
+void scalar_scatter_16_32(unsigned short *vscatter16_32)
+{
+    START_CYCLES;
+
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        vscatter16_32[word_offsets[i] / 2] = half_values[i];
+    }
+
+    PRINT_CYCLES("\n\nScalar Scatter 16_32 cycles = %llu\n");
+}
+
+/* scatter the 32 bit elements using C */
+void scalar_scatteracc_16_32(unsigned short *vscatter16_32)
+{
+    START_CYCLES;
+
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        vscatter16_32[word_offsets[i] / 2] += half_acc_values[i];
+    }
+
+    PRINT_CYCLES("\n\nScalar Scatter Acc 16_32 cycles = %llu\n");
+}
+
+void scalar_scatter_q_16_32(unsigned short *vscatter16_32)
+{
+    START_CYCLES;
+
+    for (int i = 0; i < MATRIX_SIZE; i++) {
+        if (half_predicates[i]) {
+            vscatter16_32[word_offsets[i] / 2] = half_q_values[i];
+        }
+    }
+
+    PRINT_CYCLES("\nScalar Scatter Q 16_32 cycles = %llu\n");
+}
+
+void check_scatter_16_32()
+{
+    memset(vscatter16_32_ref, FILL_CHAR,
+           SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+    scalar_scatter_16_32(vscatter16_32_ref);
+    check_buffer("check_scatter_16_32", vscatter16_32, vscatter16_32_ref,
+                 SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+}
+
+void check_scatter_acc_16_32()
+{
+    memset(vscatter16_32_ref, FILL_CHAR,
+           SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+    scalar_scatter_16_32(vscatter16_32_ref);
+    scalar_scatteracc_16_32(vscatter16_32_ref);
+    check_buffer("check_scatter_acc_16_32", vscatter16_32, vscatter16_32_ref,
+                 SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+}
+
+void check_scatter_q_16_32()
+{
+    memset(vscatter16_32_ref, FILL_CHAR,
+           SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+    scalar_scatter_16_32(vscatter16_32_ref);
+    scalar_scatteracc_16_32(vscatter16_32_ref);
+    scalar_scatter_q_16_32(vscatter16_32_ref);
+    check_buffer("check_scatter_q_16_32", vscatter16_32, vscatter16_32_ref,
+                 SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+}
+
+/* gather the elements from the scatter buffer using C */
+void scalar_gather_16(unsigned short *vgather16)
+{
+    START_CYCLES;
+
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        vgather16[i] = vscatter16[half_offsets[i] / 2];
+    }
+
+    PRINT_CYCLES("\n\nScalar Gather 16 cycles = %llu\n");
+}
+
+void scalar_gather_q_16(unsigned short *vgather16)
+{
+    START_CYCLES;
+
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        if (half_predicates[i]) {
+            vgather16[i] = vscatter16[half_offsets[i] / 2];
+        }
+    }
+
+    PRINT_CYCLES("\n\nScalar Gather Q 16 cycles = %llu\n");
+}
+
+void check_gather_16()
+{
+    memset(vgather16_ref, 0, MATRIX_SIZE * sizeof(unsigned short));
+    scalar_gather_16(vgather16_ref);
+    check_buffer("check_gather_16", vgather16, vgather16_ref,
+                 MATRIX_SIZE * sizeof(unsigned short));
+}
+
+void check_gather_q_16()
+{
+    memset(vgather16_ref, gather_q_16_init(),
+           MATRIX_SIZE * sizeof(unsigned short));
+    scalar_gather_q_16(vgather16_ref);
+    check_buffer("check_gather_q_16", vgather16, vgather16_ref,
+                 MATRIX_SIZE * sizeof(unsigned short));
+}
+
+/* gather the elements from the scatter buffer using C */
+void scalar_gather_32(unsigned int *vgather32)
+{
+    START_CYCLES;
+
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        vgather32[i] = vscatter32[word_offsets[i] / 4];
+    }
+
+    PRINT_CYCLES("\n\nScalar Gather 32 cycles = %llu\n");
+}
+
+void scalar_gather_q_32(unsigned int *vgather32)
+{
+    START_CYCLES;
+
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        if (word_predicates[i]) {
+            vgather32[i] = vscatter32[word_offsets[i] / 4];
+        }
+    }
+
+    PRINT_CYCLES("\n\nScalar Gather Q 32 cycles = %llu\n");
+}
+
+
+void check_gather_32(void)
+{
+    memset(vgather32_ref, 0, MATRIX_SIZE * sizeof(unsigned int));
+    scalar_gather_32(vgather32_ref);
+    check_buffer("check_gather_32", vgather32, vgather32_ref,
+                 MATRIX_SIZE * sizeof(unsigned int));
+}
+
+void check_gather_q_32(void)
+{
+    memset(vgather32_ref, gather_q_32_init(),
+           MATRIX_SIZE * sizeof(unsigned int));
+    scalar_gather_q_32(vgather32_ref);
+    check_buffer("check_gather_q_32", vgather32, vgather32_ref,
+                 MATRIX_SIZE * sizeof(unsigned int));
+}
+
+/* gather the elements from the scatter buffer using C */
+void scalar_gather_16_32(unsigned short *vgather16_32)
+{
+    START_CYCLES;
+
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        vgather16_32[i] = vscatter16_32[word_offsets[i] / 2];
+    }
+
+    PRINT_CYCLES("\n\nScalar Gather 16_32 cycles = %llu\n");
+}
+
+void scalar_gather_q_16_32(unsigned short *vgather16_32)
+{
+    START_CYCLES;
+
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        if (half_predicates[i]) {
+            vgather16_32[i] = vscatter16_32[word_offsets[i] / 2];
+        }
+    }
+
+    PRINT_CYCLES("\n\nScalar Gather Q 16_32 cycles = %llu\n");
+}
+
+void check_gather_16_32(void)
+{
+    memset(vgather16_32_ref, 0, MATRIX_SIZE * sizeof(unsigned short));
+    scalar_gather_16_32(vgather16_32_ref);
+    check_buffer("check_gather_16_32", vgather16_32, vgather16_32_ref,
+                 MATRIX_SIZE * sizeof(unsigned short));
+}
+
+void check_gather_q_16_32(void)
+{
+    memset(vgather16_32_ref, gather_q_16_init(),
+           MATRIX_SIZE * sizeof(unsigned short));
+    scalar_gather_q_16_32(vgather16_32_ref);
+    check_buffer("check_gather_q_16_32", vgather16_32, vgather16_32_ref,
+                 MATRIX_SIZE * sizeof(unsigned short));
+}
+
+/* These functions print the buffers to the display */
+
+/* print scatter16 buffer */
+void print_scatter16_buffer(void)
+{
+#if PRINT_DATA
+    /*
+     * printf("\n\nPrinting the 16 bit scatter buffer at 0x%08x",
+     * VTCM_SCATTER16_ADDRESS);
+     */
+    printf("\n\nPrinting the 16 bit scatter buffer");
+
+    for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) {
+        if ((i % MATRIX_SIZE) == 0) {
+            printf("\n");
+        }
+
+        for (int j = 0; j < 2; j++) {
+            printf("%c", (char)((vscatter16[i] >> j * 8) & 0xff));
+        }
+
+        printf(" ");
+    }
+    printf("\n");
+#endif
+}
+
+/* print the gather 16 buffer */
+void print_gather_result_16(void)
+{
+#if PRINT_DATA
+    /*
+     * printf("\n\nPrinting the 16 bit gather result at 0x%08x\n",
+     * VTCM_GATHER16_ADDRESS);
+     */
+    printf("\n\nPrinting the 16 bit gather result\n");
+
+    for (int i = 0; i < MATRIX_SIZE; i++) {
+        for (int j = 0; j < 2; j++) {
+            printf("%c", (char)((vgather16[i] >> j * 8) & 0xff));
+        }
+
+        printf(" ");
+    }
+    printf("\n");
+#endif
+}
+
+/* print the scatter32 buffer */
+void print_scatter32_buffer(void)
+{
+#if PRINT_DATA
+    /*
+     * printf("\n\nPrinting the 32 bit scatter buffer at 0x%08x",
+     * VTCM_SCATTER32_ADDRESS);
+     */
+    printf("\n\nPrinting the 32 bit scatter buffer");
+
+    for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) {
+        if ((i % MATRIX_SIZE) == 0) {
+            printf("\n");
+        }
+
+        for (int j = 0; j < 4; j++) {
+            printf("%c", (char)((vscatter32[i] >> j * 8) & 0xff));
+        }
+
+        printf(" ");
+    }
+    printf("\n");
+#endif
+}
+
+
+/* print the gather 32 buffer */
+void print_gather_result_32(void)
+{
+#if PRINT_DATA
+    /*
+     * printf("\n\nPrinting the 32 bit gather result at 0x%08x\n",
+     * VTCM_GATHER32_ADDRESS);
+     */
+    printf("\n\nPrinting the 32 bit gather result\n");
+
+    for (int i = 0; i < MATRIX_SIZE; i++) {
+        for (int j = 0; j < 4; j++) {
+            printf("%c", (char)((vgather32[i] >> j * 8) & 0xff));
+        }
+
+        printf(" ");
+    }
+    printf("\n");
+#endif
+}
+
+/* print the scatter16_32 buffer */
+void print_scatter16_32_buffer(void)
+{
+#if PRINT_DATA
+    /*
+     * printf("\n\nPrinting the 16_32 bit scatter buffer at 0x%08x",
+     * VTCM_SCATTER16_32_ADDRESS);
+     */
+    printf("\n\nPrinting the 16_32 bit scatter buffer");
+
+    for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) {
+        if ((i % MATRIX_SIZE) == 0) {
+            printf("\n");
+        }
+
+        for (int j = 0; j < 2; j++) {
+            printf("%c", (unsigned char)((vscatter16_32[i] >> j * 8) & 0xff));
+        }
+
+        printf(" ");
+    }
+    printf("\n");
+#endif
+}
+
+/* print the gather 16_32 buffer */
+void print_gather_result_16_32(void)
+{
+#if PRINT_DATA
+    /*
+     * printf("\n\nPrinting the 16_32 bit gather result at 0x%08x\n",
+     * VTCM_GATHER16_32_ADDRESS);
+     */
+    printf("\n\nPrinting the 16_32 bit gather result\n");
+
+    for (int i = 0; i < MATRIX_SIZE; i++) {
+        for (int j = 0; j < 2; j++) {
+            printf("%c", (unsigned char)((vgather16_32[i] >> j * 8) & 0xff));
+        }
+
+        printf(" ");
+    }
+    printf("\n");
+#endif
+}
+
+/*
+ * set up the tcm address translation
+ * Note: This method is only for the standalone environment
+ * SDK users should use the "VTCM Manager" to use VTCM
+ */
+void setup_tcm(void)
+{
+    VTCM_BASE_ADDRESS = get_vtcm_base();
+
+    uint64_t pa = VTCM_BASE_ADDRESS;
+    void *va = (void *)VTCM_BASE_ADDRESS;
+
+    VTCM_SCATTER16_ADDRESS = VTCM_BASE_ADDRESS;
+    VTCM_GATHER16_ADDRESS = VTCM_BASE_ADDRESS + SCATTER16_BUF_SIZE;
+    VTCM_SCATTER32_ADDRESS = VTCM_GATHER16_ADDRESS + GATHER16_BUF_SIZE;
+    VTCM_GATHER32_ADDRESS = VTCM_SCATTER32_ADDRESS + SCATTER32_BUF_SIZE;
+    VTCM_SCATTER16_32_ADDRESS = VTCM_GATHER32_ADDRESS + GATHER32_BUF_SIZE;
+    VTCM_GATHER16_32_ADDRESS = VTCM_SCATTER16_32_ADDRESS + SCATTER16_BUF_SIZE;
+
+    /* the vtcm base address */
+    vtcm_base = (unsigned char *)VTCM_BASE_ADDRESS;
+
+    /* scatter gather 16 bit elements using 16 bit offsets */
+    vscatter16 = (unsigned short *)VTCM_SCATTER16_ADDRESS;
+    vgather16 = (unsigned short *)VTCM_GATHER16_ADDRESS;
+
+    /* scatter gather 32 bit elements using 32 bit offsets */
+    vscatter32 = (unsigned int *)VTCM_SCATTER32_ADDRESS;
+    vgather32 = (unsigned int *)VTCM_GATHER32_ADDRESS;
+
+    /* scatter gather 16 bit elements using 32 bit offsets */
+    vscatter16_32 = (unsigned short *)VTCM_SCATTER16_32_ADDRESS;
+    vgather16_32 = (unsigned short *)VTCM_GATHER16_32_ADDRESS;
+}
+
+void inst_test()
+{
+    /* Should NOT throw an error when paranoid-commit-state turned on */
+    uint32_t R;
+    asm volatile("release(%0):at\n\t" : : "r"(R));
+}
+
+
+int main()
+{
+    setup_tcm();
+    prefill_vtcm_scratch();
+
+    /* 16 bit elements with 16 bit offsets */
+    create_offsets_and_values_16();
+    create_preds_16();
+
+#if PRINT_CYCLE_COUNTS
+    scalar_scatter_16(vscatter16);
+#endif
+    vector_scatter_16();
+    print_scatter16_buffer();
+    check_scatter_16();
+
+
+#if PRINT_CYCLE_COUNTS
+    scalar_gather_16(vgather16);
+#endif
+    vector_gather_16();
+    print_gather_result_16();
+    check_gather_16();
+
+    vector_gather_q_16();
+    print_gather_result_16();
+    check_gather_q_16();
+
+    vector_scatter_acc_16();
+    print_scatter16_buffer();
+    check_scatter_acc_16();
+
+    vector_scatter_q_16();
+    print_scatter16_buffer();
+    check_scatter_q_16();
+
+    /* 32 bit elements with 32 bit offsets */
+    create_offsets_and_values_32();
+    create_preds_32();
+
+#if PRINT_CYCLE_COUNTS
+    scalar_scatter_32(vscatter32);
+#endif
+
+    vector_scatter_32();
+
+    print_scatter32_buffer();
+    check_scatter_32();
+
+#if PRINT_CYCLE_COUNTS
+    scalar_gather_32(vgather32);
+#endif
+
+    vector_gather_32();
+
+    print_gather_result_32();
+    check_gather_32();
+
+    vector_gather_q_32();
+    print_gather_result_32();
+    check_gather_q_32();
+
+    vector_scatter_acc_32();
+    print_scatter32_buffer();
+    check_scatter_acc_32();
+
+    vector_scatter_q_32();
+    print_scatter32_buffer();
+    check_scatter_q_32();
+
+    /* 16 bit elements with 32 bit offsets */
+    create_offsets_and_values_16_32();
+    create_preds_16_32();
+
+#if PRINT_CYCLE_COUNTS
+    scalar_scatter_16_32();
+#endif
+    vector_scatter_16_32();
+
+    print_scatter16_32_buffer();
+    check_scatter_16_32();
+
+#if PRINT_CYCLE_COUNTS
+    scalar_gather_16_32(vgather16_32);
+#endif
+
+    vector_gather_16_32();
+
+    print_gather_result_16_32();
+    check_gather_16_32();
+
+    vector_gather_q_16_32();
+    print_gather_result_16_32();
+    check_gather_q_16_32();
+
+    vector_scatter_acc_16_32();
+    print_scatter16_32_buffer();
+    check_scatter_acc_16_32();
+
+    vector_scatter_q_16_32();
+    print_scatter16_32_buffer();
+    check_scatter_q_16_32();
+
+    inst_test();
+    printf("%s\n", ((err) ? "FAIL" : "PASS"));
+    return err;
+}
diff --git a/tests/tcg/hexagon/system/strutils.h b/tests/tcg/hexagon/system/strutils.h
new file mode 100644
index 0000000000000..14f4a290b8173
--- /dev/null
+++ b/tests/tcg/hexagon/system/strutils.h
@@ -0,0 +1,25 @@
+/*
+ *  Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef STRUTILS_H
+#define STRUTILS_H
+
+#include <string.h>
+
+void sort_str_arr(char **arr, size_t n)
+{
+    for (int i = 0; i < n - 1; i++) {
+        for (int j = 0; j < n - i - 1; j++) {
+            if (strcmp(arr[j], arr[j + 1]) > 0) {
+                char *tmp = arr[j];
+                arr[j] = arr[j + 1];
+                arr[j + 1] = tmp;
+            }
+        }
+    }
+}
+
+#endif
diff --git a/tests/tcg/hexagon/system/tlb-miss-tlblock.S b/tests/tcg/hexagon/system/tlb-miss-tlblock.S
new file mode 100644
index 0000000000000..fe07aca47b37a
--- /dev/null
+++ b/tests/tcg/hexagon/system/tlb-miss-tlblock.S
@@ -0,0 +1,156 @@
+/*
+ *  Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+/*
+ * Test Purpose:
+ * Verify that tlbmissx and tlbmissrw do not set the syscfg.tl bit
+ * The HW spec says:
+ *     "TLBLOCK is acquired automatically whenever a hardware thread raises a
+ *      TLB miss-RW or TLBmiss-X exception."
+ * The casual reader would assume that a miss handler would implicitly have
+ * the lock, that apparently
+ * isn't the case.
+ */
+
+.global start
+start:
+    r0 = ##evb
+    evb = r0
+    r0 = ##0
+    ssr = r0
+    jump #setup
+
+#define tlb_index r11
+#define stack r29
+#define data r18
+tlb_index = ##0x00000007
+
+.org 0x100
+
+evb:
+    jump #reset
+    jump #nmi
+    jump #error
+    jump #0
+    jump #tlbmissx
+    jump #0
+    jump #tlbmissrw
+
+
+setup:
+    {
+        r1 = ##0xc009b800
+        r0 = ##0xf7137010
+    }
+    tlb_index = add(tlb_index, #1)
+    tlbw(r1:0,tlb_index)
+
+/* Enable MMU */
+    r2 = ##0x0085a07f
+    syscfg = r2
+
+/* Test setup */
+    r12 = #0x12
+    r0 = #0x6
+    r7 = ##0x77777777
+    r6 = ##0x66666666
+    data = ##0xf2000000
+    stack = ##0x9ba01000
+    jump ##.L_server_loop
+
+/* event vector handlers */
+reset:
+    r2 = #1
+    stop(r0)
+nmi:
+    r2 = #1
+    stop(r0)
+error:
+    r2 = #1
+    stop(r0)
+
+
+/*
+ * Can only handle a single ex fault.
+ */
+tlbmissx:
+  r0 = syscfg
+  r1 = #0x800
+/*
+ * Fail if we automatically start setting SYSCFG:TL again
+ */
+    r0 = and(r0, r1)
+    {
+        p0 = cmp.eq(r0, r1); if (p0.new) jump:t .Lfailmissx
+    }
+    {
+        r1 = ##0xc009b900
+        r0 = ##0xf7137210
+    }
+    tlb_index = add(tlb_index, #1)
+    tlbw(r1:0,tlb_index)
+    tlbunlock
+    rte
+    stop(r0);
+.Lfailmissx:
+    r2 = #1
+    stop(r2);
+
+/*
+ * Can only handle a stack fault and a data fault
+ */
+tlbmissrw:
+    r0 = syscfg
+    r1 = #0x800
+/*
+ * Fail if we automatically start setting SYSCFG:TL again
+ */
+    r0 = and(r0, r1)
+    {
+        p0 = cmp.eq(r0, r1); if (p0.new) jump:t .Lfailmissrw
+    }
+    r0 = badva
+    p0 = cmp.eq (stack, r0) // missed the stack
+    if (!p0) jump .Ldata
+    {
+        r1 = ##0xc009ba00
+        r0 = ##0xf7137210
+    }
+  jump #.Ldone
+.Ldata:
+    {
+        r1 = ##0xc00f2000
+        r0 = ##0xf71e4010
+    }
+.Ldone:
+    tlb_index = add(tlb_index, #1)
+    tlbw(r1:0,tlb_index)
+    tlbunlock
+    rte
+.Lfailmissrw:
+    r2 = #1
+    stop(r2);
+
+
+
+.org 0x100000
+  nop
+.Lpass:
+   r2 = #0
+   stop(r0);
+   trap0(#0x18)
+.L_server_loop:
+{
+    p0 = cmp.eq(r0,#-0x1)
+    if (!p0.new) jump:t .Lpass
+    memd(stack) = r7:6; // S1 store to stack will also fault
+    memw(data) = r12; // S0 store will fault
+}
+/*
+ * We should not get here:
+ */
+   r2 = #1
+   stop(r0);
diff --git a/tests/tcg/hexagon/system/vid_reg.c b/tests/tcg/hexagon/system/vid_reg.c
new file mode 100644
index 0000000000000..25f266f98b2d7
--- /dev/null
+++ b/tests/tcg/hexagon/system/vid_reg.c
@@ -0,0 +1,36 @@
+/*
+ * Verify vid reads/writes really update the register.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+
+static inline uint32_t getvid()
+{
+    uint32_t reg;
+    asm volatile("%0=vid;" : "=r"(reg));
+    return reg;
+}
+static inline void setvid(uint32_t val)
+{
+    asm volatile("vid=%0;" : : "r"(val));
+    return;
+}
+int main()
+{
+    uint32_t testval = 0x3ff03ff;
+    setvid(testval);
+    if (testval != getvid()) {
+        printf("ERROR: vid read returned: 0x%x\n", getvid());
+    }
+    assert(testval == getvid());
+
+    /* L2VIC_NO_PENDING (0xffffffff) should not update the vid */
+    setvid(0xffffffff);
+    if (testval != getvid()) {
+        printf("ERROR: vid read returned: 0x%x\n", getvid());
+    }
+
+    assert(testval == getvid());
+}
diff --git a/tests/tcg/hexagon/utimer.c b/tests/tcg/hexagon/utimer.c
new file mode 100644
index 0000000000000..ae3bca3201920
--- /dev/null
+++ b/tests/tcg/hexagon/utimer.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright(c) 2022-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+
+static int err;
+
+#include "hex_test.h"
+
+static uint64_t get_time()
+{
+    uint64_t time;
+    asm volatile("%0 = utimer\n\t"
+                 : "=r"(time)
+                 :
+                 :
+                 );
+    return time;
+}
+
+static uint64_t get_time_from_regs()
+{
+    uint32_t time_low;
+    uint32_t time_high;
+    asm volatile("%0 = utimerhi\n\t"
+                 "%1 = utimerlo\n\t"
+                 : "=r"(time_high), "=r"(time_low)
+                 :
+                 :
+                 );
+    return ((uint64_t)time_high << 32) | (uint64_t)time_low;
+}
+
+
+int main()
+{
+    err = 0;
+
+    uint64_t t0 = get_time();
+    check64_ne(t0, 0);
+
+    uint64_t t1 = get_time_from_regs();
+    check64_ne(t1, 0);
+
+    puts(err ? "FAIL" : "PASS");
+    return err;
+}
diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target
index bbe2c44b2a79e..f1df40411b09d 100644
--- a/tests/tcg/i386/Makefile.target
+++ b/tests/tcg/i386/Makefile.target
@@ -22,7 +22,7 @@ run-test-i386-sse-exceptions: QEMU_OPTS += -cpu max
 test-i386-pcmpistri: CFLAGS += -msse4.2
 run-test-i386-pcmpistri: QEMU_OPTS += -cpu max
 
-test-i386-bmi2: CFLAGS=-O2
+test-i386-bmi2: CFLAGS=-O2 -fwrapv
 run-test-i386-bmi2: QEMU_OPTS += -cpu max
 
 test-i386-adcox: CFLAGS=-O2
diff --git a/tests/tcg/i386/test-avx.c b/tests/tcg/i386/test-avx.c
index 230e6d84b84a9..80fe363cfc29b 100644
--- a/tests/tcg/i386/test-avx.c
+++ b/tests/tcg/i386/test-avx.c
@@ -244,7 +244,7 @@ v4di indexd = {0x00000002ffffffcdull, 0xfffffff500000010ull,
                0x0000003afffffff0ull, 0x000000000000000eull};
 
 v4di gather_mem[0x20];
-_Static_assert(sizeof(gather_mem) == 1024);
+_Static_assert(sizeof(gather_mem) == 1024, "gather_mem not expected size");
 
 void init_f16reg(v4di *r)
 {
diff --git a/tests/tcg/i386/test-i386-adcox.c b/tests/tcg/i386/test-i386-adcox.c
index 16169efff8231..a717064adb0cf 100644
--- a/tests/tcg/i386/test-i386-adcox.c
+++ b/tests/tcg/i386/test-i386-adcox.c
@@ -29,7 +29,7 @@ void test_adox_adcx(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_ope
         "adcx %3, %1;"
         "pushf; pop %0"
         : "+r" (flags), "+r" (out_adcx), "+r" (out_adox)
-        : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox));
+        : "r" ((REG) - 1), "0" (flags), "1" (out_adcx), "2" (out_adox));
 
     assert(out_adcx == in_c + adcx_operand - 1);
     assert(out_adox == in_o + adox_operand - 1);
@@ -53,8 +53,8 @@ void test_adcx_adox(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_ope
         "adcx %3, %1;"
         "adox %3, %2;"
         "pushf; pop %0"
-        : "+r" (flags), "+r" (out_adcx), "+r" (out_adox)
-        : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox));
+        : "+r"(flags), "+r"(out_adcx), "+r"(out_adox)
+        : "r" ((REG)-1));
 
     assert(out_adcx == in_c + adcx_operand - 1);
     assert(out_adox == in_o + adox_operand - 1);
diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target
index 688a6be203cac..45c9cfe18c5bd 100644
--- a/tests/tcg/multiarch/Makefile.target
+++ b/tests/tcg/multiarch/Makefile.target
@@ -45,6 +45,14 @@ vma-pthread: LDFLAGS+=-pthread
 sigreturn-sigmask: CFLAGS+=-pthread
 sigreturn-sigmask: LDFLAGS+=-pthread
 
+# GCC versions 12/13/14/15 at least incorrectly complain about
+# "'SHA1Transform' reading 64 bytes from a region of size 0"; see the gcc bug
+# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106709
+# Since this is just a standard piece of library code we've borrowed for a
+# TCG test case, suppress the warning rather than trying to modify the
+# code to work around the compiler.
+sha1: CFLAGS+=-Wno-stringop-overread -Wno-unknown-warning-option
+
 # The vma-pthread seems very sensitive on gitlab and we currently
 # don't know if its exposing a real bug or the test is flaky.
 ifneq ($(GITLAB_CI),)
diff --git a/tests/tcg/multiarch/test-vma.c b/tests/tcg/multiarch/linux/test-vma.c
similarity index 100%
rename from tests/tcg/multiarch/test-vma.c
rename to tests/tcg/multiarch/linux/test-vma.c
diff --git a/tests/tcg/plugins/meson.build b/tests/tcg/plugins/meson.build
index 87a17d67bd450..c8cb0626a6da5 100644
--- a/tests/tcg/plugins/meson.build
+++ b/tests/tcg/plugins/meson.build
@@ -19,3 +19,5 @@ if t.length() > 0
 else
   run_target('test-plugins', command: find_program('true'))
 endif
+
+plugin_modules += t
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 98ad89b390c1c..7410e6f352884 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -28,7 +28,6 @@
 #include "system/block-backend.h"
 #include "qapi/error.h"
 #include "qemu/main-loop.h"
-#include "qemu/clang-tsa.h"
 #include "iothread.h"
 
 static QemuEvent done_event;
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
index 7324ea4a68d34..2b358eaaa820f 100644
--- a/tests/unit/test-block-iothread.c
+++ b/tests/unit/test-block-iothread.c
@@ -29,7 +29,6 @@
 #include "system/block-backend.h"
 #include "qapi/error.h"
 #include "qobject/qdict.h"
-#include "qemu/clang-tsa.h"
 #include "qemu/main-loop.h"
 #include "iothread.h"
 
diff --git a/tests/unit/test-crypto-tlssession.c b/tests/unit/test-crypto-tlssession.c
index 3395f73560fb9..554054e9344c8 100644
--- a/tests/unit/test-crypto-tlssession.c
+++ b/tests/unit/test-crypto-tlssession.c
@@ -158,8 +158,7 @@ static void test_crypto_tls_session_psk(void)
             rv = qcrypto_tls_session_handshake(serverSess,
                                                &error_abort);
             g_assert(rv >= 0);
-            if (qcrypto_tls_session_get_handshake_status(serverSess) ==
-                QCRYPTO_TLS_HANDSHAKE_COMPLETE) {
+            if (rv == QCRYPTO_TLS_HANDSHAKE_COMPLETE) {
                 serverShake = true;
             }
         }
@@ -167,8 +166,7 @@ static void test_crypto_tls_session_psk(void)
             rv = qcrypto_tls_session_handshake(clientSess,
                                                &error_abort);
             g_assert(rv >= 0);
-            if (qcrypto_tls_session_get_handshake_status(clientSess) ==
-                QCRYPTO_TLS_HANDSHAKE_COMPLETE) {
+            if (rv == QCRYPTO_TLS_HANDSHAKE_COMPLETE) {
                 clientShake = true;
             }
         }
@@ -352,8 +350,7 @@ static void test_crypto_tls_session_x509(const void *opaque)
             rv = qcrypto_tls_session_handshake(serverSess,
                                                &error_abort);
             g_assert(rv >= 0);
-            if (qcrypto_tls_session_get_handshake_status(serverSess) ==
-                QCRYPTO_TLS_HANDSHAKE_COMPLETE) {
+            if (rv == QCRYPTO_TLS_HANDSHAKE_COMPLETE) {
                 serverShake = true;
             }
         }
@@ -361,8 +358,7 @@ static void test_crypto_tls_session_x509(const void *opaque)
             rv = qcrypto_tls_session_handshake(clientSess,
                                                &error_abort);
             g_assert(rv >= 0);
-            if (qcrypto_tls_session_get_handshake_status(clientSess) ==
-                QCRYPTO_TLS_HANDSHAKE_COMPLETE) {
+            if (rv == QCRYPTO_TLS_HANDSHAKE_COMPLETE) {
                 clientShake = true;
             }
         }
diff --git a/tests/unit/test-thread-pool.c b/tests/unit/test-thread-pool.c
index 1483e53473db5..33407b595d356 100644
--- a/tests/unit/test-thread-pool.c
+++ b/tests/unit/test-thread-pool.c
@@ -43,10 +43,10 @@ static void done_cb(void *opaque, int ret)
     active--;
 }
 
-static void test_submit(void)
+static void test_submit_no_complete(void)
 {
     WorkerTestData data = { .n = 0 };
-    thread_pool_submit(worker_cb, &data);
+    thread_pool_submit_aio(worker_cb, &data, NULL, NULL);
     while (data.n == 0) {
         aio_poll(ctx, true);
     }
@@ -236,7 +236,7 @@ int main(int argc, char **argv)
     ctx = qemu_get_current_aio_context();
 
     g_test_init(&argc, &argv, NULL);
-    g_test_add_func("/thread-pool/submit", test_submit);
+    g_test_add_func("/thread-pool/submit-no-complete", test_submit_no_complete);
     g_test_add_func("/thread-pool/submit-aio", test_submit_aio);
     g_test_add_func("/thread-pool/submit-co", test_submit_co);
     g_test_add_func("/thread-pool/submit-many", test_submit_many);
diff --git a/tests/vm/basevm.py b/tests/vm/basevm.py
index 6d41ac7574342..9e879e966a360 100644
--- a/tests/vm/basevm.py
+++ b/tests/vm/basevm.py
@@ -83,7 +83,7 @@ class BaseVM(object):
     # command to halt the guest, can be overridden by subclasses
     poweroff = "poweroff"
     # Time to wait for shutdown to finish.
-    shutdown_timeout_default = 30
+    shutdown_timeout_default = 90
     # enable IPv6 networking
     ipv6 = True
     # This is the timeout on the wait for console bytes.
diff --git a/tests/vm/generated/freebsd.json b/tests/vm/generated/freebsd.json
index 81fc38d798a5f..c03e1cd586373 100644
--- a/tests/vm/generated/freebsd.json
+++ b/tests/vm/generated/freebsd.json
@@ -73,6 +73,7 @@
     "usbredir",
     "virglrenderer",
     "vte3",
+    "vulkan-tools",
     "xorriso",
     "zstd"
   ],
diff --git a/trace/control-target.c b/trace/control-target.c
index d58e84f6dd8e9..57ceac2108429 100644
--- a/trace/control-target.c
+++ b/trace/control-target.c
@@ -8,8 +8,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/lockable.h"
-#include "cpu.h"
 #include "trace/control.h"
 
 
diff --git a/trace/meson.build b/trace/meson.build
index c3412dc0ba5a2..3df4549355505 100644
--- a/trace/meson.build
+++ b/trace/meson.build
@@ -1,6 +1,4 @@
-system_ss.add(files('trace-hmp-cmds.c'))
-
-specific_ss.add(files('control-target.c'))
+system_ss.add(files('control-target.c', 'trace-hmp-cmds.c'))
 
 trace_events_files = []
 foreach item : [ '.' ] + trace_events_subdirs + qapi_trace_events
diff --git a/trace/simple.c b/trace/simple.c
index 18af590cf7b1c..c0aba00cb7f8f 100644
--- a/trace/simple.c
+++ b/trace/simple.c
@@ -366,7 +366,7 @@ void st_set_trace_file(const char *file)
         /* Type cast needed for Windows where getpid() returns an int. */
         trace_file_name = g_strdup_printf(CONFIG_TRACE_FILE "-" FMT_pid, (pid_t)getpid());
     } else {
-        trace_file_name = g_strdup_printf("%s", file);
+        trace_file_name = g_strdup(file);
     }
 
     st_set_trace_file_enabled(saved_enable);
diff --git a/ui/console-vc.c b/ui/console-vc.c
index fe20579832a57..df1341513d535 100644
--- a/ui/console-vc.c
+++ b/ui/console-vc.c
@@ -42,6 +42,8 @@ enum TTYState {
     TTY_STATE_NORM,
     TTY_STATE_ESC,
     TTY_STATE_CSI,
+    TTY_STATE_G0,
+    TTY_STATE_G1,
 };
 
 typedef struct QemuTextConsole {
@@ -88,6 +90,7 @@ struct VCChardev {
     int esc_params[MAX_ESC_PARAMS];
     int nb_esc_params;
     TextAttributes t_attrib; /* currently active text attributes */
+    TextAttributes t_attrib_saved;
     int x_saved, y_saved;
 };
 typedef struct VCChardev VCChardev;
@@ -615,10 +618,9 @@ static void vc_put_one(VCChardev *vc, int ch)
 
 static void vc_respond_str(VCChardev *vc, const char *buf)
 {
-    while (*buf) {
-        vc_put_one(vc, *buf);
-        buf++;
-    }
+    QemuTextConsole *s = vc->console;
+
+    qemu_chr_be_write(s->chr, (const uint8_t *)buf, strlen(buf));
 }
 
 /* set cursor, checking bounds */
@@ -643,6 +645,113 @@ static void vc_set_cursor(VCChardev *vc, int x, int y)
     s->y = y;
 }
 
+/**
+ * vc_csi_P() - (DCH) deletes one or more characters from the cursor
+ * position to the right. As characters are deleted, the remaining
+ * characters between the cursor and right margin move to the
+ * left. Character attributes move with the characters.
+ */
+static void vc_csi_P(struct VCChardev *vc, unsigned int nr)
+{
+    QemuTextConsole *s = vc->console;
+    TextCell *c1, *c2;
+    unsigned int x1, x2, y;
+    unsigned int end, len;
+
+    if (!nr) {
+        nr = 1;
+    }
+    if (nr > s->width - s->x) {
+        nr = s->width - s->x;
+        if (!nr) {
+            return;
+        }
+    }
+
+    x1 = s->x;
+    x2 = s->x + nr;
+    len = s->width - x2;
+    if (len) {
+        y = (s->y_base + s->y) % s->total_height;
+        c1 = &s->cells[y * s->width + x1];
+        c2 = &s->cells[y * s->width + x2];
+        memmove(c1, c2, len * sizeof(*c1));
+        for (end = x1 + len; x1 < end; x1++) {
+            vc_update_xy(vc, x1, s->y);
+        }
+    }
+    /* Clear the rest */
+    for (; x1 < s->width; x1++) {
+        vc_clear_xy(vc, x1, s->y);
+    }
+}
+
+/**
+ * vc_csi_at() - (ICH) inserts `nr` blank characters with the default
+ * character attribute. The cursor remains at the beginning of the
+ * blank characters. Text between the cursor and right margin moves to
+ * the right. Characters scrolled past the right margin are lost.
+ */
+static void vc_csi_at(struct VCChardev *vc, unsigned int nr)
+{
+    QemuTextConsole *s = vc->console;
+    TextCell *c1, *c2;
+    unsigned int x1, x2, y;
+    unsigned int end, len;
+
+    if (!nr) {
+        nr = 1;
+    }
+    if (nr > s->width - s->x) {
+        nr = s->width - s->x;
+        if (!nr) {
+            return;
+        }
+    }
+
+    x1 = s->x + nr;
+    x2 = s->x;
+    len = s->width - x1;
+    if (len) {
+        y = (s->y_base + s->y) % s->total_height;
+        c1 = &s->cells[y * s->width + x1];
+        c2 = &s->cells[y * s->width + x2];
+        memmove(c1, c2, len * sizeof(*c1));
+        for (end = x1 + len; x1 < end; x1++) {
+            vc_update_xy(vc, x1, s->y);
+        }
+    }
+    /* Insert blanks */
+    for (x1 = s->x; x1 < s->x + nr; x1++) {
+        vc_clear_xy(vc, x1, s->y);
+    }
+}
+
+/**
+ * vc_save_cursor() - saves cursor position and character attributes.
+ */
+static void vc_save_cursor(VCChardev *vc)
+{
+    QemuTextConsole *s = vc->console;
+
+    vc->x_saved = s->x;
+    vc->y_saved = s->y;
+    vc->t_attrib_saved = vc->t_attrib;
+}
+
+/**
+ * vc_restore_cursor() - restores cursor position and character
+ * attributes from saved state.
+ */
+static void vc_restore_cursor(VCChardev *vc)
+{
+    QemuTextConsole *s = vc->console;
+
+    s->x = vc->x_saved;
+    s->y = vc->y_saved;
+    vc->t_attrib = vc->t_attrib_saved;
+}
+
 static void vc_putchar(VCChardev *vc, int ch)
 {
     QemuTextConsole *s = vc->console;
@@ -694,6 +803,16 @@ static void vc_putchar(VCChardev *vc, int ch)
                 vc->esc_params[i] = 0;
             vc->nb_esc_params = 0;
             vc->state = TTY_STATE_CSI;
+        } else if (ch == '(') {
+            vc->state = TTY_STATE_G0;
+        } else if (ch == ')') {
+            vc->state = TTY_STATE_G1;
+        } else if (ch == '7') {
+            vc_save_cursor(vc);
+            vc->state = TTY_STATE_NORM;
+        } else if (ch == '8') {
+            vc_restore_cursor(vc);
+            vc->state = TTY_STATE_NORM;
         } else {
             vc->state = TTY_STATE_NORM;
         }
@@ -810,6 +929,9 @@ static void vc_putchar(VCChardev *vc, int ch)
                     break;
                 }
                 break;
+            case 'P':
+                vc_csi_P(vc, vc->esc_params[0]);
+                break;
             case 'm':
                 vc_handle_escape(vc);
                 break;
@@ -822,21 +944,19 @@ static void vc_putchar(VCChardev *vc, int ch)
                 case 6:
                     /* report cursor position */
                     response = g_strdup_printf("\033[%d;%dR",
-                           (s->y_base + s->y) % s->total_height + 1,
-                            s->x + 1);
+                                               s->y + 1, s->x + 1);
                     vc_respond_str(vc, response);
                     break;
                 }
                 break;
             case 's':
-                /* save cursor position */
-                vc->x_saved = s->x;
-                vc->y_saved = s->y;
+                vc_save_cursor(vc);
                 break;
             case 'u':
-                /* restore cursor position */
-                s->x = vc->x_saved;
-                s->y = vc->y_saved;
+                vc_restore_cursor(vc);
+                break;
+            case '@':
+                vc_csi_at(vc, vc->esc_params[0]);
                 break;
             default:
                 trace_console_putchar_unhandled(ch);
@@ -844,6 +964,16 @@ static void vc_putchar(VCChardev *vc, int ch)
             }
             break;
         }
+        break;
+    case TTY_STATE_G0: /* set character sets */
+    case TTY_STATE_G1: /* set character sets */
+        switch (ch) {
+        case 'B':
+            /* Latin-1 map */
+            break;
+        }
+        vc->state = TTY_STATE_NORM;
+        break;
     }
 }
 
diff --git a/ui/console.c b/ui/console.c
index 914ed2cc76bfe..6456e8dd90867 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -1386,9 +1386,7 @@ char *qemu_console_get_label(QemuConsole *con)
                                        object_get_typename(c->device),
                                        c->head);
             } else {
-                return g_strdup_printf("%s", dev->id ?
-                                       dev->id :
-                                       object_get_typename(c->device));
+                return g_strdup(dev->id ? : object_get_typename(c->device));
             }
         }
         return g_strdup("VGA");
diff --git a/ui/gtk.c b/ui/gtk.c
index c02374314893d..59bda83da657a 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -1944,8 +1944,7 @@ static GSList *gd_vc_vte_init(GtkDisplayState *s, VirtualConsole *vc,
     vcd->console = vc;
 
     snprintf(buffer, sizeof(buffer), "vc%d", idx);
-    vc->label = g_strdup_printf("%s", vc->vte.chr->label
-                                ? vc->vte.chr->label : buffer);
+    vc->label = g_strdup(vc->vte.chr->label ? : buffer);
     group = gd_vc_menu_init(s, vc, idx, group, view_menu);
 
     vc->vte.terminal = vte_terminal_new();
diff --git a/ui/meson.build b/ui/meson.build
index 28c7381dd102b..35fb04cadf3a6 100644
--- a/ui/meson.build
+++ b/ui/meson.build
@@ -120,10 +120,6 @@ if gtk.found()
 endif
 
 if sdl.found()
-  if host_os == 'windows'
-    system_ss.add(files('win32-kbd-hook.c'))
-  endif
-
   sdl_ss = ss.source_set()
   sdl_ss.add(sdl, sdl_image, pixman, glib, files(
     'sdl2-2d.c',
diff --git a/ui/sdl2.c b/ui/sdl2.c
index 445eb1dd9f9ae..cda4293a53e6c 100644
--- a/ui/sdl2.c
+++ b/ui/sdl2.c
@@ -32,7 +32,6 @@
 #include "system/runstate.h"
 #include "system/runstate-action.h"
 #include "system/system.h"
-#include "ui/win32-kbd-hook.h"
 #include "qemu/log.h"
 #include "qemu-main.h"
 
@@ -263,7 +262,6 @@ static void sdl_grab_start(struct sdl2_console *scon)
     }
     SDL_SetWindowGrab(scon->real_window, SDL_TRUE);
     gui_grab = 1;
-    win32_kbd_set_grab(true);
     sdl_update_caption(scon);
 }
 
@@ -271,7 +269,6 @@ static void sdl_grab_end(struct sdl2_console *scon)
 {
     SDL_SetWindowGrab(scon->real_window, SDL_FALSE);
     gui_grab = 0;
-    win32_kbd_set_grab(false);
     sdl_show_cursor(scon);
     sdl_update_caption(scon);
 }
@@ -372,19 +369,6 @@ static int get_mod_state(void)
     }
 }
 
-static void *sdl2_win32_get_hwnd(struct sdl2_console *scon)
-{
-#ifdef CONFIG_WIN32
-    SDL_SysWMinfo info;
-
-    SDL_VERSION(&info.version);
-    if (SDL_GetWindowWMInfo(scon->real_window, &info)) {
-        return info.info.win.window;
-    }
-#endif
-    return NULL;
-}
-
 static void handle_keydown(SDL_Event *ev)
 {
     int win;
@@ -609,10 +593,6 @@ static void handle_windowevent(SDL_Event *ev)
         sdl2_redraw(scon);
         break;
     case SDL_WINDOWEVENT_FOCUS_GAINED:
-        win32_kbd_set_grab(gui_grab);
-        if (qemu_console_is_graphic(scon->dcl.con)) {
-            win32_kbd_set_window(sdl2_win32_get_hwnd(scon));
-        }
         /* fall through */
     case SDL_WINDOWEVENT_ENTER:
         if (!gui_grab && (qemu_input_is_absolute(scon->dcl.con) || absolute_enabled)) {
@@ -628,9 +608,6 @@ static void handle_windowevent(SDL_Event *ev)
         scon->ignore_hotkeys = get_mod_state();
         break;
     case SDL_WINDOWEVENT_FOCUS_LOST:
-        if (qemu_console_is_graphic(scon->dcl.con)) {
-            win32_kbd_set_window(NULL);
-        }
         if (gui_grab && !gui_fullscreen) {
             sdl_grab_end(scon);
         }
@@ -870,10 +847,7 @@ static void sdl2_display_init(DisplayState *ds, DisplayOptions *o)
 #ifdef SDL_HINT_VIDEO_X11_NET_WM_BYPASS_COMPOSITOR /* only available since SDL 2.0.8 */
     SDL_SetHint(SDL_HINT_VIDEO_X11_NET_WM_BYPASS_COMPOSITOR, "0");
 #endif
-#ifndef CONFIG_WIN32
-    /* QEMU uses its own low level keyboard hook procedure on Windows */
     SDL_SetHint(SDL_HINT_GRAB_KEYBOARD, "1");
-#endif
 #ifdef SDL_HINT_ALLOW_ALT_TAB_WHILE_GRABBED
     SDL_SetHint(SDL_HINT_ALLOW_ALT_TAB_WHILE_GRABBED, "0");
 #endif
diff --git a/util/aio-posix.c b/util/aio-posix.c
index 06bf9f456cf97..2e0a5dadc4d74 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -28,6 +28,9 @@
 /* Stop userspace polling on a handler if it isn't active for some time */
 #define POLL_IDLE_INTERVAL_NS (7 * NANOSECONDS_PER_SECOND)
 
+static void adjust_polling_time(AioContext *ctx, AioPolledEvent *poll,
+                                int64_t block_ns);
+
 bool aio_poll_disabled(AioContext *ctx)
 {
     return qatomic_read(&ctx->poll_disable_cnt);
@@ -392,7 +395,8 @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
  * scanning all handlers with aio_dispatch_handlers().
  */
 static bool aio_dispatch_ready_handlers(AioContext *ctx,
-                                        AioHandlerList *ready_list)
+                                        AioHandlerList *ready_list,
+                                        int64_t block_ns)
 {
     bool progress = false;
     AioHandler *node;
@@ -400,6 +404,14 @@ static bool aio_dispatch_ready_handlers(AioContext *ctx,
     while ((node = QLIST_FIRST(ready_list))) {
         QLIST_REMOVE(node, node_ready);
         progress = aio_dispatch_handler(ctx, node) || progress;
+
+        /*
+         * Adjust polling time only after aio_dispatch_handler(), which can
+         * add the handler to ctx->poll_aio_handlers.
+         */
+        if (ctx->poll_max_ns && QLIST_IS_INSERTED(node, node_poll)) {
+            adjust_polling_time(ctx, &node->poll, block_ns);
+        }
     }
 
     return progress;
@@ -579,13 +591,19 @@ static bool run_poll_handlers(AioContext *ctx, AioHandlerList *ready_list,
 static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
                           int64_t *timeout)
 {
+    AioHandler *node;
     int64_t max_ns;
 
     if (QLIST_EMPTY_RCU(&ctx->poll_aio_handlers)) {
         return false;
     }
 
-    max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns);
+    max_ns = 0;
+    QLIST_FOREACH(node, &ctx->poll_aio_handlers, node_poll) {
+        max_ns = MAX(max_ns, node->poll.ns);
+    }
+    max_ns = qemu_soonest_timeout(*timeout, max_ns);
+
     if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) {
         /*
          * Enable poll mode. It pairs with the poll_set_started() in
@@ -600,6 +618,46 @@ static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
     return false;
 }
 
+static void adjust_polling_time(AioContext *ctx, AioPolledEvent *poll,
+                                int64_t block_ns)
+{
+    if (block_ns <= poll->ns) {
+        /* This is the sweet spot, no adjustment needed */
+    } else if (block_ns > ctx->poll_max_ns) {
+        /* We'd have to poll for too long, poll less */
+        int64_t old = poll->ns;
+
+        if (ctx->poll_shrink) {
+            poll->ns /= ctx->poll_shrink;
+        } else {
+            poll->ns = 0;
+        }
+
+        trace_poll_shrink(ctx, old, poll->ns);
+    } else if (poll->ns < ctx->poll_max_ns &&
+               block_ns < ctx->poll_max_ns) {
+        /* There is room to grow, poll longer */
+        int64_t old = poll->ns;
+        int64_t grow = ctx->poll_grow;
+
+        if (grow == 0) {
+            grow = 2;
+        }
+
+        if (poll->ns) {
+            poll->ns *= grow;
+        } else {
+            poll->ns = 4000; /* start polling at 4 microseconds */
+        }
+
+        if (poll->ns > ctx->poll_max_ns) {
+            poll->ns = ctx->poll_max_ns;
+        }
+
+        trace_poll_grow(ctx, old, poll->ns);
+    }
+}
+
 bool aio_poll(AioContext *ctx, bool blocking)
 {
     AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
@@ -607,6 +665,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
     bool use_notify_me;
     int64_t timeout;
     int64_t start = 0;
+    int64_t block_ns = 0;
 
     /*
      * There cannot be two concurrent aio_poll calls for the same AioContext (or
@@ -679,49 +738,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
     aio_notify_accept(ctx);
 
-    /* Adjust polling time */
+    /* Calculate blocked time for adaptive polling */
     if (ctx->poll_max_ns) {
-        int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
-
-        if (block_ns <= ctx->poll_ns) {
-            /* This is the sweet spot, no adjustment needed */
-        } else if (block_ns > ctx->poll_max_ns) {
-            /* We'd have to poll for too long, poll less */
-            int64_t old = ctx->poll_ns;
-
-            if (ctx->poll_shrink) {
-                ctx->poll_ns /= ctx->poll_shrink;
-            } else {
-                ctx->poll_ns = 0;
-            }
-
-            trace_poll_shrink(ctx, old, ctx->poll_ns);
-        } else if (ctx->poll_ns < ctx->poll_max_ns &&
-                   block_ns < ctx->poll_max_ns) {
-            /* There is room to grow, poll longer */
-            int64_t old = ctx->poll_ns;
-            int64_t grow = ctx->poll_grow;
-
-            if (grow == 0) {
-                grow = 2;
-            }
-
-            if (ctx->poll_ns) {
-                ctx->poll_ns *= grow;
-            } else {
-                ctx->poll_ns = 4000; /* start polling at 4 microseconds */
-            }
-
-            if (ctx->poll_ns > ctx->poll_max_ns) {
-                ctx->poll_ns = ctx->poll_max_ns;
-            }
-
-            trace_poll_grow(ctx, old, ctx->poll_ns);
-        }
+        block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
     }
 
     progress |= aio_bh_poll(ctx);
-    progress |= aio_dispatch_ready_handlers(ctx, &ready_list);
+    progress |= aio_dispatch_ready_handlers(ctx, &ready_list, block_ns);
 
     aio_free_deleted_handlers(ctx);
 
@@ -767,11 +790,18 @@ void aio_context_use_g_source(AioContext *ctx)
 void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
                                  int64_t grow, int64_t shrink, Error **errp)
 {
+    AioHandler *node;
+
+    qemu_lockcnt_inc(&ctx->list_lock);
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        node->poll.ns = 0;
+    }
+    qemu_lockcnt_dec(&ctx->list_lock);
+
     /* No thread synchronization here, it doesn't matter if an incorrect value
      * is used once.
      */
     ctx->poll_max_ns = max_ns;
-    ctx->poll_ns = 0;
     ctx->poll_grow = grow;
     ctx->poll_shrink = shrink;
 
diff --git a/util/aio-posix.h b/util/aio-posix.h
index 4264c518be012..82a0201ea48b1 100644
--- a/util/aio-posix.h
+++ b/util/aio-posix.h
@@ -38,6 +38,7 @@ struct AioHandler {
 #endif
     int64_t poll_idle_timeout; /* when to stop userspace polling */
     bool poll_ready; /* has polling detected an event? */
+    AioPolledEvent poll;
 };
 
 /* Add a handler to a ready list */
diff --git a/util/async.c b/util/async.c
index 0fe29436090dc..863416dee929f 100644
--- a/util/async.c
+++ b/util/async.c
@@ -369,7 +369,7 @@ aio_ctx_finalize(GSource     *source)
     QEMUBH *bh;
     unsigned flags;
 
-    thread_pool_free(ctx->thread_pool);
+    thread_pool_free_aio(ctx->thread_pool);
 
 #ifdef CONFIG_LINUX_AIO
     if (ctx->linux_aio) {
@@ -435,10 +435,10 @@ GSource *aio_get_g_source(AioContext *ctx)
     return &ctx->source;
 }
 
-ThreadPool *aio_get_thread_pool(AioContext *ctx)
+ThreadPoolAio *aio_get_thread_pool(AioContext *ctx)
 {
     if (!ctx->thread_pool) {
-        ctx->thread_pool = thread_pool_new(ctx);
+        ctx->thread_pool = thread_pool_new_aio(ctx);
     }
     return ctx->thread_pool;
 }
@@ -609,7 +609,6 @@ AioContext *aio_context_new(Error **errp)
     qemu_rec_mutex_init(&ctx->lock);
     timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
 
-    ctx->poll_ns = 0;
     ctx->poll_max_ns = 0;
     ctx->poll_grow = 0;
     ctx->poll_shrink = 0;
diff --git a/util/cacheflush.c b/util/cacheflush.c
index a08906155a9b6..1d12899a39223 100644
--- a/util/cacheflush.c
+++ b/util/cacheflush.c
@@ -279,9 +279,11 @@ void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
         for (p = rw & -dcache_lsize; p < rw + len; p += dcache_lsize) {
             asm volatile("dc\tcvau, %0" : : "r" (p) : "memory");
         }
-        asm volatile("dsb\tish" : : : "memory");
     }
 
+    /* DSB unconditionally to ensure any outstanding writes are committed. */
+    asm volatile("dsb\tish" : : : "memory");
+
     /*
      * If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point
      * of Unification is not required for instruction to data coherence.
diff --git a/util/hexdump.c b/util/hexdump.c
index ae0d4992dcf32..f29ffceb74668 100644
--- a/util/hexdump.c
+++ b/util/hexdump.c
@@ -15,6 +15,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
+#include "qemu/host-utils.h"
 
 static inline char hexdump_nibble(unsigned x)
 {
@@ -97,3 +98,20 @@ void qemu_hexdump(FILE *fp, const char *prefix,
     }
 
 }
+
+void qemu_hexdump_to_buffer(char *restrict buffer, size_t buffer_size,
+                            const uint8_t *restrict data, size_t data_size)
+{
+    size_t i;
+    uint64_t required_buffer_size;
+    bool overflow = umul64_overflow(data_size, 2, &required_buffer_size);
+    overflow |= uadd64_overflow(required_buffer_size, 1, &required_buffer_size);
+    assert(!overflow && buffer_size >= required_buffer_size);
+
+    for (i = 0; i < data_size; i++) {
+        uint8_t val = data[i];
+        *(buffer++) = hexdump_nibble(val >> 4);
+        *(buffer++) = hexdump_nibble(val & 0xf);
+    }
+    *buffer = '\0';
+}
diff --git a/util/iov.c b/util/iov.c
index 7777116123d0f..f8536f0474a6d 100644
--- a/util/iov.c
+++ b/util/iov.c
@@ -37,7 +37,6 @@ size_t iov_from_buf_full(const struct iovec *iov, unsigned int iov_cnt,
             offset -= iov[i].iov_len;
         }
     }
-    assert(offset == 0);
     return done;
 }
 
@@ -56,7 +55,6 @@ size_t iov_to_buf_full(const struct iovec *iov, const unsigned int iov_cnt,
             offset -= iov[i].iov_len;
         }
     }
-    assert(offset == 0);
     return done;
 }
 
@@ -75,7 +73,6 @@ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt,
             offset -= iov[i].iov_len;
         }
     }
-    assert(offset == 0);
     return done;
 }
 
@@ -277,7 +274,6 @@ unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt,
         bytes -= len;
         offset = 0;
     }
-    assert(offset == 0);
     return j;
 }
 
@@ -348,7 +344,6 @@ size_t qemu_iovec_concat_iov(QEMUIOVector *dst,
             soffset -= src_iov[i].iov_len;
         }
     }
-    assert(soffset == 0); /* offset beyond end of src */
 
     return done;
 }
diff --git a/util/iova-tree.c b/util/iova-tree.c
index 06295e2755029..5b0c95ff1583c 100644
--- a/util/iova-tree.c
+++ b/util/iova-tree.c
@@ -257,3 +257,49 @@ void iova_tree_destroy(IOVATree *tree)
     g_tree_destroy(tree->tree);
     g_free(tree);
 }
+
+static int gpa_tree_compare(gconstpointer a, gconstpointer b, gpointer data)
+{
+    const DMAMap *m1 = a, *m2 = b;
+
+    if (m1->translated_addr > m2->translated_addr + m2->size) {
+        return 1;
+    }
+
+    if (m1->translated_addr + m1->size < m2->translated_addr) {
+        return -1;
+    }
+
+    /* Overlapped */
+    return 0;
+}
+
+IOVATree *gpa_tree_new(void)
+{
+    IOVATree *gpa_tree = g_new0(IOVATree, 1);
+
+    gpa_tree->tree = g_tree_new_full(gpa_tree_compare, NULL, g_free, NULL);
+
+    return gpa_tree;
+}
+
+int gpa_tree_insert(IOVATree *tree, const DMAMap *map)
+{
+    DMAMap *new;
+
+    if (map->translated_addr + map->size < map->translated_addr ||
+        map->perm == IOMMU_NONE) {
+        return IOVA_ERR_INVALID;
+    }
+
+    /* We don't allow inserting ranges that overlap with existing ones */
+    if (iova_tree_find(tree, map)) {
+        return IOVA_ERR_OVERLAP;
+    }
+
+    new = g_new0(DMAMap, 1);
+    memcpy(new, map, sizeof(*new));
+    iova_tree_insert_internal(tree->tree, new);
+
+    return IOVA_OK;
+}
diff --git a/util/module.c b/util/module.c
index 3eb0f06df1655..1aa2079d01324 100644
--- a/util/module.c
+++ b/util/module.c
@@ -234,7 +234,7 @@ int module_load(const char *prefix, const char *name, Error **errp)
 
     search_dir = getenv("QEMU_MODULE_DIR");
     if (search_dir != NULL) {
-        dirs[n_dirs++] = g_strdup_printf("%s", search_dir);
+        dirs[n_dirs++] = g_strdup(search_dir);
     }
     dirs[n_dirs++] = get_relocated_path(CONFIG_QEMU_MODDIR);
 
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index 6fff4162ac6b2..b2e26e21205b6 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -17,7 +17,6 @@
 #include "qemu-thread-common.h"
 #include "qemu/tsan.h"
 #include "qemu/bitmap.h"
-#include "qemu/clang-tsa.h"
 
 #ifdef CONFIG_PTHREAD_SET_NAME_NP
 #include <pthread_np.h>
diff --git a/util/qemu-timer.c b/util/qemu-timer.c
index 3243d2c515c10..788466fe22f4f 100644
--- a/util/qemu-timer.c
+++ b/util/qemu-timer.c
@@ -409,10 +409,6 @@ static bool timer_mod_ns_locked(QEMUTimerList *timer_list,
 
 static void timerlist_rearm(QEMUTimerList *timer_list)
 {
-    /* Interrupt execution to force deadline recalculation.  */
-    if (icount_enabled() && timer_list->clock->type == QEMU_CLOCK_VIRTUAL) {
-        icount_start_warp_timer();
-    }
     timerlist_notify(timer_list);
 }
 
diff --git a/util/thread-pool.c b/util/thread-pool.c
index 27eb777e855b6..d2ead6b728576 100644
--- a/util/thread-pool.c
+++ b/util/thread-pool.c
@@ -23,9 +23,9 @@
 #include "block/thread-pool.h"
 #include "qemu/main-loop.h"
 
-static void do_spawn_thread(ThreadPool *pool);
+static void do_spawn_thread(ThreadPoolAio *pool);
 
-typedef struct ThreadPoolElement ThreadPoolElement;
+typedef struct ThreadPoolElementAio ThreadPoolElementAio;
 
 enum ThreadState {
     THREAD_QUEUED,
@@ -33,9 +33,9 @@ enum ThreadState {
     THREAD_DONE,
 };
 
-struct ThreadPoolElement {
+struct ThreadPoolElementAio {
     BlockAIOCB common;
-    ThreadPool *pool;
+    ThreadPoolAio *pool;
     ThreadPoolFunc *func;
     void *arg;
 
@@ -47,13 +47,13 @@ struct ThreadPoolElement {
     int ret;
 
     /* Access to this list is protected by lock.  */
-    QTAILQ_ENTRY(ThreadPoolElement) reqs;
+    QTAILQ_ENTRY(ThreadPoolElementAio) reqs;
 
     /* This list is only written by the thread pool's mother thread.  */
-    QLIST_ENTRY(ThreadPoolElement) all;
+    QLIST_ENTRY(ThreadPoolElementAio) all;
 };
 
-struct ThreadPool {
+struct ThreadPoolAio {
     AioContext *ctx;
     QEMUBH *completion_bh;
     QemuMutex lock;
@@ -62,10 +62,10 @@ struct ThreadPool {
     QEMUBH *new_thread_bh;
 
     /* The following variables are only accessed from one AioContext. */
-    QLIST_HEAD(, ThreadPoolElement) head;
+    QLIST_HEAD(, ThreadPoolElementAio) head;
 
     /* The following variables are protected by lock.  */
-    QTAILQ_HEAD(, ThreadPoolElement) request_list;
+    QTAILQ_HEAD(, ThreadPoolElementAio) request_list;
     int cur_threads;
     int idle_threads;
     int new_threads;     /* backlog of threads we need to create */
@@ -76,14 +76,14 @@ struct ThreadPool {
 
 static void *worker_thread(void *opaque)
 {
-    ThreadPool *pool = opaque;
+    ThreadPoolAio *pool = opaque;
 
     qemu_mutex_lock(&pool->lock);
     pool->pending_threads--;
     do_spawn_thread(pool);
 
     while (pool->cur_threads <= pool->max_threads) {
-        ThreadPoolElement *req;
+        ThreadPoolElementAio *req;
         int ret;
 
         if (QTAILQ_EMPTY(&pool->request_list)) {
@@ -131,7 +131,7 @@ static void *worker_thread(void *opaque)
     return NULL;
 }
 
-static void do_spawn_thread(ThreadPool *pool)
+static void do_spawn_thread(ThreadPoolAio *pool)
 {
     QemuThread t;
 
@@ -148,14 +148,14 @@ static void do_spawn_thread(ThreadPool *pool)
 
 static void spawn_thread_bh_fn(void *opaque)
 {
-    ThreadPool *pool = opaque;
+    ThreadPoolAio *pool = opaque;
 
     qemu_mutex_lock(&pool->lock);
     do_spawn_thread(pool);
     qemu_mutex_unlock(&pool->lock);
 }
 
-static void spawn_thread(ThreadPool *pool)
+static void spawn_thread(ThreadPoolAio *pool)
 {
     pool->cur_threads++;
     pool->new_threads++;
@@ -173,8 +173,8 @@ static void spawn_thread(ThreadPool *pool)
 
 static void thread_pool_completion_bh(void *opaque)
 {
-    ThreadPool *pool = opaque;
-    ThreadPoolElement *elem, *next;
+    ThreadPoolAio *pool = opaque;
+    ThreadPoolElementAio *elem, *next;
 
     defer_call_begin(); /* cb() may use defer_call() to coalesce work */
 
@@ -184,8 +184,8 @@ static void thread_pool_completion_bh(void *opaque)
             continue;
         }
 
-        trace_thread_pool_complete(pool, elem, elem->common.opaque,
-                                   elem->ret);
+        trace_thread_pool_complete_aio(pool, elem, elem->common.opaque,
+                                       elem->ret);
         QLIST_REMOVE(elem, all);
 
         if (elem->common.cb) {
@@ -217,10 +217,10 @@ static void thread_pool_completion_bh(void *opaque)
 
 static void thread_pool_cancel(BlockAIOCB *acb)
 {
-    ThreadPoolElement *elem = (ThreadPoolElement *)acb;
-    ThreadPool *pool = elem->pool;
+    ThreadPoolElementAio *elem = (ThreadPoolElementAio *)acb;
+    ThreadPoolAio *pool = elem->pool;
 
-    trace_thread_pool_cancel(elem, elem->common.opaque);
+    trace_thread_pool_cancel_aio(elem, elem->common.opaque);
 
     QEMU_LOCK_GUARD(&pool->lock);
     if (elem->state == THREAD_QUEUED) {
@@ -234,16 +234,16 @@ static void thread_pool_cancel(BlockAIOCB *acb)
 }
 
 static const AIOCBInfo thread_pool_aiocb_info = {
-    .aiocb_size         = sizeof(ThreadPoolElement),
+    .aiocb_size         = sizeof(ThreadPoolElementAio),
     .cancel_async       = thread_pool_cancel,
 };
 
 BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
                                    BlockCompletionFunc *cb, void *opaque)
 {
-    ThreadPoolElement *req;
+    ThreadPoolElementAio *req;
     AioContext *ctx = qemu_get_current_aio_context();
-    ThreadPool *pool = aio_get_thread_pool(ctx);
+    ThreadPoolAio *pool = aio_get_thread_pool(ctx);
 
     /* Assert that the thread submitting work is the same running the pool */
     assert(pool->ctx == qemu_get_current_aio_context());
@@ -256,7 +256,7 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
 
     QLIST_INSERT_HEAD(&pool->head, req, all);
 
-    trace_thread_pool_submit(pool, req, arg);
+    trace_thread_pool_submit_aio(pool, req, arg);
 
     qemu_mutex_lock(&pool->lock);
     if (pool->idle_threads == 0 && pool->cur_threads < pool->max_threads) {
@@ -290,12 +290,7 @@ int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg)
     return tpc.ret;
 }
 
-void thread_pool_submit(ThreadPoolFunc *func, void *arg)
-{
-    thread_pool_submit_aio(func, arg, NULL, NULL);
-}
-
-void thread_pool_update_params(ThreadPool *pool, AioContext *ctx)
+void thread_pool_update_params(ThreadPoolAio *pool, AioContext *ctx)
 {
     qemu_mutex_lock(&pool->lock);
 
@@ -322,7 +317,7 @@ void thread_pool_update_params(ThreadPool *pool, AioContext *ctx)
     qemu_mutex_unlock(&pool->lock);
 }
 
-static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
+static void thread_pool_init_one(ThreadPoolAio *pool, AioContext *ctx)
 {
     if (!ctx) {
         ctx = qemu_get_aio_context();
@@ -342,14 +337,14 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
     thread_pool_update_params(pool, ctx);
 }
 
-ThreadPool *thread_pool_new(AioContext *ctx)
+ThreadPoolAio *thread_pool_new_aio(AioContext *ctx)
 {
-    ThreadPool *pool = g_new(ThreadPool, 1);
+    ThreadPoolAio *pool = g_new(ThreadPoolAio, 1);
     thread_pool_init_one(pool, ctx);
     return pool;
 }
 
-void thread_pool_free(ThreadPool *pool)
+void thread_pool_free_aio(ThreadPoolAio *pool)
 {
     if (!pool) {
         return;
@@ -379,3 +374,122 @@ void thread_pool_free(ThreadPool *pool)
     qemu_mutex_destroy(&pool->lock);
     g_free(pool);
 }
+
+struct ThreadPool {
+    GThreadPool *t;
+    size_t cur_work;
+    QemuMutex cur_work_lock;
+    QemuCond all_finished_cond;
+};
+
+typedef struct {
+    ThreadPoolFunc *func;
+    void *opaque;
+    GDestroyNotify opaque_destroy;
+} ThreadPoolElement;
+
+static void thread_pool_func(gpointer data, gpointer user_data)
+{
+    ThreadPool *pool = user_data;
+    g_autofree ThreadPoolElement *el = data;
+
+    el->func(el->opaque);
+
+    if (el->opaque_destroy) {
+        el->opaque_destroy(el->opaque);
+    }
+
+    QEMU_LOCK_GUARD(&pool->cur_work_lock);
+
+    assert(pool->cur_work > 0);
+    pool->cur_work--;
+
+    if (pool->cur_work == 0) {
+        qemu_cond_signal(&pool->all_finished_cond);
+    }
+}
+
+ThreadPool *thread_pool_new(void)
+{
+    ThreadPool *pool = g_new(ThreadPool, 1);
+
+    pool->cur_work = 0;
+    qemu_mutex_init(&pool->cur_work_lock);
+    qemu_cond_init(&pool->all_finished_cond);
+
+    pool->t = g_thread_pool_new(thread_pool_func, pool, 0, TRUE, NULL);
+    /*
+     * g_thread_pool_new() can only return errors if initial thread(s)
+     * creation fails but we ask for 0 initial threads above.
+     */
+    assert(pool->t);
+
+    return pool;
+}
+
+void thread_pool_free(ThreadPool *pool)
+{
+    /*
+     * With _wait = TRUE this effectively waits for all
+     * previously submitted work to complete first.
+     */
+    g_thread_pool_free(pool->t, FALSE, TRUE);
+
+    qemu_cond_destroy(&pool->all_finished_cond);
+    qemu_mutex_destroy(&pool->cur_work_lock);
+
+    g_free(pool);
+}
+
+void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func,
+                        void *opaque, GDestroyNotify opaque_destroy)
+{
+    ThreadPoolElement *el = g_new(ThreadPoolElement, 1);
+
+    el->func = func;
+    el->opaque = opaque;
+    el->opaque_destroy = opaque_destroy;
+
+    WITH_QEMU_LOCK_GUARD(&pool->cur_work_lock) {
+        pool->cur_work++;
+    }
+
+    /*
+     * Ignore the return value since this function can only return errors
+     * if creation of an additional thread fails but even in this case the
+     * provided work is still getting queued (just for the existing threads).
+     */
+    g_thread_pool_push(pool->t, el, NULL);
+}
+
+void thread_pool_submit_immediate(ThreadPool *pool, ThreadPoolFunc *func,
+                                  void *opaque, GDestroyNotify opaque_destroy)
+{
+    thread_pool_submit(pool, func, opaque, opaque_destroy);
+    thread_pool_adjust_max_threads_to_work(pool);
+}
+
+void thread_pool_wait(ThreadPool *pool)
+{
+    QEMU_LOCK_GUARD(&pool->cur_work_lock);
+
+    while (pool->cur_work > 0) {
+        qemu_cond_wait(&pool->all_finished_cond,
+                       &pool->cur_work_lock);
+    }
+}
+
+bool thread_pool_set_max_threads(ThreadPool *pool,
+                                 int max_threads)
+{
+    assert(max_threads > 0);
+
+    return g_thread_pool_set_max_threads(pool->t, max_threads, NULL);
+}
+
+bool thread_pool_adjust_max_threads_to_work(ThreadPool *pool)
+{
+    QEMU_LOCK_GUARD(&pool->cur_work_lock);
+
+    return thread_pool_set_max_threads(pool, pool->cur_work);
+}
diff --git a/util/trace-events b/util/trace-events
index 49a4962e18869..bd8f25fb59209 100644
--- a/util/trace-events
+++ b/util/trace-events
@@ -14,9 +14,9 @@ aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p"
 reentrant_aio(void *ctx, const char *name) "ctx %p name %s"
 
 # thread-pool.c
-thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
-thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
-thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
+thread_pool_submit_aio(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
+thread_pool_complete_aio(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
+thread_pool_cancel_aio(void *req, void *opaque) "req %p opaque %p"
 
 # buffer.c
 buffer_resize(const char *buf, size_t olen, size_t len) "%s: old %zd, new %zd"