qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 1/3] memory: reintroduce BQL-free fine-grained PIO/MMIO


From: Igor Mammedov
Subject: [PATCH 1/3] memory: reintroduce BQL-free fine-grained PIO/MMIO
Date: Fri, 20 Jun 2025 17:14:16 +0200

This patch brings back Jan's idea [1] of BQL-free IO access,
with a twist that whitelist read access only.

(as BQL-free write access in [1] used to cause issues [2]
and still does (Windows crash) if write path is not lock protected)

However with limiting it read path only, guest boots without issues.
This will let us make read access ACPI PM/HPET timers cheaper,
and prevent guest VCPUs BQL contention in case of workload
that heavily uses the timers.

2) 196ea13104f (memory: Add global-locking property to memory regions)
   ... de7ea885c539 (kvm: Switch to unlocked MMIO)
3) https://bugzilla.redhat.com/show_bug.cgi?id=1322713
   1beb99f787 (Revert "acpi: mark PMTIMER as unlocked")

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
 include/system/memory.h   | 10 +++++++++-
 hw/remote/vfio-user-obj.c |  2 +-
 system/memory.c           |  5 +++++
 system/memory_ldst.c.inc  | 18 +++++++++---------
 system/physmem.c          |  9 +++++----
 5 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/include/system/memory.h b/include/system/memory.h
index fc35a0dcad..1afabf2d94 100644
--- a/include/system/memory.h
+++ b/include/system/memory.h
@@ -775,6 +775,7 @@ struct MemoryRegion {
     bool nonvolatile;
     bool rom_device;
     bool flush_coalesced_mmio;
+    bool lockless_ro_io;
     bool unmergeable;
     uint8_t dirty_log_mask;
     bool is_iommu;
@@ -2253,6 +2254,13 @@ void memory_region_set_flush_coalesced(MemoryRegion *mr);
  */
 void memory_region_clear_flush_coalesced(MemoryRegion *mr);
 
+/**
+ * memory_region_enable_lockless_ro_io: Enable lockless (BQL) read-only 
acceess.
+ *
+ * Enable BQL-free readonly access for devices with fine-grained locking.
+ */
+void memory_region_enable_lockless_ro_io(MemoryRegion *mr);
+
 /**
  * memory_region_add_eventfd: Request an eventfd to be triggered when a word
  *                            is written to a location.
@@ -3002,7 +3010,7 @@ MemTxResult 
address_space_write_cached_slow(MemoryRegionCache *cache,
                                             hwaddr len);
 
 int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr);
-bool prepare_mmio_access(MemoryRegion *mr);
+bool prepare_mmio_access(MemoryRegion *mr, bool read);
 
 static inline bool memory_region_supports_direct_access(MemoryRegion *mr)
 {
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
index ea6165ebdc..936a9befd4 100644
--- a/hw/remote/vfio-user-obj.c
+++ b/hw/remote/vfio-user-obj.c
@@ -381,7 +381,7 @@ static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, 
hwaddr offset,
          * The read/write logic used below is similar to the ones in
          * flatview_read/write_continue()
          */
-        release_lock = prepare_mmio_access(mr);
+        release_lock = prepare_mmio_access(mr, !is_write);
 
         access_size = memory_access_size(mr, size, offset);
 
diff --git a/system/memory.c b/system/memory.c
index 63b983efcd..5192195473 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2558,6 +2558,11 @@ void memory_region_clear_flush_coalesced(MemoryRegion 
*mr)
     }
 }
 
+void memory_region_enable_lockless_ro_io(MemoryRegion *mr)
+{
+    mr->lockless_ro_io = true;
+}
+
 void memory_region_add_eventfd(MemoryRegion *mr,
                                hwaddr addr,
                                unsigned size,
diff --git a/system/memory_ldst.c.inc b/system/memory_ldst.c.inc
index 7f32d3d9ff..919357578c 100644
--- a/system/memory_ldst.c.inc
+++ b/system/memory_ldst.c.inc
@@ -35,7 +35,7 @@ static inline uint32_t glue(address_space_ldl_internal, 
SUFFIX)(ARG1_DECL,
     RCU_READ_LOCK();
     mr = TRANSLATE(addr, &addr1, &l, false, attrs);
     if (l < 4 || !memory_access_is_direct(mr, false, attrs)) {
-        release_lock |= prepare_mmio_access(mr);
+        release_lock |= prepare_mmio_access(mr, true);
 
         /* I/O case */
         r = memory_region_dispatch_read(mr, addr1, &val,
@@ -104,7 +104,7 @@ static inline uint64_t glue(address_space_ldq_internal, 
SUFFIX)(ARG1_DECL,
     RCU_READ_LOCK();
     mr = TRANSLATE(addr, &addr1, &l, false, attrs);
     if (l < 8 || !memory_access_is_direct(mr, false, attrs)) {
-        release_lock |= prepare_mmio_access(mr);
+        release_lock |= prepare_mmio_access(mr, true);
 
         /* I/O case */
         r = memory_region_dispatch_read(mr, addr1, &val,
@@ -171,7 +171,7 @@ uint8_t glue(address_space_ldub, SUFFIX)(ARG1_DECL,
     RCU_READ_LOCK();
     mr = TRANSLATE(addr, &addr1, &l, false, attrs);
     if (!memory_access_is_direct(mr, false, attrs)) {
-        release_lock |= prepare_mmio_access(mr);
+        release_lock |= prepare_mmio_access(mr, true);
 
         /* I/O case */
         r = memory_region_dispatch_read(mr, addr1, &val, MO_8, attrs);
@@ -208,7 +208,7 @@ static inline uint16_t glue(address_space_lduw_internal, 
SUFFIX)(ARG1_DECL,
     RCU_READ_LOCK();
     mr = TRANSLATE(addr, &addr1, &l, false, attrs);
     if (l < 2 || !memory_access_is_direct(mr, false, attrs)) {
-        release_lock |= prepare_mmio_access(mr);
+        release_lock |= prepare_mmio_access(mr, true);
 
         /* I/O case */
         r = memory_region_dispatch_read(mr, addr1, &val,
@@ -278,7 +278,7 @@ void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL,
     RCU_READ_LOCK();
     mr = TRANSLATE(addr, &addr1, &l, true, attrs);
     if (l < 4 || !memory_access_is_direct(mr, true, attrs)) {
-        release_lock |= prepare_mmio_access(mr);
+        release_lock |= prepare_mmio_access(mr, false);
 
         r = memory_region_dispatch_write(mr, addr1, val, MO_32, attrs);
     } else {
@@ -315,7 +315,7 @@ static inline void glue(address_space_stl_internal, 
SUFFIX)(ARG1_DECL,
     RCU_READ_LOCK();
     mr = TRANSLATE(addr, &addr1, &l, true, attrs);
     if (l < 4 || !memory_access_is_direct(mr, true, attrs)) {
-        release_lock |= prepare_mmio_access(mr);
+        release_lock |= prepare_mmio_access(mr, false);
         r = memory_region_dispatch_write(mr, addr1, val,
                                          MO_32 | devend_memop(endian), attrs);
     } else {
@@ -378,7 +378,7 @@ void glue(address_space_stb, SUFFIX)(ARG1_DECL,
     RCU_READ_LOCK();
     mr = TRANSLATE(addr, &addr1, &l, true, attrs);
     if (!memory_access_is_direct(mr, true, attrs)) {
-        release_lock |= prepare_mmio_access(mr);
+        release_lock |= prepare_mmio_access(mr, false);
         r = memory_region_dispatch_write(mr, addr1, val, MO_8, attrs);
     } else {
         /* RAM case */
@@ -411,7 +411,7 @@ static inline void glue(address_space_stw_internal, 
SUFFIX)(ARG1_DECL,
     RCU_READ_LOCK();
     mr = TRANSLATE(addr, &addr1, &l, true, attrs);
     if (l < 2 || !memory_access_is_direct(mr, true, attrs)) {
-        release_lock |= prepare_mmio_access(mr);
+        release_lock |= prepare_mmio_access(mr, false);
         r = memory_region_dispatch_write(mr, addr1, val,
                                          MO_16 | devend_memop(endian), attrs);
     } else {
@@ -475,7 +475,7 @@ static void glue(address_space_stq_internal, 
SUFFIX)(ARG1_DECL,
     RCU_READ_LOCK();
     mr = TRANSLATE(addr, &addr1, &l, true, attrs);
     if (l < 8 || !memory_access_is_direct(mr, true, attrs)) {
-        release_lock |= prepare_mmio_access(mr);
+        release_lock |= prepare_mmio_access(mr, false);
         r = memory_region_dispatch_write(mr, addr1, val,
                                          MO_64 | devend_memop(endian), attrs);
     } else {
diff --git a/system/physmem.c b/system/physmem.c
index a8a9ca309e..60e330de99 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -2881,11 +2881,12 @@ int memory_access_size(MemoryRegion *mr, unsigned l, 
hwaddr addr)
     return l;
 }
 
-bool prepare_mmio_access(MemoryRegion *mr)
+bool prepare_mmio_access(MemoryRegion *mr, bool read)
 {
     bool release_lock = false;
 
-    if (!bql_locked()) {
+    if (!bql_locked() &&
+        !(read && mr->lockless_ro_io == true)) {
         bql_lock();
         release_lock = true;
     }
@@ -2935,7 +2936,7 @@ static MemTxResult 
flatview_write_continue_step(MemTxAttrs attrs,
     if (!memory_access_is_direct(mr, true, attrs)) {
         uint64_t val;
         MemTxResult result;
-        bool release_lock = prepare_mmio_access(mr);
+        bool release_lock = prepare_mmio_access(mr, false);
 
         *l = memory_access_size(mr, *l, mr_addr);
         /*
@@ -3032,7 +3033,7 @@ static MemTxResult flatview_read_continue_step(MemTxAttrs 
attrs, uint8_t *buf,
         /* I/O case */
         uint64_t val;
         MemTxResult result;
-        bool release_lock = prepare_mmio_access(mr);
+        bool release_lock = prepare_mmio_access(mr, true);
 
         *l = memory_access_size(mr, *l, mr_addr);
         result = memory_region_dispatch_read(mr, mr_addr, &val, size_memop(*l),
-- 
2.43.5




reply via email to

[Prev in Thread] Current Thread [Next in Thread]