[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH V1 2/3] amd-iommu: Sync IOVA-to-GPA translation during page inval
From: |
Wei Huang |
Subject: |
[PATCH V1 2/3] amd-iommu: Sync IOVA-to-GPA translation during page invalidation |
Date: |
Mon, 28 Sep 2020 15:05:05 -0500 |
Add support to sync the IOVA-to-GPA translation at the time of IOMMU
page invalidation. This function is called when two IOMMU commands,
AMDVI_CMD_INVAL_AMDVI_PAGES and AMDVI_CMD_INVAL_AMDVI_ALL, are
intercepted. Address space notifiers are called accordingly.
Co-developed-by: Wei Huang <wei.huang2@amd.com>
Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
---
hw/i386/amd_iommu.c | 177 ++++++++++++++++++++++++++++++++++++++++++++
hw/i386/amd_iommu.h | 10 +++
hw/vfio/common.c | 3 +-
3 files changed, 189 insertions(+), 1 deletion(-)
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index c7d24a05484d..7604e2080595 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -76,6 +76,12 @@ typedef struct AMDVIIOTLBEntry {
uint64_t page_mask; /* physical page size */
} AMDVIIOTLBEntry;
+static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry);
+static void amdvi_sync_domain(AMDVIState *s, uint32_t domid,
+ uint64_t addr, uint16_t flags);
+static void amdvi_walk_level(AMDVIAddressSpace *as, uint64_t pte,
+ uint64_t iova, uint64_t partial);
+
/* configure MMIO registers at startup/reset */
static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val,
uint64_t romask, uint64_t w1cmask)
@@ -443,6 +449,78 @@ static void amdvi_address_space_unmap(AMDVIAddressSpace
*as, IOMMUNotifier *n)
memory_region_notify_one(n, &entry);
}
+/*
+ * Sync the IOVA-to-GPA translation at the time of IOMMU page invalidation.
+ * This function is called when IOMMU commands, AMDVI_CMD_INVAL_AMDVI_PAGES
+ * and AMDVI_CMD_INVAL_AMDVI_ALL, are triggred.
+ *
+ * The range of addr invalidation is determined by addr and flags, using
+ * the following rules:
+ * - All pages
+ * In this case, we unmap the whole address space and then re-walk the
+ * I/O page table to sync the mapping relationship.
+ * - Single page:
+ * Re-walk the page based on the specified iova, and only sync the
+ * newly mapped page.
+ */
+static void amdvi_sync_domain(AMDVIState *s, uint32_t domid,
+ uint64_t addr, uint16_t flags)
+{
+ AMDVIAddressSpace *as;
+ bool sync_all_domains = false;
+ uint64_t mask, size = 0x1000;
+
+ if (domid == AMDVI_DOMAIN_ALL) {
+ sync_all_domains = true;
+ }
+
+ /* S=1 means the invalidation size is from addr field; otherwise 4KB */
+ if (flags & AMDVI_CMD_INVAL_IOMMU_PAGES_S_BIT) {
+ uint32_t zbit = cto64(addr | 0xFFF) + 1;
+
+ size = 1ULL << zbit;
+
+ if (size < 0x1000) {
+ addr = 0;
+ size = AMDVI_PGSZ_ENTIRE;
+ } else {
+ mask = ~(size - 1);
+ addr &= mask;
+ }
+ }
+
+ QLIST_FOREACH(as, &s->amdvi_as_with_notifiers, next) {
+ uint64_t dte[4];
+ IOMMUNotifier *n;
+
+ if (!amdvi_get_dte(s, as->devfn, dte)) {
+ continue;
+ }
+
+ if (!sync_all_domains && (domid != (dte[1] & 0xFFFULL))) {
+ continue;
+ }
+
+ /*
+ * In case of syncing more than a page, we invalidate the entire
+ * address range and re-walk the whole page table.
+ */
+ if (size == AMDVI_PGSZ_ENTIRE) {
+ IOMMU_NOTIFIER_FOREACH(n, &as->iommu) {
+ amdvi_address_space_unmap(as, n);
+ }
+ } else if (size > 0x1000) {
+ IOMMU_NOTIFIER_FOREACH(n, &as->iommu) {
+ if (n->start <= addr && addr + size < n->end) {
+ amdvi_address_space_unmap(as, n);
+ }
+ }
+ }
+
+ amdvi_walk_level(as, dte[0], addr, 0);
+ }
+}
+
static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value,
gpointer user_data)
{
@@ -455,6 +533,8 @@ static gboolean amdvi_iotlb_remove_by_domid(gpointer key,
gpointer value,
static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd)
{
uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16));
+ uint64_t addr = cpu_to_le64(extract64(cmd[1], 12, 52)) << 12;
+ uint16_t flags = cpu_to_le16((uint16_t)extract64(cmd[1], 0, 12));
if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 48, 12) ||
extract64(cmd[1], 3, 9)) {
@@ -465,6 +545,8 @@ static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd)
g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid,
&domid);
trace_amdvi_pages_inval(domid);
+
+ amdvi_sync_domain(s, domid, addr, flags);
}
static void amdvi_prefetch_pages(AMDVIState *s, uint64_t *cmd)
@@ -910,6 +992,101 @@ static inline uint64_t amdvi_get_pte_entry(AMDVIState *s,
uint64_t pte_addr,
return pte;
}
+static inline uint64_t pte_get_page_size(uint64_t level)
+{
+ return 1UL << ((level * 9) + 3);
+}
+
+static void amdvi_sync_iova(AMDVIAddressSpace *as, uint64_t pte, uint64_t iova)
+{
+ IOMMUTLBEntry entry;
+ uint64_t addr = pte & AMDVI_DEV_PT_ROOT_MASK;
+ uint32_t level = get_pte_translation_mode(pte);
+ uint64_t size = pte_get_page_size(level + 1);
+ uint64_t perm = amdvi_get_perms(pte);
+
+ assert(level == 0 || level == 7);
+
+ entry.target_as = &address_space_memory;
+ entry.iova = iova ;
+ entry.perm = perm;
+ if (level == 0) {
+ entry.addr_mask = size - 1;
+ entry.translated_addr = addr;
+ } else if (level == 7) {
+ entry.addr_mask = (1 << (cto64(addr | 0xFFF) + 1)) - 1;
+ entry.translated_addr = addr & ~entry.addr_mask;
+ }
+
+ memory_region_notify_iommu(&as->iommu, 0, entry);
+}
+
+/*
+ * Walk the I/O page table and notify mapping change. Note that iova
+ * determines if this function's behavior:
+ * - iova == 0: re-walk the whole page table
+ * - iova != 0: re-walk the address defined in iova
+ */
+static void amdvi_walk_level(AMDVIAddressSpace *as, uint64_t pte,
+ uint64_t iova, uint64_t partial)
+{
+ uint64_t index = 0;
+ uint8_t level = get_pte_translation_mode(pte);
+ uint64_t cur_addr = pte & AMDVI_DEV_PT_ROOT_MASK;
+ uint64_t end_addr = cur_addr + 4096;
+ uint64_t new_partial = 0;
+
+ if (!(pte & AMDVI_PTE_PRESENT)) {
+ return;
+ }
+
+ if (level == 7) {
+ amdvi_sync_iova(as, pte, iova);
+ return;
+ }
+
+ /* narrow the scope of table walk if iova != 0 */
+ if (iova) {
+ cur_addr += ((iova >> (3 + 9 * level)) & 0x1FF) << 3;
+ end_addr = cur_addr + 8;
+ }
+
+ while (cur_addr < end_addr) {
+ int cur_addr_inc = 8;
+ int index_inc = 1;
+
+ pte = amdvi_get_pte_entry(as->iommu_state, cur_addr, as->devfn);
+ /* validate the entry */
+ if (!(pte & AMDVI_PTE_PRESENT)) {
+ goto next;
+ }
+
+ if (level > 1) {
+ new_partial = (partial << 9) | index;
+ amdvi_walk_level(as, pte, iova, new_partial);
+ } else {
+ /* found a page, sync the mapping first */
+ if (iova) {
+ amdvi_sync_iova(as, pte, iova);
+ } else {
+ amdvi_sync_iova(as, pte, ((partial << 9) | index) << 12);
+ }
+
+ /* skip following entries when a large page is found */
+ if (get_pte_translation_mode(pte) == 7) {
+ int skipped = 1 << (cto64(pte >> 12) + 1);
+
+ cur_addr_inc = 8 * skipped;
+ index_inc = skipped;
+ }
+ }
+
+next:
+ cur_addr += cur_addr_inc;
+ index += index_inc;
+ }
+}
+
static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte,
IOMMUTLBEntry *ret, unsigned perms,
hwaddr addr)
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index aeed9fd1cbb0..22f846837a95 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -123,6 +123,8 @@
#define AMDVI_CMD_COMPLETE_PPR_REQUEST 0x07
#define AMDVI_CMD_INVAL_AMDVI_ALL 0x08
+#define AMDVI_CMD_INVAL_IOMMU_PAGES_S_BIT (1ULL << 0)
+
#define AMDVI_DEVTAB_ENTRY_SIZE 32
/* Device table entry bits 0:63 */
@@ -148,6 +150,9 @@
#define AMDVI_EVENT_ILLEGAL_COMMAND_ERROR (0x5U << 12)
#define AMDVI_EVENT_COMMAND_HW_ERROR (0x6U << 12)
+/* PTE bits */
+#define AMDVI_PTE_PRESENT (1ULL << 0)
+
#define AMDVI_EVENT_LEN 16
#define AMDVI_PERM_READ (1 << 0)
#define AMDVI_PERM_WRITE (1 << 1)
@@ -198,6 +203,11 @@
#define AMDVI_MAX_PH_ADDR (40UL << 8)
#define AMDVI_MAX_GVA_ADDR (48UL << 15)
+#define AMDVI_PGSZ_ENTIRE (0X0007FFFFFFFFF000ULL)
+
+/* The domain id is 16-bit, so use 32-bit all 1's to represent all domains */
+#define AMDVI_DOMAIN_ALL (UINT32_MAX)
+
/* Completion Wait data size */
#define AMDVI_COMPLETION_DATA_SIZE 8
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 13471ae29436..243216499ce0 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -346,7 +346,8 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr
iova,
* the VGA ROM space.
*/
if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
- (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 &&
+ ((errno == EEXIST || errno == EBUSY) &&
+ vfio_dma_unmap(container, iova, size) == 0 &&
ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
return 0;
}
--
2.25.2