summaryrefslogtreecommitdiffstats
path: root/system/xen/xsa
diff options
context:
space:
mode:
Diffstat (limited to 'system/xen/xsa')
-rw-r--r--system/xen/xsa/xsa401-4.16-1.patch170
-rw-r--r--system/xen/xsa/xsa401-4.16-2.patch191
-rw-r--r--system/xen/xsa/xsa402-4.16-1.patch43
-rw-r--r--system/xen/xsa/xsa402-4.16-2.patch213
-rw-r--r--system/xen/xsa/xsa402-4.16-3.patch284
-rw-r--r--system/xen/xsa/xsa402-4.16-4.patch83
-rw-r--r--system/xen/xsa/xsa402-4.16-5.patch148
7 files changed, 0 insertions, 1132 deletions
diff --git a/system/xen/xsa/xsa401-4.16-1.patch b/system/xen/xsa/xsa401-4.16-1.patch
deleted file mode 100644
index 5c8c50617a..0000000000
--- a/system/xen/xsa/xsa401-4.16-1.patch
+++ /dev/null
@@ -1,170 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: x86/pv: Clean up _get_page_type()
-
-Various fixes for clarity, ahead of making complicated changes.
-
- * Split the overflow check out of the if/else chain for type handling, as
- it's somewhat unrelated.
- * Comment the main if/else chain to explain what is going on. Adjust one
- ASSERT() and state the bit layout for validate-locked and partial states.
- * Correct the comment about TLB flushing, as it's backwards. The problem
- case is when writeable mappings are retained to a page becoming read-only,
- as it allows the guest to bypass Xen's safety checks for updates.
- * Reduce the scope of 'y'. It is an artefact of the cmpxchg loop and not
- valid for use by subsequent logic. Switch to using ACCESS_ONCE() to treat
- all reads as explicitly volatile. The only thing preventing the validated
- wait-loop being infinite is the compiler barrier hidden in cpu_relax().
- * Replace one page_get_owner(page) with the already-calculated 'd' already in
- scope.
-
-No functional change.
-
-This is part of XSA-401 / CVE-2022-26362.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: George Dunlap <george.dunlap@citrix.com>
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index 796faca64103..ddd32f88c798 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -2935,16 +2935,17 @@ static int _put_page_type(struct page_info *page, unsigned int flags,
- static int _get_page_type(struct page_info *page, unsigned long type,
- bool preemptible)
- {
-- unsigned long nx, x, y = page->u.inuse.type_info;
-+ unsigned long nx, x;
- int rc = 0;
-
- ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
- ASSERT(!in_irq());
-
-- for ( ; ; )
-+ for ( unsigned long y = ACCESS_ONCE(page->u.inuse.type_info); ; )
- {
- x = y;
- nx = x + 1;
-+
- if ( unlikely((nx & PGT_count_mask) == 0) )
- {
- gdprintk(XENLOG_WARNING,
-@@ -2952,8 +2953,15 @@ static int _get_page_type(struct page_info *page, unsigned long type,
- mfn_x(page_to_mfn(page)));
- return -EINVAL;
- }
-- else if ( unlikely((x & PGT_count_mask) == 0) )
-+
-+ if ( unlikely((x & PGT_count_mask) == 0) )
- {
-+ /*
-+ * Typeref 0 -> 1.
-+ *
-+ * Type changes are permitted when the typeref is 0. If the type
-+ * actually changes, the page needs re-validating.
-+ */
- struct domain *d = page_get_owner(page);
-
- if ( d && shadow_mode_enabled(d) )
-@@ -2964,8 +2972,8 @@ static int _get_page_type(struct page_info *page, unsigned long type,
- {
- /*
- * On type change we check to flush stale TLB entries. It is
-- * vital that no other CPUs are left with mappings of a frame
-- * which is about to become writeable to the guest.
-+ * vital that no other CPUs are left with writeable mappings
-+ * to a frame which is intending to become pgtable/segdesc.
- */
- cpumask_t *mask = this_cpu(scratch_cpumask);
-
-@@ -2977,7 +2985,7 @@ static int _get_page_type(struct page_info *page, unsigned long type,
-
- if ( unlikely(!cpumask_empty(mask)) &&
- /* Shadow mode: track only writable pages. */
-- (!shadow_mode_enabled(page_get_owner(page)) ||
-+ (!shadow_mode_enabled(d) ||
- ((nx & PGT_type_mask) == PGT_writable_page)) )
- {
- perfc_incr(need_flush_tlb_flush);
-@@ -3008,7 +3016,14 @@ static int _get_page_type(struct page_info *page, unsigned long type,
- }
- else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) )
- {
-- /* Don't log failure if it could be a recursive-mapping attempt. */
-+ /*
-+ * else, we're trying to take a new reference, of the wrong type.
-+ *
-+ * This (being able to prohibit use of the wrong type) is what the
-+ * typeref system exists for, but skip printing the failure if it
-+ * looks like a recursive mapping, as subsequent logic might
-+ * ultimately permit the attempt.
-+ */
- if ( ((x & PGT_type_mask) == PGT_l2_page_table) &&
- (type == PGT_l1_page_table) )
- return -EINVAL;
-@@ -3027,18 +3042,46 @@ static int _get_page_type(struct page_info *page, unsigned long type,
- }
- else if ( unlikely(!(x & PGT_validated)) )
- {
-+ /*
-+ * else, the count is non-zero, and we're grabbing the right type;
-+ * but the page hasn't been validated yet.
-+ *
-+ * The page is in one of two states (depending on PGT_partial),
-+ * and should have exactly one reference.
-+ */
-+ ASSERT((x & (PGT_type_mask | PGT_count_mask)) == (type | 1));
-+
- if ( !(x & PGT_partial) )
- {
-- /* Someone else is updating validation of this page. Wait... */
-+ /*
-+ * The page has been left in the "validate locked" state
-+ * (i.e. PGT_[type] | 1) which means that a concurrent caller
-+ * of _get_page_type() is in the middle of validation.
-+ *
-+ * Spin waiting for the concurrent user to complete (partial
-+ * or fully validated), then restart our attempt to acquire a
-+ * type reference.
-+ */
- do {
- if ( preemptible && hypercall_preempt_check() )
- return -EINTR;
- cpu_relax();
-- } while ( (y = page->u.inuse.type_info) == x );
-+ } while ( (y = ACCESS_ONCE(page->u.inuse.type_info)) == x );
- continue;
- }
-- /* Type ref count was left at 1 when PGT_partial got set. */
-- ASSERT((x & PGT_count_mask) == 1);
-+
-+ /*
-+ * The page has been left in the "partial" state
-+ * (i.e., PGT_[type] | PGT_partial | 1).
-+ *
-+ * Rather than bumping the type count, we need to try to grab the
-+ * validation lock; if we succeed, we need to validate the page,
-+ * then drop the general ref associated with the PGT_partial bit.
-+ *
-+ * We grab the validation lock by setting nx to (PGT_[type] | 1)
-+ * (i.e., non-zero type count, neither PGT_validated nor
-+ * PGT_partial set).
-+ */
- nx = x & ~PGT_partial;
- }
-
-@@ -3087,6 +3130,13 @@ static int _get_page_type(struct page_info *page, unsigned long type,
- }
-
- out:
-+ /*
-+ * Did we drop the PGT_partial bit when acquiring the typeref? If so,
-+ * drop the general reference that went along with it.
-+ *
-+ * N.B. validate_page() may have have re-set PGT_partial, not reflected in
-+ * nx, but will have taken an extra ref when doing so.
-+ */
- if ( (x & PGT_partial) && !(nx & PGT_partial) )
- put_page(page);
-
diff --git a/system/xen/xsa/xsa401-4.16-2.patch b/system/xen/xsa/xsa401-4.16-2.patch
deleted file mode 100644
index be58db59a5..0000000000
--- a/system/xen/xsa/xsa401-4.16-2.patch
+++ /dev/null
@@ -1,191 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: x86/pv: Fix ABAC cmpxchg() race in _get_page_type()
-
-_get_page_type() suffers from a race condition where it incorrectly assumes
-that because 'x' was read and a subsequent a cmpxchg() succeeds, the type
-cannot have changed in-between. Consider:
-
-CPU A:
- 1. Creates an L2e referencing pg
- `-> _get_page_type(pg, PGT_l1_page_table), sees count 0, type PGT_writable_page
- 2. Issues flush_tlb_mask()
-CPU B:
- 3. Creates a writeable mapping of pg
- `-> _get_page_type(pg, PGT_writable_page), count increases to 1
- 4. Writes into new mapping, creating a TLB entry for pg
- 5. Removes the writeable mapping of pg
- `-> _put_page_type(pg), count goes back down to 0
-CPU A:
- 7. Issues cmpxchg(), setting count 1, type PGT_l1_page_table
-
-CPU B now has a writeable mapping to pg, which Xen believes is a pagetable and
-suitably protected (i.e. read-only). The TLB flush in step 2 must be deferred
-until after the guest is prohibited from creating new writeable mappings,
-which is after step 7.
-
-Defer all safety actions until after the cmpxchg() has successfully taken the
-intended typeref, because that is what prevents concurrent users from using
-the old type.
-
-Also remove the early validation for writeable and shared pages. This removes
-race conditions where one half of a parallel mapping attempt can return
-successfully before:
- * The IOMMU pagetables are in sync with the new page type
- * Writeable mappings to shared pages have been torn down
-
-This is part of XSA-401 / CVE-2022-26362.
-
-Reported-by: Jann Horn <jannh@google.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: George Dunlap <george.dunlap@citrix.com>
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index ddd32f88c798..1693b580b152 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -2962,56 +2962,12 @@ static int _get_page_type(struct page_info *page, unsigned long type,
- * Type changes are permitted when the typeref is 0. If the type
- * actually changes, the page needs re-validating.
- */
-- struct domain *d = page_get_owner(page);
--
-- if ( d && shadow_mode_enabled(d) )
-- shadow_prepare_page_type_change(d, page, type);
-
- ASSERT(!(x & PGT_pae_xen_l2));
- if ( (x & PGT_type_mask) != type )
- {
-- /*
-- * On type change we check to flush stale TLB entries. It is
-- * vital that no other CPUs are left with writeable mappings
-- * to a frame which is intending to become pgtable/segdesc.
-- */
-- cpumask_t *mask = this_cpu(scratch_cpumask);
--
-- BUG_ON(in_irq());
-- cpumask_copy(mask, d->dirty_cpumask);
--
-- /* Don't flush if the timestamp is old enough */
-- tlbflush_filter(mask, page->tlbflush_timestamp);
--
-- if ( unlikely(!cpumask_empty(mask)) &&
-- /* Shadow mode: track only writable pages. */
-- (!shadow_mode_enabled(d) ||
-- ((nx & PGT_type_mask) == PGT_writable_page)) )
-- {
-- perfc_incr(need_flush_tlb_flush);
-- /*
-- * If page was a page table make sure the flush is
-- * performed using an IPI in order to avoid changing the
-- * type of a page table page under the feet of
-- * spurious_page_fault().
-- */
-- flush_mask(mask,
-- (x & PGT_type_mask) &&
-- (x & PGT_type_mask) <= PGT_root_page_table
-- ? FLUSH_TLB | FLUSH_FORCE_IPI
-- : FLUSH_TLB);
-- }
--
-- /* We lose existing type and validity. */
- nx &= ~(PGT_type_mask | PGT_validated);
- nx |= type;
--
-- /*
-- * No special validation needed for writable pages.
-- * Page tables and GDT/LDT need to be scanned for validity.
-- */
-- if ( type == PGT_writable_page || type == PGT_shared_page )
-- nx |= PGT_validated;
- }
- }
- else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) )
-@@ -3092,6 +3048,56 @@ static int _get_page_type(struct page_info *page, unsigned long type,
- return -EINTR;
- }
-
-+ /*
-+ * One typeref has been taken and is now globally visible.
-+ *
-+ * The page is either in the "validate locked" state (PGT_[type] | 1) or
-+ * fully validated (PGT_[type] | PGT_validated | >0).
-+ */
-+
-+ if ( unlikely((x & PGT_count_mask) == 0) )
-+ {
-+ struct domain *d = page_get_owner(page);
-+
-+ if ( d && shadow_mode_enabled(d) )
-+ shadow_prepare_page_type_change(d, page, type);
-+
-+ if ( (x & PGT_type_mask) != type )
-+ {
-+ /*
-+ * On type change we check to flush stale TLB entries. It is
-+ * vital that no other CPUs are left with writeable mappings
-+ * to a frame which is intending to become pgtable/segdesc.
-+ */
-+ cpumask_t *mask = this_cpu(scratch_cpumask);
-+
-+ BUG_ON(in_irq());
-+ cpumask_copy(mask, d->dirty_cpumask);
-+
-+ /* Don't flush if the timestamp is old enough */
-+ tlbflush_filter(mask, page->tlbflush_timestamp);
-+
-+ if ( unlikely(!cpumask_empty(mask)) &&
-+ /* Shadow mode: track only writable pages. */
-+ (!shadow_mode_enabled(d) ||
-+ ((nx & PGT_type_mask) == PGT_writable_page)) )
-+ {
-+ perfc_incr(need_flush_tlb_flush);
-+ /*
-+ * If page was a page table make sure the flush is
-+ * performed using an IPI in order to avoid changing the
-+ * type of a page table page under the feet of
-+ * spurious_page_fault().
-+ */
-+ flush_mask(mask,
-+ (x & PGT_type_mask) &&
-+ (x & PGT_type_mask) <= PGT_root_page_table
-+ ? FLUSH_TLB | FLUSH_FORCE_IPI
-+ : FLUSH_TLB);
-+ }
-+ }
-+ }
-+
- if ( unlikely(((x & PGT_type_mask) == PGT_writable_page) !=
- (type == PGT_writable_page)) )
- {
-@@ -3120,13 +3126,25 @@ static int _get_page_type(struct page_info *page, unsigned long type,
-
- if ( unlikely(!(nx & PGT_validated)) )
- {
-- if ( !(x & PGT_partial) )
-+ /*
-+ * No special validation needed for writable or shared pages. Page
-+ * tables and GDT/LDT need to have their contents audited.
-+ *
-+ * per validate_page(), non-atomic updates are fine here.
-+ */
-+ if ( type == PGT_writable_page || type == PGT_shared_page )
-+ page->u.inuse.type_info |= PGT_validated;
-+ else
- {
-- page->nr_validated_ptes = 0;
-- page->partial_flags = 0;
-- page->linear_pt_count = 0;
-+ if ( !(x & PGT_partial) )
-+ {
-+ page->nr_validated_ptes = 0;
-+ page->partial_flags = 0;
-+ page->linear_pt_count = 0;
-+ }
-+
-+ rc = validate_page(page, type, preemptible);
- }
-- rc = validate_page(page, type, preemptible);
- }
-
- out:
diff --git a/system/xen/xsa/xsa402-4.16-1.patch b/system/xen/xsa/xsa402-4.16-1.patch
deleted file mode 100644
index b783383fc8..0000000000
--- a/system/xen/xsa/xsa402-4.16-1.patch
+++ /dev/null
@@ -1,43 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: x86/page: Introduce _PAGE_* constants for memory types
-
-... rather than opencoding the PAT/PCD/PWT attributes in __PAGE_HYPERVISOR_*
-constants. These are going to be needed by forthcoming logic.
-
-No functional change.
-
-This is part of XSA-402.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
-diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h
-index 1d080cffbe84..2e542050f65a 100644
---- a/xen/include/asm-x86/page.h
-+++ b/xen/include/asm-x86/page.h
-@@ -331,6 +331,14 @@ void efi_update_l4_pgtable(unsigned int l4idx, l4_pgentry_t);
-
- #define PAGE_CACHE_ATTRS (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)
-
-+/* Memory types, encoded under Xen's choice of MSR_PAT. */
-+#define _PAGE_WB ( 0)
-+#define _PAGE_WT ( _PAGE_PWT)
-+#define _PAGE_UCM ( _PAGE_PCD )
-+#define _PAGE_UC ( _PAGE_PCD | _PAGE_PWT)
-+#define _PAGE_WC (_PAGE_PAT )
-+#define _PAGE_WP (_PAGE_PAT | _PAGE_PWT)
-+
- /*
- * Debug option: Ensure that granted mappings are not implicitly unmapped.
- * WARNING: This will need to be disabled to run OSes that use the spare PTE
-@@ -349,8 +357,8 @@ void efi_update_l4_pgtable(unsigned int l4idx, l4_pgentry_t);
- #define __PAGE_HYPERVISOR_RX (_PAGE_PRESENT | _PAGE_ACCESSED)
- #define __PAGE_HYPERVISOR (__PAGE_HYPERVISOR_RX | \
- _PAGE_DIRTY | _PAGE_RW)
--#define __PAGE_HYPERVISOR_UCMINUS (__PAGE_HYPERVISOR | _PAGE_PCD)
--#define __PAGE_HYPERVISOR_UC (__PAGE_HYPERVISOR | _PAGE_PCD | _PAGE_PWT)
-+#define __PAGE_HYPERVISOR_UCMINUS (__PAGE_HYPERVISOR | _PAGE_UCM)
-+#define __PAGE_HYPERVISOR_UC (__PAGE_HYPERVISOR | _PAGE_UC)
- #define __PAGE_HYPERVISOR_SHSTK (__PAGE_HYPERVISOR_RO | _PAGE_DIRTY)
-
- #define MAP_SMALL_PAGES _PAGE_AVAIL0 /* don't use superpages mappings */
diff --git a/system/xen/xsa/xsa402-4.16-2.patch b/system/xen/xsa/xsa402-4.16-2.patch
deleted file mode 100644
index ebb2f5e221..0000000000
--- a/system/xen/xsa/xsa402-4.16-2.patch
+++ /dev/null
@@ -1,213 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: x86: Don't change the cacheability of the directmap
-
-Changeset 55f97f49b7ce ("x86: Change cache attributes of Xen 1:1 page mappings
-in response to guest mapping requests") attempted to keep the cacheability
-consistent between different mappings of the same page.
-
-The reason wasn't described in the changelog, but it is understood to be in
-regards to a concern over machine check exceptions, owing to errata when using
-mixed cacheabilities. It did this primarily by updating Xen's mapping of the
-page in the direct map when the guest mapped a page with reduced cacheability.
-
-Unfortunately, the logic didn't actually prevent mixed cacheability from
-occurring:
- * A guest could map a page normally, and then map the same page with
- different cacheability; nothing prevented this.
- * The cacheability of the directmap was always latest-takes-precedence in
- terms of guest requests.
- * Grant-mapped frames with lesser cacheability didn't adjust the page's
- cacheattr settings.
- * The map_domain_page() function still unconditionally created WB mappings,
- irrespective of the page's cacheattr settings.
-
-Additionally, update_xen_mappings() had a bug where the alias calculation was
-wrong for mfn's which were .init content, which should have been treated as
-fully guest pages, not Xen pages.
-
-Worse yet, the logic introduced a vulnerability whereby necessary
-pagetable/segdesc adjustments made by Xen in the validation logic could become
-non-coherent between the cache and main memory. The CPU could subsequently
-operate on the stale value in the cache, rather than the safe value in main
-memory.
-
-The directmap contains primarily mappings of RAM. PAT/MTRR conflict
-resolution is asymmetric, and generally for MTRR=WB ranges, PAT of lesser
-cacheability resolves to being coherent. The special case is WC mappings,
-which are non-coherent against MTRR=WB regions (except for fully-coherent
-CPUs).
-
-Xen must not have any WC cacheability in the directmap, to prevent Xen's
-actions from creating non-coherency. (Guest actions creating non-coherency is
-dealt with in subsequent patches.) As all memory types for MTRR=WB ranges
-inter-operate coherently, so leave Xen's directmap mappings as WB.
-
-Only PV guests with access to devices can use reduced-cacheability mappings to
-begin with, and they're trusted not to mount DoSs against the system anyway.
-
-Drop PGC_cacheattr_{base,mask} entirely, and the logic to manipulate them.
-Shift the later PGC_* constants up, to gain 3 extra bits in the main reference
-count. Retain the check in get_page_from_l1e() for special_pages() because a
-guest has no business using reduced cacheability on these.
-
-This reverts changeset 55f97f49b7ce6c3520c555d19caac6cf3f9a5df0
-
-This is CVE-2022-26363, part of XSA-402.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: George Dunlap <george.dunlap@citrix.com>
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index c6429b0f749a..ab32d13a1a0d 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -783,28 +783,6 @@ bool is_iomem_page(mfn_t mfn)
- return (page_get_owner(page) == dom_io);
- }
-
--static int update_xen_mappings(unsigned long mfn, unsigned int cacheattr)
--{
-- int err = 0;
-- bool alias = mfn >= PFN_DOWN(xen_phys_start) &&
-- mfn < PFN_UP(xen_phys_start + xen_virt_end - XEN_VIRT_START);
-- unsigned long xen_va =
-- XEN_VIRT_START + ((mfn - PFN_DOWN(xen_phys_start)) << PAGE_SHIFT);
--
-- if ( boot_cpu_has(X86_FEATURE_XEN_SELFSNOOP) )
-- return 0;
--
-- if ( unlikely(alias) && cacheattr )
-- err = map_pages_to_xen(xen_va, _mfn(mfn), 1, 0);
-- if ( !err )
-- err = map_pages_to_xen((unsigned long)mfn_to_virt(mfn), _mfn(mfn), 1,
-- PAGE_HYPERVISOR | cacheattr_to_pte_flags(cacheattr));
-- if ( unlikely(alias) && !cacheattr && !err )
-- err = map_pages_to_xen(xen_va, _mfn(mfn), 1, PAGE_HYPERVISOR);
--
-- return err;
--}
--
- #ifndef NDEBUG
- struct mmio_emul_range_ctxt {
- const struct domain *d;
-@@ -1009,47 +987,14 @@ get_page_from_l1e(
- goto could_not_pin;
- }
-
-- if ( pte_flags_to_cacheattr(l1f) !=
-- ((page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base) )
-+ if ( (l1f & PAGE_CACHE_ATTRS) != _PAGE_WB && is_special_page(page) )
- {
-- unsigned long x, nx, y = page->count_info;
-- unsigned long cacheattr = pte_flags_to_cacheattr(l1f);
-- int err;
--
-- if ( is_special_page(page) )
-- {
-- if ( write )
-- put_page_type(page);
-- put_page(page);
-- gdprintk(XENLOG_WARNING,
-- "Attempt to change cache attributes of Xen heap page\n");
-- return -EACCES;
-- }
--
-- do {
-- x = y;
-- nx = (x & ~PGC_cacheattr_mask) | (cacheattr << PGC_cacheattr_base);
-- } while ( (y = cmpxchg(&page->count_info, x, nx)) != x );
--
-- err = update_xen_mappings(mfn, cacheattr);
-- if ( unlikely(err) )
-- {
-- cacheattr = y & PGC_cacheattr_mask;
-- do {
-- x = y;
-- nx = (x & ~PGC_cacheattr_mask) | cacheattr;
-- } while ( (y = cmpxchg(&page->count_info, x, nx)) != x );
--
-- if ( write )
-- put_page_type(page);
-- put_page(page);
--
-- gdprintk(XENLOG_WARNING, "Error updating mappings for mfn %" PRI_mfn
-- " (pfn %" PRI_pfn ", from L1 entry %" PRIpte ") for d%d\n",
-- mfn, get_gpfn_from_mfn(mfn),
-- l1e_get_intpte(l1e), l1e_owner->domain_id);
-- return err;
-- }
-+ if ( write )
-+ put_page_type(page);
-+ put_page(page);
-+ gdprintk(XENLOG_WARNING,
-+ "Attempt to change cache attributes of Xen heap page\n");
-+ return -EACCES;
- }
-
- return 0;
-@@ -2467,25 +2412,10 @@ static int mod_l4_entry(l4_pgentry_t *pl4e,
- */
- static int cleanup_page_mappings(struct page_info *page)
- {
-- unsigned int cacheattr =
-- (page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base;
- int rc = 0;
- unsigned long mfn = mfn_x(page_to_mfn(page));
-
- /*
-- * If we've modified xen mappings as a result of guest cache
-- * attributes, restore them to the "normal" state.
-- */
-- if ( unlikely(cacheattr) )
-- {
-- page->count_info &= ~PGC_cacheattr_mask;
--
-- BUG_ON(is_special_page(page));
--
-- rc = update_xen_mappings(mfn, 0);
-- }
--
-- /*
- * If this may be in a PV domain's IOMMU, remove it.
- *
- * NB that writable xenheap pages have their type set and cleared by
-diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
-index cb9052749963..8a9a43bb0a9d 100644
---- a/xen/include/asm-x86/mm.h
-+++ b/xen/include/asm-x86/mm.h
-@@ -69,25 +69,22 @@
- /* Set when is using a page as a page table */
- #define _PGC_page_table PG_shift(3)
- #define PGC_page_table PG_mask(1, 3)
-- /* 3-bit PAT/PCD/PWT cache-attribute hint. */
--#define PGC_cacheattr_base PG_shift(6)
--#define PGC_cacheattr_mask PG_mask(7, 6)
- /* Page is broken? */
--#define _PGC_broken PG_shift(7)
--#define PGC_broken PG_mask(1, 7)
-+#define _PGC_broken PG_shift(4)
-+#define PGC_broken PG_mask(1, 4)
- /* Mutually-exclusive page states: { inuse, offlining, offlined, free }. */
--#define PGC_state PG_mask(3, 9)
--#define PGC_state_inuse PG_mask(0, 9)
--#define PGC_state_offlining PG_mask(1, 9)
--#define PGC_state_offlined PG_mask(2, 9)
--#define PGC_state_free PG_mask(3, 9)
-+#define PGC_state PG_mask(3, 6)
-+#define PGC_state_inuse PG_mask(0, 6)
-+#define PGC_state_offlining PG_mask(1, 6)
-+#define PGC_state_offlined PG_mask(2, 6)
-+#define PGC_state_free PG_mask(3, 6)
- #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st)
- /* Page is not reference counted (see below for caveats) */
--#define _PGC_extra PG_shift(10)
--#define PGC_extra PG_mask(1, 10)
-+#define _PGC_extra PG_shift(7)
-+#define PGC_extra PG_mask(1, 7)
-
- /* Count of references to this frame. */
--#define PGC_count_width PG_shift(10)
-+#define PGC_count_width PG_shift(7)
- #define PGC_count_mask ((1UL<<PGC_count_width)-1)
-
- /*
diff --git a/system/xen/xsa/xsa402-4.16-3.patch b/system/xen/xsa/xsa402-4.16-3.patch
deleted file mode 100644
index b4d2a4c835..0000000000
--- a/system/xen/xsa/xsa402-4.16-3.patch
+++ /dev/null
@@ -1,284 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: x86: Split cache_flush() out of cache_writeback()
-
-Subsequent changes will want a fully flushing version.
-
-Use the new helper rather than opencoding it in flush_area_local(). This
-resolves an outstanding issue where the conditional sfence is on the wrong
-side of the clflushopt loop. clflushopt is ordered with respect to older
-stores, not to younger stores.
-
-Rename gnttab_cache_flush()'s helper to avoid colliding in name.
-grant_table.c can see the prototype from cache.h so the build fails
-otherwise.
-
-This is part of XSA-402.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
-Xen 4.16 and earlier:
- * Also backport half of c/s 3330013e67396 "VT-d / x86: re-arrange cache
- syncing" to split cache_writeback() out of the IOMMU logic, but without the
- associated hooks changes.
-
-diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
-index 25798df50f54..0c912b8669f8 100644
---- a/xen/arch/x86/flushtlb.c
-+++ b/xen/arch/x86/flushtlb.c
-@@ -234,7 +234,7 @@ unsigned int flush_area_local(const void *va, unsigned int flags)
- if ( flags & FLUSH_CACHE )
- {
- const struct cpuinfo_x86 *c = &current_cpu_data;
-- unsigned long i, sz = 0;
-+ unsigned long sz = 0;
-
- if ( order < (BITS_PER_LONG - PAGE_SHIFT) )
- sz = 1UL << (order + PAGE_SHIFT);
-@@ -244,13 +244,7 @@ unsigned int flush_area_local(const void *va, unsigned int flags)
- c->x86_clflush_size && c->x86_cache_size && sz &&
- ((sz >> 10) < c->x86_cache_size) )
- {
-- alternative("", "sfence", X86_FEATURE_CLFLUSHOPT);
-- for ( i = 0; i < sz; i += c->x86_clflush_size )
-- alternative_input(".byte " __stringify(NOP_DS_PREFIX) ";"
-- " clflush %0",
-- "data16 clflush %0", /* clflushopt */
-- X86_FEATURE_CLFLUSHOPT,
-- "m" (((const char *)va)[i]));
-+ cache_flush(va, sz);
- flags &= ~FLUSH_CACHE;
- }
- else
-@@ -265,6 +259,80 @@ unsigned int flush_area_local(const void *va, unsigned int flags)
- return flags;
- }
-
-+void cache_flush(const void *addr, unsigned int size)
-+{
-+ /*
-+ * This function may be called before current_cpu_data is established.
-+ * Hence a fallback is needed to prevent the loop below becoming infinite.
-+ */
-+ unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16;
-+ const void *end = addr + size;
-+
-+ addr -= (unsigned long)addr & (clflush_size - 1);
-+ for ( ; addr < end; addr += clflush_size )
-+ {
-+ /*
-+ * Note regarding the "ds" prefix use: it's faster to do a clflush
-+ * + prefix than a clflush + nop, and hence the prefix is added instead
-+ * of letting the alternative framework fill the gap by appending nops.
-+ */
-+ alternative_io("ds; clflush %[p]",
-+ "data16 clflush %[p]", /* clflushopt */
-+ X86_FEATURE_CLFLUSHOPT,
-+ /* no outputs */,
-+ [p] "m" (*(const char *)(addr)));
-+ }
-+
-+ alternative("", "sfence", X86_FEATURE_CLFLUSHOPT);
-+}
-+
-+void cache_writeback(const void *addr, unsigned int size)
-+{
-+ unsigned int clflush_size;
-+ const void *end = addr + size;
-+
-+ /* Fall back to CLFLUSH{,OPT} when CLWB isn't available. */
-+ if ( !boot_cpu_has(X86_FEATURE_CLWB) )
-+ return cache_flush(addr, size);
-+
-+ /*
-+ * This function may be called before current_cpu_data is established.
-+ * Hence a fallback is needed to prevent the loop below becoming infinite.
-+ */
-+ clflush_size = current_cpu_data.x86_clflush_size ?: 16;
-+ addr -= (unsigned long)addr & (clflush_size - 1);
-+ for ( ; addr < end; addr += clflush_size )
-+ {
-+/*
-+ * The arguments to a macro must not include preprocessor directives. Doing so
-+ * results in undefined behavior, so we have to create some defines here in
-+ * order to avoid it.
-+ */
-+#if defined(HAVE_AS_CLWB)
-+# define CLWB_ENCODING "clwb %[p]"
-+#elif defined(HAVE_AS_XSAVEOPT)
-+# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
-+#else
-+# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */
-+#endif
-+
-+#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
-+#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
-+# define INPUT BASE_INPUT
-+#else
-+# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
-+#endif
-+
-+ asm volatile (CLWB_ENCODING :: INPUT(addr));
-+
-+#undef INPUT
-+#undef BASE_INPUT
-+#undef CLWB_ENCODING
-+ }
-+
-+ asm volatile ("sfence" ::: "memory");
-+}
-+
- unsigned int guest_flush_tlb_flags(const struct domain *d)
- {
- bool shadow = paging_mode_shadow(d);
-diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
-index 66f8ce71741c..4c742cd8fe81 100644
---- a/xen/common/grant_table.c
-+++ b/xen/common/grant_table.c
-@@ -3431,7 +3431,7 @@ gnttab_swap_grant_ref(XEN_GUEST_HANDLE_PARAM(gnttab_swap_grant_ref_t) uop,
- return 0;
- }
-
--static int cache_flush(const gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref)
-+static int _cache_flush(const gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref)
- {
- struct domain *d, *owner;
- struct page_info *page;
-@@ -3525,7 +3525,7 @@ gnttab_cache_flush(XEN_GUEST_HANDLE_PARAM(gnttab_cache_flush_t) uop,
- return -EFAULT;
- for ( ; ; )
- {
-- int ret = cache_flush(&op, cur_ref);
-+ int ret = _cache_flush(&op, cur_ref);
-
- if ( ret < 0 )
- return ret;
-diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h
-index 01e010a10d61..401079299725 100644
---- a/xen/drivers/passthrough/vtd/extern.h
-+++ b/xen/drivers/passthrough/vtd/extern.h
-@@ -76,7 +76,6 @@ int __must_check qinval_device_iotlb_sync(struct vtd_iommu *iommu,
- struct pci_dev *pdev,
- u16 did, u16 size, u64 addr);
-
--unsigned int get_cache_line_size(void);
- void flush_all_cache(void);
-
- uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
-diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
-index 8975c1de61bc..bc377c9bcfa4 100644
---- a/xen/drivers/passthrough/vtd/iommu.c
-+++ b/xen/drivers/passthrough/vtd/iommu.c
-@@ -31,6 +31,7 @@
- #include <xen/pci.h>
- #include <xen/pci_regs.h>
- #include <xen/keyhandler.h>
-+#include <asm/cache.h>
- #include <asm/msi.h>
- #include <asm/nops.h>
- #include <asm/irq.h>
-@@ -206,54 +207,6 @@ static void check_cleanup_domid_map(const struct domain *d,
- }
- }
-
--static void sync_cache(const void *addr, unsigned int size)
--{
-- static unsigned long clflush_size = 0;
-- const void *end = addr + size;
--
-- if ( clflush_size == 0 )
-- clflush_size = get_cache_line_size();
--
-- addr -= (unsigned long)addr & (clflush_size - 1);
-- for ( ; addr < end; addr += clflush_size )
--/*
-- * The arguments to a macro must not include preprocessor directives. Doing so
-- * results in undefined behavior, so we have to create some defines here in
-- * order to avoid it.
-- */
--#if defined(HAVE_AS_CLWB)
--# define CLWB_ENCODING "clwb %[p]"
--#elif defined(HAVE_AS_XSAVEOPT)
--# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
--#else
--# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */
--#endif
--
--#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
--#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
--# define INPUT BASE_INPUT
--#else
--# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
--#endif
-- /*
-- * Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush
-- * + prefix than a clflush + nop, and hence the prefix is added instead
-- * of letting the alternative framework fill the gap by appending nops.
-- */
-- alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]",
-- "data16 clflush %[p]", /* clflushopt */
-- X86_FEATURE_CLFLUSHOPT,
-- CLWB_ENCODING,
-- X86_FEATURE_CLWB, /* no outputs */,
-- INPUT(addr));
--#undef INPUT
--#undef BASE_INPUT
--#undef CLWB_ENCODING
--
-- alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT,
-- "sfence", X86_FEATURE_CLWB);
--}
--
- /* Allocate page table, return its machine address */
- uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node)
- {
-@@ -273,7 +226,7 @@ uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node)
- clear_page(vaddr);
-
- if ( (iommu_ops.init ? &iommu_ops : &vtd_ops)->sync_cache )
-- sync_cache(vaddr, PAGE_SIZE);
-+ cache_writeback(vaddr, PAGE_SIZE);
- unmap_domain_page(vaddr);
- cur_pg++;
- }
-@@ -1305,7 +1258,7 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd)
- iommu->nr_pt_levels = agaw_to_level(agaw);
-
- if ( !ecap_coherent(iommu->ecap) )
-- vtd_ops.sync_cache = sync_cache;
-+ vtd_ops.sync_cache = cache_writeback;
-
- /* allocate domain id bitmap */
- iommu->domid_bitmap = xzalloc_array(unsigned long, BITS_TO_LONGS(nr_dom));
-diff --git a/xen/drivers/passthrough/vtd/x86/vtd.c b/xen/drivers/passthrough/vtd/x86/vtd.c
-index 6681dccd6970..55f0faa521cb 100644
---- a/xen/drivers/passthrough/vtd/x86/vtd.c
-+++ b/xen/drivers/passthrough/vtd/x86/vtd.c
-@@ -47,11 +47,6 @@ void unmap_vtd_domain_page(const void *va)
- unmap_domain_page(va);
- }
-
--unsigned int get_cache_line_size(void)
--{
-- return ((cpuid_ebx(1) >> 8) & 0xff) * 8;
--}
--
- void flush_all_cache()
- {
- wbinvd();
-diff --git a/xen/include/asm-x86/cache.h b/xen/include/asm-x86/cache.h
-index 1f7173d8c72c..e4770efb22b9 100644
---- a/xen/include/asm-x86/cache.h
-+++ b/xen/include/asm-x86/cache.h
-@@ -11,4 +11,11 @@
-
- #define __read_mostly __section(".data.read_mostly")
-
-+#ifndef __ASSEMBLY__
-+
-+void cache_flush(const void *addr, unsigned int size);
-+void cache_writeback(const void *addr, unsigned int size);
-+
-+#endif
-+
- #endif
diff --git a/system/xen/xsa/xsa402-4.16-4.patch b/system/xen/xsa/xsa402-4.16-4.patch
deleted file mode 100644
index 21109225d7..0000000000
--- a/system/xen/xsa/xsa402-4.16-4.patch
+++ /dev/null
@@ -1,83 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: x86/amd: Work around CLFLUSH ordering on older parts
-
-On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakely ordered with everything,
-including reads and writes to the address, and LFENCE/SFENCE instructions.
-
-This creates a multitude of problematic corner cases, laid out in the manual.
-Arrange to use MFENCE on both sides of the CLFLUSH to force proper ordering.
-
-This is part of XSA-402.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
-diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
-index a8e37dbb1f5c..b3b9a0df5fed 100644
---- a/xen/arch/x86/cpu/amd.c
-+++ b/xen/arch/x86/cpu/amd.c
-@@ -812,6 +812,14 @@ static void init_amd(struct cpuinfo_x86 *c)
- if (!cpu_has_lfence_dispatch)
- __set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability);
-
-+ /*
-+ * On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakly ordered with
-+ * everything, including reads and writes to address, and
-+ * LFENCE/SFENCE instructions.
-+ */
-+ if (!cpu_has_clflushopt)
-+ setup_force_cpu_cap(X86_BUG_CLFLUSH_MFENCE);
-+
- switch(c->x86)
- {
- case 0xf ... 0x11:
-diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
-index 0c912b8669f8..dcbb4064012e 100644
---- a/xen/arch/x86/flushtlb.c
-+++ b/xen/arch/x86/flushtlb.c
-@@ -259,6 +259,13 @@ unsigned int flush_area_local(const void *va, unsigned int flags)
- return flags;
- }
-
-+/*
-+ * On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakly ordered with everything,
-+ * including reads and writes to address, and LFENCE/SFENCE instructions.
-+ *
-+ * This function only works safely after alternatives have run. Luckily, at
-+ * the time of writing, we don't flush the caches that early.
-+ */
- void cache_flush(const void *addr, unsigned int size)
- {
- /*
-@@ -268,6 +275,8 @@ void cache_flush(const void *addr, unsigned int size)
- unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16;
- const void *end = addr + size;
-
-+ alternative("", "mfence", X86_BUG_CLFLUSH_MFENCE);
-+
- addr -= (unsigned long)addr & (clflush_size - 1);
- for ( ; addr < end; addr += clflush_size )
- {
-@@ -283,7 +292,9 @@ void cache_flush(const void *addr, unsigned int size)
- [p] "m" (*(const char *)(addr)));
- }
-
-- alternative("", "sfence", X86_FEATURE_CLFLUSHOPT);
-+ alternative_2("",
-+ "sfence", X86_FEATURE_CLFLUSHOPT,
-+ "mfence", X86_BUG_CLFLUSH_MFENCE);
- }
-
- void cache_writeback(const void *addr, unsigned int size)
-diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h
-index 7413febd7ad8..ff3157d52d13 100644
---- a/xen/include/asm-x86/cpufeatures.h
-+++ b/xen/include/asm-x86/cpufeatures.h
-@@ -47,6 +47,7 @@ XEN_CPUFEATURE(XEN_IBT, X86_SYNTH(27)) /* Xen uses CET Indirect Branch
-
- #define X86_BUG_FPU_PTRS X86_BUG( 0) /* (F)X{SAVE,RSTOR} doesn't save/restore FOP/FIP/FDP. */
- #define X86_BUG_NULL_SEG X86_BUG( 1) /* NULL-ing a selector preserves the base and limit. */
-+#define X86_BUG_CLFLUSH_MFENCE X86_BUG( 2) /* MFENCE needed to serialise CLFLUSH */
-
- /* Total number of capability words, inc synth and bug words. */
- #define NCAPINTS (FSCAPINTS + X86_NR_SYNTH + X86_NR_BUG) /* N 32-bit words worth of info */
diff --git a/system/xen/xsa/xsa402-4.16-5.patch b/system/xen/xsa/xsa402-4.16-5.patch
deleted file mode 100644
index 4806d25c6f..0000000000
--- a/system/xen/xsa/xsa402-4.16-5.patch
+++ /dev/null
@@ -1,148 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: x86/pv: Track and flush non-coherent mappings of RAM
-
-There are legitimate uses of WC mappings of RAM, e.g. for DMA buffers with
-devices that make non-coherent writes. The Linux sound subsystem makes
-extensive use of this technique.
-
-For such usecases, the guest's DMA buffer is mapped and consistently used as
-WC, and Xen doesn't interact with the buffer.
-
-However, a mischevious guest can use WC mappings to deliberately create
-non-coherency between the cache and RAM, and use this to trick Xen into
-validating a pagetable which isn't actually safe.
-
-Allocate a new PGT_non_coherent to track the non-coherency of mappings. Set
-it whenever a non-coherent writeable mapping is created. If the page is used
-as anything other than PGT_writable_page, force a cache flush before
-validation. Also force a cache flush before the page is returned to the heap.
-
-This is CVE-2022-26364, part of XSA-402.
-
-Reported-by: Jann Horn <jannh@google.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: George Dunlap <george.dunlap@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index ab32d13a1a0d..bab9624fabb7 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -997,6 +997,15 @@ get_page_from_l1e(
- return -EACCES;
- }
-
-+ /*
-+ * Track writeable non-coherent mappings to RAM pages, to trigger a cache
-+ * flush later if the target is used as anything but a PGT_writeable page.
-+ * We care about all writeable mappings, including foreign mappings.
-+ */
-+ if ( !boot_cpu_has(X86_FEATURE_XEN_SELFSNOOP) &&
-+ (l1f & (PAGE_CACHE_ATTRS | _PAGE_RW)) == (_PAGE_WC | _PAGE_RW) )
-+ set_bit(_PGT_non_coherent, &page->u.inuse.type_info);
-+
- return 0;
-
- could_not_pin:
-@@ -2454,6 +2463,19 @@ static int cleanup_page_mappings(struct page_info *page)
- }
- }
-
-+ /*
-+ * Flush the cache if there were previously non-coherent writeable
-+ * mappings of this page. This forces the page to be coherent before it
-+ * is freed back to the heap.
-+ */
-+ if ( __test_and_clear_bit(_PGT_non_coherent, &page->u.inuse.type_info) )
-+ {
-+ void *addr = __map_domain_page(page);
-+
-+ cache_flush(addr, PAGE_SIZE);
-+ unmap_domain_page(addr);
-+ }
-+
- return rc;
- }
-
-@@ -3028,6 +3050,22 @@ static int _get_page_type(struct page_info *page, unsigned long type,
- if ( unlikely(!(nx & PGT_validated)) )
- {
- /*
-+ * Flush the cache if there were previously non-coherent mappings of
-+ * this page, and we're trying to use it as anything other than a
-+ * writeable page. This forces the page to be coherent before we
-+ * validate its contents for safety.
-+ */
-+ if ( (nx & PGT_non_coherent) && type != PGT_writable_page )
-+ {
-+ void *addr = __map_domain_page(page);
-+
-+ cache_flush(addr, PAGE_SIZE);
-+ unmap_domain_page(addr);
-+
-+ page->u.inuse.type_info &= ~PGT_non_coherent;
-+ }
-+
-+ /*
- * No special validation needed for writable or shared pages. Page
- * tables and GDT/LDT need to have their contents audited.
- *
-diff --git a/xen/arch/x86/pv/grant_table.c b/xen/arch/x86/pv/grant_table.c
-index 0325618c9883..81c72e61ed55 100644
---- a/xen/arch/x86/pv/grant_table.c
-+++ b/xen/arch/x86/pv/grant_table.c
-@@ -109,7 +109,17 @@ int create_grant_pv_mapping(uint64_t addr, mfn_t frame,
-
- ol1e = *pl1e;
- if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, 0) )
-+ {
-+ /*
-+ * We always create mappings in this path. However, our caller,
-+ * map_grant_ref(), only passes potentially non-zero cache_flags for
-+ * MMIO frames, so this path doesn't create non-coherent mappings of
-+ * RAM frames and there's no need to calculate PGT_non_coherent.
-+ */
-+ ASSERT(!cache_flags || is_iomem_page(frame));
-+
- rc = GNTST_okay;
-+ }
-
- out_unlock:
- page_unlock(page);
-@@ -294,7 +304,18 @@ int replace_grant_pv_mapping(uint64_t addr, mfn_t frame,
- l1e_get_flags(ol1e), addr, grant_pte_flags);
-
- if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, 0) )
-+ {
-+ /*
-+ * Generally, replace_grant_pv_mapping() is used to destroy mappings
-+ * (n1le = l1e_empty()), but it can be a present mapping on the
-+ * GNTABOP_unmap_and_replace path.
-+ *
-+ * In such cases, the PTE is fully transplanted from its old location
-+ * via steal_linear_addr(), so we need not perform PGT_non_coherent
-+ * checking here.
-+ */
- rc = GNTST_okay;
-+ }
-
- out_unlock:
- page_unlock(page);
-diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
-index 8a9a43bb0a9d..7464167ae192 100644
---- a/xen/include/asm-x86/mm.h
-+++ b/xen/include/asm-x86/mm.h
-@@ -53,8 +53,12 @@
- #define _PGT_partial PG_shift(8)
- #define PGT_partial PG_mask(1, 8)
-
-+/* Has this page been mapped writeable with a non-coherent memory type? */
-+#define _PGT_non_coherent PG_shift(9)
-+#define PGT_non_coherent PG_mask(1, 9)
-+
- /* Count of uses of this frame as its current type. */
--#define PGT_count_width PG_shift(8)
-+#define PGT_count_width PG_shift(9)
- #define PGT_count_mask ((1UL<<PGT_count_width)-1)
-
- /* Are the 'type mask' bits identical? */