diff options
Diffstat (limited to 'system/xen/xsa')
-rw-r--r-- | system/xen/xsa/xsa401-4.16-1.patch | 170 | ||||
-rw-r--r-- | system/xen/xsa/xsa401-4.16-2.patch | 191 | ||||
-rw-r--r-- | system/xen/xsa/xsa402-4.16-1.patch | 43 | ||||
-rw-r--r-- | system/xen/xsa/xsa402-4.16-2.patch | 213 | ||||
-rw-r--r-- | system/xen/xsa/xsa402-4.16-3.patch | 284 | ||||
-rw-r--r-- | system/xen/xsa/xsa402-4.16-4.patch | 83 | ||||
-rw-r--r-- | system/xen/xsa/xsa402-4.16-5.patch | 148 |
7 files changed, 0 insertions, 1132 deletions
diff --git a/system/xen/xsa/xsa401-4.16-1.patch b/system/xen/xsa/xsa401-4.16-1.patch deleted file mode 100644 index 5c8c50617a..0000000000 --- a/system/xen/xsa/xsa401-4.16-1.patch +++ /dev/null @@ -1,170 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: x86/pv: Clean up _get_page_type() - -Various fixes for clarity, ahead of making complicated changes. - - * Split the overflow check out of the if/else chain for type handling, as - it's somewhat unrelated. - * Comment the main if/else chain to explain what is going on. Adjust one - ASSERT() and state the bit layout for validate-locked and partial states. - * Correct the comment about TLB flushing, as it's backwards. The problem - case is when writeable mappings are retained to a page becoming read-only, - as it allows the guest to bypass Xen's safety checks for updates. - * Reduce the scope of 'y'. It is an artefact of the cmpxchg loop and not - valid for use by subsequent logic. Switch to using ACCESS_ONCE() to treat - all reads as explicitly volatile. The only thing preventing the validated - wait-loop being infinite is the compiler barrier hidden in cpu_relax(). - * Replace one page_get_owner(page) with the already-calculated 'd' already in - scope. - -No functional change. - -This is part of XSA-401 / CVE-2022-26362. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: George Dunlap <george.dunlap@citrix.com> - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 796faca64103..ddd32f88c798 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -2935,16 +2935,17 @@ static int _put_page_type(struct page_info *page, unsigned int flags, - static int _get_page_type(struct page_info *page, unsigned long type, - bool preemptible) - { -- unsigned long nx, x, y = page->u.inuse.type_info; -+ unsigned long nx, x; - int rc = 0; - - ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2))); - ASSERT(!in_irq()); - -- for ( ; ; ) -+ for ( unsigned long y = ACCESS_ONCE(page->u.inuse.type_info); ; ) - { - x = y; - nx = x + 1; -+ - if ( unlikely((nx & PGT_count_mask) == 0) ) - { - gdprintk(XENLOG_WARNING, -@@ -2952,8 +2953,15 @@ static int _get_page_type(struct page_info *page, unsigned long type, - mfn_x(page_to_mfn(page))); - return -EINVAL; - } -- else if ( unlikely((x & PGT_count_mask) == 0) ) -+ -+ if ( unlikely((x & PGT_count_mask) == 0) ) - { -+ /* -+ * Typeref 0 -> 1. -+ * -+ * Type changes are permitted when the typeref is 0. If the type -+ * actually changes, the page needs re-validating. -+ */ - struct domain *d = page_get_owner(page); - - if ( d && shadow_mode_enabled(d) ) -@@ -2964,8 +2972,8 @@ static int _get_page_type(struct page_info *page, unsigned long type, - { - /* - * On type change we check to flush stale TLB entries. It is -- * vital that no other CPUs are left with mappings of a frame -- * which is about to become writeable to the guest. -+ * vital that no other CPUs are left with writeable mappings -+ * to a frame which is intending to become pgtable/segdesc. - */ - cpumask_t *mask = this_cpu(scratch_cpumask); - -@@ -2977,7 +2985,7 @@ static int _get_page_type(struct page_info *page, unsigned long type, - - if ( unlikely(!cpumask_empty(mask)) && - /* Shadow mode: track only writable pages. */ -- (!shadow_mode_enabled(page_get_owner(page)) || -+ (!shadow_mode_enabled(d) || - ((nx & PGT_type_mask) == PGT_writable_page)) ) - { - perfc_incr(need_flush_tlb_flush); -@@ -3008,7 +3016,14 @@ static int _get_page_type(struct page_info *page, unsigned long type, - } - else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) ) - { -- /* Don't log failure if it could be a recursive-mapping attempt. */ -+ /* -+ * else, we're trying to take a new reference, of the wrong type. -+ * -+ * This (being able to prohibit use of the wrong type) is what the -+ * typeref system exists for, but skip printing the failure if it -+ * looks like a recursive mapping, as subsequent logic might -+ * ultimately permit the attempt. -+ */ - if ( ((x & PGT_type_mask) == PGT_l2_page_table) && - (type == PGT_l1_page_table) ) - return -EINVAL; -@@ -3027,18 +3042,46 @@ static int _get_page_type(struct page_info *page, unsigned long type, - } - else if ( unlikely(!(x & PGT_validated)) ) - { -+ /* -+ * else, the count is non-zero, and we're grabbing the right type; -+ * but the page hasn't been validated yet. -+ * -+ * The page is in one of two states (depending on PGT_partial), -+ * and should have exactly one reference. -+ */ -+ ASSERT((x & (PGT_type_mask | PGT_count_mask)) == (type | 1)); -+ - if ( !(x & PGT_partial) ) - { -- /* Someone else is updating validation of this page. Wait... */ -+ /* -+ * The page has been left in the "validate locked" state -+ * (i.e. PGT_[type] | 1) which means that a concurrent caller -+ * of _get_page_type() is in the middle of validation. -+ * -+ * Spin waiting for the concurrent user to complete (partial -+ * or fully validated), then restart our attempt to acquire a -+ * type reference. -+ */ - do { - if ( preemptible && hypercall_preempt_check() ) - return -EINTR; - cpu_relax(); -- } while ( (y = page->u.inuse.type_info) == x ); -+ } while ( (y = ACCESS_ONCE(page->u.inuse.type_info)) == x ); - continue; - } -- /* Type ref count was left at 1 when PGT_partial got set. */ -- ASSERT((x & PGT_count_mask) == 1); -+ -+ /* -+ * The page has been left in the "partial" state -+ * (i.e., PGT_[type] | PGT_partial | 1). -+ * -+ * Rather than bumping the type count, we need to try to grab the -+ * validation lock; if we succeed, we need to validate the page, -+ * then drop the general ref associated with the PGT_partial bit. -+ * -+ * We grab the validation lock by setting nx to (PGT_[type] | 1) -+ * (i.e., non-zero type count, neither PGT_validated nor -+ * PGT_partial set). -+ */ - nx = x & ~PGT_partial; - } - -@@ -3087,6 +3130,13 @@ static int _get_page_type(struct page_info *page, unsigned long type, - } - - out: -+ /* -+ * Did we drop the PGT_partial bit when acquiring the typeref? If so, -+ * drop the general reference that went along with it. -+ * -+ * N.B. validate_page() may have have re-set PGT_partial, not reflected in -+ * nx, but will have taken an extra ref when doing so. -+ */ - if ( (x & PGT_partial) && !(nx & PGT_partial) ) - put_page(page); - diff --git a/system/xen/xsa/xsa401-4.16-2.patch b/system/xen/xsa/xsa401-4.16-2.patch deleted file mode 100644 index be58db59a5..0000000000 --- a/system/xen/xsa/xsa401-4.16-2.patch +++ /dev/null @@ -1,191 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: x86/pv: Fix ABAC cmpxchg() race in _get_page_type() - -_get_page_type() suffers from a race condition where it incorrectly assumes -that because 'x' was read and a subsequent a cmpxchg() succeeds, the type -cannot have changed in-between. Consider: - -CPU A: - 1. Creates an L2e referencing pg - `-> _get_page_type(pg, PGT_l1_page_table), sees count 0, type PGT_writable_page - 2. Issues flush_tlb_mask() -CPU B: - 3. Creates a writeable mapping of pg - `-> _get_page_type(pg, PGT_writable_page), count increases to 1 - 4. Writes into new mapping, creating a TLB entry for pg - 5. Removes the writeable mapping of pg - `-> _put_page_type(pg), count goes back down to 0 -CPU A: - 7. Issues cmpxchg(), setting count 1, type PGT_l1_page_table - -CPU B now has a writeable mapping to pg, which Xen believes is a pagetable and -suitably protected (i.e. read-only). The TLB flush in step 2 must be deferred -until after the guest is prohibited from creating new writeable mappings, -which is after step 7. - -Defer all safety actions until after the cmpxchg() has successfully taken the -intended typeref, because that is what prevents concurrent users from using -the old type. - -Also remove the early validation for writeable and shared pages. This removes -race conditions where one half of a parallel mapping attempt can return -successfully before: - * The IOMMU pagetables are in sync with the new page type - * Writeable mappings to shared pages have been torn down - -This is part of XSA-401 / CVE-2022-26362. - -Reported-by: Jann Horn <jannh@google.com> -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: George Dunlap <george.dunlap@citrix.com> - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index ddd32f88c798..1693b580b152 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -2962,56 +2962,12 @@ static int _get_page_type(struct page_info *page, unsigned long type, - * Type changes are permitted when the typeref is 0. If the type - * actually changes, the page needs re-validating. - */ -- struct domain *d = page_get_owner(page); -- -- if ( d && shadow_mode_enabled(d) ) -- shadow_prepare_page_type_change(d, page, type); - - ASSERT(!(x & PGT_pae_xen_l2)); - if ( (x & PGT_type_mask) != type ) - { -- /* -- * On type change we check to flush stale TLB entries. It is -- * vital that no other CPUs are left with writeable mappings -- * to a frame which is intending to become pgtable/segdesc. -- */ -- cpumask_t *mask = this_cpu(scratch_cpumask); -- -- BUG_ON(in_irq()); -- cpumask_copy(mask, d->dirty_cpumask); -- -- /* Don't flush if the timestamp is old enough */ -- tlbflush_filter(mask, page->tlbflush_timestamp); -- -- if ( unlikely(!cpumask_empty(mask)) && -- /* Shadow mode: track only writable pages. */ -- (!shadow_mode_enabled(d) || -- ((nx & PGT_type_mask) == PGT_writable_page)) ) -- { -- perfc_incr(need_flush_tlb_flush); -- /* -- * If page was a page table make sure the flush is -- * performed using an IPI in order to avoid changing the -- * type of a page table page under the feet of -- * spurious_page_fault(). -- */ -- flush_mask(mask, -- (x & PGT_type_mask) && -- (x & PGT_type_mask) <= PGT_root_page_table -- ? FLUSH_TLB | FLUSH_FORCE_IPI -- : FLUSH_TLB); -- } -- -- /* We lose existing type and validity. */ - nx &= ~(PGT_type_mask | PGT_validated); - nx |= type; -- -- /* -- * No special validation needed for writable pages. -- * Page tables and GDT/LDT need to be scanned for validity. -- */ -- if ( type == PGT_writable_page || type == PGT_shared_page ) -- nx |= PGT_validated; - } - } - else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) ) -@@ -3092,6 +3048,56 @@ static int _get_page_type(struct page_info *page, unsigned long type, - return -EINTR; - } - -+ /* -+ * One typeref has been taken and is now globally visible. -+ * -+ * The page is either in the "validate locked" state (PGT_[type] | 1) or -+ * fully validated (PGT_[type] | PGT_validated | >0). -+ */ -+ -+ if ( unlikely((x & PGT_count_mask) == 0) ) -+ { -+ struct domain *d = page_get_owner(page); -+ -+ if ( d && shadow_mode_enabled(d) ) -+ shadow_prepare_page_type_change(d, page, type); -+ -+ if ( (x & PGT_type_mask) != type ) -+ { -+ /* -+ * On type change we check to flush stale TLB entries. It is -+ * vital that no other CPUs are left with writeable mappings -+ * to a frame which is intending to become pgtable/segdesc. -+ */ -+ cpumask_t *mask = this_cpu(scratch_cpumask); -+ -+ BUG_ON(in_irq()); -+ cpumask_copy(mask, d->dirty_cpumask); -+ -+ /* Don't flush if the timestamp is old enough */ -+ tlbflush_filter(mask, page->tlbflush_timestamp); -+ -+ if ( unlikely(!cpumask_empty(mask)) && -+ /* Shadow mode: track only writable pages. */ -+ (!shadow_mode_enabled(d) || -+ ((nx & PGT_type_mask) == PGT_writable_page)) ) -+ { -+ perfc_incr(need_flush_tlb_flush); -+ /* -+ * If page was a page table make sure the flush is -+ * performed using an IPI in order to avoid changing the -+ * type of a page table page under the feet of -+ * spurious_page_fault(). -+ */ -+ flush_mask(mask, -+ (x & PGT_type_mask) && -+ (x & PGT_type_mask) <= PGT_root_page_table -+ ? FLUSH_TLB | FLUSH_FORCE_IPI -+ : FLUSH_TLB); -+ } -+ } -+ } -+ - if ( unlikely(((x & PGT_type_mask) == PGT_writable_page) != - (type == PGT_writable_page)) ) - { -@@ -3120,13 +3126,25 @@ static int _get_page_type(struct page_info *page, unsigned long type, - - if ( unlikely(!(nx & PGT_validated)) ) - { -- if ( !(x & PGT_partial) ) -+ /* -+ * No special validation needed for writable or shared pages. Page -+ * tables and GDT/LDT need to have their contents audited. -+ * -+ * per validate_page(), non-atomic updates are fine here. -+ */ -+ if ( type == PGT_writable_page || type == PGT_shared_page ) -+ page->u.inuse.type_info |= PGT_validated; -+ else - { -- page->nr_validated_ptes = 0; -- page->partial_flags = 0; -- page->linear_pt_count = 0; -+ if ( !(x & PGT_partial) ) -+ { -+ page->nr_validated_ptes = 0; -+ page->partial_flags = 0; -+ page->linear_pt_count = 0; -+ } -+ -+ rc = validate_page(page, type, preemptible); - } -- rc = validate_page(page, type, preemptible); - } - - out: diff --git a/system/xen/xsa/xsa402-4.16-1.patch b/system/xen/xsa/xsa402-4.16-1.patch deleted file mode 100644 index b783383fc8..0000000000 --- a/system/xen/xsa/xsa402-4.16-1.patch +++ /dev/null @@ -1,43 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: x86/page: Introduce _PAGE_* constants for memory types - -... rather than opencoding the PAT/PCD/PWT attributes in __PAGE_HYPERVISOR_* -constants. These are going to be needed by forthcoming logic. - -No functional change. - -This is part of XSA-402. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> - -diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h -index 1d080cffbe84..2e542050f65a 100644 ---- a/xen/include/asm-x86/page.h -+++ b/xen/include/asm-x86/page.h -@@ -331,6 +331,14 @@ void efi_update_l4_pgtable(unsigned int l4idx, l4_pgentry_t); - - #define PAGE_CACHE_ATTRS (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT) - -+/* Memory types, encoded under Xen's choice of MSR_PAT. */ -+#define _PAGE_WB ( 0) -+#define _PAGE_WT ( _PAGE_PWT) -+#define _PAGE_UCM ( _PAGE_PCD ) -+#define _PAGE_UC ( _PAGE_PCD | _PAGE_PWT) -+#define _PAGE_WC (_PAGE_PAT ) -+#define _PAGE_WP (_PAGE_PAT | _PAGE_PWT) -+ - /* - * Debug option: Ensure that granted mappings are not implicitly unmapped. - * WARNING: This will need to be disabled to run OSes that use the spare PTE -@@ -349,8 +357,8 @@ void efi_update_l4_pgtable(unsigned int l4idx, l4_pgentry_t); - #define __PAGE_HYPERVISOR_RX (_PAGE_PRESENT | _PAGE_ACCESSED) - #define __PAGE_HYPERVISOR (__PAGE_HYPERVISOR_RX | \ - _PAGE_DIRTY | _PAGE_RW) --#define __PAGE_HYPERVISOR_UCMINUS (__PAGE_HYPERVISOR | _PAGE_PCD) --#define __PAGE_HYPERVISOR_UC (__PAGE_HYPERVISOR | _PAGE_PCD | _PAGE_PWT) -+#define __PAGE_HYPERVISOR_UCMINUS (__PAGE_HYPERVISOR | _PAGE_UCM) -+#define __PAGE_HYPERVISOR_UC (__PAGE_HYPERVISOR | _PAGE_UC) - #define __PAGE_HYPERVISOR_SHSTK (__PAGE_HYPERVISOR_RO | _PAGE_DIRTY) - - #define MAP_SMALL_PAGES _PAGE_AVAIL0 /* don't use superpages mappings */ diff --git a/system/xen/xsa/xsa402-4.16-2.patch b/system/xen/xsa/xsa402-4.16-2.patch deleted file mode 100644 index ebb2f5e221..0000000000 --- a/system/xen/xsa/xsa402-4.16-2.patch +++ /dev/null @@ -1,213 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: x86: Don't change the cacheability of the directmap - -Changeset 55f97f49b7ce ("x86: Change cache attributes of Xen 1:1 page mappings -in response to guest mapping requests") attempted to keep the cacheability -consistent between different mappings of the same page. - -The reason wasn't described in the changelog, but it is understood to be in -regards to a concern over machine check exceptions, owing to errata when using -mixed cacheabilities. It did this primarily by updating Xen's mapping of the -page in the direct map when the guest mapped a page with reduced cacheability. - -Unfortunately, the logic didn't actually prevent mixed cacheability from -occurring: - * A guest could map a page normally, and then map the same page with - different cacheability; nothing prevented this. - * The cacheability of the directmap was always latest-takes-precedence in - terms of guest requests. - * Grant-mapped frames with lesser cacheability didn't adjust the page's - cacheattr settings. - * The map_domain_page() function still unconditionally created WB mappings, - irrespective of the page's cacheattr settings. - -Additionally, update_xen_mappings() had a bug where the alias calculation was -wrong for mfn's which were .init content, which should have been treated as -fully guest pages, not Xen pages. - -Worse yet, the logic introduced a vulnerability whereby necessary -pagetable/segdesc adjustments made by Xen in the validation logic could become -non-coherent between the cache and main memory. The CPU could subsequently -operate on the stale value in the cache, rather than the safe value in main -memory. - -The directmap contains primarily mappings of RAM. PAT/MTRR conflict -resolution is asymmetric, and generally for MTRR=WB ranges, PAT of lesser -cacheability resolves to being coherent. The special case is WC mappings, -which are non-coherent against MTRR=WB regions (except for fully-coherent -CPUs). - -Xen must not have any WC cacheability in the directmap, to prevent Xen's -actions from creating non-coherency. (Guest actions creating non-coherency is -dealt with in subsequent patches.) As all memory types for MTRR=WB ranges -inter-operate coherently, so leave Xen's directmap mappings as WB. - -Only PV guests with access to devices can use reduced-cacheability mappings to -begin with, and they're trusted not to mount DoSs against the system anyway. - -Drop PGC_cacheattr_{base,mask} entirely, and the logic to manipulate them. -Shift the later PGC_* constants up, to gain 3 extra bits in the main reference -count. Retain the check in get_page_from_l1e() for special_pages() because a -guest has no business using reduced cacheability on these. - -This reverts changeset 55f97f49b7ce6c3520c555d19caac6cf3f9a5df0 - -This is CVE-2022-26363, part of XSA-402. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: George Dunlap <george.dunlap@citrix.com> - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index c6429b0f749a..ab32d13a1a0d 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -783,28 +783,6 @@ bool is_iomem_page(mfn_t mfn) - return (page_get_owner(page) == dom_io); - } - --static int update_xen_mappings(unsigned long mfn, unsigned int cacheattr) --{ -- int err = 0; -- bool alias = mfn >= PFN_DOWN(xen_phys_start) && -- mfn < PFN_UP(xen_phys_start + xen_virt_end - XEN_VIRT_START); -- unsigned long xen_va = -- XEN_VIRT_START + ((mfn - PFN_DOWN(xen_phys_start)) << PAGE_SHIFT); -- -- if ( boot_cpu_has(X86_FEATURE_XEN_SELFSNOOP) ) -- return 0; -- -- if ( unlikely(alias) && cacheattr ) -- err = map_pages_to_xen(xen_va, _mfn(mfn), 1, 0); -- if ( !err ) -- err = map_pages_to_xen((unsigned long)mfn_to_virt(mfn), _mfn(mfn), 1, -- PAGE_HYPERVISOR | cacheattr_to_pte_flags(cacheattr)); -- if ( unlikely(alias) && !cacheattr && !err ) -- err = map_pages_to_xen(xen_va, _mfn(mfn), 1, PAGE_HYPERVISOR); -- -- return err; --} -- - #ifndef NDEBUG - struct mmio_emul_range_ctxt { - const struct domain *d; -@@ -1009,47 +987,14 @@ get_page_from_l1e( - goto could_not_pin; - } - -- if ( pte_flags_to_cacheattr(l1f) != -- ((page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base) ) -+ if ( (l1f & PAGE_CACHE_ATTRS) != _PAGE_WB && is_special_page(page) ) - { -- unsigned long x, nx, y = page->count_info; -- unsigned long cacheattr = pte_flags_to_cacheattr(l1f); -- int err; -- -- if ( is_special_page(page) ) -- { -- if ( write ) -- put_page_type(page); -- put_page(page); -- gdprintk(XENLOG_WARNING, -- "Attempt to change cache attributes of Xen heap page\n"); -- return -EACCES; -- } -- -- do { -- x = y; -- nx = (x & ~PGC_cacheattr_mask) | (cacheattr << PGC_cacheattr_base); -- } while ( (y = cmpxchg(&page->count_info, x, nx)) != x ); -- -- err = update_xen_mappings(mfn, cacheattr); -- if ( unlikely(err) ) -- { -- cacheattr = y & PGC_cacheattr_mask; -- do { -- x = y; -- nx = (x & ~PGC_cacheattr_mask) | cacheattr; -- } while ( (y = cmpxchg(&page->count_info, x, nx)) != x ); -- -- if ( write ) -- put_page_type(page); -- put_page(page); -- -- gdprintk(XENLOG_WARNING, "Error updating mappings for mfn %" PRI_mfn -- " (pfn %" PRI_pfn ", from L1 entry %" PRIpte ") for d%d\n", -- mfn, get_gpfn_from_mfn(mfn), -- l1e_get_intpte(l1e), l1e_owner->domain_id); -- return err; -- } -+ if ( write ) -+ put_page_type(page); -+ put_page(page); -+ gdprintk(XENLOG_WARNING, -+ "Attempt to change cache attributes of Xen heap page\n"); -+ return -EACCES; - } - - return 0; -@@ -2467,25 +2412,10 @@ static int mod_l4_entry(l4_pgentry_t *pl4e, - */ - static int cleanup_page_mappings(struct page_info *page) - { -- unsigned int cacheattr = -- (page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base; - int rc = 0; - unsigned long mfn = mfn_x(page_to_mfn(page)); - - /* -- * If we've modified xen mappings as a result of guest cache -- * attributes, restore them to the "normal" state. -- */ -- if ( unlikely(cacheattr) ) -- { -- page->count_info &= ~PGC_cacheattr_mask; -- -- BUG_ON(is_special_page(page)); -- -- rc = update_xen_mappings(mfn, 0); -- } -- -- /* - * If this may be in a PV domain's IOMMU, remove it. - * - * NB that writable xenheap pages have their type set and cleared by -diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h -index cb9052749963..8a9a43bb0a9d 100644 ---- a/xen/include/asm-x86/mm.h -+++ b/xen/include/asm-x86/mm.h -@@ -69,25 +69,22 @@ - /* Set when is using a page as a page table */ - #define _PGC_page_table PG_shift(3) - #define PGC_page_table PG_mask(1, 3) -- /* 3-bit PAT/PCD/PWT cache-attribute hint. */ --#define PGC_cacheattr_base PG_shift(6) --#define PGC_cacheattr_mask PG_mask(7, 6) - /* Page is broken? */ --#define _PGC_broken PG_shift(7) --#define PGC_broken PG_mask(1, 7) -+#define _PGC_broken PG_shift(4) -+#define PGC_broken PG_mask(1, 4) - /* Mutually-exclusive page states: { inuse, offlining, offlined, free }. */ --#define PGC_state PG_mask(3, 9) --#define PGC_state_inuse PG_mask(0, 9) --#define PGC_state_offlining PG_mask(1, 9) --#define PGC_state_offlined PG_mask(2, 9) --#define PGC_state_free PG_mask(3, 9) -+#define PGC_state PG_mask(3, 6) -+#define PGC_state_inuse PG_mask(0, 6) -+#define PGC_state_offlining PG_mask(1, 6) -+#define PGC_state_offlined PG_mask(2, 6) -+#define PGC_state_free PG_mask(3, 6) - #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st) - /* Page is not reference counted (see below for caveats) */ --#define _PGC_extra PG_shift(10) --#define PGC_extra PG_mask(1, 10) -+#define _PGC_extra PG_shift(7) -+#define PGC_extra PG_mask(1, 7) - - /* Count of references to this frame. */ --#define PGC_count_width PG_shift(10) -+#define PGC_count_width PG_shift(7) - #define PGC_count_mask ((1UL<<PGC_count_width)-1) - - /* diff --git a/system/xen/xsa/xsa402-4.16-3.patch b/system/xen/xsa/xsa402-4.16-3.patch deleted file mode 100644 index b4d2a4c835..0000000000 --- a/system/xen/xsa/xsa402-4.16-3.patch +++ /dev/null @@ -1,284 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: x86: Split cache_flush() out of cache_writeback() - -Subsequent changes will want a fully flushing version. - -Use the new helper rather than opencoding it in flush_area_local(). This -resolves an outstanding issue where the conditional sfence is on the wrong -side of the clflushopt loop. clflushopt is ordered with respect to older -stores, not to younger stores. - -Rename gnttab_cache_flush()'s helper to avoid colliding in name. -grant_table.c can see the prototype from cache.h so the build fails -otherwise. - -This is part of XSA-402. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> - -Xen 4.16 and earlier: - * Also backport half of c/s 3330013e67396 "VT-d / x86: re-arrange cache - syncing" to split cache_writeback() out of the IOMMU logic, but without the - associated hooks changes. - -diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c -index 25798df50f54..0c912b8669f8 100644 ---- a/xen/arch/x86/flushtlb.c -+++ b/xen/arch/x86/flushtlb.c -@@ -234,7 +234,7 @@ unsigned int flush_area_local(const void *va, unsigned int flags) - if ( flags & FLUSH_CACHE ) - { - const struct cpuinfo_x86 *c = ¤t_cpu_data; -- unsigned long i, sz = 0; -+ unsigned long sz = 0; - - if ( order < (BITS_PER_LONG - PAGE_SHIFT) ) - sz = 1UL << (order + PAGE_SHIFT); -@@ -244,13 +244,7 @@ unsigned int flush_area_local(const void *va, unsigned int flags) - c->x86_clflush_size && c->x86_cache_size && sz && - ((sz >> 10) < c->x86_cache_size) ) - { -- alternative("", "sfence", X86_FEATURE_CLFLUSHOPT); -- for ( i = 0; i < sz; i += c->x86_clflush_size ) -- alternative_input(".byte " __stringify(NOP_DS_PREFIX) ";" -- " clflush %0", -- "data16 clflush %0", /* clflushopt */ -- X86_FEATURE_CLFLUSHOPT, -- "m" (((const char *)va)[i])); -+ cache_flush(va, sz); - flags &= ~FLUSH_CACHE; - } - else -@@ -265,6 +259,80 @@ unsigned int flush_area_local(const void *va, unsigned int flags) - return flags; - } - -+void cache_flush(const void *addr, unsigned int size) -+{ -+ /* -+ * This function may be called before current_cpu_data is established. -+ * Hence a fallback is needed to prevent the loop below becoming infinite. -+ */ -+ unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16; -+ const void *end = addr + size; -+ -+ addr -= (unsigned long)addr & (clflush_size - 1); -+ for ( ; addr < end; addr += clflush_size ) -+ { -+ /* -+ * Note regarding the "ds" prefix use: it's faster to do a clflush -+ * + prefix than a clflush + nop, and hence the prefix is added instead -+ * of letting the alternative framework fill the gap by appending nops. -+ */ -+ alternative_io("ds; clflush %[p]", -+ "data16 clflush %[p]", /* clflushopt */ -+ X86_FEATURE_CLFLUSHOPT, -+ /* no outputs */, -+ [p] "m" (*(const char *)(addr))); -+ } -+ -+ alternative("", "sfence", X86_FEATURE_CLFLUSHOPT); -+} -+ -+void cache_writeback(const void *addr, unsigned int size) -+{ -+ unsigned int clflush_size; -+ const void *end = addr + size; -+ -+ /* Fall back to CLFLUSH{,OPT} when CLWB isn't available. */ -+ if ( !boot_cpu_has(X86_FEATURE_CLWB) ) -+ return cache_flush(addr, size); -+ -+ /* -+ * This function may be called before current_cpu_data is established. -+ * Hence a fallback is needed to prevent the loop below becoming infinite. -+ */ -+ clflush_size = current_cpu_data.x86_clflush_size ?: 16; -+ addr -= (unsigned long)addr & (clflush_size - 1); -+ for ( ; addr < end; addr += clflush_size ) -+ { -+/* -+ * The arguments to a macro must not include preprocessor directives. Doing so -+ * results in undefined behavior, so we have to create some defines here in -+ * order to avoid it. -+ */ -+#if defined(HAVE_AS_CLWB) -+# define CLWB_ENCODING "clwb %[p]" -+#elif defined(HAVE_AS_XSAVEOPT) -+# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */ -+#else -+# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */ -+#endif -+ -+#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr)) -+#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT) -+# define INPUT BASE_INPUT -+#else -+# define INPUT(addr) "a" (addr), BASE_INPUT(addr) -+#endif -+ -+ asm volatile (CLWB_ENCODING :: INPUT(addr)); -+ -+#undef INPUT -+#undef BASE_INPUT -+#undef CLWB_ENCODING -+ } -+ -+ asm volatile ("sfence" ::: "memory"); -+} -+ - unsigned int guest_flush_tlb_flags(const struct domain *d) - { - bool shadow = paging_mode_shadow(d); -diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c -index 66f8ce71741c..4c742cd8fe81 100644 ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -3431,7 +3431,7 @@ gnttab_swap_grant_ref(XEN_GUEST_HANDLE_PARAM(gnttab_swap_grant_ref_t) uop, - return 0; - } - --static int cache_flush(const gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref) -+static int _cache_flush(const gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref) - { - struct domain *d, *owner; - struct page_info *page; -@@ -3525,7 +3525,7 @@ gnttab_cache_flush(XEN_GUEST_HANDLE_PARAM(gnttab_cache_flush_t) uop, - return -EFAULT; - for ( ; ; ) - { -- int ret = cache_flush(&op, cur_ref); -+ int ret = _cache_flush(&op, cur_ref); - - if ( ret < 0 ) - return ret; -diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h -index 01e010a10d61..401079299725 100644 ---- a/xen/drivers/passthrough/vtd/extern.h -+++ b/xen/drivers/passthrough/vtd/extern.h -@@ -76,7 +76,6 @@ int __must_check qinval_device_iotlb_sync(struct vtd_iommu *iommu, - struct pci_dev *pdev, - u16 did, u16 size, u64 addr); - --unsigned int get_cache_line_size(void); - void flush_all_cache(void); - - uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node); -diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c -index 8975c1de61bc..bc377c9bcfa4 100644 ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -31,6 +31,7 @@ - #include <xen/pci.h> - #include <xen/pci_regs.h> - #include <xen/keyhandler.h> -+#include <asm/cache.h> - #include <asm/msi.h> - #include <asm/nops.h> - #include <asm/irq.h> -@@ -206,54 +207,6 @@ static void check_cleanup_domid_map(const struct domain *d, - } - } - --static void sync_cache(const void *addr, unsigned int size) --{ -- static unsigned long clflush_size = 0; -- const void *end = addr + size; -- -- if ( clflush_size == 0 ) -- clflush_size = get_cache_line_size(); -- -- addr -= (unsigned long)addr & (clflush_size - 1); -- for ( ; addr < end; addr += clflush_size ) --/* -- * The arguments to a macro must not include preprocessor directives. Doing so -- * results in undefined behavior, so we have to create some defines here in -- * order to avoid it. -- */ --#if defined(HAVE_AS_CLWB) --# define CLWB_ENCODING "clwb %[p]" --#elif defined(HAVE_AS_XSAVEOPT) --# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */ --#else --# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */ --#endif -- --#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr)) --#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT) --# define INPUT BASE_INPUT --#else --# define INPUT(addr) "a" (addr), BASE_INPUT(addr) --#endif -- /* -- * Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush -- * + prefix than a clflush + nop, and hence the prefix is added instead -- * of letting the alternative framework fill the gap by appending nops. -- */ -- alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]", -- "data16 clflush %[p]", /* clflushopt */ -- X86_FEATURE_CLFLUSHOPT, -- CLWB_ENCODING, -- X86_FEATURE_CLWB, /* no outputs */, -- INPUT(addr)); --#undef INPUT --#undef BASE_INPUT --#undef CLWB_ENCODING -- -- alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT, -- "sfence", X86_FEATURE_CLWB); --} -- - /* Allocate page table, return its machine address */ - uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node) - { -@@ -273,7 +226,7 @@ uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node) - clear_page(vaddr); - - if ( (iommu_ops.init ? &iommu_ops : &vtd_ops)->sync_cache ) -- sync_cache(vaddr, PAGE_SIZE); -+ cache_writeback(vaddr, PAGE_SIZE); - unmap_domain_page(vaddr); - cur_pg++; - } -@@ -1305,7 +1258,7 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd) - iommu->nr_pt_levels = agaw_to_level(agaw); - - if ( !ecap_coherent(iommu->ecap) ) -- vtd_ops.sync_cache = sync_cache; -+ vtd_ops.sync_cache = cache_writeback; - - /* allocate domain id bitmap */ - iommu->domid_bitmap = xzalloc_array(unsigned long, BITS_TO_LONGS(nr_dom)); -diff --git a/xen/drivers/passthrough/vtd/x86/vtd.c b/xen/drivers/passthrough/vtd/x86/vtd.c -index 6681dccd6970..55f0faa521cb 100644 ---- a/xen/drivers/passthrough/vtd/x86/vtd.c -+++ b/xen/drivers/passthrough/vtd/x86/vtd.c -@@ -47,11 +47,6 @@ void unmap_vtd_domain_page(const void *va) - unmap_domain_page(va); - } - --unsigned int get_cache_line_size(void) --{ -- return ((cpuid_ebx(1) >> 8) & 0xff) * 8; --} -- - void flush_all_cache() - { - wbinvd(); -diff --git a/xen/include/asm-x86/cache.h b/xen/include/asm-x86/cache.h -index 1f7173d8c72c..e4770efb22b9 100644 ---- a/xen/include/asm-x86/cache.h -+++ b/xen/include/asm-x86/cache.h -@@ -11,4 +11,11 @@ - - #define __read_mostly __section(".data.read_mostly") - -+#ifndef __ASSEMBLY__ -+ -+void cache_flush(const void *addr, unsigned int size); -+void cache_writeback(const void *addr, unsigned int size); -+ -+#endif -+ - #endif diff --git a/system/xen/xsa/xsa402-4.16-4.patch b/system/xen/xsa/xsa402-4.16-4.patch deleted file mode 100644 index 21109225d7..0000000000 --- a/system/xen/xsa/xsa402-4.16-4.patch +++ /dev/null @@ -1,83 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: x86/amd: Work around CLFLUSH ordering on older parts - -On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakely ordered with everything, -including reads and writes to the address, and LFENCE/SFENCE instructions. - -This creates a multitude of problematic corner cases, laid out in the manual. -Arrange to use MFENCE on both sides of the CLFLUSH to force proper ordering. - -This is part of XSA-402. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> - -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index a8e37dbb1f5c..b3b9a0df5fed 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -812,6 +812,14 @@ static void init_amd(struct cpuinfo_x86 *c) - if (!cpu_has_lfence_dispatch) - __set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability); - -+ /* -+ * On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakly ordered with -+ * everything, including reads and writes to address, and -+ * LFENCE/SFENCE instructions. -+ */ -+ if (!cpu_has_clflushopt) -+ setup_force_cpu_cap(X86_BUG_CLFLUSH_MFENCE); -+ - switch(c->x86) - { - case 0xf ... 0x11: -diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c -index 0c912b8669f8..dcbb4064012e 100644 ---- a/xen/arch/x86/flushtlb.c -+++ b/xen/arch/x86/flushtlb.c -@@ -259,6 +259,13 @@ unsigned int flush_area_local(const void *va, unsigned int flags) - return flags; - } - -+/* -+ * On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakly ordered with everything, -+ * including reads and writes to address, and LFENCE/SFENCE instructions. -+ * -+ * This function only works safely after alternatives have run. Luckily, at -+ * the time of writing, we don't flush the caches that early. -+ */ - void cache_flush(const void *addr, unsigned int size) - { - /* -@@ -268,6 +275,8 @@ void cache_flush(const void *addr, unsigned int size) - unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16; - const void *end = addr + size; - -+ alternative("", "mfence", X86_BUG_CLFLUSH_MFENCE); -+ - addr -= (unsigned long)addr & (clflush_size - 1); - for ( ; addr < end; addr += clflush_size ) - { -@@ -283,7 +292,9 @@ void cache_flush(const void *addr, unsigned int size) - [p] "m" (*(const char *)(addr))); - } - -- alternative("", "sfence", X86_FEATURE_CLFLUSHOPT); -+ alternative_2("", -+ "sfence", X86_FEATURE_CLFLUSHOPT, -+ "mfence", X86_BUG_CLFLUSH_MFENCE); - } - - void cache_writeback(const void *addr, unsigned int size) -diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h -index 7413febd7ad8..ff3157d52d13 100644 ---- a/xen/include/asm-x86/cpufeatures.h -+++ b/xen/include/asm-x86/cpufeatures.h -@@ -47,6 +47,7 @@ XEN_CPUFEATURE(XEN_IBT, X86_SYNTH(27)) /* Xen uses CET Indirect Branch - - #define X86_BUG_FPU_PTRS X86_BUG( 0) /* (F)X{SAVE,RSTOR} doesn't save/restore FOP/FIP/FDP. */ - #define X86_BUG_NULL_SEG X86_BUG( 1) /* NULL-ing a selector preserves the base and limit. */ -+#define X86_BUG_CLFLUSH_MFENCE X86_BUG( 2) /* MFENCE needed to serialise CLFLUSH */ - - /* Total number of capability words, inc synth and bug words. */ - #define NCAPINTS (FSCAPINTS + X86_NR_SYNTH + X86_NR_BUG) /* N 32-bit words worth of info */ diff --git a/system/xen/xsa/xsa402-4.16-5.patch b/system/xen/xsa/xsa402-4.16-5.patch deleted file mode 100644 index 4806d25c6f..0000000000 --- a/system/xen/xsa/xsa402-4.16-5.patch +++ /dev/null @@ -1,148 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: x86/pv: Track and flush non-coherent mappings of RAM - -There are legitimate uses of WC mappings of RAM, e.g. for DMA buffers with -devices that make non-coherent writes. The Linux sound subsystem makes -extensive use of this technique. - -For such usecases, the guest's DMA buffer is mapped and consistently used as -WC, and Xen doesn't interact with the buffer. - -However, a mischevious guest can use WC mappings to deliberately create -non-coherency between the cache and RAM, and use this to trick Xen into -validating a pagetable which isn't actually safe. - -Allocate a new PGT_non_coherent to track the non-coherency of mappings. Set -it whenever a non-coherent writeable mapping is created. If the page is used -as anything other than PGT_writable_page, force a cache flush before -validation. Also force a cache flush before the page is returned to the heap. - -This is CVE-2022-26364, part of XSA-402. - -Reported-by: Jann Horn <jannh@google.com> -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: George Dunlap <george.dunlap@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index ab32d13a1a0d..bab9624fabb7 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -997,6 +997,15 @@ get_page_from_l1e( - return -EACCES; - } - -+ /* -+ * Track writeable non-coherent mappings to RAM pages, to trigger a cache -+ * flush later if the target is used as anything but a PGT_writeable page. -+ * We care about all writeable mappings, including foreign mappings. -+ */ -+ if ( !boot_cpu_has(X86_FEATURE_XEN_SELFSNOOP) && -+ (l1f & (PAGE_CACHE_ATTRS | _PAGE_RW)) == (_PAGE_WC | _PAGE_RW) ) -+ set_bit(_PGT_non_coherent, &page->u.inuse.type_info); -+ - return 0; - - could_not_pin: -@@ -2454,6 +2463,19 @@ static int cleanup_page_mappings(struct page_info *page) - } - } - -+ /* -+ * Flush the cache if there were previously non-coherent writeable -+ * mappings of this page. This forces the page to be coherent before it -+ * is freed back to the heap. -+ */ -+ if ( __test_and_clear_bit(_PGT_non_coherent, &page->u.inuse.type_info) ) -+ { -+ void *addr = __map_domain_page(page); -+ -+ cache_flush(addr, PAGE_SIZE); -+ unmap_domain_page(addr); -+ } -+ - return rc; - } - -@@ -3028,6 +3050,22 @@ static int _get_page_type(struct page_info *page, unsigned long type, - if ( unlikely(!(nx & PGT_validated)) ) - { - /* -+ * Flush the cache if there were previously non-coherent mappings of -+ * this page, and we're trying to use it as anything other than a -+ * writeable page. This forces the page to be coherent before we -+ * validate its contents for safety. -+ */ -+ if ( (nx & PGT_non_coherent) && type != PGT_writable_page ) -+ { -+ void *addr = __map_domain_page(page); -+ -+ cache_flush(addr, PAGE_SIZE); -+ unmap_domain_page(addr); -+ -+ page->u.inuse.type_info &= ~PGT_non_coherent; -+ } -+ -+ /* - * No special validation needed for writable or shared pages. Page - * tables and GDT/LDT need to have their contents audited. - * -diff --git a/xen/arch/x86/pv/grant_table.c b/xen/arch/x86/pv/grant_table.c -index 0325618c9883..81c72e61ed55 100644 ---- a/xen/arch/x86/pv/grant_table.c -+++ b/xen/arch/x86/pv/grant_table.c -@@ -109,7 +109,17 @@ int create_grant_pv_mapping(uint64_t addr, mfn_t frame, - - ol1e = *pl1e; - if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, 0) ) -+ { -+ /* -+ * We always create mappings in this path. However, our caller, -+ * map_grant_ref(), only passes potentially non-zero cache_flags for -+ * MMIO frames, so this path doesn't create non-coherent mappings of -+ * RAM frames and there's no need to calculate PGT_non_coherent. -+ */ -+ ASSERT(!cache_flags || is_iomem_page(frame)); -+ - rc = GNTST_okay; -+ } - - out_unlock: - page_unlock(page); -@@ -294,7 +304,18 @@ int replace_grant_pv_mapping(uint64_t addr, mfn_t frame, - l1e_get_flags(ol1e), addr, grant_pte_flags); - - if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, 0) ) -+ { -+ /* -+ * Generally, replace_grant_pv_mapping() is used to destroy mappings -+ * (n1le = l1e_empty()), but it can be a present mapping on the -+ * GNTABOP_unmap_and_replace path. -+ * -+ * In such cases, the PTE is fully transplanted from its old location -+ * via steal_linear_addr(), so we need not perform PGT_non_coherent -+ * checking here. -+ */ - rc = GNTST_okay; -+ } - - out_unlock: - page_unlock(page); -diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h -index 8a9a43bb0a9d..7464167ae192 100644 ---- a/xen/include/asm-x86/mm.h -+++ b/xen/include/asm-x86/mm.h -@@ -53,8 +53,12 @@ - #define _PGT_partial PG_shift(8) - #define PGT_partial PG_mask(1, 8) - -+/* Has this page been mapped writeable with a non-coherent memory type? */ -+#define _PGT_non_coherent PG_shift(9) -+#define PGT_non_coherent PG_mask(1, 9) -+ - /* Count of uses of this frame as its current type. */ --#define PGT_count_width PG_shift(8) -+#define PGT_count_width PG_shift(9) - #define PGT_count_mask ((1UL<<PGT_count_width)-1) - - /* Are the 'type mask' bits identical? */ |