diff options
Diffstat (limited to 'system/xen/xsa/xsa345-0003-x86-mm-Prevent-some-races-in-hypervisor-mapping-upda.patch')
-rw-r--r-- | system/xen/xsa/xsa345-0003-x86-mm-Prevent-some-races-in-hypervisor-mapping-upda.patch | 249 |
1 files changed, 0 insertions, 249 deletions
diff --git a/system/xen/xsa/xsa345-0003-x86-mm-Prevent-some-races-in-hypervisor-mapping-upda.patch b/system/xen/xsa/xsa345-0003-x86-mm-Prevent-some-races-in-hypervisor-mapping-upda.patch deleted file mode 100644 index db407416b9..0000000000 --- a/system/xen/xsa/xsa345-0003-x86-mm-Prevent-some-races-in-hypervisor-mapping-upda.patch +++ /dev/null @@ -1,249 +0,0 @@ -From 0ff9a8453dc47cd47eee9659d5916afb5094e871 Mon Sep 17 00:00:00 2001 -From: Hongyan Xia <hongyxia@amazon.com> -Date: Sat, 11 Jan 2020 21:57:43 +0000 -Subject: [PATCH 3/3] x86/mm: Prevent some races in hypervisor mapping updates - -map_pages_to_xen will attempt to coalesce mappings into 2MiB and 1GiB -superpages if possible, to maximize TLB efficiency. This means both -replacing superpage entries with smaller entries, and replacing -smaller entries with superpages. - -Unfortunately, while some potential races are handled correctly, -others are not. These include: - -1. When one processor modifies a sub-superpage mapping while another -processor replaces the entire range with a superpage. - -Take the following example: - -Suppose L3[N] points to L2. And suppose we have two processors, A and -B. - -* A walks the pagetables, get a pointer to L2. -* B replaces L3[N] with a 1GiB mapping. -* B Frees L2 -* A writes L2[M] # - -This is race exacerbated by the fact that virt_to_xen_l[21]e doesn't -handle higher-level superpages properly: If you call virt_xen_to_l2e -on a virtual address within an L3 superpage, you'll either hit a BUG() -(most likely), or get a pointer into the middle of a data page; same -with virt_xen_to_l1 on a virtual address within either an L3 or L2 -superpage. - -So take the following example: - -* A reads pl3e and discovers it to point to an L2. -* B replaces L3[N] with a 1GiB mapping -* A calls virt_to_xen_l2e() and hits the BUG_ON() # - -2. When two processors simultaneously try to replace a sub-superpage -mapping with a superpage mapping. - -Take the following example: - -Suppose L3[N] points to L2. And suppose we have two processors, A and B, -both trying to replace L3[N] with a superpage. - -* A walks the pagetables, get a pointer to pl3e, and takes a copy ol3e pointing to L2. -* B walks the pagetables, gets a pointre to pl3e, and takes a copy ol3e pointing to L2. -* A writes the new value into L3[N] -* B writes the new value into L3[N] -* A recursively frees all the L1's under L2, then frees L2 -* B recursively double-frees all the L1's under L2, then double-frees L2 # - -Fix this by grabbing a lock for the entirety of the mapping update -operation. - -Rather than grabbing map_pgdir_lock for the entire operation, however, -repurpose the PGT_locked bit from L3's page->type_info as a lock. -This means that rather than locking the entire address space, we -"only" lock a single 512GiB chunk of hypervisor address space at a -time. - -There was a proposal for a lock-and-reverify approach, where we walk -the pagetables to the point where we decide what to do; then grab the -map_pgdir_lock, re-verify the information we collected without the -lock, and finally make the change (starting over again if anything had -changed). Without being able to guarantee that the L2 table wasn't -freed, however, that means every read would need to be considered -potentially unsafe. Thinking carefully about that is probably -something that wants to be done on public, not under time pressure. - -This is part of XSA-345. - -Reported-by: Hongyan Xia <hongyxia@amazon.com> -Signed-off-by: Hongyan Xia <hongyxia@amazon.com> -Signed-off-by: George Dunlap <george.dunlap@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> ---- - xen/arch/x86/mm.c | 92 +++++++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 89 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index af726d3274..d6a0761f43 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -2167,6 +2167,50 @@ void page_unlock(struct page_info *page) - current_locked_page_set(NULL); - } - -+/* -+ * L3 table locks: -+ * -+ * Used for serialization in map_pages_to_xen() and modify_xen_mappings(). -+ * -+ * For Xen PT pages, the page->u.inuse.type_info is unused and it is safe to -+ * reuse the PGT_locked flag. This lock is taken only when we move down to L3 -+ * tables and below, since L4 (and above, for 5-level paging) is still globally -+ * protected by map_pgdir_lock. -+ * -+ * PV MMU update hypercalls call map_pages_to_xen while holding a page's page_lock(). -+ * This has two implications: -+ * - We cannot reuse reuse current_locked_page_* for debugging -+ * - To avoid the chance of deadlock, even for different pages, we -+ * must never grab page_lock() after grabbing l3t_lock(). This -+ * includes any page_lock()-based locks, such as -+ * mem_sharing_page_lock(). -+ * -+ * Also note that we grab the map_pgdir_lock while holding the -+ * l3t_lock(), so to avoid deadlock we must avoid grabbing them in -+ * reverse order. -+ */ -+static void l3t_lock(struct page_info *page) -+{ -+ unsigned long x, nx; -+ -+ do { -+ while ( (x = page->u.inuse.type_info) & PGT_locked ) -+ cpu_relax(); -+ nx = x | PGT_locked; -+ } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x ); -+} -+ -+static void l3t_unlock(struct page_info *page) -+{ -+ unsigned long x, nx, y = page->u.inuse.type_info; -+ -+ do { -+ x = y; -+ BUG_ON(!(x & PGT_locked)); -+ nx = x & ~PGT_locked; -+ } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x ); -+} -+ - #ifdef CONFIG_PV - /* - * PTE flags that a guest may change without re-validating the PTE. -@@ -5177,6 +5221,23 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v) - flush_area_local((const void *)v, f) : \ - flush_area_all((const void *)v, f)) - -+#define L3T_INIT(page) (page) = ZERO_BLOCK_PTR -+ -+#define L3T_LOCK(page) \ -+ do { \ -+ if ( locking ) \ -+ l3t_lock(page); \ -+ } while ( false ) -+ -+#define L3T_UNLOCK(page) \ -+ do { \ -+ if ( locking && (page) != ZERO_BLOCK_PTR ) \ -+ { \ -+ l3t_unlock(page); \ -+ (page) = ZERO_BLOCK_PTR; \ -+ } \ -+ } while ( false ) -+ - int map_pages_to_xen( - unsigned long virt, - mfn_t mfn, -@@ -5188,6 +5249,7 @@ int map_pages_to_xen( - l1_pgentry_t *pl1e, ol1e; - unsigned int i; - int rc = -ENOMEM; -+ struct page_info *current_l3page; - - #define flush_flags(oldf) do { \ - unsigned int o_ = (oldf); \ -@@ -5203,13 +5265,20 @@ int map_pages_to_xen( - } \ - } while (0) - -+ L3T_INIT(current_l3page); -+ - while ( nr_mfns != 0 ) - { -- l3_pgentry_t ol3e, *pl3e = virt_to_xen_l3e(virt); -+ l3_pgentry_t *pl3e, ol3e; - -+ L3T_UNLOCK(current_l3page); -+ -+ pl3e = virt_to_xen_l3e(virt); - if ( !pl3e ) - goto out; - -+ current_l3page = virt_to_page(pl3e); -+ L3T_LOCK(current_l3page); - ol3e = *pl3e; - - if ( cpu_has_page1gb && -@@ -5543,6 +5612,7 @@ int map_pages_to_xen( - rc = 0; - - out: -+ L3T_UNLOCK(current_l3page); - return rc; - } - -@@ -5571,6 +5641,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) - unsigned int i; - unsigned long v = s; - int rc = -ENOMEM; -+ struct page_info *current_l3page; - - /* Set of valid PTE bits which may be altered. */ - #define FLAGS_MASK (_PAGE_NX|_PAGE_RW|_PAGE_PRESENT) -@@ -5579,11 +5650,22 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) - ASSERT(IS_ALIGNED(s, PAGE_SIZE)); - ASSERT(IS_ALIGNED(e, PAGE_SIZE)); - -+ L3T_INIT(current_l3page); -+ - while ( v < e ) - { -- l3_pgentry_t *pl3e = virt_to_xen_l3e(v); -+ l3_pgentry_t *pl3e; -+ -+ L3T_UNLOCK(current_l3page); - -- if ( !pl3e || !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) -+ pl3e = virt_to_xen_l3e(v); -+ if ( !pl3e ) -+ goto out; -+ -+ current_l3page = virt_to_page(pl3e); -+ L3T_LOCK(current_l3page); -+ -+ if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) - { - /* Confirm the caller isn't trying to create new mappings. */ - ASSERT(!(nf & _PAGE_PRESENT)); -@@ -5801,9 +5883,13 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) - rc = 0; - - out: -+ L3T_UNLOCK(current_l3page); - return rc; - } - -+#undef L3T_LOCK -+#undef L3T_UNLOCK -+ - #undef flush_area - - int destroy_xen_mappings(unsigned long s, unsigned long e) --- -2.25.1 - |