diff options
author | Mario Preksavec | 2019-11-19 13:17:56 +0100 |
---|---|---|
committer | Willy Sudiarto Raharjo | 2019-11-23 10:02:01 +0100 |
commit | 903c02712d4cf39ae8218eb47149258dfa8c7d8a (patch) | |
tree | 61c869ed4904270e41898b9bcf74090822e28418 /system/xen/xsa/xsa299-4.12-0011-x86-mm-Don-t-drop-a-type-ref-unless-you-held-a-ref-t.patch | |
parent | 604be6a3da8dc95e2d89a426877c7f4021eb91df (diff) | |
download | slackbuilds-903c02712d4cf39ae8218eb47149258dfa8c7d8a.tar.gz |
system/xen: Updated for version 4.12.1.
Signed-off-by: Mario Preksavec <mario@slackware.hr>
Diffstat (limited to 'system/xen/xsa/xsa299-4.12-0011-x86-mm-Don-t-drop-a-type-ref-unless-you-held-a-ref-t.patch')
-rw-r--r-- | system/xen/xsa/xsa299-4.12-0011-x86-mm-Don-t-drop-a-type-ref-unless-you-held-a-ref-t.patch | 413 |
1 files changed, 413 insertions, 0 deletions
diff --git a/system/xen/xsa/xsa299-4.12-0011-x86-mm-Don-t-drop-a-type-ref-unless-you-held-a-ref-t.patch b/system/xen/xsa/xsa299-4.12-0011-x86-mm-Don-t-drop-a-type-ref-unless-you-held-a-ref-t.patch new file mode 100644 index 0000000000..ad7e6fee1b --- /dev/null +++ b/system/xen/xsa/xsa299-4.12-0011-x86-mm-Don-t-drop-a-type-ref-unless-you-held-a-ref-t.patch @@ -0,0 +1,413 @@ +From ea3dc624c5e6325a9c2f079e52a85965d4ab6ce8 Mon Sep 17 00:00:00 2001 +From: George Dunlap <george.dunlap@citrix.com> +Date: Thu, 10 Oct 2019 17:57:50 +0100 +Subject: [PATCH 11/11] x86/mm: Don't drop a type ref unless you held a ref to + begin with + +Validation and de-validation of pagetable trees may take arbitrarily +large amounts of time, and so must be preemptible. This is indicated +by setting the PGT_partial bit in the type_info, and setting +nr_validated_entries and partial_flags appropriately. Specifically, +if the entry at [nr_validated_entries] is partially validated, +partial_flags should have the PGT_partial_set bit set, and the entry +should hold a general reference count. During de-validation, +put_page_type() is called on partially validated entries. + +Unfortunately, there are a number of issues with the current algorithm. + +First, doing a "normal" put_page_type() is not safe when no type ref +is held: there is nothing to stop another vcpu from coming along and +picking up validation again: at which point the put_page_type may drop +the only page ref on an in-use page. Some examples are listed in the +appendix. + +The core issue is that put_page_type() is being called both to clean +up PGT_partial, and to drop a type count; and has no way of knowing +which is which; and so if in between, PGT_partial is cleared, +put_page_type() will drop the type ref erroneously. + +What is needed is to distinguish between two states: +- Dropping a type ref which is held +- Cleaning up a page which has been partially de/validated + +Fix this by telling put_page_type() which of the two activities you +intend. + +When cleaning up a partial de/validation, take no action unless you +find a page partially validated. + +If put_page_type() is called without PTF_partial_set, and finds the +page in a PGT_partial state anyway, then there's certainly been a +misaccounting somewhere, and carrying on would almost certainly cause +a security issue, so crash the host instead. + +In put_page_from_lNe, pass partial_flags on to _put_page_type(). + +old_guest_table may be set either with a fully validated page (when +using the "deferred put" pattern), or with a partially validated page +(when a normal "de-validation" is interrupted, or when a validation +fails part-way through due to invalid entries). Add a flag, +old_guest_table_partial, to indicate which of these it is, and use +that to pass the appropriate flag to _put_page_type(). + +While here, delete stray trailing whitespace. + +This is part of XSA-299. + +Reported-by: George Dunlap <george.dunlap@citrix.com> +Signed-off-by: George Dunlap <george.dunlap@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +----- +Appendix: + +Suppose page A, when interpreted as an l3 pagetable, contains all +valid entries; and suppose A[x] points to page B, which when +interpreted as an l2 pagetable, contains all valid entries. + +P1: PIN_L3_TABLE + A -> PGT_l3_table | 1 | valid + B -> PGT_l2_table | 1 | valid + +P1: UNPIN_TABLE + > Arrange to interrupt after B has been de-validated + B: + type_info -> PGT_l2_table | 0 + A: + type_info -> PGT_l3_table | 1 | partial + nr_validated_enties -> (less than x) + +P2: mod_l4_entry to point to A + > Arrange for this to be interrupted while B is being validated + B: + type_info -> PGT_l2_table | 1 | partial + (nr_validated_entires &c set as appropriate) + A: + type_info -> PGT_l3_table | 1 | partial + nr_validated_entries -> x + partial_pte = 1 + +P3: mod_l3_entry some other unrelated l3 to point to B: + B: + type_info -> PGT_l2_table | 1 + +P1: Restart UNPIN_TABLE + +At this point, since A.nr_validate_entries == x and A.partial_pte != +0, free_l3_table() will call put_page_from_l3e() on pl3e[x], dropping +its type count to 0 while it's still being pointed to by some other l3 + +A similar issue arises with old_guest_table. Consider the following +scenario: + +Suppose A is a page which, when interpreted as an l2, has valid entries +until entry x, which is invalid. + +V1: PIN_L2_TABLE(A) + <Validate until we try to validate [x], get -EINVAL> + A -> PGT_l2_table | 1 | PGT_partial + V1 -> old_guest_table = A + <delayed> + +V2: PIN_L2_TABLE(A) + <Pick up where V1 left off, try to re-validate [x], get -EINVAL> + A -> PGT_l2_table | 1 | PGT_partial + V2 -> old_guest_table = A + <restart> + put_old_guest_table() + _put_page_type(A) + A -> PGT_l2_table | 0 + +V1: <restart> + put_old_guest_table() + _put_page_type(A) # UNDERFLOW + +Indeed, it is possible to engineer for old_guest_table for every vcpu +a guest has to point to the same page. +--- + xen/arch/x86/domain.c | 6 +++ + xen/arch/x86/mm.c | 99 +++++++++++++++++++++++++++++++----- + xen/include/asm-x86/domain.h | 4 +- + 3 files changed, 95 insertions(+), 14 deletions(-) + +diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c +index 59df8a6d8d..f1ae5f89f5 100644 +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -1104,9 +1104,15 @@ int arch_set_info_guest( + rc = -ERESTART; + /* Fallthrough */ + case -ERESTART: ++ /* ++ * NB that we're putting the kernel-mode table ++ * here, which we've already successfully ++ * validated above; hence partial = false; ++ */ + v->arch.old_guest_ptpg = NULL; + v->arch.old_guest_table = + pagetable_get_page(v->arch.guest_table); ++ v->arch.old_guest_table_partial = false; + v->arch.guest_table = pagetable_null(); + break; + default: +diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c +index a432e69c74..81774368a0 100644 +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -1359,10 +1359,11 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn, + { + current->arch.old_guest_ptpg = ptpg; + current->arch.old_guest_table = pg; ++ current->arch.old_guest_table_partial = false; + } + else + { +- rc = _put_page_type(pg, PTF_preemptible, ptpg); ++ rc = _put_page_type(pg, flags | PTF_preemptible, ptpg); + if ( likely(!rc) ) + put_page(pg); + } +@@ -1385,6 +1386,7 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, + unsigned long mfn = l3e_get_pfn(l3e); + bool writeable = l3e_get_flags(l3e) & _PAGE_RW; + ++ ASSERT(!(flags & PTF_partial_set)); + ASSERT(!(mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1))); + do { + put_data_page(mfn_to_page(_mfn(mfn)), writeable); +@@ -1397,12 +1399,14 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, + + if ( flags & PTF_defer ) + { ++ ASSERT(!(flags & PTF_partial_set)); + current->arch.old_guest_ptpg = mfn_to_page(_mfn(pfn)); + current->arch.old_guest_table = pg; ++ current->arch.old_guest_table_partial = false; + return 0; + } + +- rc = _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn))); ++ rc = _put_page_type(pg, flags | PTF_preemptible, mfn_to_page(_mfn(pfn))); + if ( likely(!rc) ) + put_page(pg); + +@@ -1421,12 +1425,15 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, + + if ( flags & PTF_defer ) + { ++ ASSERT(!(flags & PTF_partial_set)); + current->arch.old_guest_ptpg = mfn_to_page(_mfn(pfn)); + current->arch.old_guest_table = pg; ++ current->arch.old_guest_table_partial = false; + return 0; + } + +- rc = _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn))); ++ rc = _put_page_type(pg, flags | PTF_preemptible, ++ mfn_to_page(_mfn(pfn))); + if ( likely(!rc) ) + put_page(pg); + } +@@ -1535,6 +1542,14 @@ static int alloc_l2_table(struct page_info *page, unsigned long type) + + pl2e = map_domain_page(_mfn(pfn)); + ++ /* ++ * NB that alloc_l2_table will never set partial_pte on an l2; but ++ * free_l2_table might if a linear_pagetable entry is interrupted ++ * partway through de-validation. In that circumstance, ++ * get_page_from_l2e() will always return -EINVAL; and we must ++ * retain the type ref by doing the normal partial_flags tracking. ++ */ ++ + for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; + i++, partial_flags = 0 ) + { +@@ -1598,6 +1613,7 @@ static int alloc_l2_table(struct page_info *page, unsigned long type) + page->partial_flags = partial_flags; + current->arch.old_guest_ptpg = NULL; + current->arch.old_guest_table = page; ++ current->arch.old_guest_table_partial = true; + } + } + if ( rc < 0 ) +@@ -1704,12 +1720,16 @@ static int alloc_l3_table(struct page_info *page) + * builds. + */ + if ( current->arch.old_guest_table == l3e_get_page(l3e) ) ++ { ++ ASSERT(current->arch.old_guest_table_partial); + page->partial_flags = PTF_partial_set; ++ } + else + ASSERT_UNREACHABLE(); + } + current->arch.old_guest_ptpg = NULL; + current->arch.old_guest_table = page; ++ current->arch.old_guest_table_partial = true; + } + while ( i-- > 0 ) + pl3e[i] = unadjust_guest_l3e(pl3e[i], d); +@@ -1897,12 +1917,16 @@ static int alloc_l4_table(struct page_info *page) + * builds. + */ + if ( current->arch.old_guest_table == l4e_get_page(l4e) ) ++ { ++ ASSERT(current->arch.old_guest_table_partial); + page->partial_flags = PTF_partial_set; ++ } + else + ASSERT_UNREACHABLE(); + } + current->arch.old_guest_ptpg = NULL; + current->arch.old_guest_table = page; ++ current->arch.old_guest_table_partial = true; + } + } + } +@@ -2831,6 +2855,28 @@ static int _put_page_type(struct page_info *page, unsigned int flags, + x = y; + nx = x - 1; + ++ /* ++ * Is this expected to do a full reference drop, or only ++ * cleanup partial validation / devalidation? ++ * ++ * If the former, the caller must hold a "full" type ref; ++ * which means the page must be validated. If the page is ++ * *not* fully validated, continuing would almost certainly ++ * open up a security hole. An exception to this is during ++ * domain destruction, where PGT_validated can be dropped ++ * without dropping a type ref. ++ * ++ * If the latter, do nothing unless type PGT_partial is set. ++ * If it is set, the type count must be 1. ++ */ ++ if ( !(flags & PTF_partial_set) ) ++ BUG_ON((x & PGT_partial) || ++ !((x & PGT_validated) || page_get_owner(page)->is_dying)); ++ else if ( !(x & PGT_partial) ) ++ return 0; ++ else ++ BUG_ON((x & PGT_count_mask) != 1); ++ + ASSERT((x & PGT_count_mask) != 0); + + switch ( nx & (PGT_locked | PGT_count_mask) ) +@@ -3092,17 +3138,34 @@ int put_old_guest_table(struct vcpu *v) + if ( !v->arch.old_guest_table ) + return 0; + +- switch ( rc = _put_page_type(v->arch.old_guest_table, PTF_preemptible, +- v->arch.old_guest_ptpg) ) ++ rc = _put_page_type(v->arch.old_guest_table, ++ PTF_preemptible | ++ ( v->arch.old_guest_table_partial ? ++ PTF_partial_set : 0 ), ++ v->arch.old_guest_ptpg); ++ ++ if ( rc == -ERESTART || rc == -EINTR ) + { +- case -EINTR: +- case -ERESTART: ++ v->arch.old_guest_table_partial = (rc == -ERESTART); + return -ERESTART; +- case 0: +- put_page(v->arch.old_guest_table); + } + ++ /* ++ * It shouldn't be possible for _put_page_type() to return ++ * anything else at the moment; but if it does happen in ++ * production, leaking the type ref is probably the best thing to ++ * do. Either way, drop the general ref held by old_guest_table. ++ */ ++ ASSERT(rc == 0); ++ ++ put_page(v->arch.old_guest_table); + v->arch.old_guest_table = NULL; ++ v->arch.old_guest_ptpg = NULL; ++ /* ++ * Safest default if someone sets old_guest_table without ++ * explicitly setting old_guest_table_partial. ++ */ ++ v->arch.old_guest_table_partial = true; + + return rc; + } +@@ -3253,11 +3316,11 @@ int new_guest_cr3(mfn_t mfn) + switch ( rc = put_page_and_type_preemptible(page) ) + { + case -EINTR: +- rc = -ERESTART; +- /* fallthrough */ + case -ERESTART: + curr->arch.old_guest_ptpg = NULL; + curr->arch.old_guest_table = page; ++ curr->arch.old_guest_table_partial = (rc == -ERESTART); ++ rc = -ERESTART; + break; + default: + BUG_ON(rc); +@@ -3494,6 +3557,7 @@ long do_mmuext_op( + { + curr->arch.old_guest_ptpg = NULL; + curr->arch.old_guest_table = page; ++ curr->arch.old_guest_table_partial = false; + } + } + } +@@ -3528,6 +3592,11 @@ long do_mmuext_op( + case -ERESTART: + curr->arch.old_guest_ptpg = NULL; + curr->arch.old_guest_table = page; ++ /* ++ * EINTR means we still hold the type ref; ERESTART ++ * means PGT_partial holds the type ref ++ */ ++ curr->arch.old_guest_table_partial = (rc == -ERESTART); + rc = 0; + break; + default: +@@ -3596,11 +3665,15 @@ long do_mmuext_op( + switch ( rc = put_page_and_type_preemptible(page) ) + { + case -EINTR: +- rc = -ERESTART; +- /* fallthrough */ + case -ERESTART: + curr->arch.old_guest_ptpg = NULL; + curr->arch.old_guest_table = page; ++ /* ++ * EINTR means we still hold the type ref; ++ * ERESTART means PGT_partial holds the ref ++ */ ++ curr->arch.old_guest_table_partial = (rc == -ERESTART); ++ rc = -ERESTART; + break; + default: + BUG_ON(rc); +diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h +index 214e44ce1c..2cfce7b36b 100644 +--- a/xen/include/asm-x86/domain.h ++++ b/xen/include/asm-x86/domain.h +@@ -307,7 +307,7 @@ struct arch_domain + + struct paging_domain paging; + struct p2m_domain *p2m; +- /* To enforce lock ordering in the pod code wrt the ++ /* To enforce lock ordering in the pod code wrt the + * page_alloc lock */ + int page_alloc_unlock_level; + +@@ -581,6 +581,8 @@ struct arch_vcpu + struct page_info *old_guest_table; /* partially destructed pagetable */ + struct page_info *old_guest_ptpg; /* containing page table of the */ + /* former, if any */ ++ bool old_guest_table_partial; /* Are we dropping a type ref, or just ++ * finishing up a partial de-validation? */ + /* guest_table holds a ref to the page, and also a type-count unless + * shadow refcounts are in use */ + pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */ +-- +2.23.0 + |