summaryrefslogtreecommitdiffstats
path: root/system/xen/patches
diff options
context:
space:
mode:
author Mario Preksavec2018-08-25 14:16:23 +0200
committer Willy Sudiarto Raharjo2018-09-01 02:32:30 +0200
commit9be84725e758c71832b27d3b3918cd67cc65f182 (patch)
tree7617b9cb8c97051797f9464a2b0e396a1b303d20 /system/xen/patches
parent78ff47b691fb8043946cb8bcc3b820b7369d9d7f (diff)
downloadslackbuilds-9be84725e758c71832b27d3b3918cd67cc65f182.tar.gz
system/xen: Updated for version 4.11.0
Signed-off-by: Mario Preksavec <mario@slackware.hr>
Diffstat (limited to 'system/xen/patches')
-rw-r--r--system/xen/patches/xen-4.10.2-pre.patch1631
1 files changed, 0 insertions, 1631 deletions
diff --git a/system/xen/patches/xen-4.10.2-pre.patch b/system/xen/patches/xen-4.10.2-pre.patch
deleted file mode 100644
index 42477696e1..0000000000
--- a/system/xen/patches/xen-4.10.2-pre.patch
+++ /dev/null
@@ -1,1631 +0,0 @@
-diff --git a/tools/libacpi/Makefile b/tools/libacpi/Makefile
-index a47a658a25..c17f3924cc 100644
---- a/tools/libacpi/Makefile
-+++ b/tools/libacpi/Makefile
-@@ -43,7 +43,7 @@ all: $(C_SRC) $(H_SRC)
-
- $(H_SRC): $(ACPI_BUILD_DIR)/%.h: %.asl iasl
- iasl -vs -p $(ACPI_BUILD_DIR)/$*.$(TMP_SUFFIX) -tc $<
-- sed -e 's/AmlCode/$*/g' $(ACPI_BUILD_DIR)/$*.hex >$@
-+ sed -e 's/AmlCode/$*/g' -e 's/_aml_code//g' $(ACPI_BUILD_DIR)/$*.hex >$@
- rm -f $(addprefix $(ACPI_BUILD_DIR)/, $*.aml $*.hex)
-
- $(MK_DSDT): mk_dsdt.c
-@@ -76,7 +76,7 @@ $(ACPI_BUILD_DIR)/dsdt_anycpu_arm.asl: $(MK_DSDT)
-
- $(C_SRC): $(ACPI_BUILD_DIR)/%.c: iasl $(ACPI_BUILD_DIR)/%.asl
- iasl -vs -p $(ACPI_BUILD_DIR)/$*.$(TMP_SUFFIX) -tc $(ACPI_BUILD_DIR)/$*.asl
-- sed -e 's/AmlCode/$*/g' $(ACPI_BUILD_DIR)/$*.hex > $@.$(TMP_SUFFIX)
-+ sed -e 's/AmlCode/$*/g' -e 's/_aml_code//g' $(ACPI_BUILD_DIR)/$*.hex > $@.$(TMP_SUFFIX)
- echo "int $*_len=sizeof($*);" >> $@.$(TMP_SUFFIX)
- mv -f $@.$(TMP_SUFFIX) $@
- rm -f $(addprefix $(ACPI_BUILD_DIR)/, $*.aml $*.hex)
-#diff --git a/xen/Makefile b/xen/Makefile
-#index ecec297b9b..580af86931 100644
-#--- a/xen/Makefile
-#+++ b/xen/Makefile
-#@@ -2,7 +2,7 @@
-# # All other places this is stored (eg. compile.h) should be autogenerated.
-# export XEN_VERSION = 4
-# export XEN_SUBVERSION = 10
-#-export XEN_EXTRAVERSION ?= .1$(XEN_VENDORVERSION)
-#+export XEN_EXTRAVERSION ?= .2-pre$(XEN_VENDORVERSION)
-# export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
-# -include xen-version
-#
-diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
-index 1e4e5680a7..f7085d3c7b 100644
---- a/xen/arch/x86/acpi/power.c
-+++ b/xen/arch/x86/acpi/power.c
-@@ -28,6 +28,7 @@
- #include <asm/tboot.h>
- #include <asm/apic.h>
- #include <asm/io_apic.h>
-+#include <asm/spec_ctrl.h>
- #include <acpi/cpufreq/cpufreq.h>
-
- uint32_t system_reset_counter = 1;
-@@ -163,6 +164,7 @@ static int enter_state(u32 state)
- {
- unsigned long flags;
- int error;
-+ struct cpu_info *ci;
- unsigned long cr4;
-
- if ( (state <= ACPI_STATE_S0) || (state > ACPI_S_STATES_MAX) )
-@@ -203,12 +205,18 @@ static int enter_state(u32 state)
- printk(XENLOG_ERR "Some devices failed to power down.");
- system_state = SYS_STATE_resume;
- device_power_up(error);
-+ console_end_sync();
- error = -EIO;
- goto done;
- }
- else
- error = 0;
-
-+ ci = get_cpu_info();
-+ spec_ctrl_enter_idle(ci);
-+ /* Avoid NMI/#MC using MSR_SPEC_CTRL until we've reloaded microcode. */
-+ ci->bti_ist_info = 0;
-+
- ACPI_FLUSH_CPU_CACHE();
-
- switch ( state )
-@@ -243,17 +251,23 @@ static int enter_state(u32 state)
- if ( (state == ACPI_STATE_S3) && error )
- tboot_s3_error(error);
-
-+ console_end_sync();
-+
-+ microcode_resume_cpu(0);
-+
-+ /* Re-enabled default NMI/#MC use of MSR_SPEC_CTRL. */
-+ ci->bti_ist_info = default_bti_ist_info;
-+ spec_ctrl_exit_idle(ci);
-+
- done:
- spin_debug_enable();
- local_irq_restore(flags);
-- console_end_sync();
- acpi_sleep_post(state);
- if ( hvm_cpu_up() )
- BUG();
-+ cpufreq_add_cpu(0);
-
- enable_cpu:
-- cpufreq_add_cpu(0);
-- microcode_resume_cpu(0);
- rcu_barrier();
- mtrr_aps_sync_begin();
- enable_nonboot_cpus();
-diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
-index fdb2bf1779..136adadb63 100644
---- a/xen/arch/x86/cpu/common.c
-+++ b/xen/arch/x86/cpu/common.c
-@@ -747,6 +747,7 @@ void load_system_tables(void)
- [IST_MCE - 1] = stack_top + IST_MCE * PAGE_SIZE,
- [IST_DF - 1] = stack_top + IST_DF * PAGE_SIZE,
- [IST_NMI - 1] = stack_top + IST_NMI * PAGE_SIZE,
-+ [IST_DB - 1] = stack_top + IST_DB * PAGE_SIZE,
-
- [IST_MAX ... ARRAY_SIZE(tss->ist) - 1] =
- 0x8600111111111111ul,
-@@ -774,6 +775,7 @@ void load_system_tables(void)
- set_ist(&idt_tables[cpu][TRAP_double_fault], IST_DF);
- set_ist(&idt_tables[cpu][TRAP_nmi], IST_NMI);
- set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE);
-+ set_ist(&idt_tables[cpu][TRAP_debug], IST_DB);
-
- /*
- * Bottom-of-stack must be 16-byte aligned!
-diff --git a/xen/arch/x86/hpet.c b/xen/arch/x86/hpet.c
-index 8229c635e4..f18cbbd55a 100644
---- a/xen/arch/x86/hpet.c
-+++ b/xen/arch/x86/hpet.c
-@@ -509,6 +509,8 @@ static void hpet_attach_channel(unsigned int cpu,
- static void hpet_detach_channel(unsigned int cpu,
- struct hpet_event_channel *ch)
- {
-+ unsigned int next;
-+
- spin_lock_irq(&ch->lock);
-
- ASSERT(ch == per_cpu(cpu_bc_channel, cpu));
-@@ -517,7 +519,7 @@ static void hpet_detach_channel(unsigned int cpu,
-
- if ( cpu != ch->cpu )
- spin_unlock_irq(&ch->lock);
-- else if ( cpumask_empty(ch->cpumask) )
-+ else if ( (next = cpumask_first(ch->cpumask)) >= nr_cpu_ids )
- {
- ch->cpu = -1;
- clear_bit(HPET_EVT_USED_BIT, &ch->flags);
-@@ -525,7 +527,7 @@ static void hpet_detach_channel(unsigned int cpu,
- }
- else
- {
-- ch->cpu = cpumask_first(ch->cpumask);
-+ ch->cpu = next;
- set_channel_irq_affinity(ch);
- local_irq_enable();
- }
-diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
-index b282089e03..131480fdd9 100644
---- a/xen/arch/x86/hvm/emulate.c
-+++ b/xen/arch/x86/hvm/emulate.c
-@@ -2113,22 +2113,20 @@ static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
-
- vio->mmio_retry = 0;
-
-- switch ( rc = x86_emulate(&hvmemul_ctxt->ctxt, ops) )
-+ rc = x86_emulate(&hvmemul_ctxt->ctxt, ops);
-+ if ( rc == X86EMUL_OKAY && vio->mmio_retry )
-+ rc = X86EMUL_RETRY;
-+
-+ if ( !hvm_vcpu_io_need_completion(vio) )
- {
-- case X86EMUL_OKAY:
-- if ( vio->mmio_retry )
-- rc = X86EMUL_RETRY;
-- /* fall through */
-- default:
- vio->mmio_cache_count = 0;
- vio->mmio_insn_bytes = 0;
-- break;
--
-- case X86EMUL_RETRY:
-+ }
-+ else
-+ {
- BUILD_BUG_ON(sizeof(vio->mmio_insn) < sizeof(hvmemul_ctxt->insn_buf));
- vio->mmio_insn_bytes = hvmemul_ctxt->insn_buf_bytes;
- memcpy(vio->mmio_insn, hvmemul_ctxt->insn_buf, vio->mmio_insn_bytes);
-- break;
- }
-
- if ( hvmemul_ctxt->ctxt.retire.singlestep )
-diff --git a/xen/arch/x86/hvm/hpet.c b/xen/arch/x86/hvm/hpet.c
-index f7aed7f69e..28377091ca 100644
---- a/xen/arch/x86/hvm/hpet.c
-+++ b/xen/arch/x86/hvm/hpet.c
-@@ -264,13 +264,20 @@ static void hpet_set_timer(HPETState *h, unsigned int tn,
- diff = (timer_is_32bit(h, tn) && (-diff > HPET_TINY_TIME_SPAN))
- ? (uint32_t)diff : 0;
-
-+ destroy_periodic_time(&h->pt[tn]);
- if ( (tn <= 1) && (h->hpet.config & HPET_CFG_LEGACY) )
-+ {
- /* if LegacyReplacementRoute bit is set, HPET specification requires
- timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC,
- timer1 be routed to IRQ8 in NON-APIC or IRQ8 in the I/O APIC. */
- irq = (tn == 0) ? 0 : 8;
-+ h->pt[tn].source = PTSRC_isa;
-+ }
- else
-+ {
- irq = timer_int_route(h, tn);
-+ h->pt[tn].source = PTSRC_ioapic;
-+ }
-
- /*
- * diff is the time from now when the timer should fire, for a periodic
-diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c
-index d5afe20cc8..25b2445429 100644
---- a/xen/arch/x86/hvm/ioreq.c
-+++ b/xen/arch/x86/hvm/ioreq.c
-@@ -87,14 +87,17 @@ static void hvm_io_assist(struct hvm_ioreq_vcpu *sv, uint64_t data)
-
- static bool hvm_wait_for_io(struct hvm_ioreq_vcpu *sv, ioreq_t *p)
- {
-+ unsigned int prev_state = STATE_IOREQ_NONE;
-+
- while ( sv->pending )
- {
- unsigned int state = p->state;
-
-- rmb();
-- switch ( state )
-+ smp_rmb();
-+
-+ recheck:
-+ if ( unlikely(state == STATE_IOREQ_NONE) )
- {
-- case STATE_IOREQ_NONE:
- /*
- * The only reason we should see this case is when an
- * emulator is dying and it races with an I/O being
-@@ -102,14 +105,30 @@ static bool hvm_wait_for_io(struct hvm_ioreq_vcpu *sv, ioreq_t *p)
- */
- hvm_io_assist(sv, ~0ul);
- break;
-+ }
-+
-+ if ( unlikely(state < prev_state) )
-+ {
-+ gdprintk(XENLOG_ERR, "Weird HVM ioreq state transition %u -> %u\n",
-+ prev_state, state);
-+ sv->pending = false;
-+ domain_crash(sv->vcpu->domain);
-+ return false; /* bail */
-+ }
-+
-+ switch ( prev_state = state )
-+ {
- case STATE_IORESP_READY: /* IORESP_READY -> NONE */
- p->state = STATE_IOREQ_NONE;
- hvm_io_assist(sv, p->data);
- break;
- case STATE_IOREQ_READY: /* IOREQ_{READY,INPROCESS} -> IORESP_READY */
- case STATE_IOREQ_INPROCESS:
-- wait_on_xen_event_channel(sv->ioreq_evtchn, p->state != state);
-- break;
-+ wait_on_xen_event_channel(sv->ioreq_evtchn,
-+ ({ state = p->state;
-+ smp_rmb();
-+ state != prev_state; }));
-+ goto recheck;
- default:
- gdprintk(XENLOG_ERR, "Weird HVM iorequest state %u\n", state);
- sv->pending = false;
-diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
-index f528e2d081..c85d004402 100644
---- a/xen/arch/x86/hvm/irq.c
-+++ b/xen/arch/x86/hvm/irq.c
-@@ -41,6 +41,26 @@ static void assert_gsi(struct domain *d, unsigned ioapic_gsi)
- vioapic_irq_positive_edge(d, ioapic_gsi);
- }
-
-+int hvm_ioapic_assert(struct domain *d, unsigned int gsi, bool level)
-+{
-+ struct hvm_irq *hvm_irq = hvm_domain_irq(d);
-+ int vector;
-+
-+ if ( gsi >= hvm_irq->nr_gsis )
-+ {
-+ ASSERT_UNREACHABLE();
-+ return -1;
-+ }
-+
-+ spin_lock(&d->arch.hvm_domain.irq_lock);
-+ if ( !level || hvm_irq->gsi_assert_count[gsi]++ == 0 )
-+ assert_gsi(d, gsi);
-+ vector = vioapic_get_vector(d, gsi);
-+ spin_unlock(&d->arch.hvm_domain.irq_lock);
-+
-+ return vector;
-+}
-+
- static void assert_irq(struct domain *d, unsigned ioapic_gsi, unsigned pic_irq)
- {
- assert_gsi(d, ioapic_gsi);
-diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
-index dedec5752d..3b72b4dc2a 100644
---- a/xen/arch/x86/hvm/svm/svm.c
-+++ b/xen/arch/x86/hvm/svm/svm.c
-@@ -1046,6 +1046,7 @@ static void svm_ctxt_switch_from(struct vcpu *v)
- set_ist(&idt_tables[cpu][TRAP_double_fault], IST_DF);
- set_ist(&idt_tables[cpu][TRAP_nmi], IST_NMI);
- set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE);
-+ set_ist(&idt_tables[cpu][TRAP_debug], IST_DB);
- }
-
- static void svm_ctxt_switch_to(struct vcpu *v)
-@@ -1067,6 +1068,7 @@ static void svm_ctxt_switch_to(struct vcpu *v)
- set_ist(&idt_tables[cpu][TRAP_double_fault], IST_NONE);
- set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE);
- set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
-+ set_ist(&idt_tables[cpu][TRAP_debug], IST_NONE);
-
- svm_restore_dr(v);
-
-@@ -1836,6 +1838,25 @@ static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
- struct vcpu *v = current;
- struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-
-+ switch ( msr )
-+ {
-+ /*
-+ * Sync not needed while the cross-vendor logic is in unilateral effect.
-+ case MSR_IA32_SYSENTER_CS:
-+ case MSR_IA32_SYSENTER_ESP:
-+ case MSR_IA32_SYSENTER_EIP:
-+ */
-+ case MSR_STAR:
-+ case MSR_LSTAR:
-+ case MSR_CSTAR:
-+ case MSR_SYSCALL_MASK:
-+ case MSR_FS_BASE:
-+ case MSR_GS_BASE:
-+ case MSR_SHADOW_GS_BASE:
-+ svm_sync_vmcb(v);
-+ break;
-+ }
-+
- switch ( msr )
- {
- case MSR_IA32_SYSENTER_CS:
-@@ -1848,6 +1869,34 @@ static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
- *msr_content = v->arch.hvm_svm.guest_sysenter_eip;
- break;
-
-+ case MSR_STAR:
-+ *msr_content = vmcb->star;
-+ break;
-+
-+ case MSR_LSTAR:
-+ *msr_content = vmcb->lstar;
-+ break;
-+
-+ case MSR_CSTAR:
-+ *msr_content = vmcb->cstar;
-+ break;
-+
-+ case MSR_SYSCALL_MASK:
-+ *msr_content = vmcb->sfmask;
-+ break;
-+
-+ case MSR_FS_BASE:
-+ *msr_content = vmcb->fs.base;
-+ break;
-+
-+ case MSR_GS_BASE:
-+ *msr_content = vmcb->gs.base;
-+ break;
-+
-+ case MSR_SHADOW_GS_BASE:
-+ *msr_content = vmcb->kerngsbase;
-+ break;
-+
- case MSR_IA32_MCx_MISC(4): /* Threshold register */
- case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
- /*
-@@ -1976,32 +2025,81 @@ static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
- int ret, result = X86EMUL_OKAY;
- struct vcpu *v = current;
- struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-- int sync = 0;
-+ bool sync = false;
-
- switch ( msr )
- {
- case MSR_IA32_SYSENTER_CS:
- case MSR_IA32_SYSENTER_ESP:
- case MSR_IA32_SYSENTER_EIP:
-- sync = 1;
-- break;
-- default:
-+ case MSR_STAR:
-+ case MSR_LSTAR:
-+ case MSR_CSTAR:
-+ case MSR_SYSCALL_MASK:
-+ case MSR_FS_BASE:
-+ case MSR_GS_BASE:
-+ case MSR_SHADOW_GS_BASE:
-+ sync = true;
- break;
- }
-
- if ( sync )
-- svm_sync_vmcb(v);
-+ svm_sync_vmcb(v);
-
- switch ( msr )
- {
-+ case MSR_IA32_SYSENTER_ESP:
-+ case MSR_IA32_SYSENTER_EIP:
-+ case MSR_LSTAR:
-+ case MSR_CSTAR:
-+ case MSR_FS_BASE:
-+ case MSR_GS_BASE:
-+ case MSR_SHADOW_GS_BASE:
-+ if ( !is_canonical_address(msr_content) )
-+ goto gpf;
-+
-+ switch ( msr )
-+ {
-+ case MSR_IA32_SYSENTER_ESP:
-+ vmcb->sysenter_esp = v->arch.hvm_svm.guest_sysenter_esp = msr_content;
-+ break;
-+
-+ case MSR_IA32_SYSENTER_EIP:
-+ vmcb->sysenter_eip = v->arch.hvm_svm.guest_sysenter_eip = msr_content;
-+ break;
-+
-+ case MSR_LSTAR:
-+ vmcb->lstar = msr_content;
-+ break;
-+
-+ case MSR_CSTAR:
-+ vmcb->cstar = msr_content;
-+ break;
-+
-+ case MSR_FS_BASE:
-+ vmcb->fs.base = msr_content;
-+ break;
-+
-+ case MSR_GS_BASE:
-+ vmcb->gs.base = msr_content;
-+ break;
-+
-+ case MSR_SHADOW_GS_BASE:
-+ vmcb->kerngsbase = msr_content;
-+ break;
-+ }
-+ break;
-+
- case MSR_IA32_SYSENTER_CS:
- vmcb->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs = msr_content;
- break;
-- case MSR_IA32_SYSENTER_ESP:
-- vmcb->sysenter_esp = v->arch.hvm_svm.guest_sysenter_esp = msr_content;
-+
-+ case MSR_STAR:
-+ vmcb->star = msr_content;
- break;
-- case MSR_IA32_SYSENTER_EIP:
-- vmcb->sysenter_eip = v->arch.hvm_svm.guest_sysenter_eip = msr_content;
-+
-+ case MSR_SYSCALL_MASK:
-+ vmcb->sfmask = msr_content;
- break;
-
- case MSR_IA32_DEBUGCTLMSR:
-diff --git a/xen/arch/x86/hvm/svm/svmdebug.c b/xen/arch/x86/hvm/svm/svmdebug.c
-index 89ef2db932..b5b946aa94 100644
---- a/xen/arch/x86/hvm/svm/svmdebug.c
-+++ b/xen/arch/x86/hvm/svm/svmdebug.c
-@@ -131,9 +131,8 @@ bool svm_vmcb_isvalid(const char *from, const struct vmcb_struct *vmcb,
- PRINTF("DR7: bits [63:32] are not zero (%#"PRIx64")\n",
- vmcb_get_dr7(vmcb));
-
-- if ( efer & ~(EFER_SCE | EFER_LME | EFER_LMA | EFER_NX | EFER_SVME |
-- EFER_LMSLE | EFER_FFXSE) )
-- PRINTF("EFER: undefined bits are not zero (%#"PRIx64")\n", efer);
-+ if ( efer & ~EFER_KNOWN_MASK )
-+ PRINTF("EFER: unknown bits are not zero (%#"PRIx64")\n", efer);
-
- if ( hvm_efer_valid(v, efer, -1) )
- PRINTF("EFER: %s (%"PRIx64")\n", hvm_efer_valid(v, efer, -1), efer);
-diff --git a/xen/arch/x86/hvm/viridian.c b/xen/arch/x86/hvm/viridian.c
-index f0fa59d7d5..b02a70d086 100644
---- a/xen/arch/x86/hvm/viridian.c
-+++ b/xen/arch/x86/hvm/viridian.c
-@@ -245,7 +245,7 @@ void cpuid_viridian_leaves(const struct vcpu *v, uint32_t leaf,
- };
- union {
- HV_PARTITION_PRIVILEGE_MASK mask;
-- uint32_t lo, hi;
-+ struct { uint32_t lo, hi; };
- } u;
-
- if ( !(viridian_feature_mask(d) & HVMPV_no_freq) )
-@@ -966,12 +966,10 @@ int viridian_hypercall(struct cpu_user_regs *regs)
- gprintk(XENLOG_WARNING, "unimplemented hypercall %04x\n",
- input.call_code);
- /* Fallthrough. */
-- case HvGetPartitionId:
- case HvExtCallQueryCapabilities:
- /*
-- * These hypercalls seem to be erroneously issued by Windows
-- * despite neither AccessPartitionId nor EnableExtendedHypercalls
-- * being set in CPUID leaf 2.
-+ * This hypercall seems to be erroneously issued by Windows
-+ * despite EnableExtendedHypercalls not being set in CPUID leaf 2.
- * Given that return a status of 'invalid code' has not so far
- * caused any problems it's not worth logging.
- */
-diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c
-index 181f4cb631..04e3c2e15b 100644
---- a/xen/arch/x86/hvm/vpt.c
-+++ b/xen/arch/x86/hvm/vpt.c
-@@ -107,31 +107,49 @@ static int pt_irq_vector(struct periodic_time *pt, enum hvm_intsrc src)
- static int pt_irq_masked(struct periodic_time *pt)
- {
- struct vcpu *v = pt->vcpu;
-- unsigned int gsi, isa_irq;
-- int mask;
-- uint8_t pic_imr;
-+ unsigned int gsi = pt->irq;
-
-- if ( pt->source == PTSRC_lapic )
-+ switch ( pt->source )
-+ {
-+ case PTSRC_lapic:
- {
- struct vlapic *vlapic = vcpu_vlapic(v);
-+
- return (!vlapic_enabled(vlapic) ||
- (vlapic_get_reg(vlapic, APIC_LVTT) & APIC_LVT_MASKED));
- }
-
-- isa_irq = pt->irq;
-- gsi = hvm_isa_irq_to_gsi(isa_irq);
-- pic_imr = v->domain->arch.hvm_domain.vpic[isa_irq >> 3].imr;
-- mask = vioapic_get_mask(v->domain, gsi);
-- if ( mask < 0 )
-+ case PTSRC_isa:
- {
-- dprintk(XENLOG_WARNING, "d%u: invalid GSI (%u) for platform timer\n",
-- v->domain->domain_id, gsi);
-- domain_crash(v->domain);
-- return -1;
-+ uint8_t pic_imr = v->domain->arch.hvm_domain.vpic[pt->irq >> 3].imr;
-+
-+ /* Check if the interrupt is unmasked in the PIC. */
-+ if ( !(pic_imr & (1 << (pt->irq & 7))) && vlapic_accept_pic_intr(v) )
-+ return 0;
-+
-+ gsi = hvm_isa_irq_to_gsi(pt->irq);
-+ }
-+
-+ /* Fallthrough to check if the interrupt is masked on the IO APIC. */
-+ case PTSRC_ioapic:
-+ {
-+ int mask = vioapic_get_mask(v->domain, gsi);
-+
-+ if ( mask < 0 )
-+ {
-+ dprintk(XENLOG_WARNING,
-+ "d%d: invalid GSI (%u) for platform timer\n",
-+ v->domain->domain_id, gsi);
-+ domain_crash(v->domain);
-+ return -1;
-+ }
-+
-+ return mask;
-+ }
- }
-
-- return (((pic_imr & (1 << (isa_irq & 7))) || !vlapic_accept_pic_intr(v)) &&
-- mask);
-+ ASSERT_UNREACHABLE();
-+ return 1;
- }
-
- static void pt_lock(struct periodic_time *pt)
-@@ -252,7 +270,7 @@ int pt_update_irq(struct vcpu *v)
- struct list_head *head = &v->arch.hvm_vcpu.tm_list;
- struct periodic_time *pt, *temp, *earliest_pt;
- uint64_t max_lag;
-- int irq, is_lapic, pt_vector;
-+ int irq, pt_vector = -1;
-
- spin_lock(&v->arch.hvm_vcpu.tm_lock);
-
-@@ -288,29 +306,26 @@ int pt_update_irq(struct vcpu *v)
-
- earliest_pt->irq_issued = 1;
- irq = earliest_pt->irq;
-- is_lapic = (earliest_pt->source == PTSRC_lapic);
-
- spin_unlock(&v->arch.hvm_vcpu.tm_lock);
-
-- /*
-- * If periodic timer interrut is handled by lapic, its vector in
-- * IRR is returned and used to set eoi_exit_bitmap for virtual
-- * interrupt delivery case. Otherwise return -1 to do nothing.
-- */
-- if ( is_lapic )
-+ switch ( earliest_pt->source )
- {
-+ case PTSRC_lapic:
-+ /*
-+ * If periodic timer interrupt is handled by lapic, its vector in
-+ * IRR is returned and used to set eoi_exit_bitmap for virtual
-+ * interrupt delivery case. Otherwise return -1 to do nothing.
-+ */
- vlapic_set_irq(vcpu_vlapic(v), irq, 0);
- pt_vector = irq;
-- }
-- else
-- {
-+ break;
-+
-+ case PTSRC_isa:
- hvm_isa_irq_deassert(v->domain, irq);
- if ( platform_legacy_irq(irq) && vlapic_accept_pic_intr(v) &&
- v->domain->arch.hvm_domain.vpic[irq >> 3].int_output )
-- {
- hvm_isa_irq_assert(v->domain, irq, NULL);
-- pt_vector = -1;
-- }
- else
- {
- pt_vector = hvm_isa_irq_assert(v->domain, irq, vioapic_get_vector);
-@@ -321,6 +336,17 @@ int pt_update_irq(struct vcpu *v)
- if ( pt_vector < 0 || !vlapic_test_irq(vcpu_vlapic(v), pt_vector) )
- pt_vector = -1;
- }
-+ break;
-+
-+ case PTSRC_ioapic:
-+ /*
-+ * NB: At the moment IO-APIC routed interrupts generated by vpt devices
-+ * (HPET) are edge-triggered.
-+ */
-+ pt_vector = hvm_ioapic_assert(v->domain, irq, false);
-+ if ( pt_vector < 0 || !vlapic_test_irq(vcpu_vlapic(v), pt_vector) )
-+ pt_vector = -1;
-+ break;
- }
-
- return pt_vector;
-@@ -418,7 +444,14 @@ void create_periodic_time(
- struct vcpu *v, struct periodic_time *pt, uint64_t delta,
- uint64_t period, uint8_t irq, time_cb *cb, void *data)
- {
-- ASSERT(pt->source != 0);
-+ if ( !pt->source ||
-+ (pt->irq >= NR_ISAIRQS && pt->source == PTSRC_isa) ||
-+ (pt->irq >= hvm_domain_irq(v->domain)->nr_gsis &&
-+ pt->source == PTSRC_ioapic) )
-+ {
-+ ASSERT_UNREACHABLE();
-+ return;
-+ }
-
- destroy_periodic_time(pt);
-
-@@ -498,7 +531,7 @@ static void pt_adjust_vcpu(struct periodic_time *pt, struct vcpu *v)
- {
- int on_list;
-
-- ASSERT(pt->source == PTSRC_isa);
-+ ASSERT(pt->source == PTSRC_isa || pt->source == PTSRC_ioapic);
-
- if ( pt->vcpu == NULL )
- return;
-diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
-index 642ca312bf..c281936af0 100644
---- a/xen/arch/x86/pv/emul-priv-op.c
-+++ b/xen/arch/x86/pv/emul-priv-op.c
-@@ -813,26 +813,6 @@ static int write_cr(unsigned int reg, unsigned long val,
- return X86EMUL_UNHANDLEABLE;
- }
-
--static int read_dr(unsigned int reg, unsigned long *val,
-- struct x86_emulate_ctxt *ctxt)
--{
-- unsigned long res = do_get_debugreg(reg);
--
-- if ( IS_ERR_VALUE(res) )
-- return X86EMUL_UNHANDLEABLE;
--
-- *val = res;
--
-- return X86EMUL_OKAY;
--}
--
--static int write_dr(unsigned int reg, unsigned long val,
-- struct x86_emulate_ctxt *ctxt)
--{
-- return do_set_debugreg(reg, val) == 0
-- ? X86EMUL_OKAY : X86EMUL_UNHANDLEABLE;
--}
--
- static inline uint64_t guest_misc_enable(uint64_t val)
- {
- val &= ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL |
-@@ -906,9 +886,16 @@ static int read_msr(unsigned int reg, uint64_t *val,
- return X86EMUL_OKAY;
-
- case MSR_EFER:
-- *val = read_efer();
-+ /* Hide unknown bits, and unconditionally hide SVME from guests. */
-+ *val = read_efer() & EFER_KNOWN_MASK & ~EFER_SVME;
-+ /*
-+ * Hide the 64-bit features from 32-bit guests. SCE has
-+ * vendor-dependent behaviour.
-+ */
- if ( is_pv_32bit_domain(currd) )
-- *val &= ~(EFER_LME | EFER_LMA | EFER_LMSLE);
-+ *val &= ~(EFER_LME | EFER_LMA | EFER_LMSLE |
-+ (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL
-+ ? EFER_SCE : 0));
- return X86EMUL_OKAY;
-
- case MSR_K7_FID_VID_CTL:
-@@ -1326,8 +1313,8 @@ static const struct x86_emulate_ops priv_op_ops = {
- .read_segment = read_segment,
- .read_cr = read_cr,
- .write_cr = write_cr,
-- .read_dr = read_dr,
-- .write_dr = write_dr,
-+ .read_dr = x86emul_read_dr,
-+ .write_dr = x86emul_write_dr,
- .read_msr = read_msr,
- .write_msr = write_msr,
- .cpuid = pv_emul_cpuid,
-diff --git a/xen/arch/x86/pv/misc-hypercalls.c b/xen/arch/x86/pv/misc-hypercalls.c
-index 5862130697..1619be7874 100644
---- a/xen/arch/x86/pv/misc-hypercalls.c
-+++ b/xen/arch/x86/pv/misc-hypercalls.c
-@@ -30,22 +30,10 @@ long do_set_debugreg(int reg, unsigned long value)
-
- unsigned long do_get_debugreg(int reg)
- {
-- struct vcpu *curr = current;
-+ unsigned long val;
-+ int res = x86emul_read_dr(reg, &val, NULL);
-
-- switch ( reg )
-- {
-- case 0 ... 3:
-- case 6:
-- return curr->arch.debugreg[reg];
-- case 7:
-- return (curr->arch.debugreg[7] |
-- curr->arch.debugreg[5]);
-- case 4 ... 5:
-- return ((curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE) ?
-- curr->arch.debugreg[reg + 2] : 0);
-- }
--
-- return -EINVAL;
-+ return res == X86EMUL_OKAY ? val : -ENODEV;
- }
-
- long do_fpu_taskswitch(int set)
-diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
-index e1d023428c..f81fc2ca65 100644
---- a/xen/arch/x86/smpboot.c
-+++ b/xen/arch/x86/smpboot.c
-@@ -968,6 +968,7 @@ static int cpu_smpboot_alloc(unsigned int cpu)
- set_ist(&idt_tables[cpu][TRAP_double_fault], IST_NONE);
- set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE);
- set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
-+ set_ist(&idt_tables[cpu][TRAP_debug], IST_NONE);
-
- for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1);
- i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i )
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 3c7447bfe6..fa67a0ffbd 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -97,12 +97,13 @@ static void __init print_details(enum ind_thunk thunk)
- printk(XENLOG_DEBUG "Speculative mitigation facilities:\n");
-
- /* Hardware features which pertain to speculative mitigations. */
-- printk(XENLOG_DEBUG " Hardware features:%s%s%s%s%s\n",
-+ printk(XENLOG_DEBUG " Hardware features:%s%s%s%s%s%s\n",
- (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "",
- (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP" : "",
- (e8b & cpufeat_mask(X86_FEATURE_IBPB)) ? " IBPB" : "",
- (caps & ARCH_CAPABILITIES_IBRS_ALL) ? " IBRS_ALL" : "",
-- (caps & ARCH_CAPABILITIES_RDCL_NO) ? " RDCL_NO" : "");
-+ (caps & ARCH_CAPABILITIES_RDCL_NO) ? " RDCL_NO" : "",
-+ (caps & ARCH_CAPS_RSBA) ? " RSBA" : "");
-
- /* Compiled-in support which pertains to BTI mitigations. */
- if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) )
-@@ -135,6 +136,20 @@ static bool __init retpoline_safe(void)
- boot_cpu_data.x86 != 6 )
- return false;
-
-+ if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) )
-+ {
-+ uint64_t caps;
-+
-+ rdmsrl(MSR_ARCH_CAPABILITIES, caps);
-+
-+ /*
-+ * RBSA may be set by a hypervisor to indicate that we may move to a
-+ * processor which isn't retpoline-safe.
-+ */
-+ if ( caps & ARCH_CAPS_RSBA )
-+ return false;
-+ }
-+
- switch ( boot_cpu_data.x86_model )
- {
- case 0x17: /* Penryn */
-@@ -161,18 +176,40 @@ static bool __init retpoline_safe(void)
- * versions.
- */
- case 0x3d: /* Broadwell */
-- return ucode_rev >= 0x28;
-+ return ucode_rev >= 0x2a;
- case 0x47: /* Broadwell H */
-- return ucode_rev >= 0x1b;
-+ return ucode_rev >= 0x1d;
- case 0x4f: /* Broadwell EP/EX */
-- return ucode_rev >= 0xb000025;
-+ return ucode_rev >= 0xb000021;
- case 0x56: /* Broadwell D */
-- return false; /* TBD. */
-+ switch ( boot_cpu_data.x86_mask )
-+ {
-+ case 2: return ucode_rev >= 0x15;
-+ case 3: return ucode_rev >= 0x7000012;
-+ case 4: return ucode_rev >= 0xf000011;
-+ case 5: return ucode_rev >= 0xe000009;
-+ default:
-+ printk("Unrecognised CPU stepping %#x - assuming not reptpoline safe\n",
-+ boot_cpu_data.x86_mask);
-+ return false;
-+ }
-+ break;
-
- /*
-- * Skylake and later processors are not retpoline-safe.
-+ * Skylake, Kabylake and Cannonlake processors are not retpoline-safe.
- */
-+ case 0x4e:
-+ case 0x55:
-+ case 0x5e:
-+ case 0x66:
-+ case 0x67:
-+ case 0x8e:
-+ case 0x9e:
-+ return false;
-+
- default:
-+ printk("Unrecognised CPU model %#x - assuming not reptpoline safe\n",
-+ boot_cpu_data.x86_model);
- return false;
- }
- }
-diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
-index 906124331b..e217b0d6e2 100644
---- a/xen/arch/x86/traps.c
-+++ b/xen/arch/x86/traps.c
-@@ -325,13 +325,13 @@ static void show_guest_stack(struct vcpu *v, const struct cpu_user_regs *regs)
- /*
- * Notes for get_stack_trace_bottom() and get_stack_dump_bottom()
- *
-- * Stack pages 0, 1 and 2:
-+ * Stack pages 0 - 3:
- * These are all 1-page IST stacks. Each of these stacks have an exception
- * frame and saved register state at the top. The interesting bound for a
- * trace is the word adjacent to this, while the bound for a dump is the
- * very top, including the exception frame.
- *
-- * Stack pages 3, 4 and 5:
-+ * Stack pages 4 and 5:
- * None of these are particularly interesting. With MEMORY_GUARD, page 5 is
- * explicitly not present, so attempting to dump or trace it is
- * counterproductive. Without MEMORY_GUARD, it is possible for a call chain
-@@ -352,12 +352,12 @@ unsigned long get_stack_trace_bottom(unsigned long sp)
- {
- switch ( get_stack_page(sp) )
- {
-- case 0 ... 2:
-+ case 0 ... 3:
- return ROUNDUP(sp, PAGE_SIZE) -
- offsetof(struct cpu_user_regs, es) - sizeof(unsigned long);
-
- #ifndef MEMORY_GUARD
-- case 3 ... 5:
-+ case 4 ... 5:
- #endif
- case 6 ... 7:
- return ROUNDUP(sp, STACK_SIZE) -
-@@ -372,11 +372,11 @@ unsigned long get_stack_dump_bottom(unsigned long sp)
- {
- switch ( get_stack_page(sp) )
- {
-- case 0 ... 2:
-+ case 0 ... 3:
- return ROUNDUP(sp, PAGE_SIZE) - sizeof(unsigned long);
-
- #ifndef MEMORY_GUARD
-- case 3 ... 5:
-+ case 4 ... 5:
- #endif
- case 6 ... 7:
- return ROUNDUP(sp, STACK_SIZE) - sizeof(unsigned long);
-@@ -1761,11 +1761,36 @@ static void ler_enable(void)
-
- void do_debug(struct cpu_user_regs *regs)
- {
-+ unsigned long dr6;
- struct vcpu *v = current;
-
-+ /* Stash dr6 as early as possible. */
-+ dr6 = read_debugreg(6);
-+
- if ( debugger_trap_entry(TRAP_debug, regs) )
- return;
-
-+ /*
-+ * At the time of writing (March 2018), on the subject of %dr6:
-+ *
-+ * The Intel manual says:
-+ * Certain debug exceptions may clear bits 0-3. The remaining contents
-+ * of the DR6 register are never cleared by the processor. To avoid
-+ * confusion in identifying debug exceptions, debug handlers should
-+ * clear the register (except bit 16, which they should set) before
-+ * returning to the interrupted task.
-+ *
-+ * The AMD manual says:
-+ * Bits 15:13 of the DR6 register are not cleared by the processor and
-+ * must be cleared by software after the contents have been read.
-+ *
-+ * Some bits are reserved set, some are reserved clear, and some bits
-+ * which were previously reserved set are reused and cleared by hardware.
-+ * For future compatibility, reset to the default value, which will allow
-+ * us to spot any bit being changed by hardware to its non-default value.
-+ */
-+ write_debugreg(6, X86_DR6_DEFAULT);
-+
- if ( !guest_mode(regs) )
- {
- if ( regs->eflags & X86_EFLAGS_TF )
-@@ -1784,21 +1809,50 @@ void do_debug(struct cpu_user_regs *regs)
- regs->eflags &= ~X86_EFLAGS_TF;
- }
- }
-- else
-+
-+ /*
-+ * Check for fault conditions. General Detect, and instruction
-+ * breakpoints are faults rather than traps, at which point attempting
-+ * to ignore and continue will result in a livelock.
-+ */
-+ if ( dr6 & DR_GENERAL_DETECT )
- {
-- /*
-- * We ignore watchpoints when they trigger within Xen. This may
-- * happen when a buffer is passed to us which previously had a
-- * watchpoint set on it. No need to bump EIP; the only faulting
-- * trap is an instruction breakpoint, which can't happen to us.
-- */
-- WARN_ON(!search_exception_table(regs));
-+ printk(XENLOG_ERR "Hit General Detect in Xen context\n");
-+ fatal_trap(regs, 0);
-+ }
-+
-+ if ( dr6 & (DR_TRAP3 | DR_TRAP2 | DR_TRAP1 | DR_TRAP0) )
-+ {
-+ unsigned int bp, dr7 = read_debugreg(7) >> DR_CONTROL_SHIFT;
-+
-+ for ( bp = 0; bp < 4; ++bp )
-+ {
-+ if ( (dr6 & (1u << bp)) && /* Breakpoint triggered? */
-+ ((dr7 & (3u << (bp * DR_CONTROL_SIZE))) == 0) /* Insn? */ )
-+ {
-+ printk(XENLOG_ERR
-+ "Hit instruction breakpoint in Xen context\n");
-+ fatal_trap(regs, 0);
-+ }
-+ }
- }
-+
-+ /*
-+ * Whatever caused this #DB should be a trap. Note it and continue.
-+ * Guests can trigger this in certain corner cases, so ensure the
-+ * message is ratelimited.
-+ */
-+ gprintk(XENLOG_WARNING,
-+ "Hit #DB in Xen context: %04x:%p [%ps], stk %04x:%p, dr6 %lx\n",
-+ regs->cs, _p(regs->rip), _p(regs->rip),
-+ regs->ss, _p(regs->rsp), dr6);
-+
- goto out;
- }
-
- /* Save debug status register where guest OS can peek at it */
-- v->arch.debugreg[6] = read_debugreg(6);
-+ v->arch.debugreg[6] |= (dr6 & ~X86_DR6_DEFAULT);
-+ v->arch.debugreg[6] &= (dr6 | ~X86_DR6_DEFAULT);
-
- ler_enable();
- pv_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC);
-@@ -1917,6 +1971,7 @@ void __init init_idt_traps(void)
- set_ist(&idt_table[TRAP_double_fault], IST_DF);
- set_ist(&idt_table[TRAP_nmi], IST_NMI);
- set_ist(&idt_table[TRAP_machine_check], IST_MCE);
-+ set_ist(&idt_table[TRAP_debug], IST_DB);
-
- /* CPU0 uses the master IDT. */
- idt_tables[0] = idt_table;
-@@ -1984,6 +2039,12 @@ void activate_debugregs(const struct vcpu *curr)
- }
- }
-
-+/*
-+ * Used by hypercalls and the emulator.
-+ * -ENODEV => #UD
-+ * -EINVAL => #GP Invalid bit
-+ * -EPERM => #GP Valid bit, but not permitted to use
-+ */
- long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
- {
- int i;
-@@ -2015,7 +2076,17 @@ long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
- if ( v == curr )
- write_debugreg(3, value);
- break;
-+
-+ case 4:
-+ if ( v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE )
-+ return -ENODEV;
-+
-+ /* Fallthrough */
- case 6:
-+ /* The upper 32 bits are strictly reserved. */
-+ if ( value != (uint32_t)value )
-+ return -EINVAL;
-+
- /*
- * DR6: Bits 4-11,16-31 reserved (set to 1).
- * Bit 12 reserved (set to 0).
-@@ -2025,7 +2096,17 @@ long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
- if ( v == curr )
- write_debugreg(6, value);
- break;
-+
-+ case 5:
-+ if ( v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE )
-+ return -ENODEV;
-+
-+ /* Fallthrough */
- case 7:
-+ /* The upper 32 bits are strictly reserved. */
-+ if ( value != (uint32_t)value )
-+ return -EINVAL;
-+
- /*
- * DR7: Bit 10 reserved (set to 1).
- * Bits 11-12,14-15 reserved (set to 0).
-@@ -2038,6 +2119,10 @@ long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
- */
- if ( value & DR_GENERAL_DETECT )
- return -EPERM;
-+
-+ /* Zero the IO shadow before recalculating the real %dr7 */
-+ v->arch.debugreg[5] = 0;
-+
- /* DR7.{G,L}E = 0 => debugging disabled for this domain. */
- if ( value & DR7_ACTIVE_MASK )
- {
-@@ -2070,7 +2155,7 @@ long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
- write_debugreg(7, value);
- break;
- default:
-- return -EINVAL;
-+ return -ENODEV;
- }
-
- v->arch.debugreg[reg] = value;
-diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
-index 75497bc292..a47cb9dc19 100644
---- a/xen/arch/x86/x86_64/compat/entry.S
-+++ b/xen/arch/x86/x86_64/compat/entry.S
-@@ -39,6 +39,12 @@ ENTRY(compat_test_all_events)
- leaq irq_stat+IRQSTAT_softirq_pending(%rip),%rcx
- cmpl $0,(%rcx,%rax,1)
- jne compat_process_softirqs
-+
-+ /* Inject exception if pending. */
-+ lea VCPU_trap_bounce(%rbx), %rdx
-+ testb $TBF_EXCEPTION, TRAPBOUNCE_flags(%rdx)
-+ jnz .Lcompat_process_trapbounce
-+
- testb $1,VCPU_mce_pending(%rbx)
- jnz compat_process_mce
- .Lcompat_test_guest_nmi:
-@@ -68,15 +74,24 @@ compat_process_softirqs:
- call do_softirq
- jmp compat_test_all_events
-
-+ ALIGN
-+/* %rbx: struct vcpu, %rdx: struct trap_bounce */
-+.Lcompat_process_trapbounce:
-+ sti
-+.Lcompat_bounce_exception:
-+ call compat_create_bounce_frame
-+ movb $0, TRAPBOUNCE_flags(%rdx)
-+ jmp compat_test_all_events
-+
- ALIGN
- /* %rbx: struct vcpu */
- compat_process_mce:
- testb $1 << VCPU_TRAP_MCE,VCPU_async_exception_mask(%rbx)
- jnz .Lcompat_test_guest_nmi
- sti
-- movb $0,VCPU_mce_pending(%rbx)
-- call set_guest_machinecheck_trapbounce
-- testl %eax,%eax
-+ movb $0, VCPU_mce_pending(%rbx)
-+ call set_guest_machinecheck_trapbounce
-+ test %al, %al
- jz compat_test_all_events
- movzbl VCPU_async_exception_mask(%rbx),%edx # save mask for the
- movb %dl,VCPU_mce_old_mask(%rbx) # iret hypercall
-@@ -88,11 +103,11 @@ compat_process_mce:
- /* %rbx: struct vcpu */
- compat_process_nmi:
- testb $1 << VCPU_TRAP_NMI,VCPU_async_exception_mask(%rbx)
-- jnz compat_test_guest_events
-+ jnz compat_test_guest_events
- sti
-- movb $0,VCPU_nmi_pending(%rbx)
-+ movb $0, VCPU_nmi_pending(%rbx)
- call set_guest_nmi_trapbounce
-- testl %eax,%eax
-+ test %al, %al
- jz compat_test_all_events
- movzbl VCPU_async_exception_mask(%rbx),%edx # save mask for the
- movb %dl,VCPU_nmi_old_mask(%rbx) # iret hypercall
-@@ -189,15 +204,6 @@ ENTRY(cr4_pv32_restore)
- xor %eax, %eax
- ret
-
--/* %rdx: trap_bounce, %rbx: struct vcpu */
--ENTRY(compat_post_handle_exception)
-- testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx)
-- jz compat_test_all_events
--.Lcompat_bounce_exception:
-- call compat_create_bounce_frame
-- movb $0,TRAPBOUNCE_flags(%rdx)
-- jmp compat_test_all_events
--
- .section .text.entry, "ax", @progbits
-
- /* See lstar_enter for entry register state. */
-diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
-index bdd33e727f..41d3ec21a1 100644
---- a/xen/arch/x86/x86_64/entry.S
-+++ b/xen/arch/x86/x86_64/entry.S
-@@ -42,6 +42,12 @@ test_all_events:
- leaq irq_stat+IRQSTAT_softirq_pending(%rip), %rcx
- cmpl $0, (%rcx, %rax, 1)
- jne process_softirqs
-+
-+ /* Inject exception if pending. */
-+ lea VCPU_trap_bounce(%rbx), %rdx
-+ testb $TBF_EXCEPTION, TRAPBOUNCE_flags(%rdx)
-+ jnz .Lprocess_trapbounce
-+
- cmpb $0, VCPU_mce_pending(%rbx)
- jne process_mce
- .Ltest_guest_nmi:
-@@ -69,6 +75,15 @@ process_softirqs:
- call do_softirq
- jmp test_all_events
-
-+ ALIGN
-+/* %rbx: struct vcpu, %rdx struct trap_bounce */
-+.Lprocess_trapbounce:
-+ sti
-+.Lbounce_exception:
-+ call create_bounce_frame
-+ movb $0, TRAPBOUNCE_flags(%rdx)
-+ jmp test_all_events
-+
- ALIGN
- /* %rbx: struct vcpu */
- process_mce:
-@@ -77,7 +92,7 @@ process_mce:
- sti
- movb $0, VCPU_mce_pending(%rbx)
- call set_guest_machinecheck_trapbounce
-- test %eax, %eax
-+ test %al, %al
- jz test_all_events
- movzbl VCPU_async_exception_mask(%rbx), %edx # save mask for the
- movb %dl, VCPU_mce_old_mask(%rbx) # iret hypercall
-@@ -93,7 +108,7 @@ process_nmi:
- sti
- movb $0, VCPU_nmi_pending(%rbx)
- call set_guest_nmi_trapbounce
-- test %eax, %eax
-+ test %al, %al
- jz test_all_events
- movzbl VCPU_async_exception_mask(%rbx), %edx # save mask for the
- movb %dl, VCPU_nmi_old_mask(%rbx) # iret hypercall
-@@ -667,15 +682,9 @@ handle_exception_saved:
- mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
- testb $3,UREGS_cs(%rsp)
- jz restore_all_xen
-- leaq VCPU_trap_bounce(%rbx),%rdx
- movq VCPU_domain(%rbx),%rax
- testb $1,DOMAIN_is_32bit_pv(%rax)
-- jnz compat_post_handle_exception
-- testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx)
-- jz test_all_events
--.Lbounce_exception:
-- call create_bounce_frame
-- movb $0,TRAPBOUNCE_flags(%rdx)
-+ jnz compat_test_all_events
- jmp test_all_events
-
- /* No special register assumptions. */
-@@ -730,7 +739,7 @@ ENTRY(device_not_available)
- ENTRY(debug)
- pushq $0
- movl $TRAP_debug,4(%rsp)
-- jmp handle_exception
-+ jmp handle_ist_exception
-
- ENTRY(int3)
- pushq $0
-@@ -783,12 +792,14 @@ ENTRY(double_fault)
- /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
-
- mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rbx
-- test %rbx, %rbx
-+ neg %rbx
- jz .Ldblf_cr3_okay
- jns .Ldblf_cr3_load
-+ mov %rbx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
- neg %rbx
- .Ldblf_cr3_load:
- mov %rbx, %cr3
-+ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
- .Ldblf_cr3_okay:
-
- movq %rsp,%rdi
-diff --git a/xen/arch/x86/x86_emulate.c b/xen/arch/x86/x86_emulate.c
-index c7ba221d11..9125c67c9e 100644
---- a/xen/arch/x86/x86_emulate.c
-+++ b/xen/arch/x86/x86_emulate.c
-@@ -14,6 +14,7 @@
- #include <asm/processor.h> /* current_cpu_info */
- #include <asm/xstate.h>
- #include <asm/amd.h> /* cpu_has_amd_erratum() */
-+#include <asm/debugreg.h>
-
- /* Avoid namespace pollution. */
- #undef cmpxchg
-@@ -41,3 +42,75 @@
- })
-
- #include "x86_emulate/x86_emulate.c"
-+
-+/* Called with NULL ctxt in hypercall context. */
-+int x86emul_read_dr(unsigned int reg, unsigned long *val,
-+ struct x86_emulate_ctxt *ctxt)
-+{
-+ struct vcpu *curr = current;
-+
-+ /* HVM support requires a bit more plumbing before it will work. */
-+ ASSERT(is_pv_vcpu(curr));
-+
-+ switch ( reg )
-+ {
-+ case 0 ... 3:
-+ case 6:
-+ *val = curr->arch.debugreg[reg];
-+ break;
-+
-+ case 7:
-+ *val = (curr->arch.debugreg[7] |
-+ curr->arch.debugreg[5]);
-+ break;
-+
-+ case 4 ... 5:
-+ if ( !(curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE) )
-+ {
-+ *val = curr->arch.debugreg[reg + 2];
-+ break;
-+ }
-+
-+ /* Fallthrough */
-+ default:
-+ if ( ctxt )
-+ x86_emul_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC, ctxt);
-+
-+ return X86EMUL_EXCEPTION;
-+ }
-+
-+ return X86EMUL_OKAY;
-+}
-+
-+int x86emul_write_dr(unsigned int reg, unsigned long val,
-+ struct x86_emulate_ctxt *ctxt)
-+{
-+ struct vcpu *curr = current;
-+
-+ /* HVM support requires a bit more plumbing before it will work. */
-+ ASSERT(is_pv_vcpu(curr));
-+
-+ switch ( set_debugreg(curr, reg, val) )
-+ {
-+ case 0:
-+ return X86EMUL_OKAY;
-+
-+ case -ENODEV:
-+ x86_emul_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC, ctxt);
-+ return X86EMUL_EXCEPTION;
-+
-+ default:
-+ x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
-+ return X86EMUL_EXCEPTION;
-+ }
-+}
-+
-+/*
-+ * Local variables:
-+ * mode: C
-+ * c-file-style: "BSD"
-+ * c-basic-offset: 4
-+ * tab-width: 4
-+ * indent-tabs-mode: nil
-+ * End:
-+ */
-diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h
-index 0c8c80ad5a..9c2bb8157c 100644
---- a/xen/arch/x86/x86_emulate/x86_emulate.h
-+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
-@@ -662,6 +662,11 @@ static inline void x86_emulate_free_state(struct x86_emulate_state *state) {}
- void x86_emulate_free_state(struct x86_emulate_state *state);
- #endif
-
-+int x86emul_read_dr(unsigned int reg, unsigned long *val,
-+ struct x86_emulate_ctxt *ctxt);
-+int x86emul_write_dr(unsigned int reg, unsigned long val,
-+ struct x86_emulate_ctxt *ctxt);
-+
- #endif
-
- static inline void x86_emul_hw_exception(
-diff --git a/xen/common/schedule.c b/xen/common/schedule.c
-index b7884263f2..f21c3e5a64 100644
---- a/xen/common/schedule.c
-+++ b/xen/common/schedule.c
-@@ -436,14 +436,9 @@ void sched_destroy_domain(struct domain *d)
- cpupool_rm_domain(d);
- }
-
--void vcpu_sleep_nosync(struct vcpu *v)
-+void vcpu_sleep_nosync_locked(struct vcpu *v)
- {
-- unsigned long flags;
-- spinlock_t *lock;
--
-- TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id);
--
-- lock = vcpu_schedule_lock_irqsave(v, &flags);
-+ ASSERT(spin_is_locked(per_cpu(schedule_data,v->processor).schedule_lock));
-
- if ( likely(!vcpu_runnable(v)) )
- {
-@@ -452,6 +447,18 @@ void vcpu_sleep_nosync(struct vcpu *v)
-
- SCHED_OP(vcpu_scheduler(v), sleep, v);
- }
-+}
-+
-+void vcpu_sleep_nosync(struct vcpu *v)
-+{
-+ unsigned long flags;
-+ spinlock_t *lock;
-+
-+ TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id);
-+
-+ lock = vcpu_schedule_lock_irqsave(v, &flags);
-+
-+ vcpu_sleep_nosync_locked(v);
-
- vcpu_schedule_unlock_irqrestore(lock, flags, v);
- }
-@@ -567,13 +574,54 @@ static void vcpu_move_nosched(struct vcpu *v, unsigned int new_cpu)
- sched_move_irqs(v);
- }
-
--static void vcpu_migrate(struct vcpu *v)
-+/*
-+ * Initiating migration
-+ *
-+ * In order to migrate, we need the vcpu in question to have stopped
-+ * running and had SCHED_OP(sleep) called (to take it off any
-+ * runqueues, for instance); and if it is currently running, it needs
-+ * to be scheduled out. Finally, we need to hold the scheduling locks
-+ * for both the processor we're migrating from, and the processor
-+ * we're migrating to.
-+ *
-+ * In order to avoid deadlock while satisfying the final requirement,
-+ * we must release any scheduling lock we hold, then try to grab both
-+ * locks we want, then double-check to make sure that what we started
-+ * to do hasn't been changed in the mean time.
-+ *
-+ * These steps are encapsulated in the following two functions; they
-+ * should be called like this:
-+ *
-+ * lock = vcpu_schedule_lock_irq(v);
-+ * vcpu_migrate_start(v);
-+ * vcpu_schedule_unlock_irq(lock, v)
-+ * vcpu_migrate_finish(v);
-+ *
-+ * vcpu_migrate_finish() will do the work now if it can, or simply
-+ * return if it can't (because v is still running); in that case
-+ * vcpu_migrate_finish() will be called by context_saved().
-+ */
-+void vcpu_migrate_start(struct vcpu *v)
-+{
-+ set_bit(_VPF_migrating, &v->pause_flags);
-+ vcpu_sleep_nosync_locked(v);
-+}
-+
-+static void vcpu_migrate_finish(struct vcpu *v)
- {
- unsigned long flags;
- unsigned int old_cpu, new_cpu;
- spinlock_t *old_lock, *new_lock;
- bool_t pick_called = 0;
-
-+ /*
-+ * If the vcpu is currently running, this will be handled by
-+ * context_saved(); and in any case, if the bit is cleared, then
-+ * someone else has already done the work so we don't need to.
-+ */
-+ if ( v->is_running || !test_bit(_VPF_migrating, &v->pause_flags) )
-+ return;
-+
- old_cpu = new_cpu = v->processor;
- for ( ; ; )
- {
-@@ -653,14 +701,11 @@ void vcpu_force_reschedule(struct vcpu *v)
- spinlock_t *lock = vcpu_schedule_lock_irq(v);
-
- if ( v->is_running )
-- set_bit(_VPF_migrating, &v->pause_flags);
-+ vcpu_migrate_start(v);
-+
- vcpu_schedule_unlock_irq(lock, v);
-
-- if ( v->pause_flags & VPF_migrating )
-- {
-- vcpu_sleep_nosync(v);
-- vcpu_migrate(v);
-- }
-+ vcpu_migrate_finish(v);
- }
-
- void restore_vcpu_affinity(struct domain *d)
-@@ -812,10 +857,10 @@ int cpu_disable_scheduler(unsigned int cpu)
- * * the scheduler will always fine a suitable solution, or
- * things would have failed before getting in here.
- */
-- set_bit(_VPF_migrating, &v->pause_flags);
-+ vcpu_migrate_start(v);
- vcpu_schedule_unlock_irqrestore(lock, flags, v);
-- vcpu_sleep_nosync(v);
-- vcpu_migrate(v);
-+
-+ vcpu_migrate_finish(v);
-
- /*
- * The only caveat, in this case, is that if a vcpu active in
-@@ -849,18 +894,14 @@ static int vcpu_set_affinity(
- * Always ask the scheduler to re-evaluate placement
- * when changing the affinity.
- */
-- set_bit(_VPF_migrating, &v->pause_flags);
-+ vcpu_migrate_start(v);
- }
-
- vcpu_schedule_unlock_irq(lock, v);
-
- domain_update_node_affinity(v->domain);
-
-- if ( v->pause_flags & VPF_migrating )
-- {
-- vcpu_sleep_nosync(v);
-- vcpu_migrate(v);
-- }
-+ vcpu_migrate_finish(v);
-
- return ret;
- }
-@@ -1088,7 +1129,6 @@ int vcpu_pin_override(struct vcpu *v, int cpu)
- {
- cpumask_copy(v->cpu_hard_affinity, v->cpu_hard_affinity_saved);
- v->affinity_broken = 0;
-- set_bit(_VPF_migrating, &v->pause_flags);
- ret = 0;
- }
- }
-@@ -1101,20 +1141,18 @@ int vcpu_pin_override(struct vcpu *v, int cpu)
- cpumask_copy(v->cpu_hard_affinity_saved, v->cpu_hard_affinity);
- v->affinity_broken = 1;
- cpumask_copy(v->cpu_hard_affinity, cpumask_of(cpu));
-- set_bit(_VPF_migrating, &v->pause_flags);
- ret = 0;
- }
- }
-
-+ if ( ret == 0 )
-+ vcpu_migrate_start(v);
-+
- vcpu_schedule_unlock_irq(lock, v);
-
- domain_update_node_affinity(v->domain);
-
-- if ( v->pause_flags & VPF_migrating )
-- {
-- vcpu_sleep_nosync(v);
-- vcpu_migrate(v);
-- }
-+ vcpu_migrate_finish(v);
-
- return ret;
- }
-@@ -1501,8 +1539,7 @@ void context_saved(struct vcpu *prev)
-
- SCHED_OP(vcpu_scheduler(prev), context_saved, prev);
-
-- if ( unlikely(prev->pause_flags & VPF_migrating) )
-- vcpu_migrate(prev);
-+ vcpu_migrate_finish(prev);
- }
-
- /* The scheduler timer: force a run through the scheduler */
-diff --git a/xen/include/asm-x86/debugreg.h b/xen/include/asm-x86/debugreg.h
-index c57914efc6..b3b10eaf40 100644
---- a/xen/include/asm-x86/debugreg.h
-+++ b/xen/include/asm-x86/debugreg.h
-@@ -24,6 +24,8 @@
- #define DR_STATUS_RESERVED_ZERO (~0xffffeffful) /* Reserved, read as zero */
- #define DR_STATUS_RESERVED_ONE 0xffff0ff0ul /* Reserved, read as one */
-
-+#define X86_DR6_DEFAULT 0xffff0ff0ul /* Default %dr6 value. */
-+
- /* Now define a bunch of things for manipulating the control register.
- The top two bytes of the control register consist of 4 fields of 4
- bits - each field corresponds to one of the four debug registers,
-diff --git a/xen/include/asm-x86/hvm/irq.h b/xen/include/asm-x86/hvm/irq.h
-index f756cb5a0d..1a52ec6045 100644
---- a/xen/include/asm-x86/hvm/irq.h
-+++ b/xen/include/asm-x86/hvm/irq.h
-@@ -207,6 +207,9 @@ int hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq);
-
- int hvm_inject_msi(struct domain *d, uint64_t addr, uint32_t data);
-
-+/* Assert an IO APIC pin. */
-+int hvm_ioapic_assert(struct domain *d, unsigned int gsi, bool level);
-+
- void hvm_maybe_deassert_evtchn_irq(void);
- void hvm_assert_evtchn_irq(struct vcpu *v);
- void hvm_set_callback_via(struct domain *d, uint64_t via);
-diff --git a/xen/include/asm-x86/hvm/vpt.h b/xen/include/asm-x86/hvm/vpt.h
-index 21166edd06..0eb5ff632e 100644
---- a/xen/include/asm-x86/hvm/vpt.h
-+++ b/xen/include/asm-x86/hvm/vpt.h
-@@ -44,6 +44,7 @@ struct periodic_time {
- bool_t warned_timeout_too_short;
- #define PTSRC_isa 1 /* ISA time source */
- #define PTSRC_lapic 2 /* LAPIC time source */
-+#define PTSRC_ioapic 3 /* IOAPIC time source */
- u8 source; /* PTSRC_ */
- u8 irq;
- struct vcpu *vcpu; /* vcpu timer interrupt delivers to */
-diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
-index a8ceecf3e2..68fae91567 100644
---- a/xen/include/asm-x86/msr-index.h
-+++ b/xen/include/asm-x86/msr-index.h
-@@ -31,6 +31,9 @@
- #define EFER_LMSLE (1<<_EFER_LMSLE)
- #define EFER_FFXSE (1<<_EFER_FFXSE)
-
-+#define EFER_KNOWN_MASK (EFER_SCE | EFER_LME | EFER_LMA | EFER_NX | \
-+ EFER_SVME | EFER_LMSLE | EFER_FFXSE)
-+
- /* Speculation Controls. */
- #define MSR_SPEC_CTRL 0x00000048
- #define SPEC_CTRL_IBRS (_AC(1, ULL) << 0)
-@@ -42,6 +45,7 @@
- #define MSR_ARCH_CAPABILITIES 0x0000010a
- #define ARCH_CAPABILITIES_RDCL_NO (_AC(1, ULL) << 0)
- #define ARCH_CAPABILITIES_IBRS_ALL (_AC(1, ULL) << 1)
-+#define ARCH_CAPS_RSBA (_AC(1, ULL) << 2)
-
- /* Intel MSRs. Some also available on other CPUs */
- #define MSR_IA32_PERFCTR0 0x000000c1
-diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
-index 80f8411355..a152f1d413 100644
---- a/xen/include/asm-x86/processor.h
-+++ b/xen/include/asm-x86/processor.h
-@@ -445,7 +445,8 @@ struct __packed __cacheline_aligned tss_struct {
- #define IST_DF 1UL
- #define IST_NMI 2UL
- #define IST_MCE 3UL
--#define IST_MAX 3UL
-+#define IST_DB 4UL
-+#define IST_MAX 4UL
-
- /* Set the interrupt stack table used by a particular interrupt
- * descriptor table entry. */