[PATCH 00/14] idle performance improvements

classic Classic list List threaded Threaded
15 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH 00/14] idle performance improvements

Nicholas Piggin-2
These patches improve performance of idle sleep and wake. The
first patches rework the lazy-irq handling of idle code a bit
to make it simpler first.

Any review would be welcome. I've tested this with some
performance and simple correctness tests on POWER8, POWER9,
and with KVM on POWER8, so it's about ready to review now
I hope.

Thanks,
Nick

Nicholas Piggin (14):
  powerpc/64s: idle move soft interrupt mask logic into C code
  powerpc/64s: idle hotplug lazy-irq simplification
  powerpc/64s: idle provide a default idle for POWER9
  powerpc/64s: idle process interrupts from system reset wakeup
  powerpc/64s: msgclr when handling doorbell exceptions
  powerpc/64s: interrupt replay balance the return branch predictor
  powerpc/64s: idle branch to handler with virtual mode offset
  powerpc/64s: idle avoid SRR usage in idle sleep/wake paths
  powerpc/64s: idle hmi wakeup is unlikely
  powerpc/64s: cpuidle set polling before enabling irqs
  powerpc/64s: cpuidle read mostly for common globals
  powerpc/64s: cpuidle no memory barrier after break from idle
  powerpc/64: runlatch CTRL[RUN] set optimisation
  powerpc/64s: idle runlatch switch is done with MSR[EE]=0

 arch/powerpc/include/asm/dbell.h         |  13 +++
 arch/powerpc/include/asm/exception-64s.h |  17 +++-
 arch/powerpc/include/asm/hw_irq.h        |   1 +
 arch/powerpc/include/asm/machdep.h       |   1 +
 arch/powerpc/include/asm/ppc-opcode.h    |   3 +
 arch/powerpc/include/asm/processor.h     |  10 +--
 arch/powerpc/kernel/asm-offsets.c        |   1 +
 arch/powerpc/kernel/exceptions-64s.S     |  62 ++++++++++++--
 arch/powerpc/kernel/idle_book3s.S        | 137 +++++++++----------------------
 arch/powerpc/kernel/irq.c                |   3 +-
 arch/powerpc/kernel/process.c            |  12 +--
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  |   8 +-
 arch/powerpc/platforms/powernv/idle.c    | 104 +++++++++++++++++++----
 arch/powerpc/platforms/powernv/smp.c     |  31 ++++---
 arch/powerpc/platforms/powernv/subcore.c |   3 +-
 drivers/cpuidle/cpuidle-powernv.c        |  37 +++++----
 drivers/cpuidle/cpuidle-pseries.c        |  22 +++--
 17 files changed, 288 insertions(+), 177 deletions(-)

--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 01/14] powerpc/64s: idle move soft interrupt mask logic into C code

Nicholas Piggin-2
This simplifies the asm and fixes irq-off tracing over sleep
instructions.

Also move powersave_nap check for POWER8 into C code, and move
PSSCR register value calculation for POWER9 into C.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/include/asm/machdep.h       |  1 +
 arch/powerpc/include/asm/processor.h     | 10 ++--
 arch/powerpc/kernel/idle_book3s.S        | 84 ++++++-------------------------
 arch/powerpc/kernel/irq.c                |  3 +-
 arch/powerpc/platforms/powernv/idle.c    | 85 +++++++++++++++++++++++++++-----
 arch/powerpc/platforms/powernv/smp.c     |  2 -
 arch/powerpc/platforms/powernv/subcore.c |  3 +-
 drivers/cpuidle/cpuidle-powernv.c        | 12 ++---
 8 files changed, 105 insertions(+), 95 deletions(-)

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index f90b22c722e1..cd2fc1cc1cc7 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -226,6 +226,7 @@ struct machdep_calls {
 extern void e500_idle(void);
 extern void power4_idle(void);
 extern void power7_idle(void);
+extern void power9_idle(void);
 extern void ppc6xx_idle(void);
 extern void book3e_idle(void);
 
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 586c0b72a155..832775771bd3 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -501,11 +501,11 @@ extern unsigned long cpuidle_disable;
 enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
 
 extern int powersave_nap; /* set if nap mode can be used in idle loop */
-extern unsigned long power7_nap(int check_irq);
-extern unsigned long power7_sleep(void);
-extern unsigned long power7_winkle(void);
-extern unsigned long power9_idle_stop(unsigned long stop_psscr_val,
-      unsigned long stop_psscr_mask);
+extern unsigned long power7_idle_insn(unsigned long type); /* PNV_THREAD_NAP/etc*/
+extern void power7_idle_type(unsigned long type);
+extern unsigned long power9_idle_stop(unsigned long psscr_val);
+extern void power9_idle_type(unsigned long stop_psscr_val,
+      unsigned long stop_psscr_mask);
 
 extern void flush_instruction_cache(void);
 extern void hard_reset_now(void);
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 4898d676dcae..c7edb374d1aa 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -106,13 +106,9 @@ core_idle_lock_held:
 /*
  * Pass requested state in r3:
  * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8
- *   - Requested STOP state in POWER9
+ *   - Requested PSSCR value in POWER9
  *
- * To check IRQ_HAPPENED in r4
- * 0 - don't check
- * 1 - check
- *
- * Address to 'rfid' to in r5
+ * Address of idle handler to 'rfid' to in r4
  */
 pnv_powersave_common:
  /* Use r3 to pass state nap/sleep/winkle */
@@ -128,30 +124,7 @@ pnv_powersave_common:
  std r0,_LINK(r1)
  std r0,_NIP(r1)
 
- /* Hard disable interrupts */
- mfmsr r9
- rldicl r9,r9,48,1
- rotldi r9,r9,16
- mtmsrd r9,1 /* hard-disable interrupts */
-
- /* Check if something happened while soft-disabled */
- lbz r0,PACAIRQHAPPENED(r13)
- andi. r0,r0,~PACA_IRQ_HARD_DIS@l
- beq 1f
- cmpwi cr0,r4,0
- beq 1f
- addi r1,r1,INT_FRAME_SIZE
- ld r0,16(r1)
- li r3,0 /* Return 0 (no nap) */
- mtlr r0
- blr
-
-1: /* We mark irqs hard disabled as this is the state we'll
- * be in when returning and we need to tell arch_local_irq_restore()
- * about it
- */
- li r0,PACA_IRQ_HARD_DIS
- stb r0,PACAIRQHAPPENED(r13)
+ mfmsr   r9
 
  /* We haven't lost state ... yet */
  li r0,0
@@ -160,8 +133,8 @@ pnv_powersave_common:
  /* Continue saving state */
  SAVE_GPR(2, r1)
  SAVE_NVGPRS(r1)
- mfcr r4
- std r4,_CCR(r1)
+ mfcr r5
+ std r5,_CCR(r1)
  std r9,_MSR(r1)
  std r1,PACAR1(r13)
 
@@ -175,7 +148,7 @@ pnv_powersave_common:
  li r6, MSR_RI
  andc r6, r9, r6
  mtmsrd r6, 1 /* clear RI before setting SRR0/1 */
- mtspr SPRN_SRR0, r5
+ mtspr SPRN_SRR0, r4
  mtspr SPRN_SRR1, r7
  rfid
 
@@ -319,35 +292,14 @@ lwarx_loop_stop:
 
  IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
 
-_GLOBAL(power7_idle)
+/*
+ * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
+ * r3 contains desired idle state (PNV_THREAD_NAP/SLEEP/WINKLE).
+ */
+_GLOBAL(power7_idle_insn)
  /* Now check if user or arch enabled NAP mode */
- LOAD_REG_ADDRBASE(r3,powersave_nap)
- lwz r4,ADDROFF(powersave_nap)(r3)
- cmpwi 0,r4,0
- beqlr
- li r3, 1
- /* fall through */
-
-_GLOBAL(power7_nap)
- mr r4,r3
- li r3,PNV_THREAD_NAP
- LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode)
- b pnv_powersave_common
- /* No return */
-
-_GLOBAL(power7_sleep)
- li r3,PNV_THREAD_SLEEP
- li r4,1
- LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode)
+ LOAD_REG_ADDR(r4, pnv_enter_arch207_idle_mode)
  b pnv_powersave_common
- /* No return */
-
-_GLOBAL(power7_winkle)
- li r3,PNV_THREAD_WINKLE
- li r4,1
- LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode)
- b pnv_powersave_common
- /* No return */
 
 #define CHECK_HMI_INTERRUPT \
  mfspr r0,SPRN_SRR1; \
@@ -369,16 +321,12 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
 20: nop;
 
 /*
- * r3 - The PSSCR value corresponding to the stop state.
- * r4 - The PSSCR mask corrresonding to the stop state.
+ * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
+ * r3 contains desired PSSCR register value.
  */
 _GLOBAL(power9_idle_stop)
- mfspr   r5,SPRN_PSSCR
- andc    r5,r5,r4
- or      r3,r3,r5
- mtspr SPRN_PSSCR,r3
- LOAD_REG_ADDR(r5,power_enter_stop)
- li r4,1
+ mtspr SPRN_PSSCR,r3
+ LOAD_REG_ADDR(r4,power_enter_stop)
  b pnv_powersave_common
  /* No return */
 
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5c291df30fe3..cfa29ddcb215 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -322,7 +322,8 @@ bool prep_irq_for_idle(void)
  * First we need to hard disable to ensure no interrupt
  * occurs before we effectively enter the low power state
  */
- hard_irq_disable();
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
 
  /*
  * If anything happened while we were soft-disabled,
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 445f30a2c5ef..b82f3be23de4 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -23,6 +23,7 @@
 #include <asm/cpuidle.h>
 #include <asm/code-patching.h>
 #include <asm/smp.h>
+#include <asm/runlatch.h>
 
 #include "powernv.h"
 #include "subcore.h"
@@ -240,14 +241,6 @@ static u64 pnv_default_stop_mask;
 static bool default_stop_found;
 
 /*
- * Used for ppc_md.power_save which needs a function with no parameters
- */
-static void power9_idle(void)
-{
- power9_idle_stop(pnv_default_stop_val, pnv_default_stop_mask);
-}
-
-/*
  * First deep stop state. Used to figure out when to save/restore
  * hypervisor context.
  */
@@ -261,6 +254,74 @@ static u64 pnv_deepest_stop_psscr_val;
 static u64 pnv_deepest_stop_psscr_mask;
 static bool deepest_stop_found;
 
+static unsigned long __power7_idle_type(unsigned long type)
+{
+ unsigned long srr1;
+
+ WARN_ON(!irqs_disabled());
+
+ if (!prep_irq_for_idle())
+ return 0;
+
+ ppc64_runlatch_off();
+ srr1 = power7_idle_insn(type);
+ ppc64_runlatch_on();
+
+ return srr1;
+}
+
+void power7_idle_type(unsigned long type)
+{
+ __power7_idle_type(type);
+ __hard_irq_enable();
+}
+
+void power7_idle(void)
+{
+ if (!powersave_nap)
+ return;
+
+ power7_idle_type(PNV_THREAD_NAP);
+}
+
+static unsigned long __power9_idle_type(unsigned long stop_psscr_val,
+      unsigned long stop_psscr_mask)
+{
+ unsigned long psscr;
+ unsigned long srr1;
+
+ WARN_ON(!irqs_disabled());
+
+ if (!prep_irq_for_idle())
+ return 0;
+
+ psscr = mfspr(SPRN_PSSCR);
+ psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
+
+ ppc64_runlatch_off();
+ srr1 = power9_idle_stop(psscr);
+ ppc64_runlatch_on();
+
+ trace_hardirqs_off();
+
+ return srr1;
+}
+
+void power9_idle_type(unsigned long stop_psscr_val,
+      unsigned long stop_psscr_mask)
+{
+ __power9_idle_type(stop_psscr_val, stop_psscr_mask);
+ __hard_irq_enable();
+}
+
+/*
+ * Used for ppc_md.power_save which needs a function with no parameters
+ */
+void power9_idle(void)
+{
+ power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
+}
+
 /*
  * pnv_cpu_offline: A function that puts the CPU into the deepest
  * available platform idle state on a CPU-Offline.
@@ -275,13 +336,14 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
  srr1 = power9_idle_stop(pnv_deepest_stop_psscr_val,
  pnv_deepest_stop_psscr_mask);
  } else if (idle_states & OPAL_PM_WINKLE_ENABLED) {
- srr1 = power7_winkle();
+ srr1 = power7_idle_type(PNV_THREAD_WINKLE);
  } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
    (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
- srr1 = power7_sleep();
+ srr1 = power7_idle_type(PNV_THREAD_SLEEP);
  } else if (idle_states & OPAL_PM_NAP_ENABLED) {
- srr1 = power7_nap(1);
+ srr1 = power7_idle_type(PNV_THREAD_NAP);
  } else {
+ ppc64_runlatch_off();
  /* This is the fallback method. We emulate snooze */
  while (!generic_check_cpu_restart(cpu)) {
  HMT_low();
@@ -289,6 +351,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
  }
  srr1 = 0;
  HMT_medium();
+ ppc64_runlatch_on();
  }
 
  return srr1;
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 4aff754b6f2c..f8752795decf 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -182,9 +182,7 @@ static void pnv_smp_cpu_kill_self(void)
  */
  kvmppc_set_host_ipi(cpu, 0);
 
- ppc64_runlatch_off();
  srr1 = pnv_cpu_offline(cpu);
- ppc64_runlatch_on();
 
  /*
  * If the SRR1 value indicates that we woke up due to
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 0babef11136f..d975d78188a9 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -18,6 +18,7 @@
 #include <linux/stop_machine.h>
 
 #include <asm/cputhreads.h>
+#include <asm/cpuidle.h>
 #include <asm/kvm_ppc.h>
 #include <asm/machdep.h>
 #include <asm/opal.h>
@@ -182,7 +183,7 @@ static void unsplit_core(void)
  cpu = smp_processor_id();
  if (cpu_thread_in_core(cpu) != 0) {
  while (mfspr(SPRN_HID0) & mask)
- power7_nap(0);
+ power7_idle_insn(PNV_THREAD_NAP);
 
  per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT;
  return;
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 12409a519cc5..150b971c303b 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -73,9 +73,8 @@ static int nap_loop(struct cpuidle_device *dev,
  struct cpuidle_driver *drv,
  int index)
 {
- ppc64_runlatch_off();
- power7_idle();
- ppc64_runlatch_on();
+ power7_idle_type(PNV_THREAD_NAP);
+
  return index;
 }
 
@@ -98,7 +97,8 @@ static int fastsleep_loop(struct cpuidle_device *dev,
  new_lpcr &= ~LPCR_PECE1;
 
  mtspr(SPRN_LPCR, new_lpcr);
- power7_sleep();
+
+ power7_idle_type(PNV_THREAD_SLEEP);
 
  mtspr(SPRN_LPCR, old_lpcr);
 
@@ -110,10 +110,8 @@ static int stop_loop(struct cpuidle_device *dev,
      struct cpuidle_driver *drv,
      int index)
 {
- ppc64_runlatch_off();
- power9_idle_stop(stop_psscr_table[index].val,
+ power9_idle_type(stop_psscr_table[index].val,
  stop_psscr_table[index].mask);
- ppc64_runlatch_on();
  return index;
 }
 
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 02/14] powerpc/64s: idle hotplug lazy-irq simplification

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
Rather than concern ourselves with any soft-mask logic in the CPU
hotplug handler, just hard disable interrupts. This ensures there
are no lazy-irqs pending, which means we can call directly to idle
instruction in order to sleep.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/platforms/powernv/idle.c | 23 +++++++++++++++--------
 arch/powerpc/platforms/powernv/smp.c  | 29 ++++++++++++++---------------
 2 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index b82f3be23de4..8562916b8cf7 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -325,25 +325,31 @@ void power9_idle(void)
 /*
  * pnv_cpu_offline: A function that puts the CPU into the deepest
  * available platform idle state on a CPU-Offline.
+ * interrupts hard disabled and no lazy irq pending.
  */
 unsigned long pnv_cpu_offline(unsigned int cpu)
 {
  unsigned long srr1;
-
  u32 idle_states = pnv_get_supported_cpuidle_states();
 
+ ppc64_runlatch_off();
+
  if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
- srr1 = power9_idle_stop(pnv_deepest_stop_psscr_val,
- pnv_deepest_stop_psscr_mask);
+ unsigned long psscr;
+
+ psscr = mfspr(SPRN_PSSCR);
+ psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
+ pnv_deepest_stop_psscr_val;
+ srr1 = power9_idle_stop(psscr);
+
  } else if (idle_states & OPAL_PM_WINKLE_ENABLED) {
- srr1 = power7_idle_type(PNV_THREAD_WINKLE);
+ srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
  } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
    (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
- srr1 = power7_idle_type(PNV_THREAD_SLEEP);
+ srr1 = power7_idle_insn(PNV_THREAD_SLEEP);
  } else if (idle_states & OPAL_PM_NAP_ENABLED) {
- srr1 = power7_idle_type(PNV_THREAD_NAP);
+ srr1 = power7_idle_insn(PNV_THREAD_NAP);
  } else {
- ppc64_runlatch_off();
  /* This is the fallback method. We emulate snooze */
  while (!generic_check_cpu_restart(cpu)) {
  HMT_low();
@@ -351,9 +357,10 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
  }
  srr1 = 0;
  HMT_medium();
- ppc64_runlatch_on();
  }
 
+ ppc64_runlatch_on();
+
  return srr1;
 }
 
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index f8752795decf..c04c87adad94 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -144,7 +144,14 @@ static void pnv_smp_cpu_kill_self(void)
  unsigned long srr1, wmask;
 
  /* Standard hot unplug procedure */
- local_irq_disable();
+ /*
+ * This hard disables local interurpts, ensuring we have no lazy
+ * irqs pending.
+ */
+ WARN_ON(irqs_disabled());
+ hard_irq_disable();
+ WARN_ON(lazy_irq_pending());
+
  idle_task_exit();
  current->active_mm = NULL; /* for sanity */
  cpu = smp_processor_id();
@@ -162,16 +169,6 @@ static void pnv_smp_cpu_kill_self(void)
  */
  mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
 
- /*
- * Hard-disable interrupts, and then clear irq_happened flags
- * that we can safely ignore while off-line, since they
- * are for things for which we do no processing when off-line
- * (or in the case of HMI, all the processing we need to do
- * is done in lower-level real-mode code).
- */
- hard_irq_disable();
- local_paca->irq_happened &= ~(PACA_IRQ_DEC | PACA_IRQ_HMI);
-
  while (!generic_check_cpu_restart(cpu)) {
  /*
  * Clear IPI flag, since we don't handle IPIs while
@@ -184,6 +181,8 @@ static void pnv_smp_cpu_kill_self(void)
 
  srr1 = pnv_cpu_offline(cpu);
 
+ WARN_ON(lazy_irq_pending());
+
  /*
  * If the SRR1 value indicates that we woke up due to
  * an external interrupt, then clear the interrupt.
@@ -196,8 +195,7 @@ static void pnv_smp_cpu_kill_self(void)
  * contains 0.
  */
  if (((srr1 & wmask) == SRR1_WAKEEE) ||
-    ((srr1 & wmask) == SRR1_WAKEHVI) ||
-    (local_paca->irq_happened & PACA_IRQ_EE)) {
+    ((srr1 & wmask) == SRR1_WAKEHVI)) {
  if (cpu_has_feature(CPU_FTR_ARCH_300)) {
  if (xive_enabled())
  xive_flush_interrupt();
@@ -209,14 +207,15 @@ static void pnv_smp_cpu_kill_self(void)
  unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
  asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
  }
- local_paca->irq_happened &= ~(PACA_IRQ_EE | PACA_IRQ_DBELL);
  smp_mb();
 
  if (cpu_core_split_required())
  continue;
 
  if (srr1 && !generic_check_cpu_restart(cpu))
- DBG("CPU%d Unexpected exit while offline !\n", cpu);
+ DBG("CPU%d Unexpected exit while offline srr1=%lx!\n",
+ cpu, srr1);
+
  }
 
  /* Re-enable decrementer interrupts */
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 03/14] powerpc/64s: idle provide a default idle for POWER9

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
Before the cpuidle driver is enabled, provide a default idle
function similarly to POWER7/8.

This should not have much effect, because the cpuidle driver
for powernv is mandatory, but if that changes we should have
a fallback.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/platforms/powernv/idle.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 8562916b8cf7..78b4755b7947 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -649,6 +649,8 @@ static int __init pnv_init_idle_states(void)
 
  if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
  ppc_md.power_save = power7_idle;
+ else if (supported_cpuidle_states & OPAL_PM_STOP_INST_FAST)
+ ppc_md.power_save = power9_idle;
 
 out:
  return 0;
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 04/14] powerpc/64s: idle process interrupts from system reset wakeup

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
When the CPU wakes from low power state, it begins at the system reset
interrupt with the exception that caused the wakeup encoded in SRR1.

Today, powernv idle wakeup ignores the wakeup reason (except a special
case for HMI), and the regular interrupt corresponding to the
exception will fire after the idle wakeup exits.

Change this to replay the interrupt from the idle wakeup before
interrupts are hard-enabled.

Test on POWER8 of context_switch selftests benchmark with polling idle
disabled (e.g., always nap, giving cross-CPU IPIs) gives the following
results:

                                original         wakeup direct
Different threads, same core:   315k/s           264k/s
Different cores:                235k/s           242k/s

There is a slowdown for doorbell IPI (same core) case because system
reset wakeup does not clear the message and the doorbell interrupt
fires again needlessly.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/include/asm/hw_irq.h     |  1 +
 arch/powerpc/kernel/exceptions-64s.S  | 28 ++++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/idle.c | 12 ++++++++----
 3 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index eba60416536e..0ef9a33c139f 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -32,6 +32,7 @@
 #ifndef __ASSEMBLY__
 
 extern void __replay_interrupt(unsigned int vector);
+extern void __replay_wakeup_interrupt(unsigned long srr1);
 
 extern void timer_interrupt(struct pt_regs *);
 extern void performance_monitor_exception(struct pt_regs *regs);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 2f700a15bfa3..3d75641c2566 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1646,3 +1646,31 @@ FTR_SECTION_ELSE
  beq doorbell_super_common
 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
  blr
+
+/*
+ * Similar to __replay_interrupt but called from cpu idle wakeup
+ * with SRR1 wake value in r3.
+ */
+_GLOBAL(__replay_wakeup_interrupt)
+ extrdi r3,r3,42,4 /* Get SRR1 wake reason in low bits */
+ mfmsr r12
+ mflr r11
+ mfcr r9
+ ori r12,r12,MSR_EE
+ cmpwi r3,0x6
+ beq decrementer_common
+ cmpwi r3,0x8
+ beq hardware_interrupt_common
+BEGIN_FTR_SECTION
+ cmpwi r3,0x3
+ beq h_doorbell_common
+ cmpwi r3,0x9
+ beq h_virt_irq_common
+ cmpwi r3,0xa
+ beq hmi_exception_common
+FTR_SECTION_ELSE
+ cmpwi r3,0x5
+ beq doorbell_super_common
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+ mtmsrd r12,1
+ blr
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 78b4755b7947..7c83a95f929e 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -272,8 +272,10 @@ static unsigned long __power7_idle_type(unsigned long type)
 
 void power7_idle_type(unsigned long type)
 {
- __power7_idle_type(type);
- __hard_irq_enable();
+ unsigned long srr1;
+
+ srr1 = __power7_idle_type(type);
+ __replay_wakeup_interrupt(srr1);
 }
 
 void power7_idle(void)
@@ -310,8 +312,10 @@ static unsigned long __power9_idle_type(unsigned long stop_psscr_val,
 void power9_idle_type(unsigned long stop_psscr_val,
       unsigned long stop_psscr_mask)
 {
- __power9_idle_type(stop_psscr_val, stop_psscr_mask);
- __hard_irq_enable();
+ unsigned long srr1;
+
+ srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask);
+ __replay_wakeup_interrupt(srr1);
 }
 
 /*
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 05/14] powerpc/64s: msgclr when handling doorbell exceptions

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
msgsnd doorbell exceptions are cleared when the doorbell interrupt is
taken. However if a doorbell exception causes a system reset interrupt
wake from power saving state, the message is not cleared. Processing
the doorbell from the system reset interrupt requires msgclr to avoid
taking the exception again.

Testing this plus the previous wakup direct patch gives:

                                original         wakeup direct     msgclr
Different threads, same core:   315k/s           264k/s            345k/s
Different cores:                235k/s           242k/s            242k/s

Net speedup is +10% for same core, and +3% for different core.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/include/asm/dbell.h      | 13 +++++++++++++
 arch/powerpc/include/asm/ppc-opcode.h |  3 +++
 arch/powerpc/kernel/asm-offsets.c     |  1 +
 arch/powerpc/kernel/exceptions-64s.S  | 27 +++++++++++++++++++++++----
 4 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index f70cbfe0ec04..9f2ae0d25e15 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -56,6 +56,19 @@ static inline void ppc_msgsync(void)
  : : "i" (CPU_FTR_HVMODE|CPU_FTR_ARCH_300));
 }
 
+static inline void _ppc_msgclr(u32 msg)
+{
+ __asm__ __volatile__ (ASM_FTR_IFSET(PPC_MSGCLR(%1), PPC_MSGCLRP(%1), %0)
+ : : "i" (CPU_FTR_HVMODE), "r" (msg));
+}
+
+static inline void ppc_msgclr(enum ppc_dbell type)
+{
+ u32 msg = PPC_DBELL_TYPE(type);
+
+ _ppc_msgclr(msg);
+}
+
 #else /* CONFIG_PPC_BOOK3S */
 
 #define PPC_DBELL_MSGTYPE PPC_DBELL
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 3b6bbf5a8683..4e2cf719c9b2 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -220,6 +220,7 @@
 #define PPC_INST_MSGCLR 0x7c0001dc
 #define PPC_INST_MSGSYNC 0x7c0006ec
 #define PPC_INST_MSGSNDP 0x7c00011c
+#define PPC_INST_MSGCLRP 0x7c00015c
 #define PPC_INST_MTTMR 0x7c0003dc
 #define PPC_INST_NOP 0x60000000
 #define PPC_INST_PASTE 0x7c20070d
@@ -409,6 +410,8 @@
  ___PPC_RB(b))
 #define PPC_MSGSNDP(b) stringify_in_c(.long PPC_INST_MSGSNDP | \
  ___PPC_RB(b))
+#define PPC_MSGCLRP(b) stringify_in_c(.long PPC_INST_MSGCLRP | \
+ ___PPC_RB(b))
 #define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_INST_POPCNTB | \
  __PPC_RA(a) | __PPC_RS(s))
 #define PPC_POPCNTD(a, s) stringify_in_c(.long PPC_INST_POPCNTD | \
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 709e23425317..bd56c78ba87a 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -745,6 +745,7 @@ int main(void)
 #endif
 
  DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
+ DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE);
 
 #ifdef CONFIG_PPC_8xx
  DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE));
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 3d75641c2566..9cc34547c3b6 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1612,6 +1612,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
  b 1b
 
 /*
+ * When doorbell is triggered from system reset wakeup, the message is
+ * not cleared, so it would fire again when EE is enabled.
+ *
+ * When coming from local_irq_enable, there may be the same problem if
+ * we were hard disabled.
+ *
+ * Execute msgclr to clear pending exceptions before handling it.
+ */
+h_doorbell_common_msgclr:
+ LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36))
+ PPC_MSGCLR(3)
+ b h_doorbell_common
+
+doorbell_super_common_msgclr:
+ LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36))
+ PPC_MSGCLRP(3)
+ b doorbell_super_common
+
+/*
  * Called from arch_local_irq_enable when an interrupt needs
  * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate
  * which kind of interrupt. MSR:EE is already off. We generate a
@@ -1636,14 +1655,14 @@ _GLOBAL(__replay_interrupt)
  beq hardware_interrupt_common
 BEGIN_FTR_SECTION
  cmpwi r3,0xe80
- beq h_doorbell_common
+ beq h_doorbell_common_msgclr
  cmpwi r3,0xea0
  beq h_virt_irq_common
  cmpwi r3,0xe60
  beq hmi_exception_common
 FTR_SECTION_ELSE
  cmpwi r3,0xa00
- beq doorbell_super_common
+ beq doorbell_super_common_msgclr
 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
  blr
 
@@ -1663,14 +1682,14 @@ _GLOBAL(__replay_wakeup_interrupt)
  beq hardware_interrupt_common
 BEGIN_FTR_SECTION
  cmpwi r3,0x3
- beq h_doorbell_common
+ beq h_doorbell_common_msgclr
  cmpwi r3,0x9
  beq h_virt_irq_common
  cmpwi r3,0xa
  beq hmi_exception_common
 FTR_SECTION_ELSE
  cmpwi r3,0x5
- beq doorbell_super_common
+ beq doorbell_super_common_msgclr
 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
  mtmsrd r12,1
  blr
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 06/14] powerpc/64s: interrupt replay balance the return branch predictor

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
The __replay_interrupt code is branched to with bl, but the caller is
returned to directly with rfid from the interrupt.

Instead return to a return stub that returns to the caller with blr,
which should do better with the return predictor.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/kernel/exceptions-64s.S | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 9cc34547c3b6..52ad0789fa89 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1646,7 +1646,7 @@ _GLOBAL(__replay_interrupt)
  * we don't give a damn about, so we don't bother storing them.
  */
  mfmsr r12
- mflr r11
+ LOAD_REG_ADDR(r11, __replay_interrupt_return)
  mfcr r9
  ori r12,r12,MSR_EE
  cmpwi r3,0x900
@@ -1664,6 +1664,7 @@ FTR_SECTION_ELSE
  cmpwi r3,0xa00
  beq doorbell_super_common_msgclr
 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+__replay_interrupt_return:
  blr
 
 /*
@@ -1673,7 +1674,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
 _GLOBAL(__replay_wakeup_interrupt)
  extrdi r3,r3,42,4 /* Get SRR1 wake reason in low bits */
  mfmsr r12
- mflr r11
+ LOAD_REG_ADDR(r11, __replay_wakeup_interrupt_return)
  mfcr r9
  ori r12,r12,MSR_EE
  cmpwi r3,0x6
@@ -1692,4 +1693,5 @@ FTR_SECTION_ELSE
  beq doorbell_super_common_msgclr
 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
  mtmsrd r12,1
+__replay_wakeup_interrupt_return:
  blr
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 07/14] powerpc/64s: idle branch to handler with virtual mode offset

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
Have the system reset idle wakeup handlers branched to in real mode
with the 0xc... kernel address applied. This allows simplifications of
avoiding rfid when switching to virtual mode in the wakeup handler.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/include/asm/exception-64s.h | 17 ++++++++++++++---
 arch/powerpc/kernel/exceptions-64s.S     |  6 ++++--
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 183d73b6ed99..0912e328e1d7 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -236,15 +236,26 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define kvmppc_interrupt kvmppc_interrupt_pr
 #endif
 
+/*
+ * Branch to label using its 0xC000 address. This gives the same real address
+ * when relocation is off, but allows mtmsr to set MSR[IR|DR]=1.
+ * This could set the 0xc bits for !RELOCATABLE rather than load KBASE for
+ * a slight optimisation.
+ */
+#define BRANCH_TO_C000(reg, label) \
+ __LOAD_HANDLER(reg, label); \
+ mtctr reg; \
+ bctr
+
 #ifdef CONFIG_RELOCATABLE
 #define BRANCH_TO_COMMON(reg, label) \
  __LOAD_HANDLER(reg, label); \
  mtctr reg; \
  bctr
 
-#define BRANCH_LINK_TO_FAR(label) \
- __LOAD_FAR_HANDLER(r12, label); \
- mtctr r12; \
+#define BRANCH_LINK_TO_FAR(reg, label) \
+ __LOAD_FAR_HANDLER(reg, label); \
+ mtctr reg; \
  bctrl
 
 /*
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 52ad0789fa89..153cd967554a 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -99,7 +99,9 @@ EXC_VIRT_NONE(0x4000, 0x100)
 #ifdef CONFIG_PPC_P7_NAP
  /*
  * If running native on arch 2.06 or later, check if we are waking up
- * from nap/sleep/winkle, and branch to idle handler.
+ * from nap/sleep/winkle, and branch to idle handler. The idle wakeup
+ * handler initially runs in real mode, but we branch to the 0xc000...
+ * address so we can turn on relocation with mtmsr.
  */
 #define IDLETEST(n) \
  BEGIN_FTR_SECTION ; \
@@ -107,7 +109,7 @@ EXC_VIRT_NONE(0x4000, 0x100)
  rlwinm. r10,r10,47-31,30,31 ; \
  beq- 1f ; \
  cmpwi cr3,r10,2 ; \
- BRANCH_TO_COMMON(r10, system_reset_idle_common) ; \
+ BRANCH_TO_C000(r10, system_reset_idle_common) ; \
 1: \
  END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 #else
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 08/14] powerpc/64s: idle avoid SRR usage in idle sleep/wake paths

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
Idle code now always runs at the 0xc... effective address whether
in real or virtual mode. This means rfid can be ditched, along
with a lot of SRR manipulations.

In the wakeup path, carry SRR1 around in r12. Use mtmsrd to change
MSR states as required.

This also balances the return prediction for the idle call, by
doing blr rather than rfid to return to the idle caller.

On POWER9, 2-process context switch on different cores, with snooze
disabled, increases performance by 2%.
---
 arch/powerpc/kernel/exceptions-64s.S    |  1 +
 arch/powerpc/kernel/idle_book3s.S       | 57 +++++++++++++++------------------
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  8 ++++-
 3 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 153cd967554a..eb703c20f4ad 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -130,6 +130,7 @@ EXC_VIRT_NONE(0x4100, 0x100)
 
 #ifdef CONFIG_PPC_P7_NAP
 EXC_COMMON_BEGIN(system_reset_idle_common)
+ mfspr r12,SPRN_SRR1
  b pnv_powersave_wakeup
 #endif
 
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index c7edb374d1aa..2efb88da8ba3 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -108,7 +108,7 @@ core_idle_lock_held:
  * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8
  *   - Requested PSSCR value in POWER9
  *
- * Address of idle handler to 'rfid' to in r4
+ * Address of idle handler to branch to in realmode in r4
  */
 pnv_powersave_common:
  /* Use r3 to pass state nap/sleep/winkle */
@@ -118,14 +118,14 @@ pnv_powersave_common:
  * need to save PC, some CR bits and the NV GPRs,
  * but for now an interrupt frame will do.
  */
+ mtctr r4
+
  mflr r0
  std r0,16(r1)
  stdu r1,-INT_FRAME_SIZE(r1)
  std r0,_LINK(r1)
  std r0,_NIP(r1)
 
- mfmsr   r9
-
  /* We haven't lost state ... yet */
  li r0,0
  stb r0,PACA_NAPSTATELOST(r13)
@@ -135,7 +135,6 @@ pnv_powersave_common:
  SAVE_NVGPRS(r1)
  mfcr r5
  std r5,_CCR(r1)
- std r9,_MSR(r1)
  std r1,PACAR1(r13)
 
  /*
@@ -145,12 +144,8 @@ pnv_powersave_common:
  * the MMU context to the guest.
  */
  LOAD_REG_IMMEDIATE(r7, MSR_IDLE)
- li r6, MSR_RI
- andc r6, r9, r6
- mtmsrd r6, 1 /* clear RI before setting SRR0/1 */
- mtspr SPRN_SRR0, r4
- mtspr SPRN_SRR1, r7
- rfid
+ mtmsrd r7,0
+ bctr
 
  .globl pnv_enter_arch207_idle_mode
 pnv_enter_arch207_idle_mode:
@@ -302,11 +297,10 @@ _GLOBAL(power7_idle_insn)
  b pnv_powersave_common
 
 #define CHECK_HMI_INTERRUPT \
- mfspr r0,SPRN_SRR1; \
 BEGIN_FTR_SECTION_NESTED(66); \
- rlwinm r0,r0,45-31,0xf;  /* extract wake reason field (P8) */ \
+ rlwinm r0,r12,45-31,0xf;  /* extract wake reason field (P8) */ \
 FTR_SECTION_ELSE_NESTED(66); \
- rlwinm r0,r0,45-31,0xe;  /* P7 wake reason field is 3 bits */ \
+ rlwinm r0,r12,45-31,0xe;  /* P7 wake reason field is 3 bits */ \
 ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
  cmpwi r0,0xa; /* Hypervisor maintenance ? */ \
  bne 20f; \
@@ -384,17 +378,17 @@ pnv_powersave_wakeup_mce:
 
  /*
  * Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake
- * reason into SRR1, which allows reuse of the system reset wakeup
+ * reason into r12, which allows reuse of the system reset wakeup
  * code without being mistaken for another type of wakeup.
  */
- oris r3,r3,SRR1_WAKEMCE_RESVD@h
- mtspr SPRN_SRR1,r3
+ oris r12,r3,SRR1_WAKEMCE_RESVD@h
 
  b pnv_powersave_wakeup
 
 /*
  * Called from reset vector for powersave wakeups.
  * cr3 - set to gt if waking up with partial/complete hypervisor state loss
+ * r12 - SRR1
  */
 .global pnv_powersave_wakeup
 pnv_powersave_wakeup:
@@ -404,8 +398,10 @@ BEGIN_FTR_SECTION
 BEGIN_FTR_SECTION_NESTED(70)
  bl power9_dd1_recover_paca
 END_FTR_SECTION_NESTED_IFSET(CPU_FTR_POWER9_DD1, 70)
+ ld r1,PACAR1(r13)
  bl pnv_restore_hyp_resource_arch300
 FTR_SECTION_ELSE
+ ld r1,PACAR1(r13)
  bl pnv_restore_hyp_resource_arch207
 ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 
@@ -425,7 +421,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 #endif
 
  /* Return SRR1 from power7_nap() */
- mfspr r3,SPRN_SRR1
+ mr r3,r12
  blt cr3,pnv_wakeup_noloss
  b pnv_wakeup_loss
 
@@ -489,7 +485,6 @@ pnv_restore_hyp_resource_arch207:
  * r4 - PACA_THREAD_IDLE_STATE
  */
 pnv_wakeup_tb_loss:
- ld r1,PACAR1(r13)
  /*
  * Before entering any idle state, the NVGPRs are saved in the stack.
  * If there was a state loss, or PACA_NAPSTATELOST was set, then the
@@ -515,9 +510,9 @@ pnv_wakeup_tb_loss:
  * is required to return back to reset vector after hypervisor state
  * restore is complete.
  */
+ mr r19,r12
  mr r18,r4
  mflr r17
- mfspr r16,SPRN_SRR1
 BEGIN_FTR_SECTION
  CHECK_HMI_INTERRUPT
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
@@ -762,7 +757,7 @@ no_segments:
 
 hypervisor_state_restored:
 
- mtspr SPRN_SRR1,r16
+ mr r12,r19
  mtlr r17
  blr /* return to pnv_powersave_wakeup */
 
@@ -778,20 +773,19 @@ fastsleep_workaround_at_exit:
  */
 .global pnv_wakeup_loss
 pnv_wakeup_loss:
- ld r1,PACAR1(r13)
 BEGIN_FTR_SECTION
  CHECK_HMI_INTERRUPT
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
  REST_NVGPRS(r1)
  REST_GPR(2, r1)
+ ld r4,PACAKMSR(r13)
+ ld r5,_LINK(r1)
  ld r6,_CCR(r1)
- ld r4,_MSR(r1)
- ld r5,_NIP(r1)
  addi r1,r1,INT_FRAME_SIZE
+ mtlr r5
  mtcr r6
- mtspr SPRN_SRR1,r4
- mtspr SPRN_SRR0,r5
- rfid
+ mtmsrd r4
+ blr
 
 /*
  * R3 here contains the value that will be returned to the caller
@@ -804,12 +798,11 @@ pnv_wakeup_noloss:
 BEGIN_FTR_SECTION
  CHECK_HMI_INTERRUPT
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- ld r1,PACAR1(r13)
- ld r6,_CCR(r1)
- ld r4,_MSR(r1)
+ ld r4,PACAKMSR(r13)
  ld r5,_NIP(r1)
+ ld r6,_CCR(r1)
  addi r1,r1,INT_FRAME_SIZE
+ mtlr r5
  mtcr r6
- mtspr SPRN_SRR1,r4
- mtspr SPRN_SRR0,r5
- rfid
+ mtmsrd r4
+ blr
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bdb3f76ceb6b..eb5b78b6bacf 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -329,15 +329,21 @@ kvm_novcpu_exit:
  * We come in here when wakened from nap mode.
  * Relocation is off and most register values are lost.
  * r13 points to the PACA.
+ * r3 contains the SRR1 wakeup value, SRR1 is trashed.
  */
  .globl kvm_start_guest
 kvm_start_guest:
-
  /* Set runlatch bit the minute you wake up from nap */
  mfspr r0, SPRN_CTRLF
  ori r0, r0, 1
  mtspr SPRN_CTRLT, r0
 
+ /*
+ * Could avoid this and pass it through in r3. For now,
+ * code expects it to be in SRR1.
+ */
+ mtspr r3,SPRN_SRR1
+
  ld r2,PACATOC(r13)
 
  li r0,KVM_HWTHREAD_IN_KVM
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 09/14] powerpc/64s: idle hmi wakeup is unlikely

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
In a busy system, idle wakeups can be expected from IPIs and device
interrupts.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/kernel/idle_book3s.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 2efb88da8ba3..4004cdf72f42 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -303,7 +303,7 @@ FTR_SECTION_ELSE_NESTED(66); \
  rlwinm r0,r12,45-31,0xe;  /* P7 wake reason field is 3 bits */ \
 ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
  cmpwi r0,0xa; /* Hypervisor maintenance ? */ \
- bne 20f; \
+ bne+ 20f; \
  /* Invoke opal call to handle hmi */ \
  ld r2,PACATOC(r13); \
  ld r1,PACAR1(r13); \
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 10/14] powerpc/64s: cpuidle set polling before enabling irqs

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
local_irq_enable can cause interrupts to be taken which could
take significant amount of processing time. The idle process
should set its polling flag before this, so another process that
wakes it during this time will not have to send an IPI.

Expand the TIF_POLLING_NRFLAG coverage to as large as possible.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 drivers/cpuidle/cpuidle-powernv.c | 4 +++-
 drivers/cpuidle/cpuidle-pseries.c | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 150b971c303b..0ee4660efb5f 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -51,9 +51,10 @@ static int snooze_loop(struct cpuidle_device *dev,
 {
  u64 snooze_exit_time;
 
- local_irq_enable();
  set_thread_flag(TIF_POLLING_NRFLAG);
 
+ local_irq_enable();
+
  snooze_exit_time = get_tb() + snooze_timeout;
  ppc64_runlatch_off();
  HMT_very_low();
@@ -66,6 +67,7 @@ static int snooze_loop(struct cpuidle_device *dev,
  ppc64_runlatch_on();
  clear_thread_flag(TIF_POLLING_NRFLAG);
  smp_mb();
+
  return index;
 }
 
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index 166ccd711ec9..7b12bb2ea70f 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -62,9 +62,10 @@ static int snooze_loop(struct cpuidle_device *dev,
  unsigned long in_purr;
  u64 snooze_exit_time;
 
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
  idle_loop_prolog(&in_purr);
  local_irq_enable();
- set_thread_flag(TIF_POLLING_NRFLAG);
  snooze_exit_time = get_tb() + snooze_timeout;
 
  while (!need_resched()) {
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 11/14] powerpc/64s: cpuidle read mostly for common globals

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
Ensure these don't get put into bouncing cachelines.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 drivers/cpuidle/cpuidle-powernv.c | 10 +++++-----
 drivers/cpuidle/cpuidle-pseries.c |  8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 0ee4660efb5f..f0247652d91f 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -32,18 +32,18 @@ static struct cpuidle_driver powernv_idle_driver = {
  .owner            = THIS_MODULE,
 };
 
-static int max_idle_state;
-static struct cpuidle_state *cpuidle_state_table;
+static int max_idle_state __read_mostly;
+static struct cpuidle_state *cpuidle_state_table __read_mostly;
 
 struct stop_psscr_table {
  u64 val;
  u64 mask;
 };
 
-static struct stop_psscr_table stop_psscr_table[CPUIDLE_STATE_MAX];
+static struct stop_psscr_table stop_psscr_table[CPUIDLE_STATE_MAX] __read_mostly;
 
-static u64 snooze_timeout;
-static bool snooze_timeout_en;
+static u64 snooze_timeout __read_mostly;
+static bool snooze_timeout_en __read_mostly;
 
 static int snooze_loop(struct cpuidle_device *dev,
  struct cpuidle_driver *drv,
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index 7b12bb2ea70f..a404f352d284 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -25,10 +25,10 @@ struct cpuidle_driver pseries_idle_driver = {
  .owner            = THIS_MODULE,
 };
 
-static int max_idle_state;
-static struct cpuidle_state *cpuidle_state_table;
-static u64 snooze_timeout;
-static bool snooze_timeout_en;
+static int max_idle_state __read_mostly;
+static struct cpuidle_state *cpuidle_state_table __read_mostly;
+static u64 snooze_timeout __read_mostly;
+static bool snooze_timeout_en __read_mostly;
 
 static inline void idle_loop_prolog(unsigned long *in_purr)
 {
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 12/14] powerpc/64s: cpuidle no memory barrier after break from idle

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
A memory barrier is not required after the task wakes up,
only if we clear the polling flag before waking. The case
where we have work to do is the important one, so optimise
for it.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 drivers/cpuidle/cpuidle-powernv.c | 11 +++++++++--
 drivers/cpuidle/cpuidle-pseries.c | 11 +++++++++--
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index f0247652d91f..c53a8bb40471 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -59,14 +59,21 @@ static int snooze_loop(struct cpuidle_device *dev,
  ppc64_runlatch_off();
  HMT_very_low();
  while (!need_resched()) {
- if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time)
+ if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time) {
+ /*
+ * Task has not woken up but we are exiting the polling
+ * loop anyway. Require a barrier after polling is
+ * cleared to order subsequent test of need_resched().
+ */
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb();
  break;
+ }
  }
 
  HMT_medium();
  ppc64_runlatch_on();
  clear_thread_flag(TIF_POLLING_NRFLAG);
- smp_mb();
 
  return index;
 }
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index a404f352d284..e9b3853d93ea 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -71,13 +71,20 @@ static int snooze_loop(struct cpuidle_device *dev,
  while (!need_resched()) {
  HMT_low();
  HMT_very_low();
- if (snooze_timeout_en && get_tb() > snooze_exit_time)
+ if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time) {
+ /*
+ * Task has not woken up but we are exiting the polling
+ * loop anyway. Require a barrier after polling is
+ * cleared to order subsequent test of need_resched().
+ */
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb();
  break;
+ }
  }
 
  HMT_medium();
  clear_thread_flag(TIF_POLLING_NRFLAG);
- smp_mb();
 
  idle_loop_epilog(in_purr);
 
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 13/14] powerpc/64: runlatch CTRL[RUN] set optimisation

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
The CTRL register is read-only except bit 63 which is the run latch
control. This means it can be updated with a mtspr rather than
mfspr/mtspr.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/kernel/process.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 6273b5d5baec..29865c817b02 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1991,12 +1991,8 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
 void notrace __ppc64_runlatch_on(void)
 {
  struct thread_info *ti = current_thread_info();
- unsigned long ctrl;
-
- ctrl = mfspr(SPRN_CTRLF);
- ctrl |= CTRL_RUNLATCH;
- mtspr(SPRN_CTRLT, ctrl);
 
+ mtspr(SPRN_CTRLT, CTRL_RUNLATCH);
  ti->local_flags |= _TLF_RUNLATCH;
 }
 
@@ -2004,13 +2000,9 @@ void notrace __ppc64_runlatch_on(void)
 void notrace __ppc64_runlatch_off(void)
 {
  struct thread_info *ti = current_thread_info();
- unsigned long ctrl;
 
  ti->local_flags &= ~_TLF_RUNLATCH;
-
- ctrl = mfspr(SPRN_CTRLF);
- ctrl &= ~CTRL_RUNLATCH;
- mtspr(SPRN_CTRLT, ctrl);
+ mtspr(SPRN_CTRLT, 0);
 }
 #endif /* CONFIG_PPC64 */
 
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 14/14] powerpc/64s: idle runlatch switch is done with MSR[EE]=0

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
2*mfmsr and 2*mtmsr can be avoided in the idle sleep/wake code
because we know the MSR[EE] is clear.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/platforms/powernv/idle.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 7c83a95f929e..8db49f9c2bd8 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -263,9 +263,9 @@ static unsigned long __power7_idle_type(unsigned long type)
  if (!prep_irq_for_idle())
  return 0;
 
- ppc64_runlatch_off();
+ __ppc64_runlatch_off();
  srr1 = power7_idle_insn(type);
- ppc64_runlatch_on();
+ __ppc64_runlatch_on();
 
  return srr1;
 }
@@ -300,9 +300,9 @@ static unsigned long __power9_idle_type(unsigned long stop_psscr_val,
  psscr = mfspr(SPRN_PSSCR);
  psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
 
- ppc64_runlatch_off();
+ __ppc64_runlatch_off();
  srr1 = power9_idle_stop(psscr);
- ppc64_runlatch_on();
+ __ppc64_runlatch_on();
 
  trace_hardirqs_off();
 
@@ -336,7 +336,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
  unsigned long srr1;
  u32 idle_states = pnv_get_supported_cpuidle_states();
 
- ppc64_runlatch_off();
+ __ppc64_runlatch_off();
 
  if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
  unsigned long psscr;
@@ -363,7 +363,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
  HMT_medium();
  }
 
- ppc64_runlatch_on();
+ __ppc64_runlatch_on();
 
  return srr1;
 }
--
2.11.0