[PATCH 0/9] Reduce PACA save areas

classic Classic list List threaded Threaded
15 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH 0/9] Reduce PACA save areas

Nicholas Piggin-2
I want to save another register in the PACA, so I had a look at
what we might cut from it first. 3-4 fields are reasonably easy
to remove.

Nicholas Piggin (9):
  powerpc/64s: slb_allocate_realmode() preserve r3
  powerpc/64s: SLB miss handler avoid saving faulting address into
    EX_DAR
  powerpc/64s: SLB miss already has CTR saved for relocatable kernel
  powerpc/64s: SLB miss handler avoid r3 save/restore
  powerpc/64s: paca add EX_SIZE definition for exception save areas
  powerpc/64s: paca EX_SRR0 is unused, remove it
  powerpc/64s: paca EX_LR can be merged with EX_DAR
  powerpc/64s: paca EX_R3 can be merged with EX_DAR
  powerpc/64s: paca EX_CTR is not used with !RELOCATABLE, remove it

 arch/powerpc/include/asm/exception-64s.h | 53 ++++++++++++++++++++++---------
 arch/powerpc/include/asm/paca.h          | 12 ++++---
 arch/powerpc/kernel/exceptions-64s.S     | 54 +++++++++++++++++---------------
 arch/powerpc/mm/slb_low.S                | 24 ++++++++------
 4 files changed, 88 insertions(+), 55 deletions(-)

--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 1/9] powerpc/64s: slb_allocate_realmode() preserve r3

Nicholas Piggin-2
One fewer registers clobbered by this function means the SLB miss
handler can save one fewer.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/mm/slb_low.S | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 1519617aab36..9869b44a04dc 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -70,6 +70,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
  * Create an SLB entry for the given EA (user or kernel).
  * r3 = faulting address, r13 = PACA
  * r9, r10, r11 are clobbered by this function
+ * r3 is preserved.
  * No other registers are examined or changed.
  */
 _GLOBAL(slb_allocate_realmode)
@@ -235,6 +236,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
  * dont have any LRU information to help us choose a slot.
  */
 
+ mr r9,r3
+
+ /* slb_finish_load_1T continues here. r9=EA with non-ESID bits clear */
 7: ld r10,PACASTABRR(r13)
  addi r10,r10,1
  /* This gets soft patched on boot. */
@@ -249,10 +253,10 @@ slb_compare_rr_to_size:
  std r10,PACASTABRR(r13)
 
 3:
- rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */
- oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */
+ rldimi r9,r10,0,36 /* r9  = EA[0:35] | entry */
+ oris r10,r9,SLB_ESID_V@h /* r10 = r9 | SLB_ESID_V */
 
- /* r3 = ESID data, r11 = VSID data */
+ /* r9 = ESID data, r11 = VSID data */
 
  /*
  * No need for an isync before or after this slbmte. The exception
@@ -265,21 +269,21 @@ slb_compare_rr_to_size:
  bgelr cr7
 
  /* Update the slb cache */
- lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
- cmpldi r3,SLB_CACHE_ENTRIES
+ lhz r9,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
+ cmpldi r9,SLB_CACHE_ENTRIES
  bge 1f
 
  /* still room in the slb cache */
- sldi r11,r3,2 /* r11 = offset * sizeof(u32) */
+ sldi r11,r9,2 /* r11 = offset * sizeof(u32) */
  srdi    r10,r10,28 /* get the 36 bits of the ESID */
  add r11,r11,r13 /* r11 = (u32 *)paca + offset */
  stw r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */
- addi r3,r3,1 /* offset++ */
+ addi r9,r9,1 /* offset++ */
  b 2f
 1: /* offset >= SLB_CACHE_ENTRIES */
- li r3,SLB_CACHE_ENTRIES+1
+ li r9,SLB_CACHE_ENTRIES+1
 2:
- sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
+ sth r9,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
  crclr 4*cr0+eq /* set result to "success" */
  blr
 
@@ -301,7 +305,7 @@ slb_compare_rr_to_size:
  rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */
 
  /* r3 = EA, r11 = VSID data */
- clrrdi r3,r3,SID_SHIFT_1T /* clear out non-ESID bits */
+ clrrdi r9,r3,SID_SHIFT_1T /* clear out non-ESID bits */
  b 7b
 
 
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 2/9] powerpc/64s: SLB miss handler avoid saving faulting address into EX_DAR

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
The EX_DAR save area is only used in exceptional cases. With r3 no
longer clobbered by slb_allocate_realmode, saving faulting address to
EX_DAR can be deferred to those cases.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/kernel/exceptions-64s.S | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ae418b85c17c..a4a71bce35d6 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -628,7 +628,6 @@ EXC_COMMON_BEGIN(slb_miss_realmode)
 
  stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
  std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
- std r3,PACA_EXSLB+EX_DAR(r13)
 
  crset 4*cr0+eq
 #ifdef CONFIG_PPC_STD_MMU_64
@@ -638,11 +637,10 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 #endif
 
  ld r10,PACA_EXSLB+EX_LR(r13)
- ld r3,PACA_EXSLB+EX_R3(r13)
  lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
  mtlr r10
 
- beq 8f /* if bad address, make full stack frame */
+ beq- 8f /* if bad address, make full stack frame */
 
  andi. r10,r12,MSR_RI /* check for unrecoverable exception */
  beq- 2f
@@ -657,6 +655,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 .machine pop
 
  RESTORE_PPR_PACA(PACA_EXSLB, r9)
+ ld r3,PACA_EXSLB+EX_R3(r13)
  ld r9,PACA_EXSLB+EX_R9(r13)
  ld r10,PACA_EXSLB+EX_R10(r13)
  ld r11,PACA_EXSLB+EX_R11(r13)
@@ -665,7 +664,9 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
  rfid
  b . /* prevent speculative execution */
 
-2: mfspr r11,SPRN_SRR0
+2: std     r3,PACA_EXSLB+EX_DAR(r13)
+ ld r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r11,SPRN_SRR0
  LOAD_HANDLER(r10,unrecov_slb)
  mtspr SPRN_SRR0,r10
  ld r10,PACAKMSR(r13)
@@ -673,7 +674,9 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
  rfid
  b .
 
-8: mfspr r11,SPRN_SRR0
+8: std     r3,PACA_EXSLB+EX_DAR(r13)
+ ld r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r11,SPRN_SRR0
  LOAD_HANDLER(r10,bad_addr_slb)
  mtspr SPRN_SRR0,r10
  ld r10,PACAKMSR(r13)
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 3/9] powerpc/64s: SLB miss already has CTR saved for relocatable kernel

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
The EXCEPTION_PROLOG_1 used by SLB miss already saves CTR when the
kernel is built with CONFIG_RELOCATABLE. So it does not have to be
saved and reloaded when branching to slb_miss_realmode. It can be
restored from the PACA as usual.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/kernel/exceptions-64s.S | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index a4a71bce35d6..486e205cc762 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -519,7 +519,7 @@ EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
  * because the distance from here to there depends on where
  * the kernel ends up being put.
  */
- mfctr r11
+ /* CTR is saved if RELOCATABLE */
  LOAD_HANDLER(r10, slb_miss_realmode)
  mtctr r10
  bctr
@@ -542,7 +542,7 @@ EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
  * because the distance from here to there depends on where
  * the kernel ends up being put.
  */
- mfctr r11
+ /* CTR is saved if RELOCATABLE */
  LOAD_HANDLER(r10, slb_miss_realmode)
  mtctr r10
  bctr
@@ -582,7 +582,7 @@ EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
 #ifndef CONFIG_RELOCATABLE
  b slb_miss_realmode
 #else
- mfctr r11
+ /* CTR is saved if RELOCATABLE */
  LOAD_HANDLER(r10, slb_miss_realmode)
  mtctr r10
  bctr
@@ -600,7 +600,7 @@ EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
 #ifndef CONFIG_RELOCATABLE
  b slb_miss_realmode
 #else
- mfctr r11
+ /* CTR is saved if RELOCATABLE */
  LOAD_HANDLER(r10, slb_miss_realmode)
  mtctr r10
  bctr
@@ -622,10 +622,6 @@ EXC_COMMON_BEGIN(slb_miss_realmode)
  * procedure.
  */
  mflr r10
-#ifdef CONFIG_RELOCATABLE
- mtctr r11
-#endif
-
  stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
  std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
 
@@ -654,6 +650,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
  mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
 .machine pop
 
+ RESTORE_CTR(r9, PACA_EXSLB)
  RESTORE_PPR_PACA(PACA_EXSLB, r9)
  ld r3,PACA_EXSLB+EX_R3(r13)
  ld r9,PACA_EXSLB+EX_R9(r13)
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 4/9] powerpc/64s: SLB miss handler avoid r3 save/restore

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
The SLB miss handler uses r3 for the faulting address but r12 is
mostly able to be freed up to save r3 in. It just requires SRR1
be reloaded again on error.

It would be more conventional to use r12 for SRR1 (and use r11 to
save r3), but slb_allocate_realmode clobbers r11 and not r12.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/kernel/exceptions-64s.S | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 486e205cc762..6ba4c4c6ae69 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -507,9 +507,9 @@ EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
  SET_SCRATCH0(r13)
  EXCEPTION_PROLOG_0(PACA_EXSLB)
  EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
- std r3,PACA_EXSLB+EX_R3(r13)
+ mr r12,r3 /* save r3 */
  mfspr r3,SPRN_DAR
- mfspr r12,SPRN_SRR1
+ mfspr r11,SPRN_SRR1
  crset 4*cr6+eq
 #ifndef CONFIG_RELOCATABLE
  b slb_miss_realmode
@@ -530,9 +530,9 @@ EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
  SET_SCRATCH0(r13)
  EXCEPTION_PROLOG_0(PACA_EXSLB)
  EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
- std r3,PACA_EXSLB+EX_R3(r13)
+ mr r12,r3 /* save r3 */
  mfspr r3,SPRN_DAR
- mfspr r12,SPRN_SRR1
+ mfspr r11,SPRN_SRR1
  crset 4*cr6+eq
 #ifndef CONFIG_RELOCATABLE
  b slb_miss_realmode
@@ -575,9 +575,9 @@ EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
  SET_SCRATCH0(r13)
  EXCEPTION_PROLOG_0(PACA_EXSLB)
  EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
- std r3,PACA_EXSLB+EX_R3(r13)
+ mr r12,r3 /* save r3 */
  mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- mfspr r12,SPRN_SRR1
+ mfspr r11,SPRN_SRR1
  crclr 4*cr6+eq
 #ifndef CONFIG_RELOCATABLE
  b slb_miss_realmode
@@ -593,9 +593,9 @@ EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
  SET_SCRATCH0(r13)
  EXCEPTION_PROLOG_0(PACA_EXSLB)
  EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480)
- std r3,PACA_EXSLB+EX_R3(r13)
+ mr r12,r3 /* save r3 */
  mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- mfspr r12,SPRN_SRR1
+ mfspr r11,SPRN_SRR1
  crclr 4*cr6+eq
 #ifndef CONFIG_RELOCATABLE
  b slb_miss_realmode
@@ -613,10 +613,10 @@ TRAMP_KVM(PACA_EXSLB, 0x480)
 EXC_COMMON_BEGIN(slb_miss_realmode)
  /*
  * r13 points to the PACA, r9 contains the saved CR,
- * r12 contain the saved SRR1, SRR0 is still ready for return
+ * r12 contains the saved r3,
+ * r11 contain the saved SRR1, SRR0 is still ready for return
  * r3 has the faulting address
  * r9 - r13 are saved in paca->exslb.
- * r3 is saved in paca->slb_r3
  * cr6.eq is set for a D-SLB miss, clear for a I-SLB miss
  * We assume we aren't going to take any exceptions during this
  * procedure.
@@ -625,6 +625,9 @@ EXC_COMMON_BEGIN(slb_miss_realmode)
  stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
  std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
 
+ andi. r11,r11,MSR_RI /* check for unrecoverable exception */
+ beq- 2f
+
  crset 4*cr0+eq
 #ifdef CONFIG_PPC_STD_MMU_64
 BEGIN_MMU_FTR_SECTION
@@ -638,9 +641,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 
  beq- 8f /* if bad address, make full stack frame */
 
- andi. r10,r12,MSR_RI /* check for unrecoverable exception */
- beq- 2f
-
  /* All done -- return from exception. */
 
 .machine push
@@ -652,7 +652,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 
  RESTORE_CTR(r9, PACA_EXSLB)
  RESTORE_PPR_PACA(PACA_EXSLB, r9)
- ld r3,PACA_EXSLB+EX_R3(r13)
+ mr r3,r12
  ld r9,PACA_EXSLB+EX_R9(r13)
  ld r10,PACA_EXSLB+EX_R10(r13)
  ld r11,PACA_EXSLB+EX_R11(r13)
@@ -662,8 +662,9 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
  b . /* prevent speculative execution */
 
 2: std     r3,PACA_EXSLB+EX_DAR(r13)
- ld r3,PACA_EXSLB+EX_R3(r13)
+ mr r3,r12
  mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
  LOAD_HANDLER(r10,unrecov_slb)
  mtspr SPRN_SRR0,r10
  ld r10,PACAKMSR(r13)
@@ -672,8 +673,9 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
  b .
 
 8: std     r3,PACA_EXSLB+EX_DAR(r13)
- ld r3,PACA_EXSLB+EX_R3(r13)
+ mr r3,r12
  mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
  LOAD_HANDLER(r10,bad_addr_slb)
  mtspr SPRN_SRR0,r10
  ld r10,PACAKMSR(r13)
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 5/9] powerpc/64s: paca add EX_SIZE definition for exception save areas

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/include/asm/exception-64s.h | 30 +++++++++++++++++++-----------
 arch/powerpc/include/asm/paca.h          | 12 ++++++++----
 2 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 183d73b6ed99..3edcad24684f 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -24,18 +24,8 @@
  *  as published by the Free Software Foundation; either version
  *  2 of the License, or (at your option) any later version.
  */
-/*
- * The following macros define the code that appears as
- * the prologue to each of the exception handlers.  They
- * are split into two parts to allow a single kernel binary
- * to be used for pSeries and iSeries.
- *
- * We make as much of the exception code common between native
- * exception handlers (including pSeries LPAR) and iSeries LPAR
- * implementations as possible.
- */
-#include <asm/head-64.h>
 
+/* PACA save area offsets (exgen, exmc, etc) */
 #define EX_R9 0
 #define EX_R10 8
 #define EX_R11 16
@@ -51,6 +41,22 @@
 #define EX_PPR 88 /* SMT thread status register (priority) */
 #define EX_CTR 96
 
+#define EX_SIZE 13 /* size in u64 units */
+
+#ifdef __ASSEMBLY__
+
+/*
+ * The following macros define the code that appears as
+ * the prologue to each of the exception handlers.  They
+ * are split into two parts to allow a single kernel binary
+ * to be used for pSeries and iSeries.
+ *
+ * We make as much of the exception code common between native
+ * exception handlers (including pSeries LPAR) and iSeries LPAR
+ * implementations as possible.
+ */
+#include <asm/head-64.h>
+
 #ifdef CONFIG_RELOCATABLE
 #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
  mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
@@ -608,4 +614,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 #define FINISH_NAP
 #endif
 
+#endif /*__ASSEMBLY__ */
+
 #endif /* _ASM_POWERPC_EXCEPTION_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 1c09f8fe2ee8..8ea0907975a4 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -21,7 +21,11 @@
 #include <asm/lppaca.h>
 #include <asm/mmu.h>
 #include <asm/page.h>
+#ifdef CONFIG_PPC_BOOK3E
 #include <asm/exception-64e.h>
+#else
+#include <asm/exception-64s.h>
+#endif
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 #include <asm/kvm_book3s_asm.h>
 #endif
@@ -98,8 +102,8 @@ struct paca_struct {
  * Now, starting in cacheline 2, the exception save areas
  */
  /* used for most interrupts/exceptions */
- u64 exgen[13] __attribute__((aligned(0x80)));
- u64 exslb[13]; /* used for SLB/segment table misses
+ u64 exgen[EX_SIZE] __attribute__((aligned(0x80)));
+ u64 exslb[EX_SIZE]; /* used for SLB/segment table misses
  * on the linear mapping */
  /* SLB related definitions */
  u16 vmalloc_sllp;
@@ -181,8 +185,8 @@ struct paca_struct {
 
 #ifdef CONFIG_PPC_STD_MMU_64
  /* Non-maskable exceptions that are not performance critical */
- u64 exnmi[13]; /* used for system reset (nmi) */
- u64 exmc[13]; /* used for machine checks */
+ u64 exnmi[EX_SIZE]; /* used for system reset (nmi) */
+ u64 exmc[EX_SIZE]; /* used for machine checks */
 #endif
 #ifdef CONFIG_PPC_BOOK3S_64
  /* Exclusive stacks for system reset and machine check exception. */
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 6/9] powerpc/64s: paca EX_SRR0 is unused, remove it

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/include/asm/exception-64s.h | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 3edcad24684f..e1103dc9d8e8 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -31,17 +31,16 @@
 #define EX_R11 16
 #define EX_R12 24
 #define EX_R13 32
-#define EX_SRR0 40
-#define EX_DAR 48
-#define EX_DSISR 56
-#define EX_CCR 60
-#define EX_R3 64
-#define EX_LR 72
-#define EX_CFAR 80
-#define EX_PPR 88 /* SMT thread status register (priority) */
-#define EX_CTR 96
-
-#define EX_SIZE 13 /* size in u64 units */
+#define EX_DAR 40
+#define EX_DSISR 48
+#define EX_CCR 52
+#define EX_R3 56
+#define EX_LR 64
+#define EX_CFAR 72
+#define EX_PPR 80 /* SMT thread status register (priority) */
+#define EX_CTR 88
+
+#define EX_SIZE 12 /* size in u64 units */
 
 #ifdef __ASSEMBLY__
 
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 7/9] powerpc/64s: paca EX_LR can be merged with EX_DAR

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
EX_LR is used only for a small section of the SLB miss handler.
Merge it with EX_DAR.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/include/asm/exception-64s.h | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index e1103dc9d8e8..ba03db14e1e8 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -35,12 +35,19 @@
 #define EX_DSISR 48
 #define EX_CCR 52
 #define EX_R3 56
-#define EX_LR 64
-#define EX_CFAR 72
-#define EX_PPR 80 /* SMT thread status register (priority) */
-#define EX_CTR 88
+#define EX_CFAR 64
+#define EX_PPR 72
+#define EX_CTR 80
 
-#define EX_SIZE 12 /* size in u64 units */
+#define EX_SIZE 11 /* size in u64 units */
+
+/*
+ * EX_LR is only used in EXSLB and where it does not overlap with EX_DAR
+ * EX_CCR similarly with DSISR, but being 4 byte registers there is a hole
+ * in the save area so it's not necessary to overlap them. Could be used
+ * for future savings though if another 4 byte register was to be saved.
+ */
+#define EX_LR EX_DAR
 
 #ifdef __ASSEMBLY__
 
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 8/9] powerpc/64s: paca EX_R3 can be merged with EX_DAR

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
EX_R3 is used only for a small section of the bad stack handler.
Merge it with EX_DAR.

Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/include/asm/exception-64s.h | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index ba03db14e1e8..aaee57f0e5ad 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -34,12 +34,11 @@
 #define EX_DAR 40
 #define EX_DSISR 48
 #define EX_CCR 52
-#define EX_R3 56
-#define EX_CFAR 64
-#define EX_PPR 72
-#define EX_CTR 80
+#define EX_CFAR 56
+#define EX_PPR 64
+#define EX_CTR 72
 
-#define EX_SIZE 11 /* size in u64 units */
+#define EX_SIZE 10 /* size in u64 units */
 
 /*
  * EX_LR is only used in EXSLB and where it does not overlap with EX_DAR
@@ -49,6 +48,13 @@
  */
 #define EX_LR EX_DAR
 
+/*
+ * EX_R3 is only used by the bad_stack handler. bad_stack reloads and
+ * saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap
+ * with EX_DAR.
+ */
+#define EX_R3 EX_DAR
+
 #ifdef __ASSEMBLY__
 
 /*
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 9/9] powerpc/64s: paca EX_CTR is not used with !RELOCATABLE, remove it

Nicholas Piggin-2
In reply to this post by Nicholas Piggin-2
Signed-off-by: Nicholas Piggin <[hidden email]>
---
 arch/powerpc/include/asm/exception-64s.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index aaee57f0e5ad..3b768cb668e7 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -36,9 +36,12 @@
 #define EX_CCR 52
 #define EX_CFAR 56
 #define EX_PPR 64
+#if defined(CONFIG_RELOCATABLE)
 #define EX_CTR 72
-
 #define EX_SIZE 10 /* size in u64 units */
+#else
+#define EX_SIZE 9 /* size in u64 units */
+#endif
 
 /*
  * EX_LR is only used in EXSLB and where it does not overlap with EX_DAR
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 4/9] powerpc/64s: SLB miss handler avoid r3 save/restore

Michael Ellerman-2
In reply to this post by Nicholas Piggin-2
Nicholas Piggin <[hidden email]> writes:

> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> index 486e205cc762..6ba4c4c6ae69 100644
> --- a/arch/powerpc/kernel/exceptions-64s.S
> +++ b/arch/powerpc/kernel/exceptions-64s.S
> @@ -625,6 +625,9 @@ EXC_COMMON_BEGIN(slb_miss_realmode)
>   stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
>   std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
>  
> + andi. r11,r11,MSR_RI /* check for unrecoverable exception */
> + beq- 2f
> +
>   crset 4*cr0+eq
>  #ifdef CONFIG_PPC_STD_MMU_64
>  BEGIN_MMU_FTR_SECTION
> @@ -638,9 +641,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
>  
>   beq- 8f /* if bad address, make full stack frame */
>  
> - andi. r10,r12,MSR_RI /* check for unrecoverable exception */
> - beq- 2f
> -

Moving that check before slb_allocate_realmode() makes me a bit nervous.

It's already a bug if we're taking an SLB miss with RI off, but I'm
worried that by not doing the SLB allocate we might turn what would be a
regular oops into an infinite loop of SLB misses. But my brain is too
sleep deprived today to decide either way.

cheers
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 3/9] powerpc/64s: SLB miss already has CTR saved for relocatable kernel

Michael Ellerman-2
In reply to this post by Nicholas Piggin-2
Nicholas Piggin <[hidden email]> writes:

> The EXCEPTION_PROLOG_1 used by SLB miss already saves CTR when the
> kernel is built with CONFIG_RELOCATABLE. So it does not have to be
> saved and reloaded when branching to slb_miss_realmode. It can be
> restored from the PACA as usual.
>
> Signed-off-by: Nicholas Piggin <[hidden email]>
> ---
>  arch/powerpc/kernel/exceptions-64s.S | 13 +++++--------
>  1 file changed, 5 insertions(+), 8 deletions(-)
>
> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> index a4a71bce35d6..486e205cc762 100644
> --- a/arch/powerpc/kernel/exceptions-64s.S
> +++ b/arch/powerpc/kernel/exceptions-64s.S
> @@ -519,7 +519,7 @@ EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
>   * because the distance from here to there depends on where
>   * the kernel ends up being put.
>   */
> - mfctr r11
> + /* CTR is saved if RELOCATABLE */
>   LOAD_HANDLER(r10, slb_miss_realmode)
>   mtctr r10
>   bctr

AFAICS these can all use BRANCH_TO_COMMON().

So I'll drop all the comment additions from this and do a follow-up to
switch to BRANCH_TO_COMMON().

cheers
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 4/9] powerpc/64s: SLB miss handler avoid r3 save/restore

Nicholas Piggin-2
In reply to this post by Michael Ellerman-2
On Mon, 19 Jun 2017 14:48:37 +1000
Michael Ellerman <[hidden email]> wrote:

> Nicholas Piggin <[hidden email]> writes:
>
> > diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> > index 486e205cc762..6ba4c4c6ae69 100644
> > --- a/arch/powerpc/kernel/exceptions-64s.S
> > +++ b/arch/powerpc/kernel/exceptions-64s.S
> > @@ -625,6 +625,9 @@ EXC_COMMON_BEGIN(slb_miss_realmode)
> >   stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
> >   std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
> >  
> > + andi. r11,r11,MSR_RI /* check for unrecoverable exception */
> > + beq- 2f
> > +
> >   crset 4*cr0+eq
> >  #ifdef CONFIG_PPC_STD_MMU_64
> >  BEGIN_MMU_FTR_SECTION
> > @@ -638,9 +641,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
> >  
> >   beq- 8f /* if bad address, make full stack frame */
> >  
> > - andi. r10,r12,MSR_RI /* check for unrecoverable exception */
> > - beq- 2f
> > -  
>
> Moving that check before slb_allocate_realmode() makes me a bit nervous.
>
> It's already a bug if we're taking an SLB miss with RI off, but I'm
> worried that by not doing the SLB allocate we might turn what would be a
> regular oops into an infinite loop of SLB misses. But my brain is too
> sleep deprived today to decide either way.

After some offline back and forth over this, I think it's agreed we
should try to install the SLB entry before exiting. If nothing else
because that's what the existing code does.

So this incremental patch should restore that behaviour.

Some observations/comments:

- The additional mtcrf instruction may be getting close to the point
  where mtcr / mtcrf of multiple fields is faster. However it's not
  obviously past there yet on either POWER8 or 9, so I've kept the
  singe-field mtcrf for now.

- unrecov_slb possibly should use the emergency stack. There's a
  limit for how robust we can try to be, but with testing it wasn't
  too hard to put the code (+/- this patch) into an infinite SLB
  loop here by having something "interesting" in r1 when we take
  an RI=0 SLB fault.

Thanks,
Nick

---
 arch/powerpc/kernel/exceptions-64s.S | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6ba4c4c6ae69..575eed979f41 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -625,8 +625,14 @@ EXC_COMMON_BEGIN(slb_miss_realmode)
  stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
  std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
 
+ /*
+ * Test MSR_RI before calling slb_allocate_realmode, because the
+ * MSR in r11 gets clobbered. However we still want to allocate
+ * SLB in case MSR_RI=0, to minimise the risk of getting stuck in
+ * recursive SLB faults. So use cr5 for this, which is preserved.
+ */
  andi. r11,r11,MSR_RI /* check for unrecoverable exception */
- beq- 2f
+ cmpdi cr5,r11,MSR_RI
 
  crset 4*cr0+eq
 #ifdef CONFIG_PPC_STD_MMU_64
@@ -641,11 +647,14 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 
  beq- 8f /* if bad address, make full stack frame */
 
+ bne- cr5,2f /* if unrecoverable exception, oops */
+
  /* All done -- return from exception. */
 
 .machine push
 .machine "power4"
  mtcrf 0x80,r9
+ mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
  mtcrf 0x02,r9 /* I/D indication is in cr6 */
  mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
 .machine pop
--
2.11.0

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 3/9] powerpc/64s: SLB miss already has CTR saved for relocatable kernel

Nicholas Piggin-2
In reply to this post by Michael Ellerman-2
On Mon, 19 Jun 2017 21:45:06 +1000
Michael Ellerman <[hidden email]> wrote:

> Nicholas Piggin <[hidden email]> writes:
>
> > The EXCEPTION_PROLOG_1 used by SLB miss already saves CTR when the
> > kernel is built with CONFIG_RELOCATABLE. So it does not have to be
> > saved and reloaded when branching to slb_miss_realmode. It can be
> > restored from the PACA as usual.
> >
> > Signed-off-by: Nicholas Piggin <[hidden email]>
> > ---
> >  arch/powerpc/kernel/exceptions-64s.S | 13 +++++--------
> >  1 file changed, 5 insertions(+), 8 deletions(-)
> >
> > diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> > index a4a71bce35d6..486e205cc762 100644
> > --- a/arch/powerpc/kernel/exceptions-64s.S
> > +++ b/arch/powerpc/kernel/exceptions-64s.S
> > @@ -519,7 +519,7 @@ EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
> >   * because the distance from here to there depends on where
> >   * the kernel ends up being put.
> >   */
> > - mfctr r11
> > + /* CTR is saved if RELOCATABLE */
> >   LOAD_HANDLER(r10, slb_miss_realmode)
> >   mtctr r10
> >   bctr  
>
> AFAICS these can all use BRANCH_TO_COMMON().
>
> So I'll drop all the comment additions from this and do a follow-up to
> switch to BRANCH_TO_COMMON().

That will tidy up those ifdefs nicely. Good catch.

Thanks,
Nick
Reply | Threaded
Open this post in threaded view
|

Re: [1/9] powerpc/64s: slb_allocate_realmode() preserve r3

Michael Ellerman-3
In reply to this post by Nicholas Piggin-2
On Sun, 2017-05-21 at 13:15:42 UTC, Nicholas Piggin wrote:
> One fewer registers clobbered by this function means the SLB miss
> handler can save one fewer.
>
> Signed-off-by: Nicholas Piggin <[hidden email]>

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/d59afffdf04c66c09085160706297e

cheers