powerpc: Use LOAD_REG_IMMEDIATE only for constants on 64-bit

Using LOAD_REG_IMMEDIATE to get the address of kernel symbols
generates 5 instructions where LOAD_REG_ADDR can do it in one,
and will generate R_PPC64_ADDR16_* relocations in the output when
we get to making the kernel as a position-independent executable,
which we'd rather not have to handle.  This changes various bits
of assembly code to use LOAD_REG_ADDR when we need to get the
address of a symbol, or to use suitable position-independent code
for cases where we can't access the TOC for various reasons, or
if we're not running at the address we were linked at.

It also cleans up a few minor things; there's no reason to save and
restore SRR0/1 around RTAS calls, __mmu_off can get the return
address from LR more conveniently than the caller can supply it in
R4 (and we already assume elsewhere that EA == RA if the MMU is on
in early boot), and enable_64b_mode was using 5 instructions where
2 would do.

Signed-off-by: Paul Mackerras <paulus@samba.org>
This commit is contained in:
Paul Mackerras 2008-08-30 11:41:12 +10:00
parent 1f6a93e4c3
commit e31aa453bb
6 changed files with 111 additions and 127 deletions

View file

@ -268,7 +268,7 @@ n:
* Loads the value of the constant expression 'expr' into register 'rn'
* using immediate instructions only. Use this when it's important not
* to reference other data (i.e. on ppc64 when the TOC pointer is not
* valid).
* valid) and when 'expr' is a constant or absolute address.
*
* LOAD_REG_ADDR(rn, name)
* Loads the address of label 'name' into register 'rn'. Use this when

View file

@ -110,7 +110,7 @@ load_hids:
isync
/* Save away cpu state */
LOAD_REG_IMMEDIATE(r5,cpu_state_storage)
LOAD_REG_ADDR(r5,cpu_state_storage)
/* Save HID0,1,4 and 5 */
mfspr r3,SPRN_HID0
@ -134,7 +134,7 @@ _GLOBAL(__restore_cpu_ppc970)
rldicl. r0,r0,4,63
beqlr
LOAD_REG_IMMEDIATE(r5,cpu_state_storage)
LOAD_REG_ADDR(r5,cpu_state_storage)
/* Before accessing memory, we make sure rm_ci is clear */
li r0,0
mfspr r3,SPRN_HID4

View file

@ -690,10 +690,6 @@ _GLOBAL(enter_rtas)
std r7,_DAR(r1)
mfdsisr r8
std r8,_DSISR(r1)
mfsrr0 r9
std r9,_SRR0(r1)
mfsrr1 r10
std r10,_SRR1(r1)
/* Temporary workaround to clear CR until RTAS can be modified to
* ignore all bits.
@ -754,6 +750,10 @@ _STATIC(rtas_return_loc)
mfspr r4,SPRN_SPRG3 /* Get PACA */
clrldi r4,r4,2 /* convert to realmode address */
bcl 20,31,$+4
0: mflr r3
ld r3,(1f-0b)(r3) /* get &.rtas_restore_regs */
mfmsr r6
li r0,MSR_RI
andc r6,r6,r0
@ -761,7 +761,6 @@ _STATIC(rtas_return_loc)
mtmsrd r6
ld r1,PACAR1(r4) /* Restore our SP */
LOAD_REG_IMMEDIATE(r3,.rtas_restore_regs)
ld r4,PACASAVEDMSR(r4) /* Restore our MSR */
mtspr SPRN_SRR0,r3
@ -769,6 +768,9 @@ _STATIC(rtas_return_loc)
rfid
b . /* prevent speculative execution */
.align 3
1: .llong .rtas_restore_regs
_STATIC(rtas_restore_regs)
/* relocation is on at this point */
REST_GPR(2, r1) /* Restore the TOC */
@ -788,10 +790,6 @@ _STATIC(rtas_restore_regs)
mtdar r7
ld r8,_DSISR(r1)
mtdsisr r8
ld r9,_SRR0(r1)
mtsrr0 r9
ld r10,_SRR1(r1)
mtsrr1 r10
addi r1,r1,RTAS_FRAME_SIZE /* Unstack our frame */
ld r0,16(r1) /* get return address */

View file

@ -128,11 +128,11 @@ __secondary_hold:
/* Tell the master cpu we're here */
/* Relocation is off & we are located at an address less */
/* than 0x100, so only need to grab low order offset. */
std r24,__secondary_hold_acknowledge@l(0)
std r24,__secondary_hold_acknowledge-_stext(0)
sync
/* All secondary cpus wait here until told to start. */
100: ld r4,__secondary_hold_spinloop@l(0)
100: ld r4,__secondary_hold_spinloop-_stext(0)
cmpdi 0,r4,0
beq 100b
@ -1223,11 +1223,14 @@ _GLOBAL(generic_secondary_smp_init)
/* turn on 64-bit mode */
bl .enable_64b_mode
/* get the TOC pointer (real address) */
bl .relative_toc
/* Set up a paca value for this processor. Since we have the
* physical cpu id in r24, we need to search the pacas to find
* which logical id maps to our physical one.
*/
LOAD_REG_IMMEDIATE(r13, paca) /* Get base vaddr of paca array */
LOAD_REG_ADDR(r13, paca) /* Get base vaddr of paca array */
li r5,0 /* logical cpu id */
1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */
cmpw r6,r24 /* Compare to our id */
@ -1256,7 +1259,7 @@ _GLOBAL(generic_secondary_smp_init)
sync /* order paca.run and cur_cpu_spec */
/* See if we need to call a cpu state restore handler */
LOAD_REG_IMMEDIATE(r23, cur_cpu_spec)
LOAD_REG_ADDR(r23, cur_cpu_spec)
ld r23,0(r23)
ld r23,CPU_SPEC_RESTORE(r23)
cmpdi 0,r23,0
@ -1272,10 +1275,15 @@ _GLOBAL(generic_secondary_smp_init)
b __secondary_start
#endif
/*
* Turn the MMU off.
* Assumes we're mapped EA == RA if the MMU is on.
*/
_STATIC(__mmu_off)
mfmsr r3
andi. r0,r3,MSR_IR|MSR_DR
beqlr
mflr r4
andc r3,r3,r0
mtspr SPRN_SRR0,r4
mtspr SPRN_SRR1,r3
@ -1296,6 +1304,18 @@ _STATIC(__mmu_off)
*
*/
_GLOBAL(__start_initialization_multiplatform)
/* Make sure we are running in 64 bits mode */
bl .enable_64b_mode
/* Get TOC pointer (current runtime address) */
bl .relative_toc
/* find out where we are now */
bcl 20,31,$+4
0: mflr r26 /* r26 = runtime addr here */
addis r26,r26,(_stext - 0b)@ha
addi r26,r26,(_stext - 0b)@l /* current runtime base addr */
/*
* Are we booted from a PROM Of-type client-interface ?
*/
@ -1307,9 +1327,6 @@ _GLOBAL(__start_initialization_multiplatform)
mr r31,r3
mr r30,r4
/* Make sure we are running in 64 bits mode */
bl .enable_64b_mode
/* Setup some critical 970 SPRs before switching MMU off */
mfspr r0,SPRN_PVR
srwi r0,r0,16
@ -1324,9 +1341,7 @@ _GLOBAL(__start_initialization_multiplatform)
1: bl .__cpu_preinit_ppc970
2:
/* Switch off MMU if not already */
LOAD_REG_IMMEDIATE(r4, .__after_prom_start - KERNELBASE)
add r4,r4,r30
/* Switch off MMU if not already off */
bl .__mmu_off
b .__after_prom_start
@ -1341,23 +1356,10 @@ _INIT_STATIC(__boot_from_prom)
/*
* Align the stack to 16-byte boundary
* Depending on the size and layout of the ELF sections in the initial
* boot binary, the stack pointer will be unalignet on PowerMac
* boot binary, the stack pointer may be unaligned on PowerMac
*/
rldicr r1,r1,0,59
/* Make sure we are running in 64 bits mode */
bl .enable_64b_mode
/* put a relocation offset into r3 */
bl .reloc_offset
LOAD_REG_IMMEDIATE(r2,__toc_start)
addi r2,r2,0x4000
addi r2,r2,0x4000
/* Relocate the TOC from a virt addr to a real addr */
add r2,r2,r3
/* Restore parameters */
mr r3,r31
mr r4,r30
@ -1373,53 +1375,37 @@ _INIT_STATIC(__boot_from_prom)
_STATIC(__after_prom_start)
/*
* We need to run with __start at physical address PHYSICAL_START.
* We need to run with _stext at physical address PHYSICAL_START.
* This will leave some code in the first 256B of
* real memory, which are reserved for software use.
* The remainder of the first page is loaded with the fixed
* interrupt vectors. The next two pages are filled with
* unknown exception placeholders.
*
* Note: This process overwrites the OF exception vectors.
* r26 == relocation offset
* r27 == KERNELBASE
*/
bl .reloc_offset
mr r26,r3
LOAD_REG_IMMEDIATE(r27, KERNELBASE)
LOAD_REG_IMMEDIATE(r3, PHYSICAL_START) /* target addr */
// XXX FIXME: Use phys returned by OF (r30)
add r4,r27,r26 /* source addr */
/* current address of _start */
/* i.e. where we are running */
/* the source addr */
cmpdi r4,0 /* In some cases the loader may */
bne 1f
b .start_here_multiplatform /* have already put us at zero */
/* so we can skip the copy. */
1: LOAD_REG_IMMEDIATE(r5,copy_to_here) /* # bytes of memory to copy */
sub r5,r5,r27
cmpd r3,r26 /* In some cases the loader may */
beq 9f /* have already put us at zero */
mr r4,r26 /* source address */
lis r5,(copy_to_here - _stext)@ha
addi r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
li r6,0x100 /* Start offset, the first 0x100 */
/* bytes were copied earlier. */
bl .copy_and_flush /* copy the first n bytes */
/* this includes the code being */
/* executed here. */
LOAD_REG_IMMEDIATE(r0, 4f) /* Jump to the copy of this code */
mtctr r0 /* that we just made/relocated */
addis r8,r3,(4f - _stext)@ha /* Jump to the copy of this code */
addi r8,r8,(4f - _stext)@l /* that we just made */
mtctr r8
bctr
4: LOAD_REG_IMMEDIATE(r5,klimit)
add r5,r5,r26
ld r5,0(r5) /* get the value of klimit */
sub r5,r5,r27
4: /* Now copy the rest of the kernel up to _end */
addis r5,r26,(p_end - _stext)@ha
ld r5,(p_end - _stext)@l(r5) /* get _end */
bl .copy_and_flush /* copy the rest */
b .start_here_multiplatform
9: b .start_here_multiplatform
p_end: .llong _end - _stext
/*
* Copy routine used to copy the kernel to start at physical address 0
@ -1484,6 +1470,9 @@ _GLOBAL(pmac_secondary_start)
/* turn on 64-bit mode */
bl .enable_64b_mode
/* get TOC pointer (real address) */
bl .relative_toc
/* Copy some CPU settings from CPU 0 */
bl .__restore_cpu_ppc970
@ -1493,10 +1482,10 @@ _GLOBAL(pmac_secondary_start)
mtmsrd r3 /* RI on */
/* Set up a paca value for this processor. */
LOAD_REG_IMMEDIATE(r4, paca) /* Get base vaddr of paca array */
mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */
LOAD_REG_ADDR(r4,paca) /* Get base vaddr of paca array */
mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */
add r13,r13,r4 /* for this processor. */
mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */
mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */
/* Create a temp kernel stack for use before relocation is on. */
ld r1,PACAEMERGSP(r13)
@ -1524,9 +1513,6 @@ __secondary_start:
/* Set thread priority to MEDIUM */
HMT_MEDIUM
/* Load TOC */
ld r2,PACATOC(r13)
/* Do early setup for that CPU (stab, slb, hash table pointer) */
bl .early_setup_secondary
@ -1563,9 +1549,11 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)
/*
* Running with relocation on at this point. All we want to do is
* zero the stack back-chain pointer before going into C code.
* zero the stack back-chain pointer and get the TOC virtual address
* before going into C code.
*/
_GLOBAL(start_secondary_prolog)
ld r2,PACATOC(r13)
li r3,0
std r3,0(r1) /* Zero the stack frame pointer */
bl .start_secondary
@ -1577,34 +1565,46 @@ _GLOBAL(start_secondary_prolog)
*/
_GLOBAL(enable_64b_mode)
mfmsr r11 /* grab the current MSR */
li r12,1
rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
or r11,r11,r12
li r12,1
rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
li r12,(MSR_SF | MSR_ISF)@highest
sldi r12,r12,48
or r11,r11,r12
mtmsrd r11
isync
blr
/*
* This puts the TOC pointer into r2, offset by 0x8000 (as expected
* by the toolchain). It computes the correct value for wherever we
* are running at the moment, using position-independent code.
*/
_GLOBAL(relative_toc)
mflr r0
bcl 20,31,$+4
0: mflr r9
ld r2,(p_toc - 0b)(r9)
add r2,r2,r9
mtlr r0
blr
p_toc: .llong __toc_start + 0x8000 - 0b
/*
* This is where the main kernel code starts.
*/
_INIT_STATIC(start_here_multiplatform)
/* get a new offset, now that the kernel has moved. */
bl .reloc_offset
mr r26,r3
/* set up the TOC (real address) */
bl .relative_toc
/* Clear out the BSS. It may have been done in prom_init,
* already but that's irrelevant since prom_init will soon
* be detached from the kernel completely. Besides, we need
* to clear it now for kexec-style entry.
*/
LOAD_REG_IMMEDIATE(r11,__bss_stop)
LOAD_REG_IMMEDIATE(r8,__bss_start)
LOAD_REG_ADDR(r11,__bss_stop)
LOAD_REG_ADDR(r8,__bss_start)
sub r11,r11,r8 /* bss size */
addi r11,r11,7 /* round up to an even double word */
rldicl. r11,r11,61,3 /* shift right by 3 */
srdi. r11,r11,3 /* shift right by 3 */
beq 4f
addi r8,r8,-8
li r0,0
@ -1617,35 +1617,28 @@ _INIT_STATIC(start_here_multiplatform)
ori r6,r6,MSR_RI
mtmsrd r6 /* RI on */
/* The following gets the stack and TOC set up with the regs */
/* The following gets the stack set up with the regs */
/* pointing to the real addr of the kernel stack. This is */
/* all done to support the C function call below which sets */
/* up the htab. This is done because we have relocated the */
/* kernel but are still running in real mode. */
LOAD_REG_IMMEDIATE(r3,init_thread_union)
add r3,r3,r26
LOAD_REG_ADDR(r3,init_thread_union)
/* set up a stack pointer (physical address) */
/* set up a stack pointer */
addi r1,r3,THREAD_SIZE
li r0,0
stdu r0,-STACK_FRAME_OVERHEAD(r1)
/* set up the TOC (physical address) */
LOAD_REG_IMMEDIATE(r2,__toc_start)
addi r2,r2,0x4000
addi r2,r2,0x4000
add r2,r2,r26
/* Do very early kernel initializations, including initial hash table,
* stab and slb setup before we turn on relocation. */
/* Restore parameters passed from prom_init/kexec */
mr r3,r31
bl .early_setup
bl .early_setup /* also sets r13 and SPRG3 */
LOAD_REG_IMMEDIATE(r3, .start_here_common)
LOAD_REG_IMMEDIATE(r4, MSR_KERNEL)
LOAD_REG_ADDR(r3, .start_here_common)
ld r4,PACAKMSR(r13)
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
rfid
@ -1654,21 +1647,11 @@ _INIT_STATIC(start_here_multiplatform)
/* This is where all platforms converge execution */
_INIT_GLOBAL(start_here_common)
/* relocation is on at this point */
/* The following code sets up the SP and TOC now that we are */
/* running with translation enabled. */
LOAD_REG_IMMEDIATE(r3,init_thread_union)
/* set up the stack */
addi r1,r3,THREAD_SIZE
li r0,0
stdu r0,-STACK_FRAME_OVERHEAD(r1)
/* Load the TOC */
ld r2,PACATOC(r13)
std r1,PACAKSAVE(r13)
/* Load the TOC (virtual address) */
ld r2,PACATOC(r13)
bl .setup_system
/* Load up the kernel context */

View file

@ -31,11 +31,14 @@ _GLOBAL(reloc_offset)
mflr r0
bl 1f
1: mflr r3
LOAD_REG_IMMEDIATE(r4,1b)
PPC_LL r4,(2f-1b)(r3)
subf r3,r4,r3
mtlr r0
blr
.align 3
2: PPC_LONG 1b
/*
* add_reloc_offset(x) returns x + reloc_offset().
*/
@ -43,12 +46,15 @@ _GLOBAL(add_reloc_offset)
mflr r0
bl 1f
1: mflr r5
LOAD_REG_IMMEDIATE(r4,1b)
PPC_LL r4,(2f-1b)(r5)
subf r5,r4,r5
add r3,r3,r5
mtlr r0
blr
.align 3
2: PPC_LONG 1b
_GLOBAL(kernel_execve)
li r0,__NR_execve
sc

View file

@ -38,12 +38,13 @@
.globl system_reset_iSeries
system_reset_iSeries:
bl .relative_toc
mfspr r13,SPRN_SPRG3 /* Get alpaca address */
LOAD_REG_IMMEDIATE(r23, alpaca)
LOAD_REG_ADDR(r23, alpaca)
li r0,ALPACA_SIZE
sub r23,r13,r23
divdu r23,r23,r0 /* r23 has cpu number */
LOAD_REG_IMMEDIATE(r13, paca)
LOAD_REG_ADDR(r13, paca)
mulli r0,r23,PACA_SIZE
add r13,r13,r0
mtspr SPRN_SPRG3,r13 /* Save it away for the future */
@ -60,14 +61,14 @@ system_reset_iSeries:
mtspr SPRN_CTRLT,r4
/* Spin on __secondary_hold_spinloop until it is updated by the boot cpu. */
/* In the UP case we'll yeild() later, and we will not access the paca anyway */
/* In the UP case we'll yield() later, and we will not access the paca anyway */
#ifdef CONFIG_SMP
1:
HMT_LOW
LOAD_REG_IMMEDIATE(r23, __secondary_hold_spinloop)
LOAD_REG_ADDR(r23, __secondary_hold_spinloop)
ld r23,0(r23)
sync
LOAD_REG_IMMEDIATE(r3,current_set)
LOAD_REG_ADDR(r3,current_set)
sldi r28,r24,3 /* get current_set[cpu#] */
ldx r3,r3,r28
addi r1,r3,THREAD_SIZE
@ -90,7 +91,7 @@ system_reset_iSeries:
lbz r23,PACAPROCSTART(r13) /* Test if this processor
* should start */
sync
LOAD_REG_IMMEDIATE(r3,current_set)
LOAD_REG_ADDR(r3,current_set)
sldi r28,r24,3 /* get current_set[cpu#] */
ldx r3,r3,r28
addi r1,r3,THREAD_SIZE
@ -255,8 +256,8 @@ hardware_interrupt_iSeries_masked:
_INIT_STATIC(__start_initialization_iSeries)
/* Clear out the BSS */
LOAD_REG_IMMEDIATE(r11,__bss_stop)
LOAD_REG_IMMEDIATE(r8,__bss_start)
LOAD_REG_ADDR(r11,__bss_stop)
LOAD_REG_ADDR(r8,__bss_start)
sub r11,r11,r8 /* bss size */
addi r11,r11,7 /* round up to an even double word */
rldicl. r11,r11,61,3 /* shift right by 3 */
@ -267,15 +268,11 @@ _INIT_STATIC(__start_initialization_iSeries)
3: stdu r0,8(r8)
bdnz 3b
4:
LOAD_REG_IMMEDIATE(r1,init_thread_union)
LOAD_REG_ADDR(r1,init_thread_union)
addi r1,r1,THREAD_SIZE
li r0,0
stdu r0,-STACK_FRAME_OVERHEAD(r1)
LOAD_REG_IMMEDIATE(r2,__toc_start)
addi r2,r2,0x4000
addi r2,r2,0x4000
bl .iSeries_early_setup
bl .early_setup