remarkable-linux/arch/arm64/lib/copy_page.S
Ard Biesheuvel 288be97cc7 arm64/lib: copy_page: use consistent prefetch stride
The optional prefetch instructions in the copy_page() routine are
inconsistent: at the start of the function, two cachelines are
prefetched beyond the one being loaded in the first iteration, but
in the loop, the prefetch is one more line ahead. This appears to
be unintentional, so let's fix it.

While at it, fix the comment style and white space.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2017-07-25 10:04:42 +01:00

90 lines
2.1 KiB
ArmAsm

/*
* Copyright (C) 2012 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/linkage.h>
#include <linux/const.h>
#include <asm/assembler.h>
#include <asm/page.h>
#include <asm/cpufeature.h>
#include <asm/alternative.h>
/*
* Copy a page from src to dest (both are page aligned)
*
* Parameters:
* x0 - dest
* x1 - src
*/
ENTRY(copy_page)
alternative_if ARM64_HAS_NO_HW_PREFETCH
// Prefetch three cache lines ahead.
prfm pldl1strm, [x1, #128]
prfm pldl1strm, [x1, #256]
prfm pldl1strm, [x1, #384]
alternative_else_nop_endif
ldp x2, x3, [x1]
ldp x4, x5, [x1, #16]
ldp x6, x7, [x1, #32]
ldp x8, x9, [x1, #48]
ldp x10, x11, [x1, #64]
ldp x12, x13, [x1, #80]
ldp x14, x15, [x1, #96]
ldp x16, x17, [x1, #112]
mov x18, #(PAGE_SIZE - 128)
add x1, x1, #128
1:
subs x18, x18, #128
alternative_if ARM64_HAS_NO_HW_PREFETCH
prfm pldl1strm, [x1, #384]
alternative_else_nop_endif
stnp x2, x3, [x0]
ldp x2, x3, [x1]
stnp x4, x5, [x0, #16]
ldp x4, x5, [x1, #16]
stnp x6, x7, [x0, #32]
ldp x6, x7, [x1, #32]
stnp x8, x9, [x0, #48]
ldp x8, x9, [x1, #48]
stnp x10, x11, [x0, #64]
ldp x10, x11, [x1, #64]
stnp x12, x13, [x0, #80]
ldp x12, x13, [x1, #80]
stnp x14, x15, [x0, #96]
ldp x14, x15, [x1, #96]
stnp x16, x17, [x0, #112]
ldp x16, x17, [x1, #112]
add x0, x0, #128
add x1, x1, #128
b.gt 1b
stnp x2, x3, [x0]
stnp x4, x5, [x0, #16]
stnp x6, x7, [x0, #32]
stnp x8, x9, [x0, #48]
stnp x10, x11, [x0, #64]
stnp x12, x13, [x0, #80]
stnp x14, x15, [x0, #96]
stnp x16, x17, [x0, #112]
ret
ENDPROC(copy_page)