1
0
Fork 0

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

hifive-unleashed-5.1
David S. Miller 2018-01-09 10:37:00 -05:00
commit a0ce093180
350 changed files with 3949 additions and 1320 deletions

View File

@ -107,6 +107,7 @@ Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
Mark Brown <broonie@sirena.org.uk>
Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
Matthieu CASTET <castet.matthieu@free.fr>

View File

@ -109,6 +109,7 @@ parameter is applicable::
IPV6 IPv6 support is enabled.
ISAPNP ISA PnP code is enabled.
ISDN Appropriate ISDN support is enabled.
ISOL CPU Isolation is enabled.
JOY Appropriate joystick support is enabled.
KGDB Kernel debugger support is enabled.
KVM Kernel Virtual Machine support is enabled.

View File

@ -328,11 +328,15 @@
not play well with APC CPU idle - disable it if you have
APC and your system crashes randomly.
apic= [APIC,X86-32] Advanced Programmable Interrupt Controller
apic= [APIC,X86] Advanced Programmable Interrupt Controller
Change the output verbosity whilst booting
Format: { quiet (default) | verbose | debug }
Change the amount of debugging information output
when initialising the APIC and IO-APIC components.
For X86-32, this can also be used to specify an APIC
driver name.
Format: apic=driver_name
Examples: apic=bigsmp
apic_extnmi= [APIC,X86] External NMI delivery setting
Format: { bsp (default) | all | none }
@ -1737,7 +1741,7 @@
isapnp= [ISAPNP]
Format: <RDP>,<reset>,<pci_scan>,<verbosity>
isolcpus= [KNL,SMP] Isolate a given set of CPUs from disturbance.
isolcpus= [KNL,SMP,ISOL] Isolate a given set of CPUs from disturbance.
[Deprecated - use cpusets instead]
Format: [flag-list,]<cpu-list>
@ -2662,7 +2666,7 @@
Valid arguments: on, off
Default: on
nohz_full= [KNL,BOOT]
nohz_full= [KNL,BOOT,SMP,ISOL]
The argument is a cpu list, as described above.
In kernels built with CONFIG_NO_HZ_FULL=y, set
the specified list of CPUs whose tick will be stopped
@ -2708,6 +2712,8 @@
steal time is computed, but won't influence scheduler
behaviour
nopti [X86-64] Disable kernel page table isolation
nolapic [X86-32,APIC] Do not enable or use the local APIC.
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
@ -3282,6 +3288,12 @@
pt. [PARIDE]
See Documentation/blockdev/paride.txt.
pti= [X86_64]
Control user/kernel address space isolation:
on - enable
off - disable
auto - default setting
pty.legacy_count=
[KNL] Number of legacy pty's. Overwrites compiled-in
default number.

View File

@ -230,7 +230,7 @@ If supported by your machine this will be exposed by the WMI bus with
a sysfs attribute called "force_power".
For example the intel-wmi-thunderbolt driver exposes this attribute in:
/sys/devices/platform/PNP0C14:00/wmi_bus/wmi_bus-PNP0C14:00/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power
/sys/bus/wmi/devices/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power
To force the power to on, write 1 to this attribute file.
To disable force power, write 0 to this attribute file.

View File

@ -341,10 +341,7 @@ GuC
GuC-specific firmware loader
----------------------------
.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c
:doc: GuC-specific firmware loader
.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c
.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fw.c
:internal:
GuC-based command submission

View File

@ -12,7 +12,9 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
... unused hole ...
ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
... unused hole ...
fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
vaddr_end for KASLR
fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
... unused hole ...
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
@ -29,20 +31,22 @@ Virtual memory map with 5 level page tables:
hole caused by [56:63] sign extension
ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
ff90000000000000 - ff91ffffffffffff (=49 bits) hole
ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI
ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB)
ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
... unused hole ...
ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
... unused hole ...
fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
vaddr_end for KASLR
fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
... unused hole ...
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
... unused hole ...
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
... unused hole ...
ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space
ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
@ -66,9 +70,10 @@ memory window (this size is arbitrary, it can be raised later if needed).
The mappings are not part of any other kernel PGD and are only available
during EFI runtime calls.
The module mapping space size changes based on the CONFIG requirements for the
following fixmap section.
Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
physical memory, vmalloc/ioremap space and virtual memory map are randomized.
Their order is preserved but their base will be offset early at boot time.
Be very careful vs. KASLR when changing anything here. The KASLR address
range must not overlap with anything except the KASAN shadow area, which is
correct as KASAN disables KASLR.

View File

@ -2622,24 +2622,22 @@ F: fs/bfs/
F: include/uapi/linux/bfs_fs.h
BLACKFIN ARCHITECTURE
M: Steven Miao <realmz6@gmail.com>
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
T: git git://git.code.sf.net/p/adi-linux/code
W: http://blackfin.uclinux.org
S: Supported
S: Orphan
F: arch/blackfin/
BLACKFIN EMAC DRIVER
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
W: http://blackfin.uclinux.org
S: Supported
S: Orphan
F: drivers/net/ethernet/adi/
BLACKFIN MEDIA DRIVER
M: Scott Jiang <scott.jiang.linux@gmail.com>
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
W: http://blackfin.uclinux.org/
S: Supported
S: Orphan
F: drivers/media/platform/blackfin/
F: drivers/media/i2c/adv7183*
F: drivers/media/i2c/vs6624*
@ -2647,25 +2645,25 @@ F: drivers/media/i2c/vs6624*
BLACKFIN RTC DRIVER
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
W: http://blackfin.uclinux.org
S: Supported
S: Orphan
F: drivers/rtc/rtc-bfin.c
BLACKFIN SDH DRIVER
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
W: http://blackfin.uclinux.org
S: Supported
S: Orphan
F: drivers/mmc/host/bfin_sdh.c
BLACKFIN SERIAL DRIVER
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
W: http://blackfin.uclinux.org
S: Supported
S: Orphan
F: drivers/tty/serial/bfin_uart.c
BLACKFIN WATCHDOG DRIVER
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
W: http://blackfin.uclinux.org
S: Supported
S: Orphan
F: drivers/watchdog/bfin_wdt.c
BLINKM RGB LED DRIVER
@ -5154,15 +5152,15 @@ F: sound/usb/misc/ua101.c
EFI TEST DRIVER
L: linux-efi@vger.kernel.org
M: Ivan Hu <ivan.hu@canonical.com>
M: Matt Fleming <matt@codeblueprint.co.uk>
M: Ard Biesheuvel <ard.biesheuvel@linaro.org>
S: Maintained
F: drivers/firmware/efi/test/
EFI VARIABLE FILESYSTEM
M: Matthew Garrett <matthew.garrett@nebula.com>
M: Jeremy Kerr <jk@ozlabs.org>
M: Matt Fleming <matt@codeblueprint.co.uk>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git
M: Ard Biesheuvel <ard.biesheuvel@linaro.org>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
L: linux-efi@vger.kernel.org
S: Maintained
F: fs/efivarfs/
@ -5323,7 +5321,6 @@ S: Supported
F: security/integrity/evm/
EXTENSIBLE FIRMWARE INTERFACE (EFI)
M: Matt Fleming <matt@codeblueprint.co.uk>
M: Ard Biesheuvel <ard.biesheuvel@linaro.org>
L: linux-efi@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
@ -10152,7 +10149,7 @@ F: drivers/irqchip/irq-ompic.c
F: drivers/irqchip/irq-or1k-*
OPENVSWITCH
M: Pravin Shelar <pshelar@nicira.com>
M: Pravin B Shelar <pshelar@ovn.org>
L: netdev@vger.kernel.org
L: dev@openvswitch.org
W: http://openvswitch.org
@ -13508,6 +13505,7 @@ M: Mika Westerberg <mika.westerberg@linux.intel.com>
M: Yehezkel Bernat <yehezkel.bernat@intel.com>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
S: Maintained
F: Documentation/admin-guide/thunderbolt.rst
F: drivers/thunderbolt/
F: include/linux/thunderbolt.h

View File

@ -2,7 +2,7 @@
VERSION = 4
PATCHLEVEL = 15
SUBLEVEL = 0
EXTRAVERSION = -rc5
EXTRAVERSION = -rc7
NAME = Fearless Coyote
# *DOCUMENTATION*
@ -789,6 +789,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)
# disable invalid "can't wrap" optimizations for signed / pointers
KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
# Make sure -fstack-check isn't enabled (like gentoo apparently did)
KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,)
# conserve stack if available
KBUILD_CFLAGS += $(call cc-option,-fconserve-stack)

View File

@ -35,6 +35,14 @@
reg = <0x80 0x10>, <0x100 0x10>;
#clock-cells = <0>;
clocks = <&input_clk>;
/*
* Set initial core pll output frequency to 90MHz.
* It will be applied at the core pll driver probing
* on early boot.
*/
assigned-clocks = <&core_clk>;
assigned-clock-rates = <90000000>;
};
core_intc: archs-intc@cpu {

View File

@ -35,6 +35,14 @@
reg = <0x80 0x10>, <0x100 0x10>;
#clock-cells = <0>;
clocks = <&input_clk>;
/*
* Set initial core pll output frequency to 100MHz.
* It will be applied at the core pll driver probing
* on early boot.
*/
assigned-clocks = <&core_clk>;
assigned-clock-rates = <100000000>;
};
core_intc: archs-intc@cpu {

View File

@ -114,6 +114,14 @@
reg = <0x00 0x10>, <0x14B8 0x4>;
#clock-cells = <0>;
clocks = <&input_clk>;
/*
* Set initial core pll output frequency to 1GHz.
* It will be applied at the core pll driver probing
* on early boot.
*/
assigned-clocks = <&core_clk>;
assigned-clock-rates = <1000000000>;
};
serial: serial@5000 {

View File

@ -49,10 +49,11 @@ CONFIG_SERIAL_8250_DW=y
CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
CONFIG_DRM=y
# CONFIG_DRM_FBDEV_EMULATION is not set
CONFIG_DRM_UDL=y
CONFIG_FB=y
CONFIG_FB_UDL=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_USB=y
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_HCD_PLATFORM=y
CONFIG_USB_OHCI_HCD=y

View File

@ -668,6 +668,7 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count)
return 0;
__asm__ __volatile__(
" mov lp_count, %5 \n"
" lp 3f \n"
"1: ldb.ab %3, [%2, 1] \n"
" breq.d %3, 0, 3f \n"
@ -684,8 +685,8 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count)
" .word 1b, 4b \n"
" .previous \n"
: "+r"(res), "+r"(dst), "+r"(src), "=r"(val)
: "g"(-EFAULT), "l"(count)
: "memory");
: "g"(-EFAULT), "r"(count)
: "lp_count", "lp_start", "lp_end", "memory");
return res;
}

View File

@ -199,7 +199,7 @@ static void read_arc_build_cfg_regs(void)
unsigned int exec_ctrl;
READ_BCR(AUX_EXEC_CTRL, exec_ctrl);
cpu->extn.dual_enb = exec_ctrl & 1;
cpu->extn.dual_enb = !(exec_ctrl & 1);
/* dual issue always present for this core */
cpu->extn.dual = 1;

View File

@ -163,7 +163,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
*/
static int __print_sym(unsigned int address, void *unused)
{
__print_symbol(" %s\n", address);
printk(" %pS\n", (void *)address);
return 0;
}

View File

@ -83,6 +83,7 @@ DO_ERROR_INFO(SIGILL, "Illegal Insn (or Seq)", insterror_is_error, ILL_ILLOPC)
DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", __weak do_memory_error, BUS_ADRERR)
DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT)
DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN)
DO_ERROR_INFO(SIGSEGV, "gcc generated __builtin_trap", do_trap5_error, 0)
/*
* Entry Point for Misaligned Data access Exception, for emulating in software
@ -115,6 +116,8 @@ void do_machine_check_fault(unsigned long address, struct pt_regs *regs)
* Thus TRAP_S <n> can be used for specific purpose
* -1 used for software breakpointing (gdb)
* -2 used by kprobes
* -5 __builtin_trap() generated by gcc (2018.03 onwards) for toggle such as
* -fno-isolate-erroneous-paths-dereference
*/
void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
{
@ -134,6 +137,9 @@ void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
kgdb_trap(regs);
break;
case 5:
do_trap5_error(address, regs);
break;
default:
break;
}
@ -155,3 +161,11 @@ void do_insterror_or_kprobe(unsigned long address, struct pt_regs *regs)
insterror_is_error(address, regs);
}
/*
* abort() call generated by older gcc for __builtin_trap()
*/
void abort(void)
{
__asm__ __volatile__("trap_s 5\n");
}

View File

@ -163,6 +163,9 @@ static void show_ecr_verbose(struct pt_regs *regs)
else
pr_cont("Bus Error, check PRM\n");
#endif
} else if (vec == ECR_V_TRAP) {
if (regs->ecr_param == 5)
pr_cont("gcc generated __builtin_trap\n");
} else {
pr_cont("Check Programmer's Manual\n");
}

View File

@ -317,25 +317,23 @@ static void __init axs103_early_init(void)
* Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack
* of fudging the freq in DT
*/
#define AXS103_QUAD_CORE_CPU_FREQ_HZ 50000000
unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F;
if (num_cores > 2) {
u32 freq = 50, orig;
/*
* TODO: use cpu node "cpu-freq" param instead of platform-specific
* "/cpu_card/core_clk" as it works only if we use fixed-clock for cpu.
*/
u32 freq;
int off = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk");
const struct fdt_property *prop;
prop = fdt_get_property(initial_boot_params, off,
"clock-frequency", NULL);
orig = be32_to_cpu(*(u32*)(prop->data)) / 1000000;
"assigned-clock-rates", NULL);
freq = be32_to_cpu(*(u32 *)(prop->data));
/* Patching .dtb in-place with new core clock value */
if (freq != orig ) {
freq = cpu_to_be32(freq * 1000000);
if (freq != AXS103_QUAD_CORE_CPU_FREQ_HZ) {
freq = cpu_to_be32(AXS103_QUAD_CORE_CPU_FREQ_HZ);
fdt_setprop_inplace(initial_boot_params, off,
"clock-frequency", &freq, sizeof(freq));
"assigned-clock-rates", &freq, sizeof(freq));
}
}
#endif

View File

@ -38,42 +38,6 @@ static void __init hsdk_init_per_cpu(unsigned int cpu)
#define CREG_PAE (CREG_BASE + 0x180)
#define CREG_PAE_UPDATE (CREG_BASE + 0x194)
#define CREG_CORE_IF_CLK_DIV (CREG_BASE + 0x4B8)
#define CREG_CORE_IF_CLK_DIV_2 0x1
#define CGU_BASE ARC_PERIPHERAL_BASE
#define CGU_PLL_STATUS (ARC_PERIPHERAL_BASE + 0x4)
#define CGU_PLL_CTRL (ARC_PERIPHERAL_BASE + 0x0)
#define CGU_PLL_STATUS_LOCK BIT(0)
#define CGU_PLL_STATUS_ERR BIT(1)
#define CGU_PLL_CTRL_1GHZ 0x3A10
#define HSDK_PLL_LOCK_TIMEOUT 500
#define HSDK_PLL_LOCKED() \
!!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_LOCK)
#define HSDK_PLL_ERR() \
!!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_ERR)
static void __init hsdk_set_cpu_freq_1ghz(void)
{
u32 timeout = HSDK_PLL_LOCK_TIMEOUT;
/*
* As we set cpu clock which exceeds 500MHz, the divider for the interface
* clock must be programmed to div-by-2.
*/
iowrite32(CREG_CORE_IF_CLK_DIV_2, (void __iomem *) CREG_CORE_IF_CLK_DIV);
/* Set cpu clock to 1GHz */
iowrite32(CGU_PLL_CTRL_1GHZ, (void __iomem *) CGU_PLL_CTRL);
while (!HSDK_PLL_LOCKED() && timeout--)
cpu_relax();
if (!HSDK_PLL_LOCKED() || HSDK_PLL_ERR())
pr_err("Failed to setup CPU frequency to 1GHz!");
}
#define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000)
#define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108)
#define SDIO_UHS_REG_EXT_DIV_2 (2 << 30)
@ -98,12 +62,6 @@ static void __init hsdk_init_early(void)
* minimum possible div-by-2.
*/
iowrite32(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *) SDIO_UHS_REG_EXT);
/*
* Setup CPU frequency to 1GHz.
* TODO: remove it after smart hsdk pll driver will be introduced.
*/
hsdk_set_cpu_freq_1ghz();
}
static const char *hsdk_compat[] __initconst = {

View File

@ -219,7 +219,7 @@
compatible = "aspeed,ast2400-vuart";
reg = <0x1e787000 0x40>;
reg-shift = <2>;
interrupts = <10>;
interrupts = <8>;
clocks = <&clk_uart>;
no-loopback-test;
status = "disabled";

View File

@ -221,6 +221,7 @@
jc42@18 {
compatible = "nxp,se97b", "jedec,jc-42.4-temp";
reg = <0x18>;
smbus-timeout-disable;
};
dpot: mcp4651-104@28 {

View File

@ -178,7 +178,7 @@
*/
battery {
pinctrl-names = "default";
pintctrl-0 = <&battery_pins>;
pinctrl-0 = <&battery_pins>;
compatible = "lego,ev3-battery";
io-channels = <&adc 4>, <&adc 3>;
io-channel-names = "voltage", "current";
@ -392,7 +392,7 @@
batt_volt_en {
gpio-hog;
gpios = <6 GPIO_ACTIVE_HIGH>;
output-low;
output-high;
};
};

View File

@ -664,6 +664,10 @@
status = "okay";
};
&mixer {
status = "okay";
};
/* eMMC flash */
&mmc_0 {
status = "okay";

View File

@ -215,7 +215,7 @@
reg = <0x2a>;
VDDA-supply = <&reg_3p3v>;
VDDIO-supply = <&reg_3p3v>;
clocks = <&sys_mclk 1>;
clocks = <&sys_mclk>;
};
};
};

View File

@ -187,7 +187,7 @@
reg = <0x0a>;
VDDA-supply = <&reg_3p3v>;
VDDIO-supply = <&reg_3p3v>;
clocks = <&sys_mclk 1>;
clocks = <&sys_mclk>;
};
};

View File

@ -83,6 +83,10 @@
};
};
&cpu0 {
cpu0-supply = <&vdd_arm>;
};
&i2c1 {
status = "okay";
clock-frequency = <400000>;

View File

@ -956,7 +956,7 @@
iep_mmu: iommu@ff900800 {
compatible = "rockchip,iommu";
reg = <0x0 0xff900800 0x0 0x40>;
interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH 0>;
interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "iep_mmu";
#iommu-cells = <0>;
status = "disabled";

View File

@ -502,8 +502,8 @@
reg = <0x01c16000 0x1000>;
interrupts = <58>;
clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>,
<&ccu 9>,
<&ccu 18>;
<&ccu CLK_PLL_VIDEO0_2X>,
<&ccu CLK_PLL_VIDEO1_2X>;
clock-names = "ahb", "mod", "pll-0", "pll-1";
dmas = <&dma SUN4I_DMA_NORMAL 16>,
<&dma SUN4I_DMA_NORMAL 16>,

View File

@ -82,8 +82,8 @@
reg = <0x01c16000 0x1000>;
interrupts = <58>;
clocks = <&ccu CLK_AHB_HDMI>, <&ccu CLK_HDMI>,
<&ccu 9>,
<&ccu 16>;
<&ccu CLK_PLL_VIDEO0_2X>,
<&ccu CLK_PLL_VIDEO1_2X>;
clock-names = "ahb", "mod", "pll-0", "pll-1";
dmas = <&dma SUN4I_DMA_NORMAL 16>,
<&dma SUN4I_DMA_NORMAL 16>,

View File

@ -429,8 +429,8 @@
interrupts = <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_AHB1_HDMI>, <&ccu CLK_HDMI>,
<&ccu CLK_HDMI_DDC>,
<&ccu 7>,
<&ccu 13>;
<&ccu CLK_PLL_VIDEO0_2X>,
<&ccu CLK_PLL_VIDEO1_2X>;
clock-names = "ahb", "mod", "ddc", "pll-0", "pll-1";
resets = <&ccu RST_AHB1_HDMI>;
reset-names = "ahb";

View File

@ -581,8 +581,8 @@
reg = <0x01c16000 0x1000>;
interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>,
<&ccu 9>,
<&ccu 18>;
<&ccu CLK_PLL_VIDEO0_2X>,
<&ccu CLK_PLL_VIDEO1_2X>;
clock-names = "ahb", "mod", "pll-0", "pll-1";
dmas = <&dma SUN4I_DMA_NORMAL 16>,
<&dma SUN4I_DMA_NORMAL 16>,

View File

@ -146,6 +146,7 @@
status = "okay";
axp81x: pmic@3a3 {
compatible = "x-powers,axp813";
reg = <0x3a3>;
interrupt-parent = <&r_intc>;
interrupts = <0 IRQ_TYPE_LEVEL_LOW>;

View File

@ -156,7 +156,6 @@
reg = <0x6e000 0x400>;
ranges = <0 0x6e000 0x400>;
interrupt-parent = <&gic>;
interrupt-controller;
#address-cells = <1>;
#size-cells = <1>;

View File

@ -793,7 +793,6 @@ void abort(void)
/* if that doesn't kill us, halt */
panic("Oops failed to kill thread");
}
EXPORT_SYMBOL(abort);
void __init trap_init(void)
{

View File

@ -868,10 +868,10 @@ static const struct dma_slave_map dm365_edma_map[] = {
{ "spi_davinci.0", "rx", EDMA_FILTER_PARAM(0, 17) },
{ "spi_davinci.3", "tx", EDMA_FILTER_PARAM(0, 18) },
{ "spi_davinci.3", "rx", EDMA_FILTER_PARAM(0, 19) },
{ "dm6441-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) },
{ "dm6441-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) },
{ "dm6441-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) },
{ "dm6441-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) },
{ "da830-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) },
{ "da830-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) },
{ "da830-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) },
{ "da830-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) },
};
static struct edma_soc_info dm365_edma_pdata = {
@ -925,12 +925,14 @@ static struct resource edma_resources[] = {
/* not using TC*_ERR */
};
static struct platform_device dm365_edma_device = {
.name = "edma",
.id = 0,
.dev.platform_data = &dm365_edma_pdata,
.num_resources = ARRAY_SIZE(edma_resources),
.resource = edma_resources,
static const struct platform_device_info dm365_edma_device __initconst = {
.name = "edma",
.id = 0,
.dma_mask = DMA_BIT_MASK(32),
.res = edma_resources,
.num_res = ARRAY_SIZE(edma_resources),
.data = &dm365_edma_pdata,
.size_data = sizeof(dm365_edma_pdata),
};
static struct resource dm365_asp_resources[] = {
@ -1428,13 +1430,18 @@ int __init dm365_init_video(struct vpfe_config *vpfe_cfg,
static int __init dm365_init_devices(void)
{
struct platform_device *edma_pdev;
int ret = 0;
if (!cpu_is_davinci_dm365())
return 0;
davinci_cfg_reg(DM365_INT_EDMA_CC);
platform_device_register(&dm365_edma_device);
edma_pdev = platform_device_register_full(&dm365_edma_device);
if (IS_ERR(edma_pdev)) {
pr_warn("%s: Failed to register eDMA\n", __func__);
return PTR_ERR(edma_pdev);
}
platform_device_register(&dm365_mdio_device);
platform_device_register(&dm365_emac_device);

View File

@ -75,6 +75,7 @@
pinctrl-0 = <&rgmii_pins>;
phy-mode = "rgmii";
phy-handle = <&ext_rgmii_phy>;
phy-supply = <&reg_dc1sw>;
status = "okay";
};

View File

@ -77,6 +77,7 @@
pinctrl-0 = <&rmii_pins>;
phy-mode = "rmii";
phy-handle = <&ext_rmii_phy1>;
phy-supply = <&reg_dc1sw>;
status = "okay";
};

View File

@ -82,6 +82,7 @@
pinctrl-0 = <&rgmii_pins>;
phy-mode = "rgmii";
phy-handle = <&ext_rgmii_phy>;
phy-supply = <&reg_dc1sw>;
status = "okay";
};
@ -95,7 +96,7 @@
&mmc2 {
pinctrl-names = "default";
pinctrl-0 = <&mmc2_pins>;
vmmc-supply = <&reg_vcc3v3>;
vmmc-supply = <&reg_dcdc1>;
vqmmc-supply = <&reg_vcc1v8>;
bus-width = <8>;
non-removable;

View File

@ -45,19 +45,10 @@
#include "sun50i-a64.dtsi"
/ {
reg_vcc3v3: vcc3v3 {
compatible = "regulator-fixed";
regulator-name = "vcc3v3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
};
};
&mmc0 {
pinctrl-names = "default";
pinctrl-0 = <&mmc0_pins>;
vmmc-supply = <&reg_vcc3v3>;
vmmc-supply = <&reg_dcdc1>;
non-removable;
disable-wp;
bus-width = <4>;

View File

@ -71,7 +71,7 @@
pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>;
vmmc-supply = <&reg_vcc3v3>;
bus-width = <4>;
cd-gpios = <&pio 5 6 GPIO_ACTIVE_HIGH>;
cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>;
status = "okay";
};

View File

@ -255,7 +255,6 @@
&avb {
pinctrl-0 = <&avb_pins>;
pinctrl-names = "default";
renesas,no-ether-link;
phy-handle = <&phy0>;
status = "okay";

View File

@ -145,7 +145,6 @@
&avb {
pinctrl-0 = <&avb_pins>;
pinctrl-names = "default";
renesas,no-ether-link;
phy-handle = <&phy0>;
status = "okay";

View File

@ -132,6 +132,8 @@
assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>;
assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>;
clock_in_out = "input";
/* shows instability at 1GBit right now */
max-speed = <100>;
phy-supply = <&vcc_io>;
phy-mode = "rgmii";
pinctrl-names = "default";

View File

@ -514,7 +514,7 @@
tsadc: tsadc@ff250000 {
compatible = "rockchip,rk3328-tsadc";
reg = <0x0 0xff250000 0x0 0x100>;
interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH 0>;
interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
assigned-clocks = <&cru SCLK_TSADC>;
assigned-clock-rates = <50000>;
clocks = <&cru SCLK_TSADC>, <&cru PCLK_TSADC>;

View File

@ -155,17 +155,6 @@
regulator-min-microvolt = <5000000>;
regulator-max-microvolt = <5000000>;
};
vdd_log: vdd-log {
compatible = "pwm-regulator";
pwms = <&pwm2 0 25000 0>;
regulator-name = "vdd_log";
regulator-min-microvolt = <800000>;
regulator-max-microvolt = <1400000>;
regulator-always-on;
regulator-boot-on;
status = "okay";
};
};
&cpu_b0 {

View File

@ -198,8 +198,8 @@
gpio-controller;
#gpio-cells = <2>;
gpio-ranges = <&pinctrl 0 0 0>,
<&pinctrl 96 0 0>,
<&pinctrl 160 0 0>;
<&pinctrl 104 0 0>,
<&pinctrl 168 0 0>;
gpio-ranges-group-names = "gpio_range0",
"gpio_range1",
"gpio_range2";

View File

@ -122,7 +122,6 @@ void abort(void)
/* if that doesn't kill us, halt */
panic("Oops failed to kill thread");
}
EXPORT_SYMBOL(abort);
void __init trap_init(void)
{

View File

@ -12,6 +12,7 @@
for the semaphore. */
#define __PA_LDCW_ALIGNMENT 16
#define __PA_LDCW_ALIGN_ORDER 4
#define __ldcw_align(a) ({ \
unsigned long __ret = (unsigned long) &(a)->lock[0]; \
__ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \
@ -29,6 +30,7 @@
ldcd). */
#define __PA_LDCW_ALIGNMENT 4
#define __PA_LDCW_ALIGN_ORDER 2
#define __ldcw_align(a) (&(a)->slock)
#define __LDCW "ldcw,co"

View File

@ -870,7 +870,7 @@ static void print_parisc_device(struct parisc_device *dev)
static int count;
print_pa_hwpath(dev, hw_path);
printk(KERN_INFO "%d. %s at 0x%p [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }",
printk(KERN_INFO "%d. %s at 0x%px [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }",
++count, dev->name, (void*) dev->hpa.start, hw_path, dev->id.hw_type,
dev->id.hversion_rev, dev->id.hversion, dev->id.sversion);

View File

@ -35,6 +35,7 @@
#include <asm/pgtable.h>
#include <asm/signal.h>
#include <asm/unistd.h>
#include <asm/ldcw.h>
#include <asm/thread_info.h>
#include <linux/linkage.h>
@ -46,6 +47,14 @@
#endif
.import pa_tlb_lock,data
.macro load_pa_tlb_lock reg
#if __PA_LDCW_ALIGNMENT > 4
load32 PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg
depi 0,31,__PA_LDCW_ALIGN_ORDER, \reg
#else
load32 PA(pa_tlb_lock), \reg
#endif
.endm
/* space_to_prot macro creates a prot id from a space id */
@ -457,7 +466,7 @@
.macro tlb_lock spc,ptp,pte,tmp,tmp1,fault
#ifdef CONFIG_SMP
cmpib,COND(=),n 0,\spc,2f
load32 PA(pa_tlb_lock),\tmp
load_pa_tlb_lock \tmp
1: LDCW 0(\tmp),\tmp1
cmpib,COND(=) 0,\tmp1,1b
nop
@ -480,7 +489,7 @@
/* Release pa_tlb_lock lock. */
.macro tlb_unlock1 spc,tmp
#ifdef CONFIG_SMP
load32 PA(pa_tlb_lock),\tmp
load_pa_tlb_lock \tmp
tlb_unlock0 \spc,\tmp
#endif
.endm

View File

@ -36,6 +36,7 @@
#include <asm/assembly.h>
#include <asm/pgtable.h>
#include <asm/cache.h>
#include <asm/ldcw.h>
#include <linux/linkage.h>
.text
@ -333,8 +334,12 @@ ENDPROC_CFI(flush_data_cache_local)
.macro tlb_lock la,flags,tmp
#ifdef CONFIG_SMP
ldil L%pa_tlb_lock,%r1
ldo R%pa_tlb_lock(%r1),\la
#if __PA_LDCW_ALIGNMENT > 4
load32 pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la
depi 0,31,__PA_LDCW_ALIGN_ORDER, \la
#else
load32 pa_tlb_lock, \la
#endif
rsm PSW_SM_I,\flags
1: LDCW 0(\la),\tmp
cmpib,<>,n 0,\tmp,3f

View File

@ -39,6 +39,7 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/personality.h>
#include <linux/ptrace.h>
@ -183,6 +184,44 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r)
return 1;
}
/*
* Idle thread support
*
* Detect when running on QEMU with SeaBIOS PDC Firmware and let
* QEMU idle the host too.
*/
int running_on_qemu __read_mostly;
void __cpuidle arch_cpu_idle_dead(void)
{
/* nop on real hardware, qemu will offline CPU. */
asm volatile("or %%r31,%%r31,%%r31\n":::);
}
void __cpuidle arch_cpu_idle(void)
{
local_irq_enable();
/* nop on real hardware, qemu will idle sleep. */
asm volatile("or %%r10,%%r10,%%r10\n":::);
}
static int __init parisc_idle_init(void)
{
const char *marker;
/* check QEMU/SeaBIOS marker in PAGE0 */
marker = (char *) &PAGE0->pad0;
running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0);
if (!running_on_qemu)
cpu_idle_poll_ctrl(1);
return 0;
}
arch_initcall(parisc_idle_init);
/*
* Copy architecture-specific thread state
*/

View File

@ -631,11 +631,11 @@ void __init mem_init(void)
mem_init_print_info(NULL);
#ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */
printk("virtual kernel memory layout:\n"
" vmalloc : 0x%p - 0x%p (%4ld MB)\n"
" memory : 0x%p - 0x%p (%4ld MB)\n"
" .init : 0x%p - 0x%p (%4ld kB)\n"
" .data : 0x%p - 0x%p (%4ld kB)\n"
" .text : 0x%p - 0x%p (%4ld kB)\n",
" vmalloc : 0x%px - 0x%px (%4ld MB)\n"
" memory : 0x%px - 0x%px (%4ld MB)\n"
" .init : 0x%px - 0x%px (%4ld kB)\n"
" .data : 0x%px - 0x%px (%4ld kB)\n"
" .text : 0x%px - 0x%px (%4ld kB)\n",
(void*)VMALLOC_START, (void*)VMALLOC_END,
(VMALLOC_END - VMALLOC_START) >> 20,

View File

@ -145,6 +145,11 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address)
return __bad_area(regs, address, SEGV_MAPERR);
}
static noinline int bad_access(struct pt_regs *regs, unsigned long address)
{
return __bad_area(regs, address, SEGV_ACCERR);
}
static int do_sigbus(struct pt_regs *regs, unsigned long address,
unsigned int fault)
{
@ -490,7 +495,7 @@ retry:
good_area:
if (unlikely(access_error(is_write, is_exec, vma)))
return bad_area(regs, address);
return bad_access(regs, address);
/*
* If for any reason at all we couldn't handle the fault,

View File

@ -354,6 +354,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev)
}
static struct lock_class_key fsl_msi_irq_class;
static struct lock_class_key fsl_msi_irq_request_class;
static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
int offset, int irq_index)
@ -373,7 +374,8 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
dev_err(&dev->dev, "No memory for MSI cascade data\n");
return -ENOMEM;
}
irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class);
irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class,
&fsl_msi_irq_request_class);
cascade_data->index = offset;
cascade_data->msi_data = msi;
cascade_data->virq = virt_msir;

View File

@ -792,11 +792,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
if (kvm->arch.use_cmma) {
/*
* Get the last slot. They should be sorted by base_gfn, so the
* last slot is also the one at the end of the address space.
* We have verified above that at least one slot is present.
* Get the first slot. They are reverse sorted by base_gfn, so
* the first slot is also the one at the end of the address
* space. We have verified above that at least one slot is
* present.
*/
ms = slots->memslots + slots->used_slots - 1;
ms = slots->memslots;
/* round up so we only use full longs */
ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
/* allocate enough bytes to store all the bits */

View File

@ -1006,7 +1006,7 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
cbrlo[entries] = gfn << PAGE_SHIFT;
}
if (orc) {
if (orc && gfn < ms->bitmap_size) {
/* increment only if we are really flipping the bit to 1 */
if (!test_and_set_bit(gfn, ms->pgste_bitmap))
atomic64_inc(&ms->dirty_pages);

View File

@ -89,11 +89,11 @@ EXPORT_SYMBOL(enable_sacf_uaccess);
void disable_sacf_uaccess(mm_segment_t old_fs)
{
current->thread.mm_segment = old_fs;
if (old_fs == USER_DS && test_facility(27)) {
__ctl_load(S390_lowcore.user_asce, 1, 1);
clear_cpu_flag(CIF_ASCE_PRIMARY);
}
current->thread.mm_segment = old_fs;
}
EXPORT_SYMBOL(disable_sacf_uaccess);

View File

@ -181,6 +181,9 @@ out_unlock:
static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
size_t size, int flags)
{
unsigned long irqflags;
int ret;
/*
* With zdev->tlb_refresh == 0, rpcit is not required to establish new
* translations when previously invalid translation-table entries are
@ -196,8 +199,22 @@ static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
return 0;
}
return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
PAGE_ALIGN(size));
ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
PAGE_ALIGN(size));
if (ret == -ENOMEM && !s390_iommu_strict) {
/* enable the hypervisor to free some resources */
if (zpci_refresh_global(zdev))
goto out;
spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags);
bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
zdev->lazy_bitmap, zdev->iommu_pages);
bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags);
ret = 0;
}
out:
return ret;
}
static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,

View File

@ -89,6 +89,9 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
if (cc)
zpci_err_insn(cc, status, addr, range);
if (cc == 1 && (status == 4 || status == 16))
return -ENOMEM;
return (cc) ? -EIO : 0;
}

View File

@ -44,8 +44,8 @@ EXPORT_SYMBOL(__arch_hweight32)
.previous
ENTRY(__arch_hweight64)
sethi %hi(__sw_hweight16), %g1
jmpl %g1 + %lo(__sw_hweight16), %g0
sethi %hi(__sw_hweight64), %g1
jmpl %g1 + %lo(__sw_hweight64), %g0
nop
ENDPROC(__arch_hweight64)
EXPORT_SYMBOL(__arch_hweight64)

View File

@ -298,7 +298,6 @@ void abort(void)
/* if that doesn't kill us, halt */
panic("Oops failed to kill thread");
}
EXPORT_SYMBOL(abort);
void __init trap_init(void)
{

View File

@ -23,6 +23,9 @@
*/
#undef CONFIG_AMD_MEM_ENCRYPT
/* No PAGE_TABLE_ISOLATION support needed either: */
#undef CONFIG_PAGE_TABLE_ISOLATION
#include "misc.h"
/* These actually do the work of building the kernel identity maps. */

View File

@ -80,39 +80,43 @@ genfdimage288() {
mcopy $FBZIMAGE w:linux
}
genisoimage() {
geniso() {
tmp_dir=`dirname $FIMAGE`/isoimage
rm -rf $tmp_dir
mkdir $tmp_dir
for i in lib lib64 share end ; do
for i in lib lib64 share ; do
for j in syslinux ISOLINUX ; do
if [ -f /usr/$i/$j/isolinux.bin ] ; then
isolinux=/usr/$i/$j/isolinux.bin
cp $isolinux $tmp_dir
fi
done
for j in syslinux syslinux/modules/bios ; do
if [ -f /usr/$i/$j/ldlinux.c32 ]; then
ldlinux=/usr/$i/$j/ldlinux.c32
cp $ldlinux $tmp_dir
fi
done
if [ -n "$isolinux" -a -n "$ldlinux" ] ; then
break
fi
if [ $i = end -a -z "$isolinux" ] ; then
echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
exit 1
fi
done
if [ -z "$isolinux" ] ; then
echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
exit 1
fi
if [ -z "$ldlinux" ] ; then
echo 'Need an ldlinux.c32 file, please install syslinux/isolinux.'
exit 1
fi
cp $isolinux $tmp_dir
cp $ldlinux $tmp_dir
cp $FBZIMAGE $tmp_dir/linux
echo "$KCMDLINE" > $tmp_dir/isolinux.cfg
if [ -f "$FDINITRD" ] ; then
cp "$FDINITRD" $tmp_dir/initrd.img
fi
mkisofs -J -r -input-charset=utf-8 -quiet -o $FIMAGE -b isolinux.bin \
-c boot.cat -no-emul-boot -boot-load-size 4 -boot-info-table \
$tmp_dir
genisoimage -J -r -input-charset=utf-8 -quiet -o $FIMAGE \
-b isolinux.bin -c boot.cat -no-emul-boot -boot-load-size 4 \
-boot-info-table $tmp_dir
isohybrid $FIMAGE 2>/dev/null || true
rm -rf $tmp_dir
}
@ -121,6 +125,6 @@ case $1 in
bzdisk) genbzdisk;;
fdimage144) genfdimage144;;
fdimage288) genfdimage288;;
isoimage) genisoimage;;
isoimage) geniso;;
*) echo 'Unknown image format'; exit 1;
esac

View File

@ -1,6 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/jump_label.h>
#include <asm/unwind_hints.h>
#include <asm/cpufeatures.h>
#include <asm/page_types.h>
#include <asm/percpu.h>
#include <asm/asm-offsets.h>
#include <asm/processor-flags.h>
/*
@ -187,6 +192,146 @@ For 32-bit we have the following conventions - kernel is built with
#endif
.endm
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
* PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
* halves:
*/
#define PTI_SWITCH_PGTABLES_MASK (1<<PAGE_SHIFT)
#define PTI_SWITCH_MASK (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
.macro SET_NOFLUSH_BIT reg:req
bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
.endm
.macro ADJUST_KERNEL_CR3 reg:req
ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
/* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
andq $(~PTI_SWITCH_MASK), \reg
.endm
.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
mov %cr3, \scratch_reg
ADJUST_KERNEL_CR3 \scratch_reg
mov \scratch_reg, %cr3
.Lend_\@:
.endm
#define THIS_CPU_user_pcid_flush_mask \
PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
mov %cr3, \scratch_reg
ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
/*
* Test if the ASID needs a flush.
*/
movq \scratch_reg, \scratch_reg2
andq $(0x7FF), \scratch_reg /* mask ASID */
bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
jnc .Lnoflush_\@
/* Flush needed, clear the bit */
btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
movq \scratch_reg2, \scratch_reg
jmp .Lwrcr3_\@
.Lnoflush_\@:
movq \scratch_reg2, \scratch_reg
SET_NOFLUSH_BIT \scratch_reg
.Lwrcr3_\@:
/* Flip the PGD and ASID to the user version */
orq $(PTI_SWITCH_MASK), \scratch_reg
mov \scratch_reg, %cr3
.Lend_\@:
.endm
.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
pushq %rax
SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
popq %rax
.endm
.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
movq %cr3, \scratch_reg
movq \scratch_reg, \save_reg
/*
* Is the "switch mask" all zero? That means that both of
* these are zero:
*
* 1. The user/kernel PCID bit, and
* 2. The user/kernel "bit" that points CR3 to the
* bottom half of the 8k PGD
*
* That indicates a kernel CR3 value, not a user CR3.
*/
testq $(PTI_SWITCH_MASK), \scratch_reg
jz .Ldone_\@
ADJUST_KERNEL_CR3 \scratch_reg
movq \scratch_reg, %cr3
.Ldone_\@:
.endm
.macro RESTORE_CR3 scratch_reg:req save_reg:req
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
/*
* KERNEL pages can always resume with NOFLUSH as we do
* explicit flushes.
*/
bt $X86_CR3_PTI_SWITCH_BIT, \save_reg
jnc .Lnoflush_\@
/*
* Check if there's a pending flush for the user ASID we're
* about to set.
*/
movq \save_reg, \scratch_reg
andq $(0x7FF), \scratch_reg
bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
jnc .Lnoflush_\@
btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
jmp .Lwrcr3_\@
.Lnoflush_\@:
SET_NOFLUSH_BIT \save_reg
.Lwrcr3_\@:
/*
* The CR3 write could be avoided when not changing its value,
* but would require a CR3 read *and* a scratch register.
*/
movq \save_reg, %cr3
.Lend_\@:
.endm
#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
.endm
.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
.endm
.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
.endm
.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
.endm
.macro RESTORE_CR3 scratch_reg:req save_reg:req
.endm
#endif
#endif /* CONFIG_X86_64 */
/*

View File

@ -23,7 +23,6 @@
#include <asm/segment.h>
#include <asm/cache.h>
#include <asm/errno.h>
#include "calling.h"
#include <asm/asm-offsets.h>
#include <asm/msr.h>
#include <asm/unistd.h>
@ -40,6 +39,8 @@
#include <asm/frame.h>
#include <linux/err.h>
#include "calling.h"
.code64
.section .entry.text, "ax"
@ -168,6 +169,9 @@ ENTRY(entry_SYSCALL_64_trampoline)
/* Stash the user RSP. */
movq %rsp, RSP_SCRATCH
/* Note: using %rsp as a scratch reg. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
/* Load the top of the task stack into RSP */
movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
@ -207,6 +211,10 @@ ENTRY(entry_SYSCALL_64)
*/
swapgs
/*
* This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
* is not required to switch CR3.
*/
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
@ -403,6 +411,7 @@ syscall_return_via_sysret:
* We are on the trampoline stack. All regs except RDI are live.
* We can do future final exit work right here.
*/
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
popq %rdi
popq %rsp
@ -740,6 +749,8 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
* We can do future final exit work right here.
*/
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
/* Restore RDI. */
popq %rdi
SWAPGS
@ -822,7 +833,9 @@ native_irq_return_ldt:
*/
pushq %rdi /* Stash user RDI */
SWAPGS
SWAPGS /* to kernel GS */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
movq PER_CPU_VAR(espfix_waddr), %rdi
movq %rax, (0*8)(%rdi) /* user RAX */
movq (1*8)(%rsp), %rax /* user RIP */
@ -838,7 +851,6 @@ native_irq_return_ldt:
/* Now RAX == RSP. */
andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */
popq %rdi /* Restore user RDI */
/*
* espfix_stack[31:16] == 0. The page tables are set up such that
@ -849,7 +861,11 @@ native_irq_return_ldt:
* still points to an RO alias of the ESPFIX stack.
*/
orq PER_CPU_VAR(espfix_stack), %rax
SWAPGS
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
SWAPGS /* to user GS */
popq %rdi /* Restore user RDI */
movq %rax, %rsp
UNWIND_HINT_IRET_REGS offset=8
@ -949,6 +965,8 @@ ENTRY(switch_to_thread_stack)
UNWIND_HINT_FUNC
pushq %rdi
/* Need to switch before accessing the thread stack. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
@ -1250,7 +1268,11 @@ ENTRY(paranoid_entry)
js 1f /* negative -> in kernel */
SWAPGS
xorl %ebx, %ebx
1: ret
1:
SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
ret
END(paranoid_entry)
/*
@ -1272,6 +1294,7 @@ ENTRY(paranoid_exit)
testl %ebx, %ebx /* swapgs needed? */
jnz .Lparanoid_exit_no_swapgs
TRACE_IRQS_IRETQ
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
SWAPGS_UNSAFE_STACK
jmp .Lparanoid_exit_restore
.Lparanoid_exit_no_swapgs:
@ -1299,6 +1322,8 @@ ENTRY(error_entry)
* from user mode due to an IRET fault.
*/
SWAPGS
/* We have user CR3. Change to kernel CR3. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
.Lerror_entry_from_usermode_after_swapgs:
/* Put us onto the real thread stack. */
@ -1345,6 +1370,7 @@ ENTRY(error_entry)
* .Lgs_change's error handler with kernel gsbase.
*/
SWAPGS
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
jmp .Lerror_entry_done
.Lbstep_iret:
@ -1354,10 +1380,11 @@ ENTRY(error_entry)
.Lerror_bad_iret:
/*
* We came from an IRET to user mode, so we have user gsbase.
* Switch to kernel gsbase:
* We came from an IRET to user mode, so we have user
* gsbase and CR3. Switch to kernel gsbase and CR3:
*/
SWAPGS
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
/*
* Pretend that the exception came from user mode: set up pt_regs
@ -1389,6 +1416,10 @@ END(error_exit)
/*
* Runs on exception stack. Xen PV does not go through this path at all,
* so we can use real assembly here.
*
* Registers:
* %r14: Used to save/restore the CR3 of the interrupted context
* when PAGE_TABLE_ISOLATION is in use. Do not clobber.
*/
ENTRY(nmi)
UNWIND_HINT_IRET_REGS
@ -1452,6 +1483,7 @@ ENTRY(nmi)
swapgs
cld
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
UNWIND_HINT_IRET_REGS base=%rdx offset=8
@ -1704,6 +1736,8 @@ end_repeat_nmi:
movq $-1, %rsi
call do_nmi
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
testl %ebx, %ebx /* swapgs needed? */
jnz nmi_restore
nmi_swapgs:

View File

@ -49,6 +49,10 @@
ENTRY(entry_SYSENTER_compat)
/* Interrupts are off on entry. */
SWAPGS
/* We are about to clobber %rsp anyway, clobbering here is OK */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
/*
@ -186,8 +190,13 @@ ENTRY(entry_SYSCALL_compat)
/* Interrupts are off on entry. */
swapgs
/* Stash user ESP and switch to the kernel stack. */
/* Stash user ESP */
movl %esp, %r8d
/* Use %rsp as scratch reg. User ESP is stashed in r8 */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
/* Switch to the kernel stack */
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
/* Construct struct pt_regs on stack */
@ -256,10 +265,22 @@ sysret32_from_system_call:
* when the system call started, which is already known to user
* code. We zero R8-R10 to avoid info leaks.
*/
movq RSP-ORIG_RAX(%rsp), %rsp
/*
* The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
* on the process stack which is not mapped to userspace and
* not readable after we SWITCH_TO_USER_CR3. Delay the CR3
* switch until after after the last reference to the process
* stack.
*
* %r8/%r9 are zeroed before the sysret, thus safe to clobber.
*/
SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
xorq %r8, %r8
xorq %r9, %r9
xorq %r10, %r10
movq RSP-ORIG_RAX(%rsp), %rsp
swapgs
sysretl
END(entry_SYSCALL_compat)

View File

@ -344,14 +344,14 @@ int in_gate_area_no_mm(unsigned long addr)
* vsyscalls but leave the page not present. If so, we skip calling
* this.
*/
static void __init set_vsyscall_pgtable_user_bits(void)
void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
{
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pgd = pgd_offset_k(VSYSCALL_ADDR);
pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
p4d = p4d_offset(pgd, VSYSCALL_ADDR);
#if CONFIG_PGTABLE_LEVELS >= 5
@ -373,7 +373,7 @@ void __init map_vsyscall(void)
vsyscall_mode == NATIVE
? PAGE_KERNEL_VSYSCALL
: PAGE_KERNEL_VVAR);
set_vsyscall_pgtable_user_bits();
set_vsyscall_pgtable_user_bits(swapper_pg_dir);
}
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=

View File

@ -3847,6 +3847,8 @@ static struct attribute *intel_pmu_attrs[] = {
__init int intel_pmu_init(void)
{
struct attribute **extra_attr = NULL;
struct attribute **to_free = NULL;
union cpuid10_edx edx;
union cpuid10_eax eax;
union cpuid10_ebx ebx;
@ -3854,7 +3856,6 @@ __init int intel_pmu_init(void)
unsigned int unused;
struct extra_reg *er;
int version, i;
struct attribute **extra_attr = NULL;
char *name;
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
@ -4294,6 +4295,7 @@ __init int intel_pmu_init(void)
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
extra_attr = merge_attr(extra_attr, skl_format_attr);
to_free = extra_attr;
x86_pmu.cpu_events = get_hsw_events_attrs();
intel_pmu_pebs_data_source_skl(
boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
@ -4401,6 +4403,7 @@ __init int intel_pmu_init(void)
pr_cont("full-width counters, ");
}
kfree(to_free);
return 0;
}

View File

@ -3,16 +3,19 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <asm/cpu_entry_area.h>
#include <asm/perf_event.h>
#include <asm/tlbflush.h>
#include <asm/insn.h>
#include "../perf_event.h"
/* Waste a full page so it can be mapped into the cpu_entry_area */
DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
/* The size of a BTS record in bytes: */
#define BTS_RECORD_SIZE 24
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
#define PEBS_FIXUP_SIZE PAGE_SIZE
/*
@ -279,17 +282,67 @@ void fini_debug_store_on_cpu(int cpu)
static DEFINE_PER_CPU(void *, insn_buffer);
static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
{
unsigned long start = (unsigned long)cea;
phys_addr_t pa;
size_t msz = 0;
pa = virt_to_phys(addr);
preempt_disable();
for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
cea_set_pte(cea, pa, prot);
/*
* This is a cross-CPU update of the cpu_entry_area, we must shoot down
* all TLB entries for it.
*/
flush_tlb_kernel_range(start, start + size);
preempt_enable();
}
static void ds_clear_cea(void *cea, size_t size)
{
unsigned long start = (unsigned long)cea;
size_t msz = 0;
preempt_disable();
for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
cea_set_pte(cea, 0, PAGE_NONE);
flush_tlb_kernel_range(start, start + size);
preempt_enable();
}
static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
{
unsigned int order = get_order(size);
int node = cpu_to_node(cpu);
struct page *page;
page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
return page ? page_address(page) : NULL;
}
static void dsfree_pages(const void *buffer, size_t size)
{
if (buffer)
free_pages((unsigned long)buffer, get_order(size));
}
static int alloc_pebs_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
int node = cpu_to_node(cpu);
int max;
void *buffer, *ibuffer;
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
struct debug_store *ds = hwev->ds;
size_t bsiz = x86_pmu.pebs_buffer_size;
int max, node = cpu_to_node(cpu);
void *buffer, *ibuffer, *cea;
if (!x86_pmu.pebs)
return 0;
buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
if (unlikely(!buffer))
return -ENOMEM;
@ -300,25 +353,27 @@ static int alloc_pebs_buffer(int cpu)
if (x86_pmu.intel_cap.pebs_format < 2) {
ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
if (!ibuffer) {
kfree(buffer);
dsfree_pages(buffer, bsiz);
return -ENOMEM;
}
per_cpu(insn_buffer, cpu) = ibuffer;
}
max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
ds->pebs_buffer_base = (u64)(unsigned long)buffer;
hwev->ds_pebs_vaddr = buffer;
/* Update the cpu entry area mapping */
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
ds->pebs_buffer_base = (unsigned long) cea;
ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
ds->pebs_index = ds->pebs_buffer_base;
ds->pebs_absolute_maximum = ds->pebs_buffer_base +
max * x86_pmu.pebs_record_size;
max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
return 0;
}
static void release_pebs_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
struct debug_store *ds = hwev->ds;
void *cea;
if (!ds || !x86_pmu.pebs)
return;
@ -326,73 +381,70 @@ static void release_pebs_buffer(int cpu)
kfree(per_cpu(insn_buffer, cpu));
per_cpu(insn_buffer, cpu) = NULL;
kfree((void *)(unsigned long)ds->pebs_buffer_base);
/* Clear the fixmap */
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
ds->pebs_buffer_base = 0;
dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
hwev->ds_pebs_vaddr = NULL;
}
static int alloc_bts_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
int node = cpu_to_node(cpu);
int max, thresh;
void *buffer;
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
struct debug_store *ds = hwev->ds;
void *buffer, *cea;
int max;
if (!x86_pmu.bts)
return 0;
buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
if (unlikely(!buffer)) {
WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
return -ENOMEM;
}
max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
thresh = max / 16;
ds->bts_buffer_base = (u64)(unsigned long)buffer;
hwev->ds_bts_vaddr = buffer;
/* Update the fixmap */
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
ds->bts_buffer_base = (unsigned long) cea;
ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
ds->bts_index = ds->bts_buffer_base;
ds->bts_absolute_maximum = ds->bts_buffer_base +
max * BTS_RECORD_SIZE;
ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
thresh * BTS_RECORD_SIZE;
max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
ds->bts_absolute_maximum = ds->bts_buffer_base + max;
ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
return 0;
}
static void release_bts_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
struct debug_store *ds = hwev->ds;
void *cea;
if (!ds || !x86_pmu.bts)
return;
kfree((void *)(unsigned long)ds->bts_buffer_base);
/* Clear the fixmap */
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
ds_clear_cea(cea, BTS_BUFFER_SIZE);
ds->bts_buffer_base = 0;
dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
hwev->ds_bts_vaddr = NULL;
}
static int alloc_ds_buffer(int cpu)
{
int node = cpu_to_node(cpu);
struct debug_store *ds;
ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
if (unlikely(!ds))
return -ENOMEM;
struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
memset(ds, 0, sizeof(*ds));
per_cpu(cpu_hw_events, cpu).ds = ds;
return 0;
}
static void release_ds_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
if (!ds)
return;
per_cpu(cpu_hw_events, cpu).ds = NULL;
kfree(ds);
}
void release_ds_buffers(void)

View File

@ -14,6 +14,8 @@
#include <linux/perf_event.h>
#include <asm/intel_ds.h>
/* To enable MSR tracing please use the generic trace points. */
/*
@ -77,8 +79,6 @@ struct amd_nb {
struct event_constraint event_constraints[X86_PMC_IDX_MAX];
};
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS 8
#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1)
/*
@ -95,23 +95,6 @@ struct amd_nb {
PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
/*
* A debug store configuration.
*
* We only support architectures that use 64bit fields.
*/
struct debug_store {
u64 bts_buffer_base;
u64 bts_index;
u64 bts_absolute_maximum;
u64 bts_interrupt_threshold;
u64 pebs_buffer_base;
u64 pebs_index;
u64 pebs_absolute_maximum;
u64 pebs_interrupt_threshold;
u64 pebs_event_reset[MAX_PEBS_EVENTS];
};
#define PEBS_REGS \
(PERF_REG_X86_AX | \
PERF_REG_X86_BX | \
@ -216,6 +199,8 @@ struct cpu_hw_events {
* Intel DebugStore bits
*/
struct debug_store *ds;
void *ds_pebs_vaddr;
void *ds_bts_vaddr;
u64 pebs_enabled;
int n_pebs;
int n_large_pebs;

View File

@ -140,7 +140,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
".popsection"
".popsection\n"
#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
OLDINSTR_2(oldinstr, 1, 2) \
@ -151,7 +151,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
".popsection"
".popsection\n"
/*
* Alternative instructions for different CPU types or capabilities.

View File

@ -136,6 +136,7 @@
#endif
#ifndef __ASSEMBLY__
#ifndef __BPF__
/*
* This output constraint should be used for any inline asm which has a "call"
* instruction. Otherwise the asm may be inserted before the frame pointer
@ -145,5 +146,6 @@
register unsigned long current_stack_pointer asm(_ASM_SP);
#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
#endif
#endif
#endif /* _ASM_X86_ASM_H */

View File

@ -5,6 +5,7 @@
#include <linux/percpu-defs.h>
#include <asm/processor.h>
#include <asm/intel_ds.h>
/*
* cpu_entry_area is a percpu region that contains things needed by the CPU
@ -40,6 +41,18 @@ struct cpu_entry_area {
*/
char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
#endif
#ifdef CONFIG_CPU_SUP_INTEL
/*
* Per CPU debug store for Intel performance monitoring. Wastes a
* full page at the moment.
*/
struct debug_store cpu_debug_store;
/*
* The actual PEBS/BTS buffers must be mapped to user space
* Reserve enough fixmap PTEs.
*/
struct debug_store_buffers cpu_debug_buffers;
#endif
};
#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))

View File

@ -197,11 +197,12 @@
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
@ -340,5 +341,6 @@
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
#endif /* _ASM_X86_CPUFEATURES_H */

View File

@ -21,6 +21,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
desc->type = (info->read_exec_only ^ 1) << 1;
desc->type |= info->contents << 2;
/* Set the ACCESS bit so it can be mapped RO */
desc->type |= 1;
desc->s = 1;
desc->dpl = 0x3;

View File

@ -50,6 +50,12 @@
# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
#endif
#ifdef CONFIG_PAGE_TABLE_ISOLATION
# define DISABLE_PTI 0
#else
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
/*
* Make sure to add features to the correct mask
*/
@ -60,7 +66,7 @@
#define DISABLED_MASK4 (DISABLE_PCID)
#define DISABLED_MASK5 0
#define DISABLED_MASK6 0
#define DISABLED_MASK7 0
#define DISABLED_MASK7 (DISABLE_PTI)
#define DISABLED_MASK8 0
#define DISABLED_MASK9 (DISABLE_MPX)
#define DISABLED_MASK10 0

View File

@ -0,0 +1,36 @@
#ifndef _ASM_INTEL_DS_H
#define _ASM_INTEL_DS_H
#include <linux/percpu-defs.h>
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS 8
/*
* A debug store configuration.
*
* We only support architectures that use 64bit fields.
*/
struct debug_store {
u64 bts_buffer_base;
u64 bts_index;
u64 bts_absolute_maximum;
u64 bts_interrupt_threshold;
u64 pebs_buffer_base;
u64 pebs_index;
u64 pebs_absolute_maximum;
u64 pebs_interrupt_threshold;
u64 pebs_event_reset[MAX_PEBS_EVENTS];
} __aligned(PAGE_SIZE);
DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
struct debug_store_buffers {
char bts_buffer[BTS_BUFFER_SIZE];
char pebs_buffer[PEBS_BUFFER_SIZE];
};
#endif

View File

@ -44,7 +44,7 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs);
extern int mp_irqdomain_activate(struct irq_domain *domain,
struct irq_data *irq_data, bool early);
struct irq_data *irq_data, bool reserve);
extern void mp_irqdomain_deactivate(struct irq_domain *domain,
struct irq_data *irq_data);
extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain);

View File

@ -50,10 +50,33 @@ struct ldt_struct {
* call gates. On native, we could merge the ldt_struct and LDT
* allocations, but it's not worth trying to optimize.
*/
struct desc_struct *entries;
unsigned int nr_entries;
struct desc_struct *entries;
unsigned int nr_entries;
/*
* If PTI is in use, then the entries array is not mapped while we're
* in user mode. The whole array will be aliased at the addressed
* given by ldt_slot_va(slot). We use two slots so that we can allocate
* and map, and enable a new LDT without invalidating the mapping
* of an older, still-in-use LDT.
*
* slot will be -1 if this LDT doesn't have an alias mapping.
*/
int slot;
};
/* This is a multiple of PAGE_SIZE. */
#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
static inline void *ldt_slot_va(int slot)
{
#ifdef CONFIG_X86_64
return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
#else
BUG();
#endif
}
/*
* Used for LDT copy/destruction.
*/
@ -64,6 +87,7 @@ static inline void init_new_context_ldt(struct mm_struct *mm)
}
int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
void destroy_context_ldt(struct mm_struct *mm);
void ldt_arch_exit_mmap(struct mm_struct *mm);
#else /* CONFIG_MODIFY_LDT_SYSCALL */
static inline void init_new_context_ldt(struct mm_struct *mm) { }
static inline int ldt_dup_context(struct mm_struct *oldmm,
@ -71,7 +95,8 @@ static inline int ldt_dup_context(struct mm_struct *oldmm,
{
return 0;
}
static inline void destroy_context_ldt(struct mm_struct *mm) {}
static inline void destroy_context_ldt(struct mm_struct *mm) { }
static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
#endif
static inline void load_mm_ldt(struct mm_struct *mm)
@ -96,10 +121,31 @@ static inline void load_mm_ldt(struct mm_struct *mm)
* that we can see.
*/
if (unlikely(ldt))
set_ldt(ldt->entries, ldt->nr_entries);
else
if (unlikely(ldt)) {
if (static_cpu_has(X86_FEATURE_PTI)) {
if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
/*
* Whoops -- either the new LDT isn't mapped
* (if slot == -1) or is mapped into a bogus
* slot (if slot > 1).
*/
clear_LDT();
return;
}
/*
* If page table isolation is enabled, ldt->entries
* will not be mapped in the userspace pagetables.
* Tell the CPU to access the LDT through the alias
* at ldt_slot_va(ldt->slot).
*/
set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
} else {
set_ldt(ldt->entries, ldt->nr_entries);
}
} else {
clear_LDT();
}
#else
clear_LDT();
#endif
@ -194,6 +240,7 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
static inline void arch_exit_mmap(struct mm_struct *mm)
{
paravirt_arch_exit_mmap(mm);
ldt_arch_exit_mmap(mm);
}
#ifdef CONFIG_X86_64

View File

@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
*/
extern gfp_t __userpte_alloc_gfp;
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
* Instead of one PGD, we acquire two PGDs. Being order-1, it is
* both 8k in size and 8k-aligned. That lets us just flip bit 12
* in a pointer to swap between the two 4k halves.
*/
#define PGD_ALLOCATION_ORDER 1
#else
#define PGD_ALLOCATION_ORDER 0
#endif
/*
* Allocate and free page tables.
*/

View File

@ -28,6 +28,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD];
int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
void ptdump_walk_pgd_level_checkwx(void);
#ifdef CONFIG_DEBUG_WX
@ -841,7 +842,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
static inline int p4d_bad(p4d_t p4d)
{
return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
ignore_flags |= _PAGE_NX;
return (p4d_flags(p4d) & ~ignore_flags) != 0;
}
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
@ -875,7 +881,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
static inline int pgd_bad(pgd_t pgd)
{
return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
unsigned long ignore_flags = _PAGE_USER;
if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
ignore_flags |= _PAGE_NX;
return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
}
static inline int pgd_none(pgd_t pgd)
@ -904,7 +915,11 @@ static inline int pgd_none(pgd_t pgd)
* pgd_offset() returns a (pgd_t *)
* pgd_index() is used get the offset into the pgd page's array of pgd_t's;
*/
#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
/*
* a shortcut to get a pgd_t in a given mm
*/
#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
/*
* a shortcut which implies the use of the kernel's pgd, instead
* of a process's
@ -1106,7 +1121,14 @@ static inline int pud_write(pud_t pud)
*/
static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
{
memcpy(dst, src, count * sizeof(pgd_t));
memcpy(dst, src, count * sizeof(pgd_t));
#ifdef CONFIG_PAGE_TABLE_ISOLATION
if (!static_cpu_has(X86_FEATURE_PTI))
return;
/* Clone the user space pgd as well */
memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
count * sizeof(pgd_t));
#endif
}
#define PTE_SHIFT ilog2(PTRS_PER_PTE)

View File

@ -131,9 +131,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
#endif
}
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
* All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
* (8k-aligned and 8k in size). The kernel one is at the beginning 4k and
* the user one is in the last 4k. To switch between them, you
* just need to flip the 12th bit in their addresses.
*/
#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT
/*
* This generates better code than the inline assembly in
* __set_bit().
*/
static inline void *ptr_set_bit(void *ptr, int bit)
{
unsigned long __ptr = (unsigned long)ptr;
__ptr |= BIT(bit);
return (void *)__ptr;
}
static inline void *ptr_clear_bit(void *ptr, int bit)
{
unsigned long __ptr = (unsigned long)ptr;
__ptr &= ~BIT(bit);
return (void *)__ptr;
}
static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
{
return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
}
static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
{
return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
}
static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
{
return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
}
static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
{
return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
}
#endif /* CONFIG_PAGE_TABLE_ISOLATION */
/*
* Page table pages are page-aligned. The lower half of the top
* level is used for userspace and the top half for the kernel.
*
* Returns true for parts of the PGD that map userspace and
* false for the parts that map the kernel.
*/
static inline bool pgdp_maps_userspace(void *__ptr)
{
unsigned long ptr = (unsigned long)__ptr;
return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
}
#ifdef CONFIG_PAGE_TABLE_ISOLATION
pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd);
/*
* Take a PGD location (pgdp) and a pgd value that needs to be set there.
* Populates the user and returns the resulting PGD that must be set in
* the kernel copy of the page tables.
*/
static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
{
if (!static_cpu_has(X86_FEATURE_PTI))
return pgd;
return __pti_set_user_pgd(pgdp, pgd);
}
#else
static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
{
return pgd;
}
#endif
static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
{
#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
#else
*p4dp = p4d;
#endif
}
static inline void native_p4d_clear(p4d_t *p4d)
@ -147,7 +235,11 @@ static inline void native_p4d_clear(p4d_t *p4d)
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
*pgdp = pti_set_user_pgd(pgdp, pgd);
#else
*pgdp = pgd;
#endif
}
static inline void native_pgd_clear(pgd_t *pgd)

View File

@ -75,17 +75,27 @@ typedef struct { pteval_t pte; } pte_t;
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
/*
* See Documentation/x86/x86_64/mm.txt for a description of the memory map.
*
* Be very careful vs. KASLR when changing anything here. The KASLR address
* range must not overlap with anything except the KASAN shadow area, which
* is correct as KASAN disables KASLR.
*/
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
#ifdef CONFIG_X86_5LEVEL
# define VMALLOC_SIZE_TB _AC(16384, UL)
# define __VMALLOC_BASE _AC(0xff92000000000000, UL)
# define VMALLOC_SIZE_TB _AC(12800, UL)
# define __VMALLOC_BASE _AC(0xffa0000000000000, UL)
# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
# define LDT_PGD_ENTRY _AC(-112, UL)
# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#else
# define VMALLOC_SIZE_TB _AC(32, UL)
# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
# define LDT_PGD_ENTRY _AC(-3, UL)
# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#endif
#ifdef CONFIG_RANDOMIZE_MEMORY
@ -100,13 +110,13 @@ typedef struct { pteval_t pte; } pte_t;
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
/* The module sections ends with the start of the fixmap */
#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
#define MODULES_END _AC(0xffffffffff000000, UL)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
#define ESPFIX_PGD_ENTRY _AC(-2, UL)
#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
#define CPU_ENTRY_AREA_PGD _AC(-3, UL)
#define CPU_ENTRY_AREA_PGD _AC(-4, UL)
#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))

View File

@ -38,6 +38,11 @@
#define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull)
#define CR3_PCID_MASK 0xFFFull
#define CR3_NOFLUSH BIT_ULL(63)
#ifdef CONFIG_PAGE_TABLE_ISOLATION
# define X86_CR3_PTI_SWITCH_BIT 11
#endif
#else
/*
* CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save

View File

@ -852,13 +852,22 @@ static inline void spin_lock_prefetch(const void *x)
#else
/*
* User space process size. 47bits minus one guard page. The guard
* page is necessary on Intel CPUs: if a SYSCALL instruction is at
* the highest possible canonical userspace address, then that
* syscall will enter the kernel with a non-canonical return
* address, and SYSRET will explode dangerously. We avoid this
* particular problem by preventing anything from being mapped
* at the maximum canonical address.
* User space process size. This is the first address outside the user range.
* There are a few constraints that determine this:
*
* On Intel CPUs, if a SYSCALL instruction is at the highest canonical
* address, then that syscall will enter the kernel with a
* non-canonical return address, and SYSRET will explode dangerously.
* We avoid this particular problem by preventing anything executable
* from being mapped at the maximum canonical address.
*
* On AMD CPUs in the Ryzen family, there's a nasty bug in which the
* CPUs malfunction if they execute code from the highest canonical page.
* They'll speculate right off the end of the canonical space, and
* bad things happen. This is worked around in the same way as the
* Intel problem.
*
* With page table isolation enabled, we map the LDT in ... [stay tuned]
*/
#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)

View File

@ -0,0 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#ifndef _ASM_X86_PTI_H
#define _ASM_X86_PTI_H
#ifndef __ASSEMBLY__
#ifdef CONFIG_PAGE_TABLE_ISOLATION
extern void pti_init(void);
extern void pti_check_boottime_disable(void);
#else
static inline void pti_check_boottime_disable(void) { }
#endif
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PTI_H */

View File

@ -16,8 +16,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss);
/* This runs runs on the previous thread's stack. */
static inline void prepare_switch_to(struct task_struct *prev,
struct task_struct *next)
static inline void prepare_switch_to(struct task_struct *next)
{
#ifdef CONFIG_VMAP_STACK
/*
@ -70,7 +69,7 @@ struct fork_frame {
#define switch_to(prev, next, last) \
do { \
prepare_switch_to(prev, next); \
prepare_switch_to(next); \
\
((last) = __switch_to_asm((prev), (next))); \
} while (0)

View File

@ -10,38 +10,90 @@
#include <asm/special_insns.h>
#include <asm/smp.h>
#include <asm/invpcid.h>
#include <asm/pti.h>
#include <asm/processor-flags.h>
static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
{
/*
* Bump the generation count. This also serves as a full barrier
* that synchronizes with switch_mm(): callers are required to order
* their read of mm_cpumask after their writes to the paging
* structures.
*/
return atomic64_inc_return(&mm->context.tlb_gen);
}
/*
* The x86 feature is called PCID (Process Context IDentifier). It is similar
* to what is traditionally called ASID on the RISC processors.
*
* We don't use the traditional ASID implementation, where each process/mm gets
* its own ASID and flush/restart when we run out of ASID space.
*
* Instead we have a small per-cpu array of ASIDs and cache the last few mm's
* that came by on this CPU, allowing cheaper switch_mm between processes on
* this CPU.
*
* We end up with different spaces for different things. To avoid confusion we
* use different names for each of them:
*
* ASID - [0, TLB_NR_DYN_ASIDS-1]
* the canonical identifier for an mm
*
* kPCID - [1, TLB_NR_DYN_ASIDS]
* the value we write into the PCID part of CR3; corresponds to the
* ASID+1, because PCID 0 is special.
*
* uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
* for KPTI each mm has two address spaces and thus needs two
* PCID values, but we can still do with a single ASID denomination
* for each mm. Corresponds to kPCID + 2048.
*
*/
/* There are 12 bits of space for ASIDS in CR3 */
#define CR3_HW_ASID_BITS 12
/*
* When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
* user/kernel switches
*/
#define PTI_CONSUMED_ASID_BITS 0
#ifdef CONFIG_PAGE_TABLE_ISOLATION
# define PTI_CONSUMED_PCID_BITS 1
#else
# define PTI_CONSUMED_PCID_BITS 0
#endif
#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS)
/*
* ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account
* for them being zero-based. Another -1 is because ASID 0 is reserved for
* for them being zero-based. Another -1 is because PCID 0 is reserved for
* use by non-PCID-aware users.
*/
#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
/*
* 6 because 6 should be plenty and struct tlb_state will fit in two cache
* lines.
*/
#define TLB_NR_DYN_ASIDS 6
/*
* Given @asid, compute kPCID
*/
static inline u16 kern_pcid(u16 asid)
{
VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
* Make sure that the dynamic ASID space does not confict with the
* bit we are using to switch between user and kernel ASIDs.
*/
BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
/*
* The ASID being passed in here should have respected the
* MAX_ASID_AVAILABLE and thus never have the switch bit set.
*/
VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
#endif
/*
* The dynamically-assigned ASIDs that get passed in are small
* (<TLB_NR_DYN_ASIDS). They never have the high switch bit set,
* so do not bother to clear it.
*
* If PCID is on, ASID-aware code paths put the ASID+1 into the
* PCID bits. This serves two purposes. It prevents a nasty
* situation in which PCID-unaware code saves CR3, loads some other
@ -53,6 +105,18 @@ static inline u16 kern_pcid(u16 asid)
return asid + 1;
}
/*
* Given @asid, compute uPCID
*/
static inline u16 user_pcid(u16 asid)
{
u16 ret = kern_pcid(asid);
#ifdef CONFIG_PAGE_TABLE_ISOLATION
ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
#endif
return ret;
}
struct pgd_t;
static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
{
@ -95,12 +159,6 @@ static inline bool tlb_defer_switch_to_init_mm(void)
return !static_cpu_has(X86_FEATURE_PCID);
}
/*
* 6 because 6 should be plenty and struct tlb_state will fit in
* two cache lines.
*/
#define TLB_NR_DYN_ASIDS 6
struct tlb_context {
u64 ctx_id;
u64 tlb_gen;
@ -134,6 +192,24 @@ struct tlb_state {
*/
bool is_lazy;
/*
* If set we changed the page tables in such a way that we
* needed an invalidation of all contexts (aka. PCIDs / ASIDs).
* This tells us to go invalidate all the non-loaded ctxs[]
* on the next context switch.
*
* The current ctx was kept up-to-date as it ran and does not
* need to be invalidated.
*/
bool invalidate_other;
/*
* Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
* the corresponding user PCID needs a flush next time we
* switch to it; see SWITCH_TO_USER_CR3.
*/
unsigned short user_pcid_flush_mask;
/*
* Access to this CR4 shadow and to H/W CR4 is protected by
* disabling interrupts when modifying either one.
@ -214,6 +290,14 @@ static inline unsigned long cr4_read_shadow(void)
return this_cpu_read(cpu_tlbstate.cr4);
}
/*
* Mark all other ASIDs as invalid, preserves the current.
*/
static inline void invalidate_other_asid(void)
{
this_cpu_write(cpu_tlbstate.invalidate_other, true);
}
/*
* Save some of cr4 feature set we're using (e.g. Pentium 4MB
* enable and PPro Global page enable), so that any CPU's that boot
@ -233,19 +317,48 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
extern void initialize_tlbstate_and_flush(void);
/*
* Given an ASID, flush the corresponding user ASID. We can delay this
* until the next time we switch to it.
*
* See SWITCH_TO_USER_CR3.
*/
static inline void invalidate_user_asid(u16 asid)
{
/* There is no user ASID if address space separation is off */
if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
return;
/*
* We only have a single ASID if PCID is off and the CR3
* write will have flushed it.
*/
if (!cpu_feature_enabled(X86_FEATURE_PCID))
return;
if (!static_cpu_has(X86_FEATURE_PTI))
return;
__set_bit(kern_pcid(asid),
(unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
}
/*
* flush the entire current user mapping
*/
static inline void __native_flush_tlb(void)
{
/*
* If current->mm == NULL then we borrow a mm which may change during a
* task switch and therefore we must not be preempted while we write CR3
* back:
* Preemption or interrupts must be disabled to protect the access
* to the per CPU variable and to prevent being preempted between
* read_cr3() and write_cr3().
*/
preempt_disable();
WARN_ON_ONCE(preemptible());
invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
/* If current->mm == NULL then the read_cr3() "borrows" an mm */
native_write_cr3(__native_read_cr3());
preempt_enable();
}
/*
@ -259,6 +372,8 @@ static inline void __native_flush_tlb_global(void)
/*
* Using INVPCID is considerably faster than a pair of writes
* to CR4 sandwiched inside an IRQ flag save/restore.
*
* Note, this works with CR4.PCIDE=0 or 1.
*/
invpcid_flush_all();
return;
@ -285,7 +400,21 @@ static inline void __native_flush_tlb_global(void)
*/
static inline void __native_flush_tlb_single(unsigned long addr)
{
u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
if (!static_cpu_has(X86_FEATURE_PTI))
return;
/*
* Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
* Just use invalidate_user_asid() in case we are called early.
*/
if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
invalidate_user_asid(loaded_mm_asid);
else
invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
}
/*
@ -301,14 +430,6 @@ static inline void __flush_tlb_all(void)
*/
__flush_tlb();
}
/*
* Note: if we somehow had PCID but not PGE, then this wouldn't work --
* we'd end up flushing kernel translations for the current ASID but
* we might fail to flush kernel translations for other cached ASIDs.
*
* To avoid this issue, we force PCID off if PGE is off.
*/
}
/*
@ -318,6 +439,16 @@ static inline void __flush_tlb_one(unsigned long addr)
{
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
__flush_tlb_single(addr);
if (!static_cpu_has(X86_FEATURE_PTI))
return;
/*
* __flush_tlb_single() will have cleared the TLB entry for this ASID,
* but since kernel space is replicated across all, we must also
* invalidate all others.
*/
invalidate_other_asid();
}
#define TLB_FLUSH_ALL -1UL
@ -378,6 +509,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
void native_flush_tlb_others(const struct cpumask *cpumask,
const struct flush_tlb_info *info);
static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
{
/*
* Bump the generation count. This also serves as a full barrier
* that synchronizes with switch_mm(): callers are required to order
* their read of mm_cpumask after their writes to the paging
* structures.
*/
return atomic64_inc_return(&mm->context.tlb_gen);
}
static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
struct mm_struct *mm)
{

View File

@ -283,34 +283,34 @@ TRACE_EVENT(vector_alloc_managed,
DECLARE_EVENT_CLASS(vector_activate,
TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve,
bool early),
bool reserve),
TP_ARGS(irq, is_managed, can_reserve, early),
TP_ARGS(irq, is_managed, can_reserve, reserve),
TP_STRUCT__entry(
__field( unsigned int, irq )
__field( bool, is_managed )
__field( bool, can_reserve )
__field( bool, early )
__field( bool, reserve )
),
TP_fast_assign(
__entry->irq = irq;
__entry->is_managed = is_managed;
__entry->can_reserve = can_reserve;
__entry->early = early;
__entry->reserve = reserve;
),
TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d",
TP_printk("irq=%u is_managed=%d can_reserve=%d reserve=%d",
__entry->irq, __entry->is_managed, __entry->can_reserve,
__entry->early)
__entry->reserve)
);
#define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \
DEFINE_EVENT_FN(vector_activate, name, \
TP_PROTO(unsigned int irq, bool is_managed, \
bool can_reserve, bool early), \
TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL); \
bool can_reserve, bool reserve), \
TP_ARGS(irq, is_managed, can_reserve, reserve), NULL, NULL); \
DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate);
DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate);

View File

@ -56,18 +56,27 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
/*
* WARNING: The entire pt_regs may not be safe to dereference. In some cases,
* only the iret frame registers are accessible. Use with caution!
* If 'partial' returns true, only the iret frame registers are valid.
*/
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
bool *partial)
{
if (unwind_done(state))
return NULL;
if (partial) {
#ifdef CONFIG_UNWINDER_ORC
*partial = !state->full_regs;
#else
*partial = false;
#endif
}
return state->regs;
}
#else
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
bool *partial)
{
return NULL;
}

View File

@ -7,6 +7,7 @@
#ifdef CONFIG_X86_VSYSCALL_EMULATION
extern void map_vsyscall(void);
extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
/*
* Called on instruction fetch fault in vsyscall page.

View File

@ -78,7 +78,12 @@
#define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT)
#define X86_CR3_PCD_BIT 4 /* Page Cache Disable */
#define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT)
#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */
#define X86_CR3_PCID_BITS 12
#define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
/*
* Intel CPU features in CR4

View File

@ -2626,11 +2626,13 @@ static int __init apic_set_verbosity(char *arg)
apic_verbosity = APIC_DEBUG;
else if (strcmp("verbose", arg) == 0)
apic_verbosity = APIC_VERBOSE;
#ifdef CONFIG_X86_64
else {
pr_warning("APIC Verbosity level %s not recognised"
" use apic=verbose or apic=debug\n", arg);
return -EINVAL;
}
#endif
return 0;
}

View File

@ -151,7 +151,7 @@ static struct apic apic_flat __ro_after_init = {
.apic_id_valid = default_apic_id_valid,
.apic_id_registered = flat_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
.irq_delivery_mode = dest_Fixed,
.irq_dest_mode = 1, /* logical */
.disable_esr = 0,

View File

@ -110,7 +110,7 @@ struct apic apic_noop __ro_after_init = {
.apic_id_valid = default_apic_id_valid,
.apic_id_registered = noop_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
.irq_delivery_mode = dest_Fixed,
/* logical delivery broadcast to all CPUs: */
.irq_dest_mode = 1,

View File

@ -2988,7 +2988,7 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
}
int mp_irqdomain_activate(struct irq_domain *domain,
struct irq_data *irq_data, bool early)
struct irq_data *irq_data, bool reserve)
{
unsigned long flags;

View File

@ -39,17 +39,13 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
((apic->irq_dest_mode == 0) ?
MSI_ADDR_DEST_MODE_PHYSICAL :
MSI_ADDR_DEST_MODE_LOGICAL) |
((apic->irq_delivery_mode != dest_LowestPrio) ?
MSI_ADDR_REDIRECTION_CPU :
MSI_ADDR_REDIRECTION_LOWPRI) |
MSI_ADDR_REDIRECTION_CPU |
MSI_ADDR_DEST_ID(cfg->dest_apicid);
msg->data =
MSI_DATA_TRIGGER_EDGE |
MSI_DATA_LEVEL_ASSERT |
((apic->irq_delivery_mode != dest_LowestPrio) ?
MSI_DATA_DELIVERY_FIXED :
MSI_DATA_DELIVERY_LOWPRI) |
MSI_DATA_DELIVERY_FIXED |
MSI_DATA_VECTOR(cfg->vector);
}

View File

@ -105,7 +105,7 @@ static struct apic apic_default __ro_after_init = {
.apic_id_valid = default_apic_id_valid,
.apic_id_registered = default_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
.irq_delivery_mode = dest_Fixed,
/* logical delivery broadcast to all CPUs: */
.irq_dest_mode = 1,

View File

@ -184,6 +184,7 @@ static void reserve_irq_vector_locked(struct irq_data *irqd)
irq_matrix_reserve(vector_matrix);
apicd->can_reserve = true;
apicd->has_reserved = true;
irqd_set_can_reserve(irqd);
trace_vector_reserve(irqd->irq, 0);
vector_assign_managed_shutdown(irqd);
}
@ -368,8 +369,18 @@ static int activate_reserved(struct irq_data *irqd)
int ret;
ret = assign_irq_vector_any_locked(irqd);
if (!ret)
if (!ret) {
apicd->has_reserved = false;
/*
* Core might have disabled reservation mode after
* allocating the irq descriptor. Ideally this should
* happen before allocation time, but that would require
* completely convoluted ways of transporting that
* information.
*/
if (!irqd_can_reserve(irqd))
apicd->can_reserve = false;
}
return ret;
}
@ -398,21 +409,21 @@ static int activate_managed(struct irq_data *irqd)
}
static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
bool early)
bool reserve)
{
struct apic_chip_data *apicd = apic_chip_data(irqd);
unsigned long flags;
int ret = 0;
trace_vector_activate(irqd->irq, apicd->is_managed,
apicd->can_reserve, early);
apicd->can_reserve, reserve);
/* Nothing to do for fixed assigned vectors */
if (!apicd->can_reserve && !apicd->is_managed)
return 0;
raw_spin_lock_irqsave(&vector_lock, flags);
if (early || irqd_is_managed_and_shutdown(irqd))
if (reserve || irqd_is_managed_and_shutdown(irqd))
vector_assign_managed_shutdown(irqd);
else if (apicd->is_managed)
ret = activate_managed(irqd);
@ -478,6 +489,7 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
} else {
/* Release the vector */
apicd->can_reserve = true;
irqd_set_can_reserve(irqd);
clear_irq_vector(irqd);
realloc = true;
}

Some files were not shown because too many files have changed in this diff Show More