1
0
Fork 0
Commit Graph

62 Commits (3377e227af441aff710726437adc20efc359fd9c)

Author SHA1 Message Date
Zhaoxiu Zeng fff7fb0b2d lib/GCD.c: use binary GCD algorithm instead of Euclidean
The binary GCD algorithm is based on the following facts:
	1. If a and b are all evens, then gcd(a,b) = 2 * gcd(a/2, b/2)
	2. If a is even and b is odd, then gcd(a,b) = gcd(a/2, b)
	3. If a and b are all odds, then gcd(a,b) = gcd((a-b)/2, b) = gcd((a+b)/2, b)

Even on x86 machines with reasonable division hardware, the binary
algorithm runs about 25% faster (80% the execution time) than the
division-based Euclidian algorithm.

On platforms like Alpha and ARMv6 where division is a function call to
emulation code, it's even more significant.

There are two variants of the code here, depending on whether a fast
__ffs (find least significant set bit) instruction is available.  This
allows the unpredictable branches in the bit-at-a-time shifting loop to
be eliminated.

If fast __ffs is not available, the "even/odd" GCD variant is used.

I use the following code to benchmark:

	#include <stdio.h>
	#include <stdlib.h>
	#include <stdint.h>
	#include <string.h>
	#include <time.h>
	#include <unistd.h>

	#define swap(a, b) \
		do { \
			a ^= b; \
			b ^= a; \
			a ^= b; \
		} while (0)

	unsigned long gcd0(unsigned long a, unsigned long b)
	{
		unsigned long r;

		if (a < b) {
			swap(a, b);
		}

		if (b == 0)
			return a;

		while ((r = a % b) != 0) {
			a = b;
			b = r;
		}

		return b;
	}

	unsigned long gcd1(unsigned long a, unsigned long b)
	{
		unsigned long r = a | b;

		if (!a || !b)
			return r;

		b >>= __builtin_ctzl(b);

		for (;;) {
			a >>= __builtin_ctzl(a);
			if (a == b)
				return a << __builtin_ctzl(r);

			if (a < b)
				swap(a, b);
			a -= b;
		}
	}

	unsigned long gcd2(unsigned long a, unsigned long b)
	{
		unsigned long r = a | b;

		if (!a || !b)
			return r;

		r &= -r;

		while (!(b & r))
			b >>= 1;

		for (;;) {
			while (!(a & r))
				a >>= 1;
			if (a == b)
				return a;

			if (a < b)
				swap(a, b);
			a -= b;
			a >>= 1;
			if (a & r)
				a += b;
			a >>= 1;
		}
	}

	unsigned long gcd3(unsigned long a, unsigned long b)
	{
		unsigned long r = a | b;

		if (!a || !b)
			return r;

		b >>= __builtin_ctzl(b);
		if (b == 1)
			return r & -r;

		for (;;) {
			a >>= __builtin_ctzl(a);
			if (a == 1)
				return r & -r;
			if (a == b)
				return a << __builtin_ctzl(r);

			if (a < b)
				swap(a, b);
			a -= b;
		}
	}

	unsigned long gcd4(unsigned long a, unsigned long b)
	{
		unsigned long r = a | b;

		if (!a || !b)
			return r;

		r &= -r;

		while (!(b & r))
			b >>= 1;
		if (b == r)
			return r;

		for (;;) {
			while (!(a & r))
				a >>= 1;
			if (a == r)
				return r;
			if (a == b)
				return a;

			if (a < b)
				swap(a, b);
			a -= b;
			a >>= 1;
			if (a & r)
				a += b;
			a >>= 1;
		}
	}

	static unsigned long (*gcd_func[])(unsigned long a, unsigned long b) = {
		gcd0, gcd1, gcd2, gcd3, gcd4,
	};

	#define TEST_ENTRIES (sizeof(gcd_func) / sizeof(gcd_func[0]))

	#if defined(__x86_64__)

	#define rdtscll(val) do { \
		unsigned long __a,__d; \
		__asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \
		(val) = ((unsigned long long)__a) | (((unsigned long long)__d)<<32); \
	} while(0)

	static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
								unsigned long a, unsigned long b, unsigned long *res)
	{
		unsigned long long start, end;
		unsigned long long ret;
		unsigned long gcd_res;

		rdtscll(start);
		gcd_res = gcd(a, b);
		rdtscll(end);

		if (end >= start)
			ret = end - start;
		else
			ret = ~0ULL - start + 1 + end;

		*res = gcd_res;
		return ret;
	}

	#else

	static inline struct timespec read_time(void)
	{
		struct timespec time;
		clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time);
		return time;
	}

	static inline unsigned long long diff_time(struct timespec start, struct timespec end)
	{
		struct timespec temp;

		if ((end.tv_nsec - start.tv_nsec) < 0) {
			temp.tv_sec = end.tv_sec - start.tv_sec - 1;
			temp.tv_nsec = 1000000000ULL + end.tv_nsec - start.tv_nsec;
		} else {
			temp.tv_sec = end.tv_sec - start.tv_sec;
			temp.tv_nsec = end.tv_nsec - start.tv_nsec;
		}

		return temp.tv_sec * 1000000000ULL + temp.tv_nsec;
	}

	static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
								unsigned long a, unsigned long b, unsigned long *res)
	{
		struct timespec start, end;
		unsigned long gcd_res;

		start = read_time();
		gcd_res = gcd(a, b);
		end = read_time();

		*res = gcd_res;
		return diff_time(start, end);
	}

	#endif

	static inline unsigned long get_rand()
	{
		if (sizeof(long) == 8)
			return (unsigned long)rand() << 32 | rand();
		else
			return rand();
	}

	int main(int argc, char **argv)
	{
		unsigned int seed = time(0);
		int loops = 100;
		int repeats = 1000;
		unsigned long (*res)[TEST_ENTRIES];
		unsigned long long elapsed[TEST_ENTRIES];
		int i, j, k;

		for (;;) {
			int opt = getopt(argc, argv, "n:r:s:");
			/* End condition always first */
			if (opt == -1)
				break;

			switch (opt) {
			case 'n':
				loops = atoi(optarg);
				break;
			case 'r':
				repeats = atoi(optarg);
				break;
			case 's':
				seed = strtoul(optarg, NULL, 10);
				break;
			default:
				/* You won't actually get here. */
				break;
			}
		}

		res = malloc(sizeof(unsigned long) * TEST_ENTRIES * loops);
		memset(elapsed, 0, sizeof(elapsed));

		srand(seed);
		for (j = 0; j < loops; j++) {
			unsigned long a = get_rand();
			/* Do we have args? */
			unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
			unsigned long long min_elapsed[TEST_ENTRIES];
			for (k = 0; k < repeats; k++) {
				for (i = 0; i < TEST_ENTRIES; i++) {
					unsigned long long tmp = benchmark_gcd_func(gcd_func[i], a, b, &res[j][i]);
					if (k == 0 || min_elapsed[i] > tmp)
						min_elapsed[i] = tmp;
				}
			}
			for (i = 0; i < TEST_ENTRIES; i++)
				elapsed[i] += min_elapsed[i];
		}

		for (i = 0; i < TEST_ENTRIES; i++)
			printf("gcd%d: elapsed %llu\n", i, elapsed[i]);

		k = 0;
		srand(seed);
		for (j = 0; j < loops; j++) {
			unsigned long a = get_rand();
			unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
			for (i = 1; i < TEST_ENTRIES; i++) {
				if (res[j][i] != res[j][0])
					break;
			}
			if (i < TEST_ENTRIES) {
				if (k == 0) {
					k = 1;
					fprintf(stderr, "Error:\n");
				}
				fprintf(stderr, "gcd(%lu, %lu): ", a, b);
				for (i = 0; i < TEST_ENTRIES; i++)
					fprintf(stderr, "%ld%s", res[j][i], i < TEST_ENTRIES - 1 ? ", " : "\n");
			}
		}

		if (k == 0)
			fprintf(stderr, "PASS\n");

		free(res);

		return 0;
	}

Compiled with "-O2", on "VirtualBox 4.4.0-22-generic #38-Ubuntu x86_64" got:

  zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
  gcd0: elapsed 10174
  gcd1: elapsed 2120
  gcd2: elapsed 2902
  gcd3: elapsed 2039
  gcd4: elapsed 2812
  PASS
  zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
  gcd0: elapsed 9309
  gcd1: elapsed 2280
  gcd2: elapsed 2822
  gcd3: elapsed 2217
  gcd4: elapsed 2710
  PASS
  zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
  gcd0: elapsed 9589
  gcd1: elapsed 2098
  gcd2: elapsed 2815
  gcd3: elapsed 2030
  gcd4: elapsed 2718
  PASS
  zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
  gcd0: elapsed 9914
  gcd1: elapsed 2309
  gcd2: elapsed 2779
  gcd3: elapsed 2228
  gcd4: elapsed 2709
  PASS

[akpm@linux-foundation.org: avoid #defining a CONFIG_ variable]
Signed-off-by: Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com>
Signed-off-by: George Spelvin <linux@horizon.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-05-20 17:58:30 -07:00
James Hogan 6ad816e77e MIPS: Add probing & defs for VZ & guest features
Add a few new cpu-features.h definitions for VZ sub-features, namely the
existence of the CP0_GuestCtl0Ext, CP0_GuestCtl1, and CP0_GuestCtl2
registers, and support for GuestID to dialias TLB entries belonging to
different guests.

Also add certain features present in the guest, with the naming scheme
cpu_guest_has_*. These are added separately to the main options bitfield
since they generally parallel similar features in the root context. A
few of these (FPU, MSA, watchpoints, perf counters, CP0_[X]ContextConfig
registers, MAAR registers, and probably others in future) can be
dynamically configured in the guest context, for which the
cpu_guest_has_dyn_* macros are added.

[ralf@linux-mips.org: Resolve merge conflict.]

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/13231/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-05-13 15:30:25 +02:00
James Hogan 30228c40f0 MIPS: Add perf counter feature
Add CPU feature for standard MIPS r2 performance counters, as determined
by the Config1.PC bit. Both perf_events and oprofile probe this bit, so
lets combine the probing and change both to use cpu_has_perf.

This will also be used for VZ support in KVM to know whether performance
counters exist which can be exposed to guests.

[ralf@linux-mips.org: resolve conflict.]

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Robert Richter <rric@kernel.org>
Cc: linux-mips@linux-mips.org
Cc: oprofile-list@lists.sf.net
Patchwork: https://patchwork.linux-mips.org/patch/13226/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-05-13 15:30:25 +02:00
James Hogan f18bdfa191 MIPS: Add defs & probing of [X]ContextConfig
The CP0_[X]ContextConfig registers are present if CP0_Config3.CTXTC or
CP0_Config3.SM are set, and provide more control over which bits of
CP0_[X]Context are set to the faulting virtual address on a TLB
exception.

KVM/VZ will need to be able to save and restore these registers in the
guest context, so add the relevant definitions and probing of the
ContextConfig feature in the root context first.

[ralf@linux-mips.org: resolve merge conflict.]

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/13225/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-05-13 15:30:25 +02:00
James Hogan e06a1548f3 MIPS: Add defs & probing of BadInstr[P] registers
The optional CP0_BadInstr and CP0_BadInstrP registers are written with
the encoding of the instruction that caused a synchronous exception to
occur, and the prior branch instruction if in a delay slot.

These will be useful for instruction emulation in KVM, and especially
for VZ support where reading guest virtual memory is a bit more awkward.

Add CPU option numbers and cpu_has_* definitions to indicate the
presence of each registers, and add code to probe for them using bits in
the CP0_Config3 register.

[ralf@linux-mips.org: resolve merge conflict.]

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/13224/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-05-13 15:30:25 +02:00
James Hogan 37fb60f8e3 MIPS: Add defs & probing of extended CP0_EBase
The CP0_EBase register may optionally have a write gate (WG) bit to
allow the upper bits to be written, i.e. bits 31:30 on MIPS32 since r3
(to allow for an exception base outside of KSeg0/KSeg1 when segmentation
control is in use) and bits 63:30 on MIPS64 (which also implies the
extension of CP0_EBase to 64 bits long).

The presence of this feature will need to be known about for VZ support
in order to correctly save and restore all the bits of the guest
CP0_EBase register, so add CPU feature definition and probing for this
feature.

Probing the WG bit on MIPS64 can be a bit fiddly, since 64-bit COP0
register access instructions were UNDEFINED for 32-bit registers prior
to MIPS r6, and it'd be nice to be able to probe without clobbering the
existing state, so there are 3 potential paths:

- If we do a 32-bit read of CP0_EBase and the WG bit is already set, the
  register must be 64-bit.

- On MIPS r6 we can do a 64-bit read-modify-write to set CP0_EBase.WG,
  since the upper bits will read 0 and be ignored on write if the
  register is 32-bit.

- On pre-r6 cores, we do a 32-bit read-modify-write of CP0_EBase. This
  avoids the potentially UNDEFINED behaviour, but will clobber the upper
  32-bits of CP0_EBase if it isn't a simple sign extension (which also
  requires us to ensure BEV=1 or modifying the exception base would be
  UNDEFINED too). It is hopefully unlikely a bootloader would set up
  CP0_EBase to a 64-bit segment and leave WG=0.

[ralf@linux-mips.org: Resolved merge conflict.]

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Tested-by: Matt Redfearn <matt.redfearn@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/13223/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-05-13 15:30:25 +02:00
James Hogan 12822570a2 MIPS: Separate XPA CPU feature into LPA and MVH
XPA (eXtended Physical Addressing) should be detected as a combination
of two architectural features:
- Large Physical Address (as per Config3.LPA). With XPA this will be set
  on MIPS32r5 cores, but it may also be set for MIPS64r2 cores too.
- MTHC0/MFHC0 instructions (as per Config5.MVH). With XPA this will be
  set, but it may also be set in VZ guest context even when Config3.LPA
  in the guest context has been cleared by the hypervisor.

As such, XPA is only usable if both bits are set. Update CPU features to
separate these two features, with cpu_has_xpa requiring both to be set.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: Maciej W. Rozycki <macro@imgtec.com>
Cc: Joshua Kinard <kumba@gentoo.org>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/13112/
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-05-13 14:02:24 +02:00
Huacai Chen 380cd582c0 MIPS: Loongson-3: Fast TLB refill handler
Loongson-3A R2 has pwbase/pwfield/pwsize/pwctl registers in CP0 (this
is very similar to HTW) and lwdir/lwpte/lddir/ldpte instructions which
can be used for fast TLB refill.

[ralf@linux-mips.org: Resolve conflict.]

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Cc: Steven J . Hill <sjhill@realitydiluted.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/12754/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-05-13 14:02:15 +02:00
Zubair Lutfullah Kakakhel b5a6455cde MIPS: Detect DSP v3 support
DSPv3 is supported on all MIPSr6 systems which indicate support for DSPv2.

This doesn't require any changes to the kernel's handling of DSP
resources. The patch is to detect support and indicate it in /proc/cpuinfo

DSP v3 introduces a new instruction BPOSGE32C

Signed-off-by: Zubair Lutfullah Kakakhel <Zubair.Kakakhel@imgtec.com>
Reviewed-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/12918/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-05-13 14:01:57 +02:00
Paul Burton f270d881fa MIPS: Detect MIPSr6 Virtual Processor support
MIPSr6 introduces support for "Virtual Processors", which are
conceptually similar to VPEs from the now-deprecated MT ASE. Detect
whether the system supports VPs using the VP bit in Config5, adding
cpu_has_vp for use by later patches.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: Maciej W. Rozycki <macro@imgtec.com>
Cc: Joshua Kinard <kumba@gentoo.org>
Cc: Steven J. Hill <sjhill@realitydiluted.com>
Cc: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/12327/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-05-13 14:01:47 +02:00
Maciej W. Rozycki 9519ef37a4 MIPS: Define the legacy-NaN and 2008-NaN features
Allocate CPU option bits and define macros for the legacy-NaN and
2008-NaN IEEE Std 754 MIPS architecture features.  Unconditionally mark
the legacy-NaN feature as present across hardware and emulated
floating-point configurations.

Signed-off-by: Maciej W. Rozycki <macro@imgtec.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Fortune <Matthew.Fortune@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/11475/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-01-20 00:39:20 +01:00
Paul Burton 033549c609 MIPS: Allow RIXI for 32-bit kernels on MIPS64
Commit a68d09a156 ("MIPS: Don't use RI/XI with 32-bit kernels on
64-bit CPUs") prevented use of RIXI on MIPS64 systems, stating that the
"TLB handlers cannot handle this case". What they actually couldn't
handle was cases where there were less fill bits in the Entry{Lo,Hi}
registers than bits used by software in PTEs. The handlers can now deal
with this case, so enable RIXI for MIPS32 kernels on MIPS64 systems.

Note that beyond the obvious benefits provided by having RIXI on such
systems, this is required for systems implementing MIPSr6 where RIXI
cannot be disabled.

This reverts commit a68d09a156 ("MIPS: Don't use RI/XI with 32-bit
kernels on 64-bit CPUs").

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: Joshua Kinard <kumba@gentoo.org>
Cc: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
Cc: Maciej W. Rozycki <macro@linux-mips.org>
Cc: linux-kernel@vger.kernel.org
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Markos Chandras <markos.chandras@imgtec.com>
Patchwork: https://patchwork.linux-mips.org/patch/11219/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2015-11-11 08:35:39 +01:00
James Hogan 2f6f31363c MIPS: cpu-features: Add cpu_has_ftlb
Add cpu_has_ftlb, which specifies that an FTLB is present in addition to
the VTLB, probed based on whether Config.MT == 4 (rather than 1 for
standard JTLB).

This is necessary since MIPS release 6 removes Config4.MMUExtDef, so the
presence of the FTLB fields in Config4 must be determined from Config.MT
instead.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/11159/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2015-09-22 21:11:05 +02:00
James Hogan aaa7be48fd MIPS: Probe for small (1KiB) page support
Probe Config3 for small page support. This will be useful to give clues
as to whether the PageGrain register exists.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Maciej W. Rozycki <macro@linux-mips.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/10722/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2015-09-03 12:07:47 +02:00
Joshua Kinard 8d5ded16ee MIPS: R12000: Enable branch prediction global history
The R12000 added a new feature to enhance branch prediction called
"global history".  Per the Vr10000 Series User Manual (U10278EJ4V0UM),
Coprocessor 0, Diagnostic Register (22):

"""
If bit 26 is set, branch prediction uses all eight bits of the global
history register.  If bit 26 is not set, then bits 25:23 specify a count
of the number of bits of global history to be used. Thus if bits 26:23
are all zero, global history is disabled.

The global history contains a record of the taken/not-taken status of
recently executed branches, and when used is XOR'ed with the PC of a
branch being predicted to produce a hashed value for indexing the BPT.
Some programs with small "working set of conditional branches" benefit
significantly from the use of such hashing, some see slight performance
degradation.
"""

This patch enables global history on R12000 CPUs and up by setting bit
26 in the branch prediction diagnostic register (CP0 $22) to '1'.  Bits
25:23 are left alone so that all eight bits of the global history
register are available for branch prediction.

Signed-off-by: Joshua Kinard <kumba@gentoo.org>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2015-06-21 21:54:13 +02:00
Ralf Baechle 3e20a26b02 Merge branch '4.0-fixes' into mips-for-linux-next 2015-04-13 16:03:32 +02:00
Ralf Baechle 9cdf30bd3b MIPS: Fix cpu_has_mips_r2_exec_hazard.
Returns a non-zero value if the current processor implementation requires
an IHB instruction to deal with an instruction hazard as per MIPS R2
architecture specification, zero otherwise.

For a discussion, see http://patchwork.linux-mips.org/patch/9539/.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2015-04-10 15:41:47 +02:00
Maciej W. Rozycki 2d83fea786 MIPS: Correct FP ISA requirements
Correct ISA requirements for floating-point instructions:

* the CU3 exception signifies a real COP3 instruction in MIPS I & II,

* the BC1FL and BC1TL instructions are not supported in MIPS I,

* the SQRT.fmt instructions are indeed supported in MIPS II,

* the LDC1 and SDC1 instructions are indeed supported in MIPS32r1,

* the CEIL.W.fmt, FLOOR.W.fmt, ROUND.W.fmt and TRUNC.W.fmt instructions
  are indeed supported in MIPS32,

* the CVT.L.fmt and CVT.fmt.L instructions are indeed supported in
  MIPS32r2 and MIPS32r6,

* the CEIL.L.fmt, FLOOR.L.fmt, ROUND.L.fmt and TRUNC.L.fmt instructions
  are indeed supported in MIPS32r2 and MIPS32r6,

* the RSQRT.fmt and RECIP.fmt instructions are indeed supported in
  MIPS64r1,

Also simplify conditionals for MIPS III and MIPS IV FPU instructions and
the handling of the MOVCI minor opcode.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/9700/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2015-04-08 01:10:05 +02:00
Maciej W. Rozycki 18a2c2c6b9 MIPS: Correct `nofpu' non-functionality
The `cpu_has_fpu' feature flag must not be hardcoded to 1 or the `nofpu'
kernel option will be ignored.  Remove any such overrides and add a
cautionary note.  Hardcoding to 0 is fine for FPU-less platforms.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/9694/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2015-04-08 01:09:49 +02:00
James Hogan 9b3274bd58 MIPS: Add arch CDMM definitions and probing
Add architectural definitions and probing for the MIPS Common Device
Memory Map (CDMM) region. When supported and enabled at a particular
physical address, this region allows some number of per-CPU devices to
be discovered and controlled via MMIO.

A bit exists in Config3 to determine whether the feature is present, and
a CDMMBase CP0 register allows the region to be enabled at a particular
physical address.

[ralf@linux-mips.org: Sort conflict with other patches.]

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/9178/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2015-03-31 12:04:12 +02:00
Steven J. Hill c5b367835c MIPS: Add support for XPA.
Add support for extended physical addressing (XPA) so that
32-bit platforms can access equal to or greater than 40 bits
of physical addresses.

NOTE:
      1) XPA and EVA are not the same and cannot be used
         simultaneously.
      2) If you configure your kernel for XPA, the PTEs
         and all address sizes become 64-bit.
      3) Your platform MUST have working HIGHMEM support.

Signed-off-by: Steven J. Hill <Steven.Hill@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/9355/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2015-03-19 17:39:49 +01:00
Markos Chandras e0d32f33e6 MIPS: Handle MIPS IV, V and R2 FPU instructions on MIPS R6 as well
MIPS R2 FPU instructions are also present in MIPS R6 so amend the
preprocessor definitions to take MIPS R6 into consideration.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
2015-02-17 15:37:37 +00:00
Markos Chandras 5aed9da128 MIPS: Add LLB bit and related feature for the Config 5 CP0 register
The LLBIT (bit 4) in the Config5 CP0 register indicates the software
availability of the Load-Linked bit. This bit is only set by hardware
and it has the following meaning:

0: LLB functionality is not supported
1: LLB functionality is supported. The following feature are also
supported:

- ERETNC instruction. Similar to ERET but it does not clear the LLB
bit in the LLAddr register.
- CP0 LLAddr/LLB bit must be set
- LLbit is software accessible through the LLAddr[0]

This will be used later on to emulate R2 LL/SC instructions.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
2015-02-17 15:37:36 +00:00
Markos Chandras 515a6393db MIPS: kernel: proc: Add MIPS R6 support to /proc/cpuinfo
Print 'mips64r6' and/or 'mips32r6' if the kernel is running on
a MIPS R6 core.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
2015-02-17 15:37:26 +00:00
Leonid Yegoshin 34c56fc1c1 MIPS: asm: cpu: Add MIPSR6 ISA definitions
Add MIPS R6 to the ISA definitions

Signed-off-by: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
2015-02-17 15:37:19 +00:00
Paul Burton adac5d535d MIPS: detect presence of the FRE & UFR bits
Detect the presence of the Config5 FRE & UFE bits, as indicated by the
FREP bit in FPIR. Record this as a CPU option bit, and provide a
cpu_has_fre macro to ease checking of that option bit.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/7678/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2014-11-24 07:45:06 +01:00
Chen Jie 3c09bae43b MIPS: Use WSBH/DSBH/DSHD on Loongson 3A
Signed-off-by: chenj <chenj@lemote.com>
Cc: linux-mips@linux-mips.org
Cc: chenhc@lemote.com
Patchwork: https://patchwork.linux-mips.org/patch/7542/
Patchwork: https://patchwork.linux-mips.org/patch/7550/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2014-09-22 13:35:46 +02:00
Paul Burton 1f6c52ff72 MIPS: detect presence of MAARs
Detect the presence of MAAR using the MRP bit in Config5, and record
that presence using a CPU option bit. A cpu_has_maar macro will then
allow code to conditionalise upon the presence of MAARs.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7330/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2014-08-02 00:06:45 +02:00
Leonid Yegoshin 6ee729aa6c MIPS: Add new option for unique RI/XI exceptions
MIPSr5 added support for unique exception codes for the Read-Inhibit
and Execute-Inhibit exceptions.

Signed-off-by: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7338/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2014-08-02 00:06:39 +02:00
Markos Chandras e647e6b5b3 MIPS: cpu: Add new cpu option for Hardware Table Walker.
Moreover, report hardware page table walker support as 'htw' in the ASE
list of /proc/cpuinfo, if the core implements this feature.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7334/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2014-08-02 00:06:38 +02:00
David Daney a68d09a156 MIPS: Don't use RI/XI with 32-bit kernels on 64-bit CPUs
The TLB handlers cannot handle this case, so disable it for now.

Signed-off-by: David Daney <david.daney@cavium.com>
Signed-off-by: Andreas Herrmann <andreas.herrmann@caviumnetworks.com>
Cc: linux-mips@linux-mips.org
Cc: James Hogan <james.hogan@imgtec.com>
Cc: kvm@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/7007/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2014-05-30 21:01:10 +02:00
Ralf Baechle 08a07904e1 MIPS: math-emu: Remove most ifdefery.
Most of these tests should be runtime tests.  This also finally means
that on a MIPS III systems MIPS IV opcodes are going to result in an
exception as they're supposed to.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2014-05-21 11:12:58 +02:00
Markos Chandras 7ae6696656 MIPS: asm: cpu: Add cpu flag for Enhanced Virtual Addressing
The MIPS *Aptiv family uses bit 28 in Config5 CP0 register to
indicate whether the core supports EVA or not.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
2014-03-26 23:09:18 +01:00
Paul Burton a5e9a69e2c MIPS: Detect the MSA ASE
This patch adds support for probing the MSAP bit within the Config3
register in order to detect the presence of the MSA ASE. Presence of the
ASE will be indicated in /proc/cpuinfo. The value of the MSA
implementation register will be displayed at boot to aid debugging and
verification of a correct setup, as is done for the FPU.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/6430/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2014-03-26 23:09:10 +01:00
Steven J. Hill 4a0156fbfb MIPS: features: Add initial support for Segmentation Control registers
MIPS32R3 introduced a new set of Segmentation Control registers which
increase the flexibility of the segmented-based memory scheme.

Signed-off-by: Steven J. Hill <Steven.Hill@imgtec.com>
Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Signed-off-by: John Crispin <blogic@openwrt.org>
Patchwork: http://patchwork.linux-mips.org/patch/6131/
2014-01-22 20:18:58 +01:00
Leonid Yegoshin 1745c1ef88 MIPS: features: Add initial support for TLBINVF capable cores
New Aptiv cores support the TLBINVF instruction for flushing
the VTLB.

Signed-off-by: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Signed-off-by: John Crispin <blogic@openwrt.org>
Patchwork: http://patchwork.linux-mips.org/patch/6130/
2014-01-22 20:18:58 +01:00
Maciej W. Rozycki becee6b8c7 MIPS: cpu-features.h: s/MIPS53/MIPS64/
No support for MIPS53 processors yet.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/5876/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2013-09-24 11:07:18 +02:00
Ralf Baechle 69f24d1784 MIPS: Optimize current_cpu_type() for better code.
o Move current_cpu_type() to a separate header file
 o #ifdefing on supported CPU types lets modern GCC know that certain
   code in callers may be discarded ideally turning current_cpu_type() into
   a function returning a constant.
 o Use current_cpu_type() rather than direct access to struct cpuinfo_mips.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Cc: Steven J. Hill <Steven.Hill@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/5833/
2013-09-17 18:50:53 +02:00
Ralf Baechle cf5b2d23a7 MIPS: oprofile: Fix BUG due to smp_processor_id() in preemptible code.
current_cpu_type() is not preemption-safe.
If CONFIG_PREEMPT is enabled then mipsxx_reg_setup() can be called from preemptible state.
Added get_cpu()/put_cpu() pair to make it preemption-safe.

This was found while testing oprofile with CONFIG_DEBUG_PREEMPT enable.

/usr/zntestsuite # opcontrol --init
/usr/zntestsuite # opcontrol --setup --event=L2_CACHE_ACCESSES:500 --event=L2_CACHE_MISSES:500 --no-vmlinux
/usr/zntestsuite # opcontrol --start
Using 2.6+ OProfile kernel interface.
BUG: using smp_processor_id() in preemptible [00000000] code: oprofiled/1362
caller is mipsxx_reg_setup+0x11c/0x164
CPU: 0 PID: 1362 Comm: oprofiled Not tainted 3.10.4 #18
Stack : 00000006 70757465 00000000 00000000 00000000 00000000 80b173f6 00000037
          80b10000 00000000 80b21614 88f5a220 00000000 00000000 00000000 00000000
          00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
          00000000 00000000 00000000 89c49c00 89c49c2c 80721254 807b7927 8012c1d0
          80b10000 80721254 00000000 00000552 88f5a220 80b1335c 807b78e6 89c49ba8
          ...
Call Trace:
[<801099a4>] show_stack+0x64/0x7c
[<80665520>] dump_stack+0x20/0x2c
[<803a2250>] debug_smp_processor_id+0xe0/0xf0
[<8052df24>] mipsxx_reg_setup+0x11c/0x164
[<8052cd70>] op_mips_setup+0x24/0x4c
[<80529cfc>] oprofile_setup+0x5c/0x12c
[<8052b9f8>] event_buffer_open+0x78/0xf8
[<801c3150>] do_dentry_open.isra.15+0x2b8/0x3b0
[<801c3270>] finish_open+0x28/0x4c
[<801d49b8>] do_last.isra.41+0x2cc/0xd00
[<801d54a0>] path_openat+0xb4/0x4c4
[<801d5c44>] do_filp_open+0x3c/0xac
[<801c4744>] do_sys_open+0x110/0x1f4
[<8010f47c>] stack_done+0x20/0x44

Bug reported and original patch by Jerin Jacob <jerinjacobk@gmail.com>.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Acked-by: Jerin Jacob <jerinjacobk@gmail.com>
2013-08-05 13:34:22 +02:00
Tony Wu fc192e50f8 MIPS: Cleanup indentation and whitespace
Signed-off-by: Tony Wu <tung7970@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/5536/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2013-07-01 15:10:57 +02:00
David Daney 3ddc14add5 MIPS: Only set cpu_has_mmips if SYS_SUPPORTS_MICROMIPS
As Jonas Gorske said in his patch:

   Disable cpu_has_mmips for everything but SEAD3 and MALTA. Most of
   these platforms are from before the micromips introduction, so they
   are very unlikely to implement it.

   Reduces an -Os compiled, uncompressed kernel image by 8KiB for
   BCM63XX.

This patch taks a different approach than his, we gate the runtime
test for microMIPS by the config symbol SYS_SUPPORTS_MICROMIPS.

Signed-off-by: David Daney <david.daney@cavium.com>
Cc: Jonas Gorski <jogo@openwrt.org>
Cc: Steven J. Hill <Steven.Hill@imgtec.com>
Acked-by: Steven J. Hill <Steven.Hill@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/5327/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2013-07-01 15:10:57 +02:00
Ralf Baechle 1990e5429c MIPS: Get rid of MIPS I flag and test macros.
MIPS I is the ancestor of all MIPS ISA and architecture variants.  Anything
ever build in the MIPS empire is either MIPS I or at least contains MIPS I.
If it's running Linux, that is.

So there is little point in having cpu_has_mips_1 because it will always
evaluate as true - though usually only at runtime.  Thus there is no
point in having the MIPS_CPU_ISA_I ISA flag, so get rid of it.

Little complication: traps.c was using a test for a pure MIPS I ISA as
a test for an R3000-style cp0.  To deal with that, use a check for
cpu_has_3kex or cpu_has_4kex instead.

cpu_has_3kex is a new macro.  At the moment its default implementation is
!cpu_has_4kex but this may eventually change if Linux is ever going to
support the oddball MIPS processors R6000 and R8000 so users of either
of these macros should not make any assumptions.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Patchwork: https://patchwork.linux-mips.org/patch/5551/
2013-07-01 15:10:56 +02:00
Huacai Chen 8759934e2b MIPS: Build uasm-generated code only once to avoid CPU Hotplug problem
This and the next patch resolve memory corruption problems while CPU
hotplug. Without these patches, memory corruption can triggered easily
as below:

On a quad-core MIPS platform, use "spawn" of UnixBench-5.1.3 (http://
code.google.com/p/byte-unixbench/) and a CPU hotplug script like this
(hotplug.sh):
while true; do
echo 0 >/sys/devices/system/cpu/cpu1/online
echo 0 >/sys/devices/system/cpu/cpu2/online
echo 0 >/sys/devices/system/cpu/cpu3/online
sleep 1
echo 1 >/sys/devices/system/cpu/cpu1/online
echo 1 >/sys/devices/system/cpu/cpu2/online
echo 1 >/sys/devices/system/cpu/cpu3/online
sleep 1
done

Run "hotplug.sh" and then run "spawn 10000", spawn will get segfault
after a few minutes.

This patch:
Currently, clear_page()/copy_page() are generated by Micro-assembler
dynamically. But they are unavailable until uasm_resolve_relocs() has
finished because jump labels are illegal before that. Since these
functions are shared by every CPU, we only call build_clear_page()/
build_copy_page() only once at boot time. Without this patch, programs
will get random memory corruption (segmentation fault, bus error, etc.)
while CPU Hotplug (e.g. one CPU is using clear_page() while another is
generating it in cpu_cache_init()).

For similar reasons we modify build_tlb_refill_handler()'s invocation.

V2:
1, Rework the code to make CPU#0 can be online/offline.
2, Introduce cpu_has_local_ebase feature since some types of MIPS CPU
   need a per-CPU tlb_refill_handler().

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Signed-off-by: Hongbing Hu <huhb@lemote.com>
Acked-by: David Daney <david.daney@cavium.com>
Patchwork: http://patchwork.linux-mips.org/patch/4994/
Acked-by: John Crispin <blogic@openwrt.org>
2013-05-08 01:19:06 +02:00
Ralf Baechle 8bfc245f9a Merge branch 'mips-next-3.9' of git://git.linux-mips.org/pub/scm/john/linux-john into mips-for-linux-next 2013-02-21 12:51:33 +01:00
David Daney 1e7decdb27 MIPS: Probe for and report hardware virtualization support.
The presence of the MIPS Virtualization Application-Specific Extension
is indicated by CP0_Config3[23].  Probe for this and report it in
/proc/cpuinfo.

Signed-off-by: David Daney <david.daney@cavium.com>
Patchwork: http://patchwork.linux-mips.org/patch/4904/
Signed-off-by: John Crispin <blogic@openwrt.org>
2013-02-19 09:36:36 +01:00
Steven J. Hill f8fa4811db MIPS: Add support for the M14KEc core.
Signed-off-by: Steven J. Hill <sjhill@mips.com>
Patchwork: http://patchwork.linux-mips.org/patch/4682/
Signed-off-by: John Crispin <blogic@openwrt.org>
2013-02-17 00:15:23 +01:00
Steven J. Hill a96102be70 MIPS: Add printing of ISA version in cpuinfo.
Display the MIPS ISA version release in the /proc/cpuinfo file.

[ralf@linux-mips.org: Add support for MIPS I ... IV legacy architecture
revisions.  Also differenciate between MIPS32 and MIPS64 versions instead
of lumping them together as just r1 and r2.

Note to application programmers: this indicates the CPU's ISA level
It does not imply the current execution environment does support it.  For
example an O32 application seeing "mips64r2" would still be restricted by
by the execution environment to 32-bit - but the kernel could run mips64r2
code.  The same for a 32-bit kernel running on a 64-bit processor.  This
field doesn't include ASEs or optional architecture modules nor other
detailed flags such as the availability of an FPU.]

Signed-off-by: Steven J. Hill <sjhill@mips.com>
Cc: linux-mips@linux-mips.org
Patchwork: http://patchwork.linux-mips.org/patch/4714/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2013-02-15 23:07:38 +01:00
Ralf Baechle 7034228792 MIPS: Whitespace cleanup.
Having received another series of whitespace patches I decided to do this
once and for all rather than dealing with this kind of patches trickling
in forever.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2013-02-01 10:00:22 +01:00
Steven J. Hill ee80f7c73d MIPS: Add detection of DSP ASE Revision 2.
[ralf@linux-mips.org: This patch really only detects the ASE and passes its
existence on to userland via /proc/cpuinfo.  The DSP ASE Rev 2. adds new
resources but no resources that would need management by the kernel.]

Signed-off-by: Steven J. Hill <sjhill@mips.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/4165/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2012-10-11 11:05:03 +02:00
Al Cooper da4b62cd67 MIPS: perf: Add cpu feature bit for PCI (performance counter interrupt)
The PCI (Program Counter Interrupt) bit in the "cause" register
is mandatory for MIPS32R2 cores, but has also been added to some R1
cores (BMIPS5000). This change adds a cpu feature bit to make it
easier to check for and use this feature.

Signed-off-by: Al Cooper <alcooperx@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/4106/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2012-10-11 11:04:34 +02:00