alistair23-linux/lib/raid6/altivec.uc

/* -*- linux-c -*- ------------------------------------------------------- *
 *
 *   Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
 *
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
 *   Boston MA 02111-1307, USA; either version 2 of the License, or
 *   (at your option) any later version; incorporated herein by reference.
 *
 * ----------------------------------------------------------------------- */

/*
 * raid6altivec$#.c
 *
 * $#-way unrolled portable integer math RAID-6 instruction set
 *
 * This file is postprocessed using unroll.awk
 *
 * <benh> hpa: in process,
 * you can just "steal" the vec unit with enable_kernel_altivec() (but
 * bracked this with preempt_disable/enable or in a lock)
 */

#include <linux/raid/pq.h>

#ifdef CONFIG_ALTIVEC

#include <altivec.h>
#ifdef __KERNEL__
# include <asm/cputable.h>
# include <asm/switch_to.h>
#endif /* __KERNEL__ */

/*
 * This is the C data type to use.  We use a vector of
 * signed char so vec_cmpgt() will generate the right
 * instruction.
 */

typedef vector signed char unative_t;

#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
#define NSIZE	sizeof(unative_t)

/*
 * The SHLBYTE() operation shifts each byte left by 1, *not*
 * rolling over into the next byte
 */
static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
{
	return vec_add(v,v);
}

/*
 * The MASK() operation returns 0xFF in any byte for which the high
 * bit is 1, 0x00 for any byte for which the high bit is 0.
 */
static inline __attribute_const__ unative_t MASK(unative_t v)
{
	unative_t zv = NBYTES(0);

	/* vec_cmpgt returns a vector bool char; thus the need for the cast */
	return (unative_t)vec_cmpgt(zv, v);
}


/* This is noinline to make damned sure that gcc doesn't move any of the
   Altivec code around the enable/disable code */
static void noinline
raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs)
{
	u8 **dptr = (u8 **)ptrs;
	u8 *p, *q;
	int d, z, z0;

	unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
	unative_t x1d = NBYTES(0x1d);

	z0 = disks - 3;		/* Highest data disk */
	p = dptr[z0+1];		/* XOR parity */
	q = dptr[z0+2];		/* RS syndrome */

	for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
		wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
		for ( z = z0-1 ; z >= 0 ; z-- ) {
			wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
			wp$$ = vec_xor(wp$$, wd$$);
			w2$$ = MASK(wq$$);
			w1$$ = SHLBYTE(wq$$);
			w2$$ = vec_and(w2$$, x1d);
			w1$$ = vec_xor(w1$$, w2$$);
			wq$$ = vec_xor(w1$$, wd$$);
		}
		*(unative_t *)&p[d+NSIZE*$$] = wp$$;
		*(unative_t *)&q[d+NSIZE*$$] = wq$$;
	}
}

static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
{
	preempt_disable();
	enable_kernel_altivec();

	raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs);

	disable_kernel_altivec();
	preempt_enable();
}

int raid6_have_altivec(void);
#if $# == 1
int raid6_have_altivec(void)
{
	/* This assumes either all CPUs have Altivec or none does */
# ifdef __KERNEL__
	return cpu_has_feature(CPU_FTR_ALTIVEC);
# else
	return 1;
# endif
}
#endif

const struct raid6_calls raid6_altivec$# = {
	raid6_altivec$#_gen_syndrome,
	NULL,			/* XOR not yet implemented */
	raid6_have_altivec,
	"altivecx$#",
	0
};

#endif /* CONFIG_ALTIVEC */
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00			`/* -- linux-c -- ------------------------------------------------------- *`
			`*`
			`* Copyright 2002-2004 H. Peter Anvin - All Rights Reserved`
			`*`
			`* This program is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation, Inc., 53 Temple Place Ste 330,`
md: fix typo in FSF address Hello, I found a typo Bosto"m" in FSF address. And I am checking around linux source code. Here is the only place which uses Bosto"m" (not Boston). Signed-off-by: Atsushi SAKAI <sakaia@jp.fujitsu.com> Signed-off-by: NeilBrown <neilb@suse.de> 2009-03-30 21:57:37 -06:00			`* Boston MA 02111-1307, USA; either version 2 of the License, or`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00			`* (at your option) any later version; incorporated herein by reference.`
			`*`
			`* ----------------------------------------------------------------------- */`

			`/*`
			`* raid6altivec$#.c`
			`*`
			`* $#-way unrolled portable integer math RAID-6 instruction set`
			`*`
md: drivers/md/unroll.pl replaced with awk analog drivers/md/unroll.pl replaced by awk script to drop build-time dependency on perl Signed-off-by: Vladimir Dronnikov <dronnikov@gmail.com> Signed-off-by: NeilBrown <neilb@suse.de> 2009-10-15 23:25:19 -06:00			`* This file is postprocessed using unroll.awk`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00			`*`
			`* <benh> hpa: in process,`
			`* you can just "steal" the vec unit with enable_kernel_altivec() (but`
			`* bracked this with preempt_disable/enable or in a lock)`
			`*/`

md/raid6: move raid6 data processing to raid6_pq.ko Move the raid6 data processing routines into a standalone module (raid6_pq) to prepare them to be called from async_tx wrappers and other non-md drivers/modules. This precludes a circular dependency of raid456 needing the async modules for data processing while those modules in turn depend on raid456 for the base level synchronous raid6 routines. To support this move: 1/ The exportable definitions in raid6.h move to include/linux/raid/pq.h 2/ The raid6_call, recovery calls, and table symbols are exported 3/ Extra #ifdef __KERNEL__ statements to enable the userspace raid6test to compile Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: NeilBrown <neilb@suse.de> 2009-03-30 22:09:39 -06:00			`#include <linux/raid/pq.h>`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00
lib/raid6: Build proper raid6test files on powerpc Previously the raid6 test Makefile did not build the POWER specific files (altivec and vpermxor). This patch fixes the bug, so that all appropriate files for powerpc are built. This patch also fixes the missing and mismatched ifdef statements to allow the altivec.uc file to be built correctly. Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> 2017-08-03 21:42:33 -06:00			`#ifdef CONFIG_ALTIVEC`

Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00			`#include <altivec.h>`
[PATCH] RAID6 Altivec fix This patch fixes a signedness bug with RAID6 for Altivec, and makes the Altivec code testable in userspace. Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-09-16 20:27:29 -06:00			`#ifdef __KERNEL__`
			`# include <asm/cputable.h>`
Disintegrate asm/system.h for PowerPC Disintegrate asm/system.h for PowerPC. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> cc: linuxppc-dev@lists.ozlabs.org 2012-03-28 11:30:02 -06:00			`# include <asm/switch_to.h>`
lib/raid6: Build proper raid6test files on powerpc Previously the raid6 test Makefile did not build the POWER specific files (altivec and vpermxor). This patch fixes the bug, so that all appropriate files for powerpc are built. This patch also fixes the missing and mismatched ifdef statements to allow the altivec.uc file to be built correctly. Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> 2017-08-03 21:42:33 -06:00			`#endif /* __KERNEL__ */`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00
			`/*`
[PATCH] RAID6 Altivec fix This patch fixes a signedness bug with RAID6 for Altivec, and makes the Altivec code testable in userspace. Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-09-16 20:27:29 -06:00			`* This is the C data type to use. We use a vector of`
			`* signed char so vec_cmpgt() will generate the right`
			`* instruction.`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00			`*/`

[PATCH] RAID6 Altivec fix This patch fixes a signedness bug with RAID6 for Altivec, and makes the Altivec code testable in userspace. Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-09-16 20:27:29 -06:00			`typedef vector signed char unative_t;`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00
[PATCH] RAID6 Altivec fix This patch fixes a signedness bug with RAID6 for Altivec, and makes the Altivec code testable in userspace. Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-09-16 20:27:29 -06:00			`#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00			`#define NSIZE sizeof(unative_t)`

			`/*`
			`* The SHLBYTE() operation shifts each byte left by 1, not`
			`* rolling over into the next byte`
			`*/`
			`static inline __attribute_const__ unative_t SHLBYTE(unative_t v)`
			`{`
			`return vec_add(v,v);`
			`}`

			`/*`
			`* The MASK() operation returns 0xFF in any byte for which the high`
			`* bit is 1, 0x00 for any byte for which the high bit is 0.`
			`*/`
			`static inline __attribute_const__ unative_t MASK(unative_t v)`
			`{`
			`unative_t zv = NBYTES(0);`

			`/* vec_cmpgt returns a vector bool char; thus the need for the cast */`
			`return (unative_t)vec_cmpgt(zv, v);`
			`}`


			`/* This is noinline to make damned sure that gcc doesn't move any of the`
			`Altivec code around the enable/disable code */`
			`static void noinline`
			`raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs)`
			`{`
			`u8 dptr = (u8 )ptrs;`
			`u8 p, q;`
			`int d, z, z0;`

			`unative_t wd$$, wq$$, wp$$, w1$$, w2$$;`
			`unative_t x1d = NBYTES(0x1d);`

			`z0 = disks - 3; /* Highest data disk */`
			`p = dptr[z0+1]; /* XOR parity */`
			`q = dptr[z0+2]; /* RS syndrome */`

			`for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {`
			`wq$$ = wp$$ = (unative_t )&dptr[z0][d+$$*NSIZE];`
			`for ( z = z0-1 ; z >= 0 ; z-- ) {`
			`wd$$ = (unative_t )&dptr[z][d+$$*NSIZE];`
			`wp$$ = vec_xor(wp$$, wd$$);`
			`w2$$ = MASK(wq$$);`
			`w1$$ = SHLBYTE(wq$$);`
			`w2$$ = vec_and(w2$$, x1d);`
			`w1$$ = vec_xor(w1$$, w2$$);`
			`wq$$ = vec_xor(w1$$, wd$$);`
			`}`
			`(unative_t )&p[d+NSIZE*$$] = wp$$;`
			`(unative_t )&q[d+NSIZE*$$] = wq$$;`
			`}`
			`}`

			`static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs)`
			`{`
			`preempt_disable();`
			`enable_kernel_altivec();`

			`raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs);`

powerpc: Create disable_kernel_{fp,altivec,vsx,spe}() The enable_kernel_*() functions leave the relevant MSR bits enabled until we exit the kernel sometime later. Create disable versions that wrap the kernel use of FP, Altivec VSX or SPE. While we don't want to disable it normally for performance reasons (MSR writes are slow), it will be used for a debug boot option that does this and catches bad uses in other areas of the kernel. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> 2015-10-28 18:44:05 -06:00			`disable_kernel_altivec();`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00			`preempt_enable();`
			`}`

			`int raid6_have_altivec(void);`
			`#if $# == 1`
			`int raid6_have_altivec(void)`
			`{`
			`/* This assumes either all CPUs have Altivec or none does */`
[PATCH] RAID6 Altivec fix This patch fixes a signedness bug with RAID6 for Altivec, and makes the Altivec code testable in userspace. Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-09-16 20:27:29 -06:00			`# ifdef __KERNEL__`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00			`return cpu_has_feature(CPU_FTR_ALTIVEC);`
[PATCH] RAID6 Altivec fix This patch fixes a signedness bug with RAID6 for Altivec, and makes the Altivec code testable in userspace. Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-09-16 20:27:29 -06:00			`# else`
			`return 1;`
			`# endif`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00			`}`
			`#endif`

			`const struct raid6_calls raid6_altivec$# = {`
			`raid6_altivec$#_gen_syndrome,`
md/raid6 algorithms: delta syndrome functions v3: s-o-b comment, explanation of performance and descision for the start/stop implementation Implementing rmw functionality for RAID6 requires optimized syndrome calculation. Up to now we can only generate a complete syndrome. The target P/Q pages are always overwritten. With this patch we provide a framework for inplace P/Q modification. In the first place simply fill those functions with NULL values. xor_syndrome() has two additional parameters: start & stop. These will indicate the first and last page that are changing during a rmw run. That makes it possible to avoid several unneccessary loops and speed up calculation. The caller needs to implement the following logic to make the functions work. 1) xor_syndrome(disks, start, stop, ...): "Remove" all data of source blocks inside P/Q between (and including) start and end. 2) modify any block with start <= block <= stop 3) xor_syndrome(disks, start, stop, ...): "Reinsert" all data of source blocks into P/Q between (and including) start and end. Pages between start and stop that won't be changed should be filled with a pointer to the kernel zero page. The reasons for not taking NULL pages are: 1) Algorithms cross the whole source data line by line. Thus avoid additional branches. 2) Having a NULL page avoids calculating the XOR P parity but still need calulation steps for the Q parity. Depending on the algorithm unrolling that might be only a difference of 2 instructions per loop. The benchmark numbers of the gen_syndrome() functions are displayed in the kernel log. Do the same for the xor_syndrome() functions. This will help to analyze performance problems and give an rough estimate how well the algorithm works. The choice of the fastest algorithm will still depend on the gen_syndrome() performance. With the start/stop page implementation the speed can vary a lot in real life. E.g. a change of page 0 & page 15 on a stripe will be harder to compute than the case where page 0 & page 1 are XOR candidates. To be not to enthusiatic about the expected speeds we will run a worse case test that simulates a change on the upper half of the stripe. So we do: 1) calculation of P/Q for the upper pages 2) continuation of Q for the lower (empty) pages Signed-off-by: Markus Stockhausen <stockhausen@collogia.de> Signed-off-by: NeilBrown <neilb@suse.de> 2014-12-14 18:57:04 -07:00			`NULL, /* XOR not yet implemented */`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00			`raid6_have_altivec,`
			`"altivecx$#",`
			`0`
			`};`

			`#endif /* CONFIG_ALTIVEC */`