diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 5f07333bb224..9c903a420cda 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -15,7 +15,6 @@ obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
 
 obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
 obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
-obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
 obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
 
 obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
@@ -24,7 +23,6 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
 obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
-obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
 obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o
 obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
 obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
@@ -59,7 +57,6 @@ endif
 
 aes-i586-y := aes-i586-asm_32.o aes_glue.o
 twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
-salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
 serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
 
 aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
@@ -68,7 +65,6 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
 blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
 twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
 twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
-salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
 chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o
 serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
 
diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S
deleted file mode 100644
index 329452b8f794..000000000000
--- a/arch/x86/crypto/salsa20-i586-asm_32.S
+++ /dev/null
@@ -1,1114 +0,0 @@
-# salsa20_pm.s version 20051229
-# D. J. Bernstein
-# Public domain.
-
-#include <linux/linkage.h>
-
-.text
-
-# enter salsa20_encrypt_bytes
-ENTRY(salsa20_encrypt_bytes)
-	mov	%esp,%eax
-	and	$31,%eax
-	add	$256,%eax
-	sub	%eax,%esp
-	# eax_stack = eax
-	movl	%eax,80(%esp)
-	# ebx_stack = ebx
-	movl	%ebx,84(%esp)
-	# esi_stack = esi
-	movl	%esi,88(%esp)
-	# edi_stack = edi
-	movl	%edi,92(%esp)
-	# ebp_stack = ebp
-	movl	%ebp,96(%esp)
-	# x = arg1
-	movl	4(%esp,%eax),%edx
-	# m = arg2
-	movl	8(%esp,%eax),%esi
-	# out = arg3
-	movl	12(%esp,%eax),%edi
-	# bytes = arg4
-	movl	16(%esp,%eax),%ebx
-	# bytes -= 0
-	sub	$0,%ebx
-	# goto done if unsigned<=
-	jbe	._done
-._start:
-	# in0 = *(uint32 *) (x + 0)
-	movl	0(%edx),%eax
-	# in1 = *(uint32 *) (x + 4)
-	movl	4(%edx),%ecx
-	# in2 = *(uint32 *) (x + 8)
-	movl	8(%edx),%ebp
-	# j0 = in0
-	movl	%eax,164(%esp)
-	# in3 = *(uint32 *) (x + 12)
-	movl	12(%edx),%eax
-	# j1 = in1
-	movl	%ecx,168(%esp)
-	# in4 = *(uint32 *) (x + 16)
-	movl	16(%edx),%ecx
-	# j2 = in2
-	movl	%ebp,172(%esp)
-	# in5 = *(uint32 *) (x + 20)
-	movl	20(%edx),%ebp
-	# j3 = in3
-	movl	%eax,176(%esp)
-	# in6 = *(uint32 *) (x + 24)
-	movl	24(%edx),%eax
-	# j4 = in4
-	movl	%ecx,180(%esp)
-	# in7 = *(uint32 *) (x + 28)
-	movl	28(%edx),%ecx
-	# j5 = in5
-	movl	%ebp,184(%esp)
-	# in8 = *(uint32 *) (x + 32)
-	movl	32(%edx),%ebp
-	# j6 = in6
-	movl	%eax,188(%esp)
-	# in9 = *(uint32 *) (x + 36)
-	movl	36(%edx),%eax
-	# j7 = in7
-	movl	%ecx,192(%esp)
-	# in10 = *(uint32 *) (x + 40)
-	movl	40(%edx),%ecx
-	# j8 = in8
-	movl	%ebp,196(%esp)
-	# in11 = *(uint32 *) (x + 44)
-	movl	44(%edx),%ebp
-	# j9 = in9
-	movl	%eax,200(%esp)
-	# in12 = *(uint32 *) (x + 48)
-	movl	48(%edx),%eax
-	# j10 = in10
-	movl	%ecx,204(%esp)
-	# in13 = *(uint32 *) (x + 52)
-	movl	52(%edx),%ecx
-	# j11 = in11
-	movl	%ebp,208(%esp)
-	# in14 = *(uint32 *) (x + 56)
-	movl	56(%edx),%ebp
-	# j12 = in12
-	movl	%eax,212(%esp)
-	# in15 = *(uint32 *) (x + 60)
-	movl	60(%edx),%eax
-	# j13 = in13
-	movl	%ecx,216(%esp)
-	# j14 = in14
-	movl	%ebp,220(%esp)
-	# j15 = in15
-	movl	%eax,224(%esp)
-	# x_backup = x
-	movl	%edx,64(%esp)
-._bytesatleast1:
-	#   bytes - 64
-	cmp	$64,%ebx
-	#   goto nocopy if unsigned>=
-	jae	._nocopy
-	#     ctarget = out
-	movl	%edi,228(%esp)
-	#     out = &tmp
-	leal	0(%esp),%edi
-	#     i = bytes
-	mov	%ebx,%ecx
-	#     while (i) { *out++ = *m++; --i }
-	rep	movsb
-	#     out = &tmp
-	leal	0(%esp),%edi
-	#     m = &tmp
-	leal	0(%esp),%esi
-._nocopy:
-	#   out_backup = out
-	movl	%edi,72(%esp)
-	#   m_backup = m
-	movl	%esi,68(%esp)
-	#   bytes_backup = bytes
-	movl	%ebx,76(%esp)
-	#   in0 = j0
-	movl	164(%esp),%eax
-	#   in1 = j1
-	movl	168(%esp),%ecx
-	#   in2 = j2
-	movl	172(%esp),%edx
-	#   in3 = j3
-	movl	176(%esp),%ebx
-	#   x0 = in0
-	movl	%eax,100(%esp)
-	#   x1 = in1
-	movl	%ecx,104(%esp)
-	#   x2 = in2
-	movl	%edx,108(%esp)
-	#   x3 = in3
-	movl	%ebx,112(%esp)
-	#   in4 = j4
-	movl	180(%esp),%eax
-	#   in5 = j5
-	movl	184(%esp),%ecx
-	#   in6 = j6
-	movl	188(%esp),%edx
-	#   in7 = j7
-	movl	192(%esp),%ebx
-	#   x4 = in4
-	movl	%eax,116(%esp)
-	#   x5 = in5
-	movl	%ecx,120(%esp)
-	#   x6 = in6
-	movl	%edx,124(%esp)
-	#   x7 = in7
-	movl	%ebx,128(%esp)
-	#   in8 = j8
-	movl	196(%esp),%eax
-	#   in9 = j9
-	movl	200(%esp),%ecx
-	#   in10 = j10
-	movl	204(%esp),%edx
-	#   in11 = j11
-	movl	208(%esp),%ebx
-	#   x8 = in8
-	movl	%eax,132(%esp)
-	#   x9 = in9
-	movl	%ecx,136(%esp)
-	#   x10 = in10
-	movl	%edx,140(%esp)
-	#   x11 = in11
-	movl	%ebx,144(%esp)
-	#   in12 = j12
-	movl	212(%esp),%eax
-	#   in13 = j13
-	movl	216(%esp),%ecx
-	#   in14 = j14
-	movl	220(%esp),%edx
-	#   in15 = j15
-	movl	224(%esp),%ebx
-	#   x12 = in12
-	movl	%eax,148(%esp)
-	#   x13 = in13
-	movl	%ecx,152(%esp)
-	#   x14 = in14
-	movl	%edx,156(%esp)
-	#   x15 = in15
-	movl	%ebx,160(%esp)
-	#   i = 20
-	mov	$20,%ebp
-	# p = x0
-	movl	100(%esp),%eax
-	# s = x5
-	movl	120(%esp),%ecx
-	# t = x10
-	movl	140(%esp),%edx
-	# w = x15
-	movl	160(%esp),%ebx
-._mainloop:
-	# x0 = p
-	movl	%eax,100(%esp)
-	# 				x10 = t
-	movl	%edx,140(%esp)
-	# p += x12
-	addl	148(%esp),%eax
-	# 		x5 = s
-	movl	%ecx,120(%esp)
-	# 				t += x6
-	addl	124(%esp),%edx
-	# 						x15 = w
-	movl	%ebx,160(%esp)
-	# 		r = x1
-	movl	104(%esp),%esi
-	# 		r += s
-	add	%ecx,%esi
-	# 						v = x11
-	movl	144(%esp),%edi
-	# 						v += w
-	add	%ebx,%edi
-	# p <<<= 7
-	rol	$7,%eax
-	# p ^= x4
-	xorl	116(%esp),%eax
-	# 				t <<<= 7
-	rol	$7,%edx
-	# 				t ^= x14
-	xorl	156(%esp),%edx
-	# 		r <<<= 7
-	rol	$7,%esi
-	# 		r ^= x9
-	xorl	136(%esp),%esi
-	# 						v <<<= 7
-	rol	$7,%edi
-	# 						v ^= x3
-	xorl	112(%esp),%edi
-	# x4 = p
-	movl	%eax,116(%esp)
-	# 				x14 = t
-	movl	%edx,156(%esp)
-	# p += x0
-	addl	100(%esp),%eax
-	# 		x9 = r
-	movl	%esi,136(%esp)
-	# 				t += x10
-	addl	140(%esp),%edx
-	# 						x3 = v
-	movl	%edi,112(%esp)
-	# p <<<= 9
-	rol	$9,%eax
-	# p ^= x8
-	xorl	132(%esp),%eax
-	# 				t <<<= 9
-	rol	$9,%edx
-	# 				t ^= x2
-	xorl	108(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 9
-	rol	$9,%ecx
-	# 		s ^= x13
-	xorl	152(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 9
-	rol	$9,%ebx
-	# 						w ^= x7
-	xorl	128(%esp),%ebx
-	# x8 = p
-	movl	%eax,132(%esp)
-	# 				x2 = t
-	movl	%edx,108(%esp)
-	# p += x4
-	addl	116(%esp),%eax
-	# 		x13 = s
-	movl	%ecx,152(%esp)
-	# 				t += x14
-	addl	156(%esp),%edx
-	# 						x7 = w
-	movl	%ebx,128(%esp)
-	# p <<<= 13
-	rol	$13,%eax
-	# p ^= x12
-	xorl	148(%esp),%eax
-	# 				t <<<= 13
-	rol	$13,%edx
-	# 				t ^= x6
-	xorl	124(%esp),%edx
-	# 		r += s
-	add	%ecx,%esi
-	# 		r <<<= 13
-	rol	$13,%esi
-	# 		r ^= x1
-	xorl	104(%esp),%esi
-	# 						v += w
-	add	%ebx,%edi
-	# 						v <<<= 13
-	rol	$13,%edi
-	# 						v ^= x11
-	xorl	144(%esp),%edi
-	# x12 = p
-	movl	%eax,148(%esp)
-	# 				x6 = t
-	movl	%edx,124(%esp)
-	# p += x8
-	addl	132(%esp),%eax
-	# 		x1 = r
-	movl	%esi,104(%esp)
-	# 				t += x2
-	addl	108(%esp),%edx
-	# 						x11 = v
-	movl	%edi,144(%esp)
-	# p <<<= 18
-	rol	$18,%eax
-	# p ^= x0
-	xorl	100(%esp),%eax
-	# 				t <<<= 18
-	rol	$18,%edx
-	# 				t ^= x10
-	xorl	140(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 18
-	rol	$18,%ecx
-	# 		s ^= x5
-	xorl	120(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 18
-	rol	$18,%ebx
-	# 						w ^= x15
-	xorl	160(%esp),%ebx
-	# x0 = p
-	movl	%eax,100(%esp)
-	# 				x10 = t
-	movl	%edx,140(%esp)
-	# p += x3
-	addl	112(%esp),%eax
-	# p <<<= 7
-	rol	$7,%eax
-	# 		x5 = s
-	movl	%ecx,120(%esp)
-	# 				t += x9
-	addl	136(%esp),%edx
-	# 						x15 = w
-	movl	%ebx,160(%esp)
-	# 		r = x4
-	movl	116(%esp),%esi
-	# 		r += s
-	add	%ecx,%esi
-	# 						v = x14
-	movl	156(%esp),%edi
-	# 						v += w
-	add	%ebx,%edi
-	# p ^= x1
-	xorl	104(%esp),%eax
-	# 				t <<<= 7
-	rol	$7,%edx
-	# 				t ^= x11
-	xorl	144(%esp),%edx
-	# 		r <<<= 7
-	rol	$7,%esi
-	# 		r ^= x6
-	xorl	124(%esp),%esi
-	# 						v <<<= 7
-	rol	$7,%edi
-	# 						v ^= x12
-	xorl	148(%esp),%edi
-	# x1 = p
-	movl	%eax,104(%esp)
-	# 				x11 = t
-	movl	%edx,144(%esp)
-	# p += x0
-	addl	100(%esp),%eax
-	# 		x6 = r
-	movl	%esi,124(%esp)
-	# 				t += x10
-	addl	140(%esp),%edx
-	# 						x12 = v
-	movl	%edi,148(%esp)
-	# p <<<= 9
-	rol	$9,%eax
-	# p ^= x2
-	xorl	108(%esp),%eax
-	# 				t <<<= 9
-	rol	$9,%edx
-	# 				t ^= x8
-	xorl	132(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 9
-	rol	$9,%ecx
-	# 		s ^= x7
-	xorl	128(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 9
-	rol	$9,%ebx
-	# 						w ^= x13
-	xorl	152(%esp),%ebx
-	# x2 = p
-	movl	%eax,108(%esp)
-	# 				x8 = t
-	movl	%edx,132(%esp)
-	# p += x1
-	addl	104(%esp),%eax
-	# 		x7 = s
-	movl	%ecx,128(%esp)
-	# 				t += x11
-	addl	144(%esp),%edx
-	# 						x13 = w
-	movl	%ebx,152(%esp)
-	# p <<<= 13
-	rol	$13,%eax
-	# p ^= x3
-	xorl	112(%esp),%eax
-	# 				t <<<= 13
-	rol	$13,%edx
-	# 				t ^= x9
-	xorl	136(%esp),%edx
-	# 		r += s
-	add	%ecx,%esi
-	# 		r <<<= 13
-	rol	$13,%esi
-	# 		r ^= x4
-	xorl	116(%esp),%esi
-	# 						v += w
-	add	%ebx,%edi
-	# 						v <<<= 13
-	rol	$13,%edi
-	# 						v ^= x14
-	xorl	156(%esp),%edi
-	# x3 = p
-	movl	%eax,112(%esp)
-	# 				x9 = t
-	movl	%edx,136(%esp)
-	# p += x2
-	addl	108(%esp),%eax
-	# 		x4 = r
-	movl	%esi,116(%esp)
-	# 				t += x8
-	addl	132(%esp),%edx
-	# 						x14 = v
-	movl	%edi,156(%esp)
-	# p <<<= 18
-	rol	$18,%eax
-	# p ^= x0
-	xorl	100(%esp),%eax
-	# 				t <<<= 18
-	rol	$18,%edx
-	# 				t ^= x10
-	xorl	140(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 18
-	rol	$18,%ecx
-	# 		s ^= x5
-	xorl	120(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 18
-	rol	$18,%ebx
-	# 						w ^= x15
-	xorl	160(%esp),%ebx
-	# x0 = p
-	movl	%eax,100(%esp)
-	# 				x10 = t
-	movl	%edx,140(%esp)
-	# p += x12
-	addl	148(%esp),%eax
-	# 		x5 = s
-	movl	%ecx,120(%esp)
-	# 				t += x6
-	addl	124(%esp),%edx
-	# 						x15 = w
-	movl	%ebx,160(%esp)
-	# 		r = x1
-	movl	104(%esp),%esi
-	# 		r += s
-	add	%ecx,%esi
-	# 						v = x11
-	movl	144(%esp),%edi
-	# 						v += w
-	add	%ebx,%edi
-	# p <<<= 7
-	rol	$7,%eax
-	# p ^= x4
-	xorl	116(%esp),%eax
-	# 				t <<<= 7
-	rol	$7,%edx
-	# 				t ^= x14
-	xorl	156(%esp),%edx
-	# 		r <<<= 7
-	rol	$7,%esi
-	# 		r ^= x9
-	xorl	136(%esp),%esi
-	# 						v <<<= 7
-	rol	$7,%edi
-	# 						v ^= x3
-	xorl	112(%esp),%edi
-	# x4 = p
-	movl	%eax,116(%esp)
-	# 				x14 = t
-	movl	%edx,156(%esp)
-	# p += x0
-	addl	100(%esp),%eax
-	# 		x9 = r
-	movl	%esi,136(%esp)
-	# 				t += x10
-	addl	140(%esp),%edx
-	# 						x3 = v
-	movl	%edi,112(%esp)
-	# p <<<= 9
-	rol	$9,%eax
-	# p ^= x8
-	xorl	132(%esp),%eax
-	# 				t <<<= 9
-	rol	$9,%edx
-	# 				t ^= x2
-	xorl	108(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 9
-	rol	$9,%ecx
-	# 		s ^= x13
-	xorl	152(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 9
-	rol	$9,%ebx
-	# 						w ^= x7
-	xorl	128(%esp),%ebx
-	# x8 = p
-	movl	%eax,132(%esp)
-	# 				x2 = t
-	movl	%edx,108(%esp)
-	# p += x4
-	addl	116(%esp),%eax
-	# 		x13 = s
-	movl	%ecx,152(%esp)
-	# 				t += x14
-	addl	156(%esp),%edx
-	# 						x7 = w
-	movl	%ebx,128(%esp)
-	# p <<<= 13
-	rol	$13,%eax
-	# p ^= x12
-	xorl	148(%esp),%eax
-	# 				t <<<= 13
-	rol	$13,%edx
-	# 				t ^= x6
-	xorl	124(%esp),%edx
-	# 		r += s
-	add	%ecx,%esi
-	# 		r <<<= 13
-	rol	$13,%esi
-	# 		r ^= x1
-	xorl	104(%esp),%esi
-	# 						v += w
-	add	%ebx,%edi
-	# 						v <<<= 13
-	rol	$13,%edi
-	# 						v ^= x11
-	xorl	144(%esp),%edi
-	# x12 = p
-	movl	%eax,148(%esp)
-	# 				x6 = t
-	movl	%edx,124(%esp)
-	# p += x8
-	addl	132(%esp),%eax
-	# 		x1 = r
-	movl	%esi,104(%esp)
-	# 				t += x2
-	addl	108(%esp),%edx
-	# 						x11 = v
-	movl	%edi,144(%esp)
-	# p <<<= 18
-	rol	$18,%eax
-	# p ^= x0
-	xorl	100(%esp),%eax
-	# 				t <<<= 18
-	rol	$18,%edx
-	# 				t ^= x10
-	xorl	140(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 18
-	rol	$18,%ecx
-	# 		s ^= x5
-	xorl	120(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 18
-	rol	$18,%ebx
-	# 						w ^= x15
-	xorl	160(%esp),%ebx
-	# x0 = p
-	movl	%eax,100(%esp)
-	# 				x10 = t
-	movl	%edx,140(%esp)
-	# p += x3
-	addl	112(%esp),%eax
-	# p <<<= 7
-	rol	$7,%eax
-	# 		x5 = s
-	movl	%ecx,120(%esp)
-	# 				t += x9
-	addl	136(%esp),%edx
-	# 						x15 = w
-	movl	%ebx,160(%esp)
-	# 		r = x4
-	movl	116(%esp),%esi
-	# 		r += s
-	add	%ecx,%esi
-	# 						v = x14
-	movl	156(%esp),%edi
-	# 						v += w
-	add	%ebx,%edi
-	# p ^= x1
-	xorl	104(%esp),%eax
-	# 				t <<<= 7
-	rol	$7,%edx
-	# 				t ^= x11
-	xorl	144(%esp),%edx
-	# 		r <<<= 7
-	rol	$7,%esi
-	# 		r ^= x6
-	xorl	124(%esp),%esi
-	# 						v <<<= 7
-	rol	$7,%edi
-	# 						v ^= x12
-	xorl	148(%esp),%edi
-	# x1 = p
-	movl	%eax,104(%esp)
-	# 				x11 = t
-	movl	%edx,144(%esp)
-	# p += x0
-	addl	100(%esp),%eax
-	# 		x6 = r
-	movl	%esi,124(%esp)
-	# 				t += x10
-	addl	140(%esp),%edx
-	# 						x12 = v
-	movl	%edi,148(%esp)
-	# p <<<= 9
-	rol	$9,%eax
-	# p ^= x2
-	xorl	108(%esp),%eax
-	# 				t <<<= 9
-	rol	$9,%edx
-	# 				t ^= x8
-	xorl	132(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 9
-	rol	$9,%ecx
-	# 		s ^= x7
-	xorl	128(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 9
-	rol	$9,%ebx
-	# 						w ^= x13
-	xorl	152(%esp),%ebx
-	# x2 = p
-	movl	%eax,108(%esp)
-	# 				x8 = t
-	movl	%edx,132(%esp)
-	# p += x1
-	addl	104(%esp),%eax
-	# 		x7 = s
-	movl	%ecx,128(%esp)
-	# 				t += x11
-	addl	144(%esp),%edx
-	# 						x13 = w
-	movl	%ebx,152(%esp)
-	# p <<<= 13
-	rol	$13,%eax
-	# p ^= x3
-	xorl	112(%esp),%eax
-	# 				t <<<= 13
-	rol	$13,%edx
-	# 				t ^= x9
-	xorl	136(%esp),%edx
-	# 		r += s
-	add	%ecx,%esi
-	# 		r <<<= 13
-	rol	$13,%esi
-	# 		r ^= x4
-	xorl	116(%esp),%esi
-	# 						v += w
-	add	%ebx,%edi
-	# 						v <<<= 13
-	rol	$13,%edi
-	# 						v ^= x14
-	xorl	156(%esp),%edi
-	# x3 = p
-	movl	%eax,112(%esp)
-	# 				x9 = t
-	movl	%edx,136(%esp)
-	# p += x2
-	addl	108(%esp),%eax
-	# 		x4 = r
-	movl	%esi,116(%esp)
-	# 				t += x8
-	addl	132(%esp),%edx
-	# 						x14 = v
-	movl	%edi,156(%esp)
-	# p <<<= 18
-	rol	$18,%eax
-	# p ^= x0
-	xorl	100(%esp),%eax
-	# 				t <<<= 18
-	rol	$18,%edx
-	# 				t ^= x10
-	xorl	140(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 18
-	rol	$18,%ecx
-	# 		s ^= x5
-	xorl	120(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 18
-	rol	$18,%ebx
-	# 						w ^= x15
-	xorl	160(%esp),%ebx
-	# i -= 4
-	sub	$4,%ebp
-	# goto mainloop if unsigned >
-	ja	._mainloop
-	# x0 = p
-	movl	%eax,100(%esp)
-	# x5 = s
-	movl	%ecx,120(%esp)
-	# x10 = t
-	movl	%edx,140(%esp)
-	# x15 = w
-	movl	%ebx,160(%esp)
-	#   out = out_backup
-	movl	72(%esp),%edi
-	#   m = m_backup
-	movl	68(%esp),%esi
-	#   in0 = x0
-	movl	100(%esp),%eax
-	#   in1 = x1
-	movl	104(%esp),%ecx
-	#   in0 += j0
-	addl	164(%esp),%eax
-	#   in1 += j1
-	addl	168(%esp),%ecx
-	#   in0 ^= *(uint32 *) (m + 0)
-	xorl	0(%esi),%eax
-	#   in1 ^= *(uint32 *) (m + 4)
-	xorl	4(%esi),%ecx
-	#   *(uint32 *) (out + 0) = in0
-	movl	%eax,0(%edi)
-	#   *(uint32 *) (out + 4) = in1
-	movl	%ecx,4(%edi)
-	#   in2 = x2
-	movl	108(%esp),%eax
-	#   in3 = x3
-	movl	112(%esp),%ecx
-	#   in2 += j2
-	addl	172(%esp),%eax
-	#   in3 += j3
-	addl	176(%esp),%ecx
-	#   in2 ^= *(uint32 *) (m + 8)
-	xorl	8(%esi),%eax
-	#   in3 ^= *(uint32 *) (m + 12)
-	xorl	12(%esi),%ecx
-	#   *(uint32 *) (out + 8) = in2
-	movl	%eax,8(%edi)
-	#   *(uint32 *) (out + 12) = in3
-	movl	%ecx,12(%edi)
-	#   in4 = x4
-	movl	116(%esp),%eax
-	#   in5 = x5
-	movl	120(%esp),%ecx
-	#   in4 += j4
-	addl	180(%esp),%eax
-	#   in5 += j5
-	addl	184(%esp),%ecx
-	#   in4 ^= *(uint32 *) (m + 16)
-	xorl	16(%esi),%eax
-	#   in5 ^= *(uint32 *) (m + 20)
-	xorl	20(%esi),%ecx
-	#   *(uint32 *) (out + 16) = in4
-	movl	%eax,16(%edi)
-	#   *(uint32 *) (out + 20) = in5
-	movl	%ecx,20(%edi)
-	#   in6 = x6
-	movl	124(%esp),%eax
-	#   in7 = x7
-	movl	128(%esp),%ecx
-	#   in6 += j6
-	addl	188(%esp),%eax
-	#   in7 += j7
-	addl	192(%esp),%ecx
-	#   in6 ^= *(uint32 *) (m + 24)
-	xorl	24(%esi),%eax
-	#   in7 ^= *(uint32 *) (m + 28)
-	xorl	28(%esi),%ecx
-	#   *(uint32 *) (out + 24) = in6
-	movl	%eax,24(%edi)
-	#   *(uint32 *) (out + 28) = in7
-	movl	%ecx,28(%edi)
-	#   in8 = x8
-	movl	132(%esp),%eax
-	#   in9 = x9
-	movl	136(%esp),%ecx
-	#   in8 += j8
-	addl	196(%esp),%eax
-	#   in9 += j9
-	addl	200(%esp),%ecx
-	#   in8 ^= *(uint32 *) (m + 32)
-	xorl	32(%esi),%eax
-	#   in9 ^= *(uint32 *) (m + 36)
-	xorl	36(%esi),%ecx
-	#   *(uint32 *) (out + 32) = in8
-	movl	%eax,32(%edi)
-	#   *(uint32 *) (out + 36) = in9
-	movl	%ecx,36(%edi)
-	#   in10 = x10
-	movl	140(%esp),%eax
-	#   in11 = x11
-	movl	144(%esp),%ecx
-	#   in10 += j10
-	addl	204(%esp),%eax
-	#   in11 += j11
-	addl	208(%esp),%ecx
-	#   in10 ^= *(uint32 *) (m + 40)
-	xorl	40(%esi),%eax
-	#   in11 ^= *(uint32 *) (m + 44)
-	xorl	44(%esi),%ecx
-	#   *(uint32 *) (out + 40) = in10
-	movl	%eax,40(%edi)
-	#   *(uint32 *) (out + 44) = in11
-	movl	%ecx,44(%edi)
-	#   in12 = x12
-	movl	148(%esp),%eax
-	#   in13 = x13
-	movl	152(%esp),%ecx
-	#   in12 += j12
-	addl	212(%esp),%eax
-	#   in13 += j13
-	addl	216(%esp),%ecx
-	#   in12 ^= *(uint32 *) (m + 48)
-	xorl	48(%esi),%eax
-	#   in13 ^= *(uint32 *) (m + 52)
-	xorl	52(%esi),%ecx
-	#   *(uint32 *) (out + 48) = in12
-	movl	%eax,48(%edi)
-	#   *(uint32 *) (out + 52) = in13
-	movl	%ecx,52(%edi)
-	#   in14 = x14
-	movl	156(%esp),%eax
-	#   in15 = x15
-	movl	160(%esp),%ecx
-	#   in14 += j14
-	addl	220(%esp),%eax
-	#   in15 += j15
-	addl	224(%esp),%ecx
-	#   in14 ^= *(uint32 *) (m + 56)
-	xorl	56(%esi),%eax
-	#   in15 ^= *(uint32 *) (m + 60)
-	xorl	60(%esi),%ecx
-	#   *(uint32 *) (out + 56) = in14
-	movl	%eax,56(%edi)
-	#   *(uint32 *) (out + 60) = in15
-	movl	%ecx,60(%edi)
-	#   bytes = bytes_backup
-	movl	76(%esp),%ebx
-	#   in8 = j8
-	movl	196(%esp),%eax
-	#   in9 = j9
-	movl	200(%esp),%ecx
-	#   in8 += 1
-	add	$1,%eax
-	#   in9 += 0 + carry
-	adc	$0,%ecx
-	#   j8 = in8
-	movl	%eax,196(%esp)
-	#   j9 = in9
-	movl	%ecx,200(%esp)
-	#   bytes - 64
-	cmp	$64,%ebx
-	#   goto bytesatleast65 if unsigned>
-	ja	._bytesatleast65
-	#     goto bytesatleast64 if unsigned>=
-	jae	._bytesatleast64
-	#       m = out
-	mov	%edi,%esi
-	#       out = ctarget
-	movl	228(%esp),%edi
-	#       i = bytes
-	mov	%ebx,%ecx
-	#       while (i) { *out++ = *m++; --i }
-	rep	movsb
-._bytesatleast64:
-	#     x = x_backup
-	movl	64(%esp),%eax
-	#     in8 = j8
-	movl	196(%esp),%ecx
-	#     in9 = j9
-	movl	200(%esp),%edx
-	#     *(uint32 *) (x + 32) = in8
-	movl	%ecx,32(%eax)
-	#     *(uint32 *) (x + 36) = in9
-	movl	%edx,36(%eax)
-._done:
-	#     eax = eax_stack
-	movl	80(%esp),%eax
-	#     ebx = ebx_stack
-	movl	84(%esp),%ebx
-	#     esi = esi_stack
-	movl	88(%esp),%esi
-	#     edi = edi_stack
-	movl	92(%esp),%edi
-	#     ebp = ebp_stack
-	movl	96(%esp),%ebp
-	#     leave
-	add	%eax,%esp
-	ret
-._bytesatleast65:
-	#   bytes -= 64
-	sub	$64,%ebx
-	#   out += 64
-	add	$64,%edi
-	#   m += 64
-	add	$64,%esi
-	# goto bytesatleast1
-	jmp	._bytesatleast1
-ENDPROC(salsa20_encrypt_bytes)
-
-# enter salsa20_keysetup
-ENTRY(salsa20_keysetup)
-	mov	%esp,%eax
-	and	$31,%eax
-	add	$256,%eax
-	sub	%eax,%esp
-	#   eax_stack = eax
-	movl	%eax,64(%esp)
-	#   ebx_stack = ebx
-	movl	%ebx,68(%esp)
-	#   esi_stack = esi
-	movl	%esi,72(%esp)
-	#   edi_stack = edi
-	movl	%edi,76(%esp)
-	#   ebp_stack = ebp
-	movl	%ebp,80(%esp)
-	#   k = arg2
-	movl	8(%esp,%eax),%ecx
-	#   kbits = arg3
-	movl	12(%esp,%eax),%edx
-	#   x = arg1
-	movl	4(%esp,%eax),%eax
-	#   in1 = *(uint32 *) (k + 0)
-	movl	0(%ecx),%ebx
-	#   in2 = *(uint32 *) (k + 4)
-	movl	4(%ecx),%esi
-	#   in3 = *(uint32 *) (k + 8)
-	movl	8(%ecx),%edi
-	#   in4 = *(uint32 *) (k + 12)
-	movl	12(%ecx),%ebp
-	#   *(uint32 *) (x + 4) = in1
-	movl	%ebx,4(%eax)
-	#   *(uint32 *) (x + 8) = in2
-	movl	%esi,8(%eax)
-	#   *(uint32 *) (x + 12) = in3
-	movl	%edi,12(%eax)
-	#   *(uint32 *) (x + 16) = in4
-	movl	%ebp,16(%eax)
-	#   kbits - 256
-	cmp	$256,%edx
-	#   goto kbits128 if unsigned<
-	jb	._kbits128
-._kbits256:
-	#     in11 = *(uint32 *) (k + 16)
-	movl	16(%ecx),%edx
-	#     in12 = *(uint32 *) (k + 20)
-	movl	20(%ecx),%ebx
-	#     in13 = *(uint32 *) (k + 24)
-	movl	24(%ecx),%esi
-	#     in14 = *(uint32 *) (k + 28)
-	movl	28(%ecx),%ecx
-	#     *(uint32 *) (x + 44) = in11
-	movl	%edx,44(%eax)
-	#     *(uint32 *) (x + 48) = in12
-	movl	%ebx,48(%eax)
-	#     *(uint32 *) (x + 52) = in13
-	movl	%esi,52(%eax)
-	#     *(uint32 *) (x + 56) = in14
-	movl	%ecx,56(%eax)
-	#     in0 = 1634760805
-	mov	$1634760805,%ecx
-	#     in5 = 857760878
-	mov	$857760878,%edx
-	#     in10 = 2036477234
-	mov	$2036477234,%ebx
-	#     in15 = 1797285236
-	mov	$1797285236,%esi
-	#     *(uint32 *) (x + 0) = in0
-	movl	%ecx,0(%eax)
-	#     *(uint32 *) (x + 20) = in5
-	movl	%edx,20(%eax)
-	#     *(uint32 *) (x + 40) = in10
-	movl	%ebx,40(%eax)
-	#     *(uint32 *) (x + 60) = in15
-	movl	%esi,60(%eax)
-	#   goto keysetupdone
-	jmp	._keysetupdone
-._kbits128:
-	#     in11 = *(uint32 *) (k + 0)
-	movl	0(%ecx),%edx
-	#     in12 = *(uint32 *) (k + 4)
-	movl	4(%ecx),%ebx
-	#     in13 = *(uint32 *) (k + 8)
-	movl	8(%ecx),%esi
-	#     in14 = *(uint32 *) (k + 12)
-	movl	12(%ecx),%ecx
-	#     *(uint32 *) (x + 44) = in11
-	movl	%edx,44(%eax)
-	#     *(uint32 *) (x + 48) = in12
-	movl	%ebx,48(%eax)
-	#     *(uint32 *) (x + 52) = in13
-	movl	%esi,52(%eax)
-	#     *(uint32 *) (x + 56) = in14
-	movl	%ecx,56(%eax)
-	#     in0 = 1634760805
-	mov	$1634760805,%ecx
-	#     in5 = 824206446
-	mov	$824206446,%edx
-	#     in10 = 2036477238
-	mov	$2036477238,%ebx
-	#     in15 = 1797285236
-	mov	$1797285236,%esi
-	#     *(uint32 *) (x + 0) = in0
-	movl	%ecx,0(%eax)
-	#     *(uint32 *) (x + 20) = in5
-	movl	%edx,20(%eax)
-	#     *(uint32 *) (x + 40) = in10
-	movl	%ebx,40(%eax)
-	#     *(uint32 *) (x + 60) = in15
-	movl	%esi,60(%eax)
-._keysetupdone:
-	#   eax = eax_stack
-	movl	64(%esp),%eax
-	#   ebx = ebx_stack
-	movl	68(%esp),%ebx
-	#   esi = esi_stack
-	movl	72(%esp),%esi
-	#   edi = edi_stack
-	movl	76(%esp),%edi
-	#   ebp = ebp_stack
-	movl	80(%esp),%ebp
-	# leave
-	add	%eax,%esp
-	ret
-ENDPROC(salsa20_keysetup)
-
-# enter salsa20_ivsetup
-ENTRY(salsa20_ivsetup)
-	mov	%esp,%eax
-	and	$31,%eax
-	add	$256,%eax
-	sub	%eax,%esp
-	#   eax_stack = eax
-	movl	%eax,64(%esp)
-	#   ebx_stack = ebx
-	movl	%ebx,68(%esp)
-	#   esi_stack = esi
-	movl	%esi,72(%esp)
-	#   edi_stack = edi
-	movl	%edi,76(%esp)
-	#   ebp_stack = ebp
-	movl	%ebp,80(%esp)
-	#   iv = arg2
-	movl	8(%esp,%eax),%ecx
-	#   x = arg1
-	movl	4(%esp,%eax),%eax
-	#   in6 = *(uint32 *) (iv + 0)
-	movl	0(%ecx),%edx
-	#   in7 = *(uint32 *) (iv + 4)
-	movl	4(%ecx),%ecx
-	#   in8 = 0
-	mov	$0,%ebx
-	#   in9 = 0
-	mov	$0,%esi
-	#   *(uint32 *) (x + 24) = in6
-	movl	%edx,24(%eax)
-	#   *(uint32 *) (x + 28) = in7
-	movl	%ecx,28(%eax)
-	#   *(uint32 *) (x + 32) = in8
-	movl	%ebx,32(%eax)
-	#   *(uint32 *) (x + 36) = in9
-	movl	%esi,36(%eax)
-	#   eax = eax_stack
-	movl	64(%esp),%eax
-	#   ebx = ebx_stack
-	movl	68(%esp),%ebx
-	#   esi = esi_stack
-	movl	72(%esp),%esi
-	#   edi = edi_stack
-	movl	76(%esp),%edi
-	#   ebp = ebp_stack
-	movl	80(%esp),%ebp
-	# leave
-	add	%eax,%esp
-	ret
-ENDPROC(salsa20_ivsetup)
diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S
deleted file mode 100644
index 10db30d58006..000000000000
--- a/arch/x86/crypto/salsa20-x86_64-asm_64.S
+++ /dev/null
@@ -1,919 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-
-# enter salsa20_encrypt_bytes
-ENTRY(salsa20_encrypt_bytes)
-	mov	%rsp,%r11
-	and	$31,%r11
-	add	$256,%r11
-	sub	%r11,%rsp
-	# x = arg1
-	mov	%rdi,%r8
-	# m = arg2
-	mov	%rsi,%rsi
-	# out = arg3
-	mov	%rdx,%rdi
-	# bytes = arg4
-	mov	%rcx,%rdx
-	#               unsigned>? bytes - 0
-	cmp	$0,%rdx
-	# comment:fp stack unchanged by jump
-	# goto done if !unsigned>
-	jbe	._done
-	# comment:fp stack unchanged by fallthrough
-# start:
-._start:
-	# r11_stack = r11
-	movq	%r11,0(%rsp)
-	# r12_stack = r12
-	movq	%r12,8(%rsp)
-	# r13_stack = r13
-	movq	%r13,16(%rsp)
-	# r14_stack = r14
-	movq	%r14,24(%rsp)
-	# r15_stack = r15
-	movq	%r15,32(%rsp)
-	# rbx_stack = rbx
-	movq	%rbx,40(%rsp)
-	# rbp_stack = rbp
-	movq	%rbp,48(%rsp)
-	# in0 = *(uint64 *) (x + 0)
-	movq	0(%r8),%rcx
-	# in2 = *(uint64 *) (x + 8)
-	movq	8(%r8),%r9
-	# in4 = *(uint64 *) (x + 16)
-	movq	16(%r8),%rax
-	# in6 = *(uint64 *) (x + 24)
-	movq	24(%r8),%r10
-	# in8 = *(uint64 *) (x + 32)
-	movq	32(%r8),%r11
-	# in10 = *(uint64 *) (x + 40)
-	movq	40(%r8),%r12
-	# in12 = *(uint64 *) (x + 48)
-	movq	48(%r8),%r13
-	# in14 = *(uint64 *) (x + 56)
-	movq	56(%r8),%r14
-	# j0 = in0
-	movq	%rcx,56(%rsp)
-	# j2 = in2
-	movq	%r9,64(%rsp)
-	# j4 = in4
-	movq	%rax,72(%rsp)
-	# j6 = in6
-	movq	%r10,80(%rsp)
-	# j8 = in8
-	movq	%r11,88(%rsp)
-	# j10 = in10
-	movq	%r12,96(%rsp)
-	# j12 = in12
-	movq	%r13,104(%rsp)
-	# j14 = in14
-	movq	%r14,112(%rsp)
-	# x_backup = x
-	movq	%r8,120(%rsp)
-# bytesatleast1:
-._bytesatleast1:
-	#                   unsigned<? bytes - 64
-	cmp	$64,%rdx
-	# comment:fp stack unchanged by jump
-	#   goto nocopy if !unsigned<
-	jae	._nocopy
-	#     ctarget = out
-	movq	%rdi,128(%rsp)
-	#     out = &tmp
-	leaq	192(%rsp),%rdi
-	#     i = bytes
-	mov	%rdx,%rcx
-	#     while (i) { *out++ = *m++; --i }
-	rep	movsb
-	#     out = &tmp
-	leaq	192(%rsp),%rdi
-	#     m = &tmp
-	leaq	192(%rsp),%rsi
-	# comment:fp stack unchanged by fallthrough
-#   nocopy:
-._nocopy:
-	#   out_backup = out
-	movq	%rdi,136(%rsp)
-	#   m_backup = m
-	movq	%rsi,144(%rsp)
-	#   bytes_backup = bytes
-	movq	%rdx,152(%rsp)
-	#   x1 = j0
-	movq	56(%rsp),%rdi
-	#   x0 = x1
-	mov	%rdi,%rdx
-	#   (uint64) x1 >>= 32
-	shr	$32,%rdi
-	#   		x3 = j2
-	movq	64(%rsp),%rsi
-	#   		x2 = x3
-	mov	%rsi,%rcx
-	#   		(uint64) x3 >>= 32
-	shr	$32,%rsi
-	#   x5 = j4
-	movq	72(%rsp),%r8
-	#   x4 = x5
-	mov	%r8,%r9
-	#   (uint64) x5 >>= 32
-	shr	$32,%r8
-	#   x5_stack = x5
-	movq	%r8,160(%rsp)
-	#   		x7 = j6
-	movq	80(%rsp),%r8
-	#   		x6 = x7
-	mov	%r8,%rax
-	#   		(uint64) x7 >>= 32
-	shr	$32,%r8
-	#   x9 = j8
-	movq	88(%rsp),%r10
-	#   x8 = x9
-	mov	%r10,%r11
-	#   (uint64) x9 >>= 32
-	shr	$32,%r10
-	#   		x11 = j10
-	movq	96(%rsp),%r12
-	#   		x10 = x11
-	mov	%r12,%r13
-	#   		x10_stack = x10
-	movq	%r13,168(%rsp)
-	#   		(uint64) x11 >>= 32
-	shr	$32,%r12
-	#   x13 = j12
-	movq	104(%rsp),%r13
-	#   x12 = x13
-	mov	%r13,%r14
-	#   (uint64) x13 >>= 32
-	shr	$32,%r13
-	#   		x15 = j14
-	movq	112(%rsp),%r15
-	#   		x14 = x15
-	mov	%r15,%rbx
-	#   		(uint64) x15 >>= 32
-	shr	$32,%r15
-	#   		x15_stack = x15
-	movq	%r15,176(%rsp)
-	#   i = 20
-	mov	$20,%r15
-#   mainloop:
-._mainloop:
-	#   i_backup = i
-	movq	%r15,184(%rsp)
-	# 		x5 = x5_stack
-	movq	160(%rsp),%r15
-	# a = x12 + x0
-	lea	(%r14,%rdx),%rbp
-	# (uint32) a <<<= 7
-	rol	$7,%ebp
-	# x4 ^= a
-	xor	%rbp,%r9
-	# 		b = x1 + x5
-	lea	(%rdi,%r15),%rbp
-	# 		(uint32) b <<<= 7
-	rol	$7,%ebp
-	# 		x9 ^= b
-	xor	%rbp,%r10
-	# a = x0 + x4
-	lea	(%rdx,%r9),%rbp
-	# (uint32) a <<<= 9
-	rol	$9,%ebp
-	# x8 ^= a
-	xor	%rbp,%r11
-	# 		b = x5 + x9
-	lea	(%r15,%r10),%rbp
-	# 		(uint32) b <<<= 9
-	rol	$9,%ebp
-	# 		x13 ^= b
-	xor	%rbp,%r13
-	# a = x4 + x8
-	lea	(%r9,%r11),%rbp
-	# (uint32) a <<<= 13
-	rol	$13,%ebp
-	# x12 ^= a
-	xor	%rbp,%r14
-	# 		b = x9 + x13
-	lea	(%r10,%r13),%rbp
-	# 		(uint32) b <<<= 13
-	rol	$13,%ebp
-	# 		x1 ^= b
-	xor	%rbp,%rdi
-	# a = x8 + x12
-	lea	(%r11,%r14),%rbp
-	# (uint32) a <<<= 18
-	rol	$18,%ebp
-	# x0 ^= a
-	xor	%rbp,%rdx
-	# 		b = x13 + x1
-	lea	(%r13,%rdi),%rbp
-	# 		(uint32) b <<<= 18
-	rol	$18,%ebp
-	# 		x5 ^= b
-	xor	%rbp,%r15
-	# 				x10 = x10_stack
-	movq	168(%rsp),%rbp
-	# 		x5_stack = x5
-	movq	%r15,160(%rsp)
-	# 				c = x6 + x10
-	lea	(%rax,%rbp),%r15
-	# 				(uint32) c <<<= 7
-	rol	$7,%r15d
-	# 				x14 ^= c
-	xor	%r15,%rbx
-	# 				c = x10 + x14
-	lea	(%rbp,%rbx),%r15
-	# 				(uint32) c <<<= 9
-	rol	$9,%r15d
-	# 				x2 ^= c
-	xor	%r15,%rcx
-	# 				c = x14 + x2
-	lea	(%rbx,%rcx),%r15
-	# 				(uint32) c <<<= 13
-	rol	$13,%r15d
-	# 				x6 ^= c
-	xor	%r15,%rax
-	# 				c = x2 + x6
-	lea	(%rcx,%rax),%r15
-	# 				(uint32) c <<<= 18
-	rol	$18,%r15d
-	# 				x10 ^= c
-	xor	%r15,%rbp
-	# 						x15 = x15_stack
-	movq	176(%rsp),%r15
-	# 				x10_stack = x10
-	movq	%rbp,168(%rsp)
-	# 						d = x11 + x15
-	lea	(%r12,%r15),%rbp
-	# 						(uint32) d <<<= 7
-	rol	$7,%ebp
-	# 						x3 ^= d
-	xor	%rbp,%rsi
-	# 						d = x15 + x3
-	lea	(%r15,%rsi),%rbp
-	# 						(uint32) d <<<= 9
-	rol	$9,%ebp
-	# 						x7 ^= d
-	xor	%rbp,%r8
-	# 						d = x3 + x7
-	lea	(%rsi,%r8),%rbp
-	# 						(uint32) d <<<= 13
-	rol	$13,%ebp
-	# 						x11 ^= d
-	xor	%rbp,%r12
-	# 						d = x7 + x11
-	lea	(%r8,%r12),%rbp
-	# 						(uint32) d <<<= 18
-	rol	$18,%ebp
-	# 						x15 ^= d
-	xor	%rbp,%r15
-	# 						x15_stack = x15
-	movq	%r15,176(%rsp)
-	# 		x5 = x5_stack
-	movq	160(%rsp),%r15
-	# a = x3 + x0
-	lea	(%rsi,%rdx),%rbp
-	# (uint32) a <<<= 7
-	rol	$7,%ebp
-	# x1 ^= a
-	xor	%rbp,%rdi
-	# 		b = x4 + x5
-	lea	(%r9,%r15),%rbp
-	# 		(uint32) b <<<= 7
-	rol	$7,%ebp
-	# 		x6 ^= b
-	xor	%rbp,%rax
-	# a = x0 + x1
-	lea	(%rdx,%rdi),%rbp
-	# (uint32) a <<<= 9
-	rol	$9,%ebp
-	# x2 ^= a
-	xor	%rbp,%rcx
-	# 		b = x5 + x6
-	lea	(%r15,%rax),%rbp
-	# 		(uint32) b <<<= 9
-	rol	$9,%ebp
-	# 		x7 ^= b
-	xor	%rbp,%r8
-	# a = x1 + x2
-	lea	(%rdi,%rcx),%rbp
-	# (uint32) a <<<= 13
-	rol	$13,%ebp
-	# x3 ^= a
-	xor	%rbp,%rsi
-	# 		b = x6 + x7
-	lea	(%rax,%r8),%rbp
-	# 		(uint32) b <<<= 13
-	rol	$13,%ebp
-	# 		x4 ^= b
-	xor	%rbp,%r9
-	# a = x2 + x3
-	lea	(%rcx,%rsi),%rbp
-	# (uint32) a <<<= 18
-	rol	$18,%ebp
-	# x0 ^= a
-	xor	%rbp,%rdx
-	# 		b = x7 + x4
-	lea	(%r8,%r9),%rbp
-	# 		(uint32) b <<<= 18
-	rol	$18,%ebp
-	# 		x5 ^= b
-	xor	%rbp,%r15
-	# 				x10 = x10_stack
-	movq	168(%rsp),%rbp
-	# 		x5_stack = x5
-	movq	%r15,160(%rsp)
-	# 				c = x9 + x10
-	lea	(%r10,%rbp),%r15
-	# 				(uint32) c <<<= 7
-	rol	$7,%r15d
-	# 				x11 ^= c
-	xor	%r15,%r12
-	# 				c = x10 + x11
-	lea	(%rbp,%r12),%r15
-	# 				(uint32) c <<<= 9
-	rol	$9,%r15d
-	# 				x8 ^= c
-	xor	%r15,%r11
-	# 				c = x11 + x8
-	lea	(%r12,%r11),%r15
-	# 				(uint32) c <<<= 13
-	rol	$13,%r15d
-	# 				x9 ^= c
-	xor	%r15,%r10
-	# 				c = x8 + x9
-	lea	(%r11,%r10),%r15
-	# 				(uint32) c <<<= 18
-	rol	$18,%r15d
-	# 				x10 ^= c
-	xor	%r15,%rbp
-	# 						x15 = x15_stack
-	movq	176(%rsp),%r15
-	# 				x10_stack = x10
-	movq	%rbp,168(%rsp)
-	# 						d = x14 + x15
-	lea	(%rbx,%r15),%rbp
-	# 						(uint32) d <<<= 7
-	rol	$7,%ebp
-	# 						x12 ^= d
-	xor	%rbp,%r14
-	# 						d = x15 + x12
-	lea	(%r15,%r14),%rbp
-	# 						(uint32) d <<<= 9
-	rol	$9,%ebp
-	# 						x13 ^= d
-	xor	%rbp,%r13
-	# 						d = x12 + x13
-	lea	(%r14,%r13),%rbp
-	# 						(uint32) d <<<= 13
-	rol	$13,%ebp
-	# 						x14 ^= d
-	xor	%rbp,%rbx
-	# 						d = x13 + x14
-	lea	(%r13,%rbx),%rbp
-	# 						(uint32) d <<<= 18
-	rol	$18,%ebp
-	# 						x15 ^= d
-	xor	%rbp,%r15
-	# 						x15_stack = x15
-	movq	%r15,176(%rsp)
-	# 		x5 = x5_stack
-	movq	160(%rsp),%r15
-	# a = x12 + x0
-	lea	(%r14,%rdx),%rbp
-	# (uint32) a <<<= 7
-	rol	$7,%ebp
-	# x4 ^= a
-	xor	%rbp,%r9
-	# 		b = x1 + x5
-	lea	(%rdi,%r15),%rbp
-	# 		(uint32) b <<<= 7
-	rol	$7,%ebp
-	# 		x9 ^= b
-	xor	%rbp,%r10
-	# a = x0 + x4
-	lea	(%rdx,%r9),%rbp
-	# (uint32) a <<<= 9
-	rol	$9,%ebp
-	# x8 ^= a
-	xor	%rbp,%r11
-	# 		b = x5 + x9
-	lea	(%r15,%r10),%rbp
-	# 		(uint32) b <<<= 9
-	rol	$9,%ebp
-	# 		x13 ^= b
-	xor	%rbp,%r13
-	# a = x4 + x8
-	lea	(%r9,%r11),%rbp
-	# (uint32) a <<<= 13
-	rol	$13,%ebp
-	# x12 ^= a
-	xor	%rbp,%r14
-	# 		b = x9 + x13
-	lea	(%r10,%r13),%rbp
-	# 		(uint32) b <<<= 13
-	rol	$13,%ebp
-	# 		x1 ^= b
-	xor	%rbp,%rdi
-	# a = x8 + x12
-	lea	(%r11,%r14),%rbp
-	# (uint32) a <<<= 18
-	rol	$18,%ebp
-	# x0 ^= a
-	xor	%rbp,%rdx
-	# 		b = x13 + x1
-	lea	(%r13,%rdi),%rbp
-	# 		(uint32) b <<<= 18
-	rol	$18,%ebp
-	# 		x5 ^= b
-	xor	%rbp,%r15
-	# 				x10 = x10_stack
-	movq	168(%rsp),%rbp
-	# 		x5_stack = x5
-	movq	%r15,160(%rsp)
-	# 				c = x6 + x10
-	lea	(%rax,%rbp),%r15
-	# 				(uint32) c <<<= 7
-	rol	$7,%r15d
-	# 				x14 ^= c
-	xor	%r15,%rbx
-	# 				c = x10 + x14
-	lea	(%rbp,%rbx),%r15
-	# 				(uint32) c <<<= 9
-	rol	$9,%r15d
-	# 				x2 ^= c
-	xor	%r15,%rcx
-	# 				c = x14 + x2
-	lea	(%rbx,%rcx),%r15
-	# 				(uint32) c <<<= 13
-	rol	$13,%r15d
-	# 				x6 ^= c
-	xor	%r15,%rax
-	# 				c = x2 + x6
-	lea	(%rcx,%rax),%r15
-	# 				(uint32) c <<<= 18
-	rol	$18,%r15d
-	# 				x10 ^= c
-	xor	%r15,%rbp
-	# 						x15 = x15_stack
-	movq	176(%rsp),%r15
-	# 				x10_stack = x10
-	movq	%rbp,168(%rsp)
-	# 						d = x11 + x15
-	lea	(%r12,%r15),%rbp
-	# 						(uint32) d <<<= 7
-	rol	$7,%ebp
-	# 						x3 ^= d
-	xor	%rbp,%rsi
-	# 						d = x15 + x3
-	lea	(%r15,%rsi),%rbp
-	# 						(uint32) d <<<= 9
-	rol	$9,%ebp
-	# 						x7 ^= d
-	xor	%rbp,%r8
-	# 						d = x3 + x7
-	lea	(%rsi,%r8),%rbp
-	# 						(uint32) d <<<= 13
-	rol	$13,%ebp
-	# 						x11 ^= d
-	xor	%rbp,%r12
-	# 						d = x7 + x11
-	lea	(%r8,%r12),%rbp
-	# 						(uint32) d <<<= 18
-	rol	$18,%ebp
-	# 						x15 ^= d
-	xor	%rbp,%r15
-	# 						x15_stack = x15
-	movq	%r15,176(%rsp)
-	# 		x5 = x5_stack
-	movq	160(%rsp),%r15
-	# a = x3 + x0
-	lea	(%rsi,%rdx),%rbp
-	# (uint32) a <<<= 7
-	rol	$7,%ebp
-	# x1 ^= a
-	xor	%rbp,%rdi
-	# 		b = x4 + x5
-	lea	(%r9,%r15),%rbp
-	# 		(uint32) b <<<= 7
-	rol	$7,%ebp
-	# 		x6 ^= b
-	xor	%rbp,%rax
-	# a = x0 + x1
-	lea	(%rdx,%rdi),%rbp
-	# (uint32) a <<<= 9
-	rol	$9,%ebp
-	# x2 ^= a
-	xor	%rbp,%rcx
-	# 		b = x5 + x6
-	lea	(%r15,%rax),%rbp
-	# 		(uint32) b <<<= 9
-	rol	$9,%ebp
-	# 		x7 ^= b
-	xor	%rbp,%r8
-	# a = x1 + x2
-	lea	(%rdi,%rcx),%rbp
-	# (uint32) a <<<= 13
-	rol	$13,%ebp
-	# x3 ^= a
-	xor	%rbp,%rsi
-	# 		b = x6 + x7
-	lea	(%rax,%r8),%rbp
-	# 		(uint32) b <<<= 13
-	rol	$13,%ebp
-	# 		x4 ^= b
-	xor	%rbp,%r9
-	# a = x2 + x3
-	lea	(%rcx,%rsi),%rbp
-	# (uint32) a <<<= 18
-	rol	$18,%ebp
-	# x0 ^= a
-	xor	%rbp,%rdx
-	# 		b = x7 + x4
-	lea	(%r8,%r9),%rbp
-	# 		(uint32) b <<<= 18
-	rol	$18,%ebp
-	# 		x5 ^= b
-	xor	%rbp,%r15
-	# 				x10 = x10_stack
-	movq	168(%rsp),%rbp
-	# 		x5_stack = x5
-	movq	%r15,160(%rsp)
-	# 				c = x9 + x10
-	lea	(%r10,%rbp),%r15
-	# 				(uint32) c <<<= 7
-	rol	$7,%r15d
-	# 				x11 ^= c
-	xor	%r15,%r12
-	# 				c = x10 + x11
-	lea	(%rbp,%r12),%r15
-	# 				(uint32) c <<<= 9
-	rol	$9,%r15d
-	# 				x8 ^= c
-	xor	%r15,%r11
-	# 				c = x11 + x8
-	lea	(%r12,%r11),%r15
-	# 				(uint32) c <<<= 13
-	rol	$13,%r15d
-	# 				x9 ^= c
-	xor	%r15,%r10
-	# 				c = x8 + x9
-	lea	(%r11,%r10),%r15
-	# 				(uint32) c <<<= 18
-	rol	$18,%r15d
-	# 				x10 ^= c
-	xor	%r15,%rbp
-	# 						x15 = x15_stack
-	movq	176(%rsp),%r15
-	# 				x10_stack = x10
-	movq	%rbp,168(%rsp)
-	# 						d = x14 + x15
-	lea	(%rbx,%r15),%rbp
-	# 						(uint32) d <<<= 7
-	rol	$7,%ebp
-	# 						x12 ^= d
-	xor	%rbp,%r14
-	# 						d = x15 + x12
-	lea	(%r15,%r14),%rbp
-	# 						(uint32) d <<<= 9
-	rol	$9,%ebp
-	# 						x13 ^= d
-	xor	%rbp,%r13
-	# 						d = x12 + x13
-	lea	(%r14,%r13),%rbp
-	# 						(uint32) d <<<= 13
-	rol	$13,%ebp
-	# 						x14 ^= d
-	xor	%rbp,%rbx
-	# 						d = x13 + x14
-	lea	(%r13,%rbx),%rbp
-	# 						(uint32) d <<<= 18
-	rol	$18,%ebp
-	# 						x15 ^= d
-	xor	%rbp,%r15
-	# 						x15_stack = x15
-	movq	%r15,176(%rsp)
-	#   i = i_backup
-	movq	184(%rsp),%r15
-	#                  unsigned>? i -= 4
-	sub	$4,%r15
-	# comment:fp stack unchanged by jump
-	# goto mainloop if unsigned>
-	ja	._mainloop
-	#   (uint32) x2 += j2
-	addl	64(%rsp),%ecx
-	#   x3 <<= 32
-	shl	$32,%rsi
-	#   x3 += j2
-	addq	64(%rsp),%rsi
-	#   (uint64) x3 >>= 32
-	shr	$32,%rsi
-	#   x3 <<= 32
-	shl	$32,%rsi
-	#   x2 += x3
-	add	%rsi,%rcx
-	#   (uint32) x6 += j6
-	addl	80(%rsp),%eax
-	#   x7 <<= 32
-	shl	$32,%r8
-	#   x7 += j6
-	addq	80(%rsp),%r8
-	#   (uint64) x7 >>= 32
-	shr	$32,%r8
-	#   x7 <<= 32
-	shl	$32,%r8
-	#   x6 += x7
-	add	%r8,%rax
-	#   (uint32) x8 += j8
-	addl	88(%rsp),%r11d
-	#   x9 <<= 32
-	shl	$32,%r10
-	#   x9 += j8
-	addq	88(%rsp),%r10
-	#   (uint64) x9 >>= 32
-	shr	$32,%r10
-	#   x9 <<= 32
-	shl	$32,%r10
-	#   x8 += x9
-	add	%r10,%r11
-	#   (uint32) x12 += j12
-	addl	104(%rsp),%r14d
-	#   x13 <<= 32
-	shl	$32,%r13
-	#   x13 += j12
-	addq	104(%rsp),%r13
-	#   (uint64) x13 >>= 32
-	shr	$32,%r13
-	#   x13 <<= 32
-	shl	$32,%r13
-	#   x12 += x13
-	add	%r13,%r14
-	#   (uint32) x0 += j0
-	addl	56(%rsp),%edx
-	#   x1 <<= 32
-	shl	$32,%rdi
-	#   x1 += j0
-	addq	56(%rsp),%rdi
-	#   (uint64) x1 >>= 32
-	shr	$32,%rdi
-	#   x1 <<= 32
-	shl	$32,%rdi
-	#   x0 += x1
-	add	%rdi,%rdx
-	#   x5 = x5_stack
-	movq	160(%rsp),%rdi
-	#   (uint32) x4 += j4
-	addl	72(%rsp),%r9d
-	#   x5 <<= 32
-	shl	$32,%rdi
-	#   x5 += j4
-	addq	72(%rsp),%rdi
-	#   (uint64) x5 >>= 32
-	shr	$32,%rdi
-	#   x5 <<= 32
-	shl	$32,%rdi
-	#   x4 += x5
-	add	%rdi,%r9
-	#   x10 = x10_stack
-	movq	168(%rsp),%r8
-	#   (uint32) x10 += j10
-	addl	96(%rsp),%r8d
-	#   x11 <<= 32
-	shl	$32,%r12
-	#   x11 += j10
-	addq	96(%rsp),%r12
-	#   (uint64) x11 >>= 32
-	shr	$32,%r12
-	#   x11 <<= 32
-	shl	$32,%r12
-	#   x10 += x11
-	add	%r12,%r8
-	#   x15 = x15_stack
-	movq	176(%rsp),%rdi
-	#   (uint32) x14 += j14
-	addl	112(%rsp),%ebx
-	#   x15 <<= 32
-	shl	$32,%rdi
-	#   x15 += j14
-	addq	112(%rsp),%rdi
-	#   (uint64) x15 >>= 32
-	shr	$32,%rdi
-	#   x15 <<= 32
-	shl	$32,%rdi
-	#   x14 += x15
-	add	%rdi,%rbx
-	#   out = out_backup
-	movq	136(%rsp),%rdi
-	#   m = m_backup
-	movq	144(%rsp),%rsi
-	#   x0 ^= *(uint64 *) (m + 0)
-	xorq	0(%rsi),%rdx
-	#   *(uint64 *) (out + 0) = x0
-	movq	%rdx,0(%rdi)
-	#   x2 ^= *(uint64 *) (m + 8)
-	xorq	8(%rsi),%rcx
-	#   *(uint64 *) (out + 8) = x2
-	movq	%rcx,8(%rdi)
-	#   x4 ^= *(uint64 *) (m + 16)
-	xorq	16(%rsi),%r9
-	#   *(uint64 *) (out + 16) = x4
-	movq	%r9,16(%rdi)
-	#   x6 ^= *(uint64 *) (m + 24)
-	xorq	24(%rsi),%rax
-	#   *(uint64 *) (out + 24) = x6
-	movq	%rax,24(%rdi)
-	#   x8 ^= *(uint64 *) (m + 32)
-	xorq	32(%rsi),%r11
-	#   *(uint64 *) (out + 32) = x8
-	movq	%r11,32(%rdi)
-	#   x10 ^= *(uint64 *) (m + 40)
-	xorq	40(%rsi),%r8
-	#   *(uint64 *) (out + 40) = x10
-	movq	%r8,40(%rdi)
-	#   x12 ^= *(uint64 *) (m + 48)
-	xorq	48(%rsi),%r14
-	#   *(uint64 *) (out + 48) = x12
-	movq	%r14,48(%rdi)
-	#   x14 ^= *(uint64 *) (m + 56)
-	xorq	56(%rsi),%rbx
-	#   *(uint64 *) (out + 56) = x14
-	movq	%rbx,56(%rdi)
-	#   bytes = bytes_backup
-	movq	152(%rsp),%rdx
-	#   in8 = j8
-	movq	88(%rsp),%rcx
-	#   in8 += 1
-	add	$1,%rcx
-	#   j8 = in8
-	movq	%rcx,88(%rsp)
-	#                          unsigned>? unsigned<? bytes - 64
-	cmp	$64,%rdx
-	# comment:fp stack unchanged by jump
-	#   goto bytesatleast65 if unsigned>
-	ja	._bytesatleast65
-	# comment:fp stack unchanged by jump
-	#     goto bytesatleast64 if !unsigned<
-	jae	._bytesatleast64
-	#       m = out
-	mov	%rdi,%rsi
-	#       out = ctarget
-	movq	128(%rsp),%rdi
-	#       i = bytes
-	mov	%rdx,%rcx
-	#       while (i) { *out++ = *m++; --i }
-	rep	movsb
-	# comment:fp stack unchanged by fallthrough
-#     bytesatleast64:
-._bytesatleast64:
-	#     x = x_backup
-	movq	120(%rsp),%rdi
-	#     in8 = j8
-	movq	88(%rsp),%rsi
-	#     *(uint64 *) (x + 32) = in8
-	movq	%rsi,32(%rdi)
-	#     r11 = r11_stack
-	movq	0(%rsp),%r11
-	#     r12 = r12_stack
-	movq	8(%rsp),%r12
-	#     r13 = r13_stack
-	movq	16(%rsp),%r13
-	#     r14 = r14_stack
-	movq	24(%rsp),%r14
-	#     r15 = r15_stack
-	movq	32(%rsp),%r15
-	#     rbx = rbx_stack
-	movq	40(%rsp),%rbx
-	#     rbp = rbp_stack
-	movq	48(%rsp),%rbp
-	# comment:fp stack unchanged by fallthrough
-#     done:
-._done:
-	#     leave
-	add	%r11,%rsp
-	mov	%rdi,%rax
-	mov	%rsi,%rdx
-	ret
-#   bytesatleast65:
-._bytesatleast65:
-	#   bytes -= 64
-	sub	$64,%rdx
-	#   out += 64
-	add	$64,%rdi
-	#   m += 64
-	add	$64,%rsi
-	# comment:fp stack unchanged by jump
-	# goto bytesatleast1
-	jmp	._bytesatleast1
-ENDPROC(salsa20_encrypt_bytes)
-
-# enter salsa20_keysetup
-ENTRY(salsa20_keysetup)
-	mov	%rsp,%r11
-	and	$31,%r11
-	add	$256,%r11
-	sub	%r11,%rsp
-	#   k = arg2
-	mov	%rsi,%rsi
-	#   kbits = arg3
-	mov	%rdx,%rdx
-	#   x = arg1
-	mov	%rdi,%rdi
-	#   in0 = *(uint64 *) (k + 0)
-	movq	0(%rsi),%r8
-	#   in2 = *(uint64 *) (k + 8)
-	movq	8(%rsi),%r9
-	#   *(uint64 *) (x + 4) = in0
-	movq	%r8,4(%rdi)
-	#   *(uint64 *) (x + 12) = in2
-	movq	%r9,12(%rdi)
-	#                    unsigned<? kbits - 256
-	cmp	$256,%rdx
-	# comment:fp stack unchanged by jump
-	#   goto kbits128 if unsigned<
-	jb	._kbits128
-#   kbits256:
-._kbits256:
-	#     in10 = *(uint64 *) (k + 16)
-	movq	16(%rsi),%rdx
-	#     in12 = *(uint64 *) (k + 24)
-	movq	24(%rsi),%rsi
-	#     *(uint64 *) (x + 44) = in10
-	movq	%rdx,44(%rdi)
-	#     *(uint64 *) (x + 52) = in12
-	movq	%rsi,52(%rdi)
-	#     in0 = 1634760805
-	mov	$1634760805,%rsi
-	#     in4 = 857760878
-	mov	$857760878,%rdx
-	#     in10 = 2036477234
-	mov	$2036477234,%rcx
-	#     in14 = 1797285236
-	mov	$1797285236,%r8
-	#     *(uint32 *) (x + 0) = in0
-	movl	%esi,0(%rdi)
-	#     *(uint32 *) (x + 20) = in4
-	movl	%edx,20(%rdi)
-	#     *(uint32 *) (x + 40) = in10
-	movl	%ecx,40(%rdi)
-	#     *(uint32 *) (x + 60) = in14
-	movl	%r8d,60(%rdi)
-	# comment:fp stack unchanged by jump
-	#   goto keysetupdone
-	jmp	._keysetupdone
-#   kbits128:
-._kbits128:
-	#     in10 = *(uint64 *) (k + 0)
-	movq	0(%rsi),%rdx
-	#     in12 = *(uint64 *) (k + 8)
-	movq	8(%rsi),%rsi
-	#     *(uint64 *) (x + 44) = in10
-	movq	%rdx,44(%rdi)
-	#     *(uint64 *) (x + 52) = in12
-	movq	%rsi,52(%rdi)
-	#     in0 = 1634760805
-	mov	$1634760805,%rsi
-	#     in4 = 824206446
-	mov	$824206446,%rdx
-	#     in10 = 2036477238
-	mov	$2036477238,%rcx
-	#     in14 = 1797285236
-	mov	$1797285236,%r8
-	#     *(uint32 *) (x + 0) = in0
-	movl	%esi,0(%rdi)
-	#     *(uint32 *) (x + 20) = in4
-	movl	%edx,20(%rdi)
-	#     *(uint32 *) (x + 40) = in10
-	movl	%ecx,40(%rdi)
-	#     *(uint32 *) (x + 60) = in14
-	movl	%r8d,60(%rdi)
-#   keysetupdone:
-._keysetupdone:
-	# leave
-	add	%r11,%rsp
-	mov	%rdi,%rax
-	mov	%rsi,%rdx
-	ret
-ENDPROC(salsa20_keysetup)
-
-# enter salsa20_ivsetup
-ENTRY(salsa20_ivsetup)
-	mov	%rsp,%r11
-	and	$31,%r11
-	add	$256,%r11
-	sub	%r11,%rsp
-	#   iv = arg2
-	mov	%rsi,%rsi
-	#   x = arg1
-	mov	%rdi,%rdi
-	#   in6 = *(uint64 *) (iv + 0)
-	movq	0(%rsi),%rsi
-	#   in8 = 0
-	mov	$0,%r8
-	#   *(uint64 *) (x + 24) = in6
-	movq	%rsi,24(%rdi)
-	#   *(uint64 *) (x + 32) = in8
-	movq	%r8,32(%rdi)
-	# leave
-	add	%r11,%rsp
-	mov	%rdi,%rax
-	mov	%rsi,%rdx
-	ret
-ENDPROC(salsa20_ivsetup)
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
deleted file mode 100644
index cb91a64a99e7..000000000000
--- a/arch/x86/crypto/salsa20_glue.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Glue code for optimized assembly version of  Salsa20.
- *
- * Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com>
- *
- * The assembly codes are public domain assembly codes written by Daniel. J.
- * Bernstein <djb@cr.yp.to>. The codes are modified to include indentation
- * and to remove extraneous comments and functions that are not needed.
- * - i586 version, renamed as salsa20-i586-asm_32.S
- *   available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s>
- * - x86-64 version, renamed as salsa20-x86_64-asm_64.S
- *   available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/algapi.h>
-#include <linux/module.h>
-#include <linux/crypto.h>
-
-#define SALSA20_IV_SIZE        8U
-#define SALSA20_MIN_KEY_SIZE  16U
-#define SALSA20_MAX_KEY_SIZE  32U
-
-struct salsa20_ctx
-{
-	u32 input[16];
-};
-
-asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k,
-				 u32 keysize, u32 ivsize);
-asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv);
-asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx,
-				      const u8 *src, u8 *dst, u32 bytes);
-
-static int setkey(struct crypto_tfm *tfm, const u8 *key,
-		  unsigned int keysize)
-{
-	struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm);
-	salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8);
-	return 0;
-}
-
-static int encrypt(struct blkcipher_desc *desc,
-		   struct scatterlist *dst, struct scatterlist *src,
-		   unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm);
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, 64);
-
-	salsa20_ivsetup(ctx, walk.iv);
-
-	while (walk.nbytes >= 64) {
-		salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
-				      walk.dst.virt.addr,
-				      walk.nbytes - (walk.nbytes % 64));
-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64);
-	}
-
-	if (walk.nbytes) {
-		salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
-				      walk.dst.virt.addr, walk.nbytes);
-		err = blkcipher_walk_done(desc, &walk, 0);
-	}
-
-	return err;
-}
-
-static struct crypto_alg alg = {
-	.cra_name           =   "salsa20",
-	.cra_driver_name    =   "salsa20-asm",
-	.cra_priority       =   200,
-	.cra_flags          =   CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_type           =   &crypto_blkcipher_type,
-	.cra_blocksize      =   1,
-	.cra_ctxsize        =   sizeof(struct salsa20_ctx),
-	.cra_alignmask      =	3,
-	.cra_module         =   THIS_MODULE,
-	.cra_u              =   {
-		.blkcipher = {
-			.setkey         =   setkey,
-			.encrypt        =   encrypt,
-			.decrypt        =   encrypt,
-			.min_keysize    =   SALSA20_MIN_KEY_SIZE,
-			.max_keysize    =   SALSA20_MAX_KEY_SIZE,
-			.ivsize         =   SALSA20_IV_SIZE,
-		}
-	}
-};
-
-static int __init init(void)
-{
-	return crypto_register_alg(&alg);
-}
-
-static void __exit fini(void)
-{
-	crypto_unregister_alg(&alg);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)");
-MODULE_ALIAS_CRYPTO("salsa20");
-MODULE_ALIAS_CRYPTO("salsa20-asm");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 42212b60a0ee..5579eb88d460 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1324,32 +1324,6 @@ config CRYPTO_SALSA20
 	  The Salsa20 stream cipher algorithm is designed by Daniel J.
 	  Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
 
-config CRYPTO_SALSA20_586
-	tristate "Salsa20 stream cipher algorithm (i586)"
-	depends on (X86 || UML_X86) && !64BIT
-	select CRYPTO_BLKCIPHER
-	help
-	  Salsa20 stream cipher algorithm.
-
-	  Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
-	  Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>
-
-	  The Salsa20 stream cipher algorithm is designed by Daniel J.
-	  Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
-
-config CRYPTO_SALSA20_X86_64
-	tristate "Salsa20 stream cipher algorithm (x86_64)"
-	depends on (X86 || UML_X86) && 64BIT
-	select CRYPTO_BLKCIPHER
-	help
-	  Salsa20 stream cipher algorithm.
-
-	  Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
-	  Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>
-
-	  The Salsa20 stream cipher algorithm is designed by Daniel J.
-	  Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
-
 config CRYPTO_CHACHA20
 	tristate "ChaCha20 cipher algorithm"
 	select CRYPTO_BLKCIPHER