diff --git a/source/alignedseqmemcpy.h b/source/alignedseqmemcpy.h
new file mode 100644
index 0000000..dc915bf
--- /dev/null
+++ b/source/alignedseqmemcpy.h
@@ -0,0 +1,28 @@
+/*
+* This file is part of Luma3DS
+* Copyright (C) 2016-2019 Aurora Wright, TuxSH
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see .
+*
+* Additional Terms 7.b and 7.c of GPLv3 apply to this file:
+* * Requiring preservation of specified reasonable legal notices or
+* author attributions in that material or in the Appropriate Legal
+* Notices displayed by works containing it.
+* * Prohibiting misrepresentation of the origin of that material,
+* or requiring that modified versions of such material be marked in
+* reasonable ways as different from the original version.
+*/
+#include "types.h"
+
+void *alignedseqmemcpy(void *dst, const void *src, u32 len);
diff --git a/source/alignedseqmemcpy.s b/source/alignedseqmemcpy.s
new file mode 100644
index 0000000..6784b69
--- /dev/null
+++ b/source/alignedseqmemcpy.s
@@ -0,0 +1,65 @@
+@ This file is part of Luma3DS
+@ Copyright (C) 2016-2019 Aurora Wright, TuxSH
+@
+@ This program is free software: you can redistribute it and/or modify
+@ it under the terms of the GNU General Public License as published by
+@ the Free Software Foundation, either version 3 of the License, or
+@ (at your option) any later version.
+@
+@ This program is distributed in the hope that it will be useful,
+@ but WITHOUT ANY WARRANTY; without even the implied warranty of
+@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+@ GNU General Public License for more details.
+@
+@ You should have received a copy of the GNU General Public License
+@ along with this program. If not, see .
+@
+@ Additional Terms 7.b and 7.c of GPLv3 apply to this file:
+@ * Requiring preservation of specified reasonable legal notices or
+@ author attributions in that material or in the Appropriate Legal
+@ Notices displayed by works containing it.
+@ * Prohibiting misrepresentation of the origin of that material,
+@ or requiring that modified versions of such material be marked in
+@ reasonable ways as different from the original version.
+
+.section .text.alignedseqmemcpy, "ax", %progbits
+.arm
+@ Align on cache line boundaries & make sure the loops don't cross them.
+.align 5
+.global alignedseqmemcpy
+.type alignedseqmemcpy, %function
+alignedseqmemcpy:
+ @ src=r1 and dst=r0 are expected to be 4-byte-aligned
+ push {r4-r10, lr}
+
+ lsrs r12, r2, #5
+ sub r2, r2, r12, lsl #5
+ beq 2f
+
+1:
+ ldmia r1!, {r3-r10}
+ stmia r0!, {r3-r10}
+ subs r12, #1
+ bne 1b
+
+2:
+ lsrs r12, r2, #2
+ sub r2, r2, r12, lsl #2
+ beq 4f
+
+3:
+ ldr r3, [r1], #4
+ str r3, [r0], #4
+ subs r12, #1
+ bne 3b
+
+4:
+ tst r2, #2
+ ldrneh r3, [r1], #2
+ strneh r3, [r0], #2
+
+ tst r2, #1
+ ldrneb r3, [r1], #1
+ strneb r3, [r0], #1
+
+ pop {r4-r10, pc}
diff --git a/source/cache.s b/source/cache.s
index 996608e..0990022 100644
--- a/source/cache.s
+++ b/source/cache.s
@@ -22,7 +22,7 @@
@ or requiring that modified versions of such material be marked in
@ reasonable ways as different from the original version.
-.text
+.section .text.cache, "ax", %progbits
.arm
.align 4
diff --git a/source/crypto.c b/source/crypto.c
index 74e69a1..300df4b 100755
--- a/source/crypto.c
+++ b/source/crypto.c
@@ -36,6 +36,7 @@
#include "emunand.h"
#include "strings.h"
#include "utils.h"
+#include "alignedseqmemcpy.h"
#include "fatfs/sdmmc/sdmmc.h"
/****************************************************************
@@ -288,24 +289,18 @@ void sha(void *res, const void *src, u32 size, u32 mode)
sha_wait_idle();
*REG_SHA_CNT = mode | SHA_CNT_OUTPUT_ENDIAN | SHA_NORMAL_ROUND;
- const u32 *src32 = (const u32 *)src;
- int i;
+ const u8 *src8 = (const u8 *)src;
while(size >= 0x40)
{
sha_wait_idle();
- for(i = 0; i < 4; ++i)
- {
- *REG_SHA_INFIFO = *src32++;
- *REG_SHA_INFIFO = *src32++;
- *REG_SHA_INFIFO = *src32++;
- *REG_SHA_INFIFO = *src32++;
- }
+ alignedseqmemcpy((void *)REG_SHA_INFIFO, src8, 0x40);
+ src8 += 0x40;
size -= 0x40;
}
sha_wait_idle();
- memcpy((void *)REG_SHA_INFIFO, src32, size);
+ alignedseqmemcpy((void *)REG_SHA_INFIFO, src8, size);
*REG_SHA_CNT = (*REG_SHA_CNT & ~SHA_NORMAL_ROUND) | SHA_FINAL_ROUND;
diff --git a/source/start.s b/source/start.s
index 1af74dd..aa5e37f 100644
--- a/source/start.s
+++ b/source/start.s
@@ -22,7 +22,7 @@
@ or requiring that modified versions of such material be marked in
@ reasonable ways as different from the original version.
-.section .text.start
+.section .text.start, "ax", %progbits
.align 4
.global _start
_start: