mirror of
https://gitee.com/anod/open_agb_firm.git
synced 2025-05-06 22:04:10 +08:00
266 lines
22 KiB
C
266 lines
22 KiB
C
#pragma once
|
|
|
|
// Based on: https://github.com/ARM-software/CMSIS_5/blob/master/CMSIS/Core/Include/cmsis_gcc.h
|
|
|
|
#include "types.h"
|
|
|
|
|
|
// u32 result, u32 op1.
|
|
#define MAKE_INTR_U32_1OP(isVolatile, inst) \
|
|
ALWAYS_INLINE u32 __##inst(u32 op1) \
|
|
{ \
|
|
u32 res; \
|
|
if(isVolatile == 1) \
|
|
__asm__ volatile(#inst " %0, %1" : "=r" (res) : "r" (op1) : ); \
|
|
else \
|
|
__asm__(#inst " %0, %1" : "=r" (res) : "r" (op1) : ); \
|
|
return res; \
|
|
}
|
|
|
|
// u32 result, u32 op1, u32 op2.
|
|
#define MAKE_INTR_U32_2OPS(isVolatile, inst) \
|
|
ALWAYS_INLINE u32 __##inst(u32 op1, u32 op2) \
|
|
{ \
|
|
u32 res; \
|
|
if(isVolatile == 1) \
|
|
__asm__ volatile(#inst " %0, %1, %2" : "=r" (res) : "r" (op1), "r" (op2) : ); \
|
|
else \
|
|
__asm__(#inst " %0, %1, %2" : "=r" (res) : "r" (op1), "r" (op2) : ); \
|
|
return res; \
|
|
}
|
|
|
|
// s32 result, s32 op1, s32 op2.
|
|
#define MAKE_INTR_S32_2OPS(isVolatile, inst) \
|
|
ALWAYS_INLINE s32 __##inst(s32 op1, s32 op2) \
|
|
{ \
|
|
s32 res; \
|
|
if(isVolatile == 1) \
|
|
__asm__ volatile(#inst " %0, %1, %2" : "=r" (res) : "r" (op1), "r" (op2) : ); \
|
|
else \
|
|
__asm__(#inst " %0, %1, %2" : "=r" (res) : "r" (op1), "r" (op2) : ); \
|
|
return res; \
|
|
}
|
|
|
|
// u32 result, u32 op1, u32 op2, u32 op3.
|
|
#define MAKE_INTR_U32_3OPS(isVolatile, inst) \
|
|
ALWAYS_INLINE u32 __##inst(u32 op1, u32 op2, u32 op3) \
|
|
{ \
|
|
u32 res; \
|
|
if(isVolatile == 1) \
|
|
__asm__ volatile(#inst " %0, %1, %2, %3" : "=r" (res) : "r" (op1), "r" (op2), "r" (op3) : ); \
|
|
else \
|
|
__asm__(#inst " %0, %1, %2, %3" : "=r" (res) : "r" (op1), "r" (op2), "r" (op3) : ); \
|
|
return res; \
|
|
}
|
|
|
|
// s32 result, s32 op1, s32 op2, s32 op3.
|
|
#define MAKE_INTR_S32_3OPS(isVolatile, inst) \
|
|
ALWAYS_INLINE s32 __##inst(s32 op1, s32 op2, s32 op3) \
|
|
{ \
|
|
s32 res; \
|
|
if(isVolatile == 1) \
|
|
__asm__ volatile(#inst " %0, %1, %2, %3" : "=r" (res) : "r" (op1), "r" (op2), "r" (op3) : ); \
|
|
else \
|
|
__asm__(#inst " %0, %1, %2, %3" : "=r" (res) : "r" (op1), "r" (op2), "r" (op3) : ); \
|
|
return res; \
|
|
}
|
|
|
|
#ifndef __ARMEB__ // Little endian
|
|
// Special instruction with shared u64 input and output.
|
|
// u64 result, u32 op1, u32 op2, u64 acc.
|
|
#define MAKE_INTR_U64_U32_U32_U64(isVolatile, inst) \
|
|
ALWAYS_INLINE u64 __##inst(u32 op1, u32 op2, u64 acc) \
|
|
{ \
|
|
union \
|
|
{ \
|
|
u32 r32[2]; \
|
|
u64 r64; \
|
|
} r; \
|
|
r.r64 = acc; \
|
|
\
|
|
if(isVolatile == 1) \
|
|
__asm__ volatile(#inst " %0, %1, %2, %3" : "=r" (r.r32[0]), "=r" (r.r32[1]) : "r" (op1), "r" (op2), "0" (r.r32[0]), "1" (r.r32[1]) : ); \
|
|
else \
|
|
__asm__(#inst " %0, %1, %2, %3" : "=r" (r.r32[0]), "=r" (r.r32[1]) : "r" (op1), "r" (op2), "0" (r.r32[0]), "1" (r.r32[1]) : ); \
|
|
\
|
|
return r.r64; \
|
|
}
|
|
|
|
#else // Big endian
|
|
|
|
// Special instruction with shared u64 input and output.
|
|
// u64 result, u32 op1, u32 op2, u64 acc.
|
|
#define MAKE_INTR_U64_U32_U32_U64(isVolatile, inst) \
|
|
ALWAYS_INLINE u64 __##inst(u32 op1, u32 op2, u64 acc) \
|
|
{ \
|
|
union \
|
|
{ \
|
|
u32 r32[2]; \
|
|
u64 r64; \
|
|
} r; \
|
|
r.r64 = acc; \
|
|
\
|
|
if(isVolatile == 1) \
|
|
__asm__ volatile(#inst " %0, %1, %2, %3" : "=r" (r.r32[1]), "=r" (r.r32[0]) : "r" (op1), "r" (op2), "0" (r.r32[1]), "1" (r.r32[0]) : ); \
|
|
else \
|
|
__asm__(#inst " %0, %1, %2, %3" : "=r" (r.r32[1]), "=r" (r.r32[0]) : "r" (op1), "r" (op2), "0" (r.r32[1]), "1" (r.r32[0]) : ); \
|
|
\
|
|
return r.r64; \
|
|
}
|
|
#endif // #ifndef __ARMEB__
|
|
|
|
|
|
|
|
// Pack Halfword Bottom Top.
|
|
#define __pkhbt(op1, op2, sh) \
|
|
({ \
|
|
u32 __res; \
|
|
__asm__("pkhbt %0, %1, %2, lsl %3" : "=r" (__res) : "r" (op1), "r" (op2), "I" (sh) : ); \
|
|
__res; \
|
|
})
|
|
|
|
// Pack Halfword Top Bottom.
|
|
#define __pkhtb(op1, op2, sh) \
|
|
({ \
|
|
u32 __res, __sh = (sh); \
|
|
if(__sh == 0) \
|
|
__asm__("pkhtb %0, %1, %2" : "=r" (__res) : "r" (op1), "r" (op2) : ); \
|
|
else \
|
|
__asm__("pkhtb %0, %1, %2, asr %3" : "=r" (__res) : "r" (op1), "r" (op2), "I" (__sh) : ); \
|
|
__res; \
|
|
})
|
|
|
|
MAKE_INTR_S32_2OPS(1, qadd) // Signed saturating addition.
|
|
MAKE_INTR_U32_2OPS(0, qadd8) // Signed saturating parallel byte-wise addition.
|
|
MAKE_INTR_U32_2OPS(0, qadd16) // Signed saturating parallel halfword-wise addition.
|
|
MAKE_INTR_U32_2OPS(0, qasx) // Signed saturating parallel add and subtract halfwords with exchange.
|
|
MAKE_INTR_S32_2OPS(1, qdadd) // Signed saturating Double and Add.
|
|
MAKE_INTR_S32_2OPS(1, qdsub) // Signed saturating Double and Subtract.
|
|
MAKE_INTR_U32_2OPS(0, qsax) // Signed saturating parallel subtract and add halfwords with exchange.
|
|
MAKE_INTR_S32_2OPS(1, qsub) // Signed saturating Subtract.
|
|
MAKE_INTR_U32_2OPS(0, qsub8) // Signed saturating parallel byte-wise subtraction.
|
|
MAKE_INTR_U32_2OPS(0, qsub16) // Signed saturating parallel halfword-wise subtraction.
|
|
MAKE_INTR_U32_1OP(0, rev) // Reverse the byte order in a word.
|
|
MAKE_INTR_U32_1OP(0, rev16) // Reverse the byte order in each halfword independently.
|
|
MAKE_INTR_U32_1OP(0, revsh) // Reverse the byte order in the bottom halfword, and sign extend to 32 bits.
|
|
MAKE_INTR_U32_2OPS(1, sadd8) // Signed parallel byte-wise addition.
|
|
MAKE_INTR_U32_2OPS(1, sadd16) // Signed parallel halfword-wise addition.
|
|
MAKE_INTR_U32_2OPS(1, sasx) // Signed parallel add and subtract halfwords with exchange.
|
|
MAKE_INTR_U32_2OPS(1, sel) // Select bytes from each operand according to the state of the APSR GE flags.
|
|
MAKE_INTR_U32_2OPS(0, shadd8) // Signed halving parallel byte-wise addition.
|
|
MAKE_INTR_U32_2OPS(0, shadd16) // Signed halving parallel halfword-wise addition.
|
|
MAKE_INTR_U32_2OPS(0, shasx) // Signed halving parallel add and subtract halfwords with exchange.
|
|
MAKE_INTR_U32_2OPS(0, shsax) // Signed halving parallel subtract and add halfwords with exchange.
|
|
MAKE_INTR_U32_2OPS(0, shsub8) // Signed halving parallel byte-wise subtraction.
|
|
MAKE_INTR_U32_2OPS(0, shsub16) // Signed halving parallel halfword-wise subtraction.
|
|
MAKE_INTR_U32_3OPS(1, smlabb) // Signed Multiply Accumulate, with 16-bit operands (bottom, bottom) and a 32-bit result and accumulator.
|
|
MAKE_INTR_U32_3OPS(1, smlabt) // Signed Multiply Accumulate, with 16-bit operands (bottom, top) and a 32-bit result and accumulator.
|
|
MAKE_INTR_U32_3OPS(1, smlatb) // Signed Multiply Accumulate, with 16-bit operands (top, bottom) and a 32-bit result and accumulator.
|
|
MAKE_INTR_U32_3OPS(1, smlatt) // Signed Multiply Accumulate, with 16-bit operands (top, top) and a 32-bit result and accumulator.
|
|
MAKE_INTR_U32_3OPS(0, smlad) // Dual 16-bit Signed Multiply with Addition of products and 32-bit accumulation.
|
|
MAKE_INTR_U32_3OPS(0, smladx) // Dual 16-bit Signed exchange Multiply with Addition of products and 32-bit accumulation.
|
|
MAKE_INTR_U64_U32_U32_U64(0, smlal) // Signed Long Multiply, with optional Accumulate, with 32-bit operands, and 64-bit result and accumulator.
|
|
MAKE_INTR_U64_U32_U32_U64(0, smlald) // Dual 16-bit Signed Multiply with Addition of products and 64-bit Accumulation.
|
|
MAKE_INTR_U64_U32_U32_U64(0, smlaldx) // Dual 16-bit Signed exchange Multiply with Addition of products and 64-bit Accumulation.
|
|
MAKE_INTR_U64_U32_U32_U64(0, smlalbb) // Signed Multiply-Accumulate with 16-bit operands (bottom, bottom) and a 64-bit accumulator.
|
|
MAKE_INTR_U64_U32_U32_U64(0, smlalbt) // Signed Multiply-Accumulate with 16-bit operands (bottom, top) and a 64-bit accumulator.
|
|
MAKE_INTR_U64_U32_U32_U64(0, smlaltb) // Signed Multiply-Accumulate with 16-bit operands (top, bottom) and a 64-bit accumulator.
|
|
MAKE_INTR_U64_U32_U32_U64(0, smlaltt) // Signed Multiply-Accumulate with 16-bit operands (top, top) and a 64-bit accumulator.
|
|
MAKE_INTR_U32_3OPS(1, smlawb) // Signed Multiply-Accumulate Wide, with one 32-bit operand and one 16-bit operand (bottom half), and a 32-bit accumulate value, providing the top 32 bits of the result.
|
|
MAKE_INTR_U32_3OPS(1, smlawt) // Signed Multiply-Accumulate Wide, with one 32-bit operand and one 16-bit operand (top half), and a 32-bit accumulate value, providing the top 32 bits of the result.
|
|
MAKE_INTR_U32_3OPS(0, smlsd) // Dual 16-bit Signed Multiply with Subtraction of products and 32-bit accumulation.
|
|
MAKE_INTR_U32_3OPS(0, smlsdx) // Dual 16-bit Signed exchange Multiply with Subtraction of products and 32-bit accumulation.
|
|
MAKE_INTR_U64_U32_U32_U64(0, smlsld) // Dual 16-bit Signed Multiply with Subtraction of products and 64-bit Accumulation.
|
|
MAKE_INTR_U64_U32_U32_U64(0, smlsldx) // Dual 16-bit Signed exchange Multiply with Subtraction of products and 64-bit Accumulation.
|
|
MAKE_INTR_S32_3OPS(0, smmla) // Signed Most significant word Multiply with Accumulation.
|
|
MAKE_INTR_S32_3OPS(0, smmlar) // Signed Most significant word Multiply with Accumulation and rounding.
|
|
MAKE_INTR_S32_3OPS(0, smmls) // Signed Most significant word Multiply with Subtraction.
|
|
MAKE_INTR_S32_2OPS(0, smmlsr) // Signed Most significant word Multiply with Subtraction and rounding.
|
|
MAKE_INTR_S32_2OPS(0, smmul) // Signed Most significant word Multiply.
|
|
MAKE_INTR_S32_2OPS(0, smmulr) // Signed Most significant word Multiply and round.
|
|
MAKE_INTR_U32_2OPS(1, smuad) // Dual 16-bit Signed Multiply with Addition of products.
|
|
MAKE_INTR_U32_2OPS(1, smuadx) // Dual 16-bit Signed Multiply with Addition of products with exchange.
|
|
MAKE_INTR_U32_2OPS(0, smulbb) // Signed Multiply, with 16-bit operands (bottom, bottom) and a 32-bit result.
|
|
MAKE_INTR_U32_2OPS(0, smulbt) // Signed Multiply, with 16-bit operands (bottom, top) and a 32-bit result.
|
|
MAKE_INTR_U32_2OPS(0, smultb) // Signed Multiply, with 16-bit operands (top, bottom) and a 32-bit result.
|
|
MAKE_INTR_U32_2OPS(0, smultt) // Signed Multiply, with 16-bit operands (top, top) and a 32-bit result.
|
|
// TODO: smull // Signed Long Multiply, with 32-bit operands and 64-bit result.
|
|
MAKE_INTR_U32_2OPS(0, smulwb) // Signed Multiply Wide, with one 32-bit and one 16-bit operand (bottom half), providing the top 32 bits of the result.
|
|
MAKE_INTR_U32_2OPS(0, smulwt) // Signed Multiply Wide, with one 32-bit and one 16-bit operand (top half), providing the top 32 bits of the result.
|
|
/* Doesn't affect any flags? */ MAKE_INTR_U32_2OPS(0, smusd) // Dual 16-bit Signed Multiply with Subtraction of products.
|
|
/* Doesn't affect any flags? */ MAKE_INTR_U32_2OPS(0, smusdx) // Dual 16-bit Signed Multiply with Subtraction of products with exchange.
|
|
|
|
// Signed Saturate to any bit position, with optional shift before saturating.
|
|
#define __ssat(op1, op2) \
|
|
({ \
|
|
s32 __res; \
|
|
__asm__ volatile("ssat %0, %1, %2" : "=r" (__res) : "I" (op1), "r" (op2) : "cc"); \
|
|
__res; \
|
|
})
|
|
|
|
// Parallel halfword Saturate.
|
|
#define __ssat16(op1, op2) \
|
|
({ \
|
|
u32 __res; \
|
|
__asm__ volatile("ssat16 %0, %1, %2" : "=r" (__res) : "I" (op1), "r" (op2) : "cc"); \
|
|
__res; \
|
|
})
|
|
|
|
MAKE_INTR_U32_2OPS(1, ssax) // Signed parallel subtract and add halfwords with exchange.
|
|
MAKE_INTR_U32_2OPS(1, ssub8) // Signed parallel byte-wise subtraction.
|
|
MAKE_INTR_U32_2OPS(1, ssub16) // Signed parallel halfword-wise subtraction.
|
|
MAKE_INTR_U32_2OPS(0, sxtab) // Sign extend Byte with Add, to extend an 8-bit value to a 32-bit value.
|
|
MAKE_INTR_U32_2OPS(0, sxtab16) // Sign extend two Bytes with Add, to extend two 8-bit values to two 16-bit values.
|
|
MAKE_INTR_U32_2OPS(0, sxtah) // Sign extend Halfword with Add, to extend a 16-bit value to a 32-bit value.
|
|
MAKE_INTR_U32_1OP(0, sxtb) // Sign extend Byte, to extend an 8-bit value to a 32-bit value.
|
|
MAKE_INTR_U32_1OP(0, sxtb16) // Sign extend two bytes.
|
|
MAKE_INTR_U32_1OP(0, sxth) // Sign extend Halfword.
|
|
MAKE_INTR_U32_2OPS(1, uadd8) // Unsigned parallel byte-wise addition.
|
|
MAKE_INTR_U32_2OPS(1, uadd16) // Unsigned parallel halfword-wise addition.
|
|
MAKE_INTR_U32_2OPS(1, uasx) // Unsigned parallel add and subtract halfwords with exchange.
|
|
MAKE_INTR_U32_2OPS(0, uhadd8) // Unsigned halving parallel byte-wise addition.
|
|
MAKE_INTR_U32_2OPS(0, uhadd16) // Unsigned halving parallel halfword-wise addition.
|
|
MAKE_INTR_U32_2OPS(0, uhasx) // Unsigned halving parallel add and subtract halfwords with exchange.
|
|
MAKE_INTR_U32_2OPS(0, uhsax) // Unsigned halving parallel subtract and add halfwords with exchange.
|
|
MAKE_INTR_U32_2OPS(0, uhsub8) // Unsigned halving parallel byte-wise subtraction.
|
|
MAKE_INTR_U32_2OPS(0, uhsub16) // Unsigned halving parallel halfword-wise subtraction.
|
|
MAKE_INTR_U32_2OPS(0, uqadd8) // Unsigned saturating parallel byte-wise addition.
|
|
MAKE_INTR_U32_2OPS(0, uqadd16) // Unsigned saturating parallel halfword-wise addition.
|
|
MAKE_INTR_U32_2OPS(0, uqasx) // Unsigned saturating parallel add and subtract halfwords with exchange.
|
|
MAKE_INTR_U32_2OPS(0, uqsax) // Unsigned saturating parallel subtract and add halfwords with exchange.
|
|
MAKE_INTR_U32_2OPS(0, uqsub8) // Unsigned saturating parallel byte-wise subtraction.
|
|
MAKE_INTR_U32_2OPS(0, uqsub16) // Unsigned saturating parallel halfword-wise subtraction.
|
|
MAKE_INTR_U32_2OPS(0, usad8) // Unsigned Sum of Absolute Differences.
|
|
MAKE_INTR_U32_3OPS(0, usada8) // Unsigned Sum of Absolute Differences and Accumulate.
|
|
|
|
// Unsigned Saturate to any bit position, with optional shift before saturating.
|
|
#define __usat(op1, op2) \
|
|
({ \
|
|
u32 __res; \
|
|
__asm__ volatile("usat %0, %1, %2" : "=r" (__res) : "I" (op1), "r" (op2) : "cc"); \
|
|
__res; \
|
|
})
|
|
|
|
// Parallel halfword Saturate.
|
|
#define __usat16(op1, op2) \
|
|
({ \
|
|
u32 __res; \
|
|
__asm__ volatile("usat16 %0, %1, %2" : "=r" (__res) : "I" (op1), "r" (op2) : "cc"); \
|
|
__res; \
|
|
})
|
|
|
|
MAKE_INTR_U32_2OPS(1, usax) // Unsigned parallel subtract and add halfwords with exchange.
|
|
MAKE_INTR_U32_2OPS(1, usub8) // Unsigned parallel byte-wise subtraction.
|
|
MAKE_INTR_U32_2OPS(1, usub16) // Unsigned parallel halfword-wise subtraction.
|
|
//MAKE_INTR_U32_2OPS(0, uxtab) // Zero extend Byte and Add.
|
|
MAKE_INTR_U32_2OPS(0, uxtab16) // Zero extend two Bytes and Add.
|
|
//MAKE_INTR_U32_2OPS(0, uxtah) // Zero extend Halfword and Add.
|
|
//MAKE_INTR_U32_1OP(0, uxtb) // Zero extend Byte.
|
|
MAKE_INTR_U32_1OP(0, uxtb16) // Zero extend two Bytes.
|
|
//MAKE_INTR_U32_1OP(0, uxth) // Zero extend Halfword.
|
|
|
|
|
|
#undef MAKE_INTR_U32_1OP
|
|
#undef MAKE_INTR_U32_2OPS
|
|
#undef MAKE_INTR_U32_3OPS
|