用simd对混色三巨头进行了改写,不过效率提高的确实不明显
This commit is contained in:
parent
4da9434d46
commit
4725daa5ec
@ -4,7 +4,7 @@
|
|||||||
* Licensed under GPLv3 or any later version.
|
* Licensed under GPLv3 or any later version.
|
||||||
* Refer to the included LICENSE file.
|
* Refer to the included LICENSE file.
|
||||||
*/
|
*/
|
||||||
|
#include <wasm_simd128.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
#include "ppu.hpp"
|
#include "ppu.hpp"
|
||||||
@ -24,9 +24,28 @@ ALWAYS_INLINE static u16 Blend(u16 color_a, u16 color_b, int eva, int evb) {
|
|||||||
eva = std::min<int>(16, eva);
|
eva = std::min<int>(16, eva);
|
||||||
evb = std::min<int>(16, evb);
|
evb = std::min<int>(16, evb);
|
||||||
|
|
||||||
const int r = std::min<u8>((r_a * eva + r_b * evb + 8) >> 4, 31);
|
auto colora = wasm_i32x4_make(r_a, g_a, b_a, 0);
|
||||||
const int g = std::min<u8>((g_a * eva + g_b * evb + 8) >> 4, 63) >> 1;
|
auto colorb = wasm_i32x4_make(r_b, g_b, b_b, 0);
|
||||||
const int b = std::min<u8>((b_a * eva + b_b * evb + 8) >> 4, 31);
|
auto evavec = wasm_i32x4_make(eva, eva, eva, 0);
|
||||||
|
auto evbvec = wasm_i32x4_make(evb, evb, evb, 0);
|
||||||
|
auto number = wasm_i32x4_make(8, 8, 8, 0);
|
||||||
|
auto factor = wasm_i32x4_mul( colora, evavec );
|
||||||
|
auto result = wasm_i32x4_mul( colorb, evbvec );
|
||||||
|
result = wasm_i32x4_add( result, factor );
|
||||||
|
result = wasm_i32x4_add( result, number );
|
||||||
|
result = wasm_i32x4_shr( result, 4 );
|
||||||
|
number = wasm_i32x4_make( 31, 63, 31, 0 );
|
||||||
|
result = wasm_i32x4_min( result, number );
|
||||||
|
|
||||||
|
int r = wasm_i32x4_extract_lane( result, 0 );
|
||||||
|
int g = wasm_i32x4_extract_lane( result, 1 );
|
||||||
|
int b = wasm_i32x4_extract_lane( result, 2 );
|
||||||
|
|
||||||
|
g >>= 1;
|
||||||
|
|
||||||
|
// const int r = std::min<u8>((r_a * eva + r_b * evb + 8) >> 4, 31);
|
||||||
|
// const int g = std::min<u8>((g_a * eva + g_b * evb + 8) >> 4, 63) >> 1;
|
||||||
|
// const int b = std::min<u8>((b_a * eva + b_b * evb + 8) >> 4, 31);
|
||||||
|
|
||||||
return (u16)((b << 10) | (g << 5) | r);
|
return (u16)((b << 10) | (g << 5) | r);
|
||||||
}
|
}
|
||||||
@ -38,9 +57,24 @@ ALWAYS_INLINE static u16 Brighten(u16 color, int evy) {
|
|||||||
int g = ((color >> 4) & 62) | (color >> 15);
|
int g = ((color >> 4) & 62) | (color >> 15);
|
||||||
int b = (color >> 10) & 31;
|
int b = (color >> 10) & 31;
|
||||||
|
|
||||||
r += ((31 - r) * evy + 8) >> 4;
|
auto source = wasm_i32x4_make(r, g, b, 0);
|
||||||
g += ((63 - g) * evy + 8) >> 4;
|
auto maxume = wasm_i32x4_make(31, 63, 31, 0);
|
||||||
b += ((31 - b) * evy + 8) >> 4;
|
auto factor = wasm_i32x4_make(r, g, b, 0);
|
||||||
|
auto evyvec = wasm_i32x4_make(evy, evy, evy, 0);
|
||||||
|
auto number = wasm_i32x4_make(8, 8, 8, 0);
|
||||||
|
auto result = wasm_i32x4_sub( maxume, factor );
|
||||||
|
result = wasm_i32x4_mul( result, evyvec );
|
||||||
|
result = wasm_i32x4_add( result, number );
|
||||||
|
result = wasm_i32x4_shr( result, 4 );
|
||||||
|
result = wasm_i32x4_add( source, result );
|
||||||
|
|
||||||
|
r = wasm_i32x4_extract_lane( result, 0 );
|
||||||
|
g = wasm_i32x4_extract_lane( result, 1 );
|
||||||
|
b = wasm_i32x4_extract_lane( result, 2 );
|
||||||
|
|
||||||
|
// r += ((31 - r) * evy + 8) >> 4;
|
||||||
|
// g += ((63 - g) * evy + 8) >> 4;
|
||||||
|
// b += ((31 - b) * evy + 8) >> 4;
|
||||||
|
|
||||||
g >>= 1;
|
g >>= 1;
|
||||||
|
|
||||||
@ -54,9 +88,22 @@ ALWAYS_INLINE static u16 Darken(u16 color, int evy) {
|
|||||||
int g = ((color >> 4) & 62) | (color >> 15);
|
int g = ((color >> 4) & 62) | (color >> 15);
|
||||||
int b = (color >> 10) & 31;
|
int b = (color >> 10) & 31;
|
||||||
|
|
||||||
r -= (r * evy + 7) >> 4;
|
auto source = wasm_i32x4_make(r, g, b, 0);
|
||||||
g -= (g * evy + 7) >> 4;
|
auto factor = wasm_i32x4_make(r, g, b, 0);
|
||||||
b -= (b * evy + 7) >> 4;
|
auto evyvec = wasm_i32x4_make(evy, evy, evy, 0);
|
||||||
|
auto number = wasm_i32x4_make(7, 7, 7, 0);
|
||||||
|
auto result = wasm_i32x4_mul( factor, evyvec );
|
||||||
|
result = wasm_i32x4_add( result, number );
|
||||||
|
result = wasm_i32x4_shr( result, 4 );
|
||||||
|
result = wasm_i32x4_sub( source, result );
|
||||||
|
|
||||||
|
r = wasm_i32x4_extract_lane( result, 0 );
|
||||||
|
g = wasm_i32x4_extract_lane( result, 1 );
|
||||||
|
b = wasm_i32x4_extract_lane( result, 2 );
|
||||||
|
|
||||||
|
// r -= (r * evy + 7) >> 4;
|
||||||
|
// g -= (g * evy + 7) >> 4;
|
||||||
|
// b -= (b * evy + 7) >> 4;
|
||||||
|
|
||||||
g >>= 1;
|
g >>= 1;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user