diff options
author | Fox Caminiti <fox@foxcam.net> | 2022-08-21 22:05:10 -0400 |
---|---|---|
committer | Fox Caminiti <fox@foxcam.net> | 2022-08-21 22:05:10 -0400 |
commit | d03d7187c1881237b1a98404a125507d33d85a0e (patch) | |
tree | e0409ebb4bec224b1231317e3e7743fb9349b989 /bitmap_calls.cpp | |
parent | ed51dab429e467fc144f0bfbed70a5291c8a0a27 (diff) |
a bit of housekeeping
Diffstat (limited to 'bitmap_calls.cpp')
-rw-r--r-- | bitmap_calls.cpp | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/bitmap_calls.cpp b/bitmap_calls.cpp index a67cd0b..f37f493 100644 --- a/bitmap_calls.cpp +++ b/bitmap_calls.cpp @@ -238,6 +238,51 @@ Bitmap_CopyToPointer(void *Input, void *Output, uint16 BytesPerPixel, uint64 Tot } } +static void +Bitmap_StencilAlpha(void *Input, void *Output, uint16 BytesPerPixel, uint64 TotalBytes) +{ + uint8 *Row = (uint8 *)Input; + uint8 *Row2 = (uint8 *)Output; + + uint64 bytes = 0; + uint16 ByteOffset = Bitmap_CalcByteOffset(BytesPerPixel); + uint64 RemainderBytes = TotalBytes % ByteOffset; + + __m256i AlphaBytes = _mm256_set1_epi32(0x00FFFFFF); + __m256 Zeroi = _mm256_set1_epi32(0); + + while (bytes <= TotalBytes - RemainderBytes) { + uint8 *Pixel = (uint8 *)Row + bytes; + uint8 *Pixel2 = (uint8 *)Row2 + bytes; +#if ARM + if (InstructionMode == instruction_mode_neon) { + uint32x2x4_t OutputPixel = vld4_u32((uint32 *)Pixel); + vst4_u32((uint32 *)Pixel2, OutputPixel); +#else + if (InstructionMode == instruction_mode_avx) { + __m256i InputPixel = _mm256_loadu_si256((__m256i *)Pixel); + __m256i OutputPixel = _mm256_loadu_si256((__m256i *)Pixel2); + if (_mm256_movemask_epi8(OutputPixel)) { + OutputPixel = _mm256_blendv_epi8(OutputPixel, InputPixel, AlphaBytes); + _mm256_storeu_si256((__m256i *)Pixel2, OutputPixel); + } + } else if (InstructionMode == instruction_mode_sse) { + __m128i OutputPixel = _mm_loadu_si128((__m128i *)Pixel); + _mm_storeu_si128((__m128i *)Pixel2, OutputPixel); +#endif + } else { + *(uint32 *)Pixel2 = *(uint32 *)Pixel; + } + bytes += ByteOffset; + } + while (bytes <= TotalBytes) { + uint8 *Pixel = (uint8 *)Row + bytes; + uint8 *Pixel2 = (uint8 *)Row2 + bytes; + *(uint32 *)Pixel2 = *(uint32 *)Pixel; + bytes += BytesPerPixel; + } +} + // This would be an easy SIMD if only AVX had a scatter call... // NOTE(fox): Only works with unpacked bitmaps for now. static void |