From d03d7187c1881237b1a98404a125507d33d85a0e Mon Sep 17 00:00:00 2001 From: Fox Caminiti Date: Sun, 21 Aug 2022 22:05:10 -0400 Subject: a bit of housekeeping --- bitmap_calls.cpp | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'bitmap_calls.cpp') diff --git a/bitmap_calls.cpp b/bitmap_calls.cpp index a67cd0b..f37f493 100644 --- a/bitmap_calls.cpp +++ b/bitmap_calls.cpp @@ -238,6 +238,51 @@ Bitmap_CopyToPointer(void *Input, void *Output, uint16 BytesPerPixel, uint64 Tot } } +static void +Bitmap_StencilAlpha(void *Input, void *Output, uint16 BytesPerPixel, uint64 TotalBytes) +{ + uint8 *Row = (uint8 *)Input; + uint8 *Row2 = (uint8 *)Output; + + uint64 bytes = 0; + uint16 ByteOffset = Bitmap_CalcByteOffset(BytesPerPixel); + uint64 RemainderBytes = TotalBytes % ByteOffset; + + __m256i AlphaBytes = _mm256_set1_epi32(0x00FFFFFF); + __m256 Zeroi = _mm256_set1_epi32(0); + + while (bytes <= TotalBytes - RemainderBytes) { + uint8 *Pixel = (uint8 *)Row + bytes; + uint8 *Pixel2 = (uint8 *)Row2 + bytes; +#if ARM + if (InstructionMode == instruction_mode_neon) { + uint32x2x4_t OutputPixel = vld4_u32((uint32 *)Pixel); + vst4_u32((uint32 *)Pixel2, OutputPixel); +#else + if (InstructionMode == instruction_mode_avx) { + __m256i InputPixel = _mm256_loadu_si256((__m256i *)Pixel); + __m256i OutputPixel = _mm256_loadu_si256((__m256i *)Pixel2); + if (_mm256_movemask_epi8(OutputPixel)) { + OutputPixel = _mm256_blendv_epi8(OutputPixel, InputPixel, AlphaBytes); + _mm256_storeu_si256((__m256i *)Pixel2, OutputPixel); + } + } else if (InstructionMode == instruction_mode_sse) { + __m128i OutputPixel = _mm_loadu_si128((__m128i *)Pixel); + _mm_storeu_si128((__m128i *)Pixel2, OutputPixel); +#endif + } else { + *(uint32 *)Pixel2 = *(uint32 *)Pixel; + } + bytes += ByteOffset; + } + while (bytes <= TotalBytes) { + uint8 *Pixel = (uint8 *)Row + bytes; + uint8 *Pixel2 = (uint8 *)Row2 + bytes; + *(uint32 *)Pixel2 = *(uint32 *)Pixel; + bytes += BytesPerPixel; + } +} + // This would be an easy SIMD if only AVX had a scatter call... // NOTE(fox): Only works with unpacked bitmaps for now. static void -- cgit v1.2.3