From 04b7ccfd87d802e6b9a22b86c8d098979164b8ba Mon Sep 17 00:00:00 2001 From: Fox Caminiti Date: Mon, 15 Aug 2022 23:03:30 -0400 Subject: undo started --- bitmap_calls.cpp | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'bitmap_calls.cpp') diff --git a/bitmap_calls.cpp b/bitmap_calls.cpp index dd5c793..6425e6d 100644 --- a/bitmap_calls.cpp +++ b/bitmap_calls.cpp @@ -31,10 +31,17 @@ void Bitmap_ConvertPacking(void *Buffer, void *DestBuffer, uint16 Width, uint16 DPixel = Temp + Y*Width*4 + X*BytesPerPixel; } +#if ARM + if (InstructionMode == instruction_mode_neon) { + uint32x2x2_t Row = vld2_u32((uint32 *)Pixel); + vst2_u32((uint32 *)DPixel, Row); + X += 4; +#else if (InstructionMode == instruction_mode_sse || InstructionMode == instruction_mode_avx) { __m128i Row = _mm_loadu_si128((__m128i *)Pixel); _mm_storeu_si128((__m128i *)DPixel, Row); X+=4; +#endif } else { *(uint32 *)DPixel = *(uint32 *)Pixel; X++; @@ -101,8 +108,13 @@ static void Bitmap_Clear(void *Buffer, uint16 Width, uint16 Height, uint16 BytesPerPixel) { uint8 *Row = (uint8 *)Buffer; +#if ARM + uint32 Zero[4] = {0}; + uint32x2x4_t Zero8 = vld4_dup_u32(Zero); +#else __m256i Zero8 = _mm256_setzero_si256(); __m128i Zero = _mm_setzero_si128(); +#endif uint64 bytes = 0; uint16 ByteOffset = Bitmap_CalcByteOffset(BytesPerPixel); @@ -110,10 +122,15 @@ Bitmap_Clear(void *Buffer, uint16 Width, uint16 Height, uint16 BytesPerPixel) while (bytes < TotalBytes) { uint8 *Pixel = Row + bytes; +#if ARM + if (InstructionMode == instruction_mode_neon) { + vst4_u32((uint32 *)Pixel, Zero8); +#else if (InstructionMode == instruction_mode_avx) { _mm256_storeu_si256((__m256i *)Pixel, Zero8); } else if (InstructionMode == instruction_mode_sse) { _mm_storeu_si128((__m128i *)Pixel, Zero); +#endif } else { *(uint32 *)Pixel = 0x00000000; } @@ -136,10 +153,15 @@ Bitmap_CalcPackedDimensions(uint16 Width, uint16 Height, uint16 *WidthP, uint16 static uint16 Bitmap_CalcByteOffset(uint16 BytesPerPixel) { uint16 ByteOffset = BytesPerPixel; +#if ARM + if (InstructionMode == instruction_mode_neon) + ByteOffset = 8*BytesPerPixel; +#else if (InstructionMode == instruction_mode_avx) ByteOffset = 8*BytesPerPixel; if (InstructionMode == instruction_mode_sse) ByteOffset = 4*BytesPerPixel; +#endif return ByteOffset; } @@ -174,12 +196,18 @@ Bitmap_CopyToPointer(void *Input, void *Output, uint16 BytesPerPixel, uint64 Tot int pp = 0; uint8 *Pixel = (uint8 *)Row + bytes; uint8 *Pixel2 = (uint8 *)Row2 + bytes; +#if ARM + if (InstructionMode == instruction_mode_neon) { + uint32x2x4_t OutputPixel = vld4_u32((uint32 *)Pixel); + vst4_u32((uint32 *)Pixel2, OutputPixel); +#else if (InstructionMode == instruction_mode_avx) { __m256i OutputPixel = _mm256_loadu_si256((__m256i *)Pixel); _mm256_storeu_si256((__m256i *)Pixel2, OutputPixel); } else if (InstructionMode == instruction_mode_sse) { __m128i OutputPixel = _mm_loadu_si128((__m128i *)Pixel); _mm_storeu_si128((__m128i *)Pixel2, OutputPixel); +#endif } else { *(uint32 *)Pixel2 = *(uint32 *)Pixel; } -- cgit v1.2.3