summaryrefslogtreecommitdiff
path: root/bitmap_calls.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'bitmap_calls.cpp')
-rw-r--r--bitmap_calls.cpp28
1 files changed, 28 insertions, 0 deletions
diff --git a/bitmap_calls.cpp b/bitmap_calls.cpp
index dd5c793..6425e6d 100644
--- a/bitmap_calls.cpp
+++ b/bitmap_calls.cpp
@@ -31,10 +31,17 @@ void Bitmap_ConvertPacking(void *Buffer, void *DestBuffer, uint16 Width, uint16
DPixel = Temp + Y*Width*4 + X*BytesPerPixel;
}
+#if ARM
+ if (InstructionMode == instruction_mode_neon) {
+ uint32x2x2_t Row = vld2_u32((uint32 *)Pixel);
+ vst2_u32((uint32 *)DPixel, Row);
+ X += 4;
+#else
if (InstructionMode == instruction_mode_sse || InstructionMode == instruction_mode_avx) {
__m128i Row = _mm_loadu_si128((__m128i *)Pixel);
_mm_storeu_si128((__m128i *)DPixel, Row);
X+=4;
+#endif
} else {
*(uint32 *)DPixel = *(uint32 *)Pixel;
X++;
@@ -101,8 +108,13 @@ static void
Bitmap_Clear(void *Buffer, uint16 Width, uint16 Height, uint16 BytesPerPixel)
{
uint8 *Row = (uint8 *)Buffer;
+#if ARM
+ uint32 Zero[4] = {0};
+ uint32x2x4_t Zero8 = vld4_dup_u32(Zero);
+#else
__m256i Zero8 = _mm256_setzero_si256();
__m128i Zero = _mm_setzero_si128();
+#endif
uint64 bytes = 0;
uint16 ByteOffset = Bitmap_CalcByteOffset(BytesPerPixel);
@@ -110,10 +122,15 @@ Bitmap_Clear(void *Buffer, uint16 Width, uint16 Height, uint16 BytesPerPixel)
while (bytes < TotalBytes) {
uint8 *Pixel = Row + bytes;
+#if ARM
+ if (InstructionMode == instruction_mode_neon) {
+ vst4_u32((uint32 *)Pixel, Zero8);
+#else
if (InstructionMode == instruction_mode_avx) {
_mm256_storeu_si256((__m256i *)Pixel, Zero8);
} else if (InstructionMode == instruction_mode_sse) {
_mm_storeu_si128((__m128i *)Pixel, Zero);
+#endif
} else {
*(uint32 *)Pixel = 0x00000000;
}
@@ -136,10 +153,15 @@ Bitmap_CalcPackedDimensions(uint16 Width, uint16 Height, uint16 *WidthP, uint16
static uint16
Bitmap_CalcByteOffset(uint16 BytesPerPixel) {
uint16 ByteOffset = BytesPerPixel;
+#if ARM
+ if (InstructionMode == instruction_mode_neon)
+ ByteOffset = 8*BytesPerPixel;
+#else
if (InstructionMode == instruction_mode_avx)
ByteOffset = 8*BytesPerPixel;
if (InstructionMode == instruction_mode_sse)
ByteOffset = 4*BytesPerPixel;
+#endif
return ByteOffset;
}
@@ -174,12 +196,18 @@ Bitmap_CopyToPointer(void *Input, void *Output, uint16 BytesPerPixel, uint64 Tot
int pp = 0;
uint8 *Pixel = (uint8 *)Row + bytes;
uint8 *Pixel2 = (uint8 *)Row2 + bytes;
+#if ARM
+ if (InstructionMode == instruction_mode_neon) {
+ uint32x2x4_t OutputPixel = vld4_u32((uint32 *)Pixel);
+ vst4_u32((uint32 *)Pixel2, OutputPixel);
+#else
if (InstructionMode == instruction_mode_avx) {
__m256i OutputPixel = _mm256_loadu_si256((__m256i *)Pixel);
_mm256_storeu_si256((__m256i *)Pixel2, OutputPixel);
} else if (InstructionMode == instruction_mode_sse) {
__m128i OutputPixel = _mm_loadu_si128((__m128i *)Pixel);
_mm_storeu_si128((__m128i *)Pixel2, OutputPixel);
+#endif
} else {
*(uint32 *)Pixel2 = *(uint32 *)Pixel;
}