path: root/bitmap_calls.cpp
diff options
Diffstat (limited to 'bitmap_calls.cpp')
1 files changed, 0 insertions, 402 deletions
diff --git a/bitmap_calls.cpp b/bitmap_calls.cpp
deleted file mode 100644
index 0e76039..0000000
--- a/bitmap_calls.cpp
+++ /dev/null
@@ -1,402 +0,0 @@
-// Bitmaps are curently stored two ways in this program, which I'm calling
-// "packed" and "unpacked." Both are 0xAAGGBBRR little endian. Unpacked bitmaps
-// use the typical method of storage, rows of X that you increment by
-// Width*BytesPerPixel to look up the Y coordinate. "Packed" bitmaps encode
-// pixels as 4x4 chunks. To illustrate this clearly with an 8x4 bitmap:
-// A1 A2 A3 A4 E1 E2 E3 E4
-// B1 B2 B3 B4 F1 F2 F3 F4
-// C1 C2 C3 C4 G1 G2 G3 G4
-// D1 D2 D3 D4 H1 H2 H3 H4
-// Unpacked would be stored in memory order as A1 A2 A3 A4 E1 E2 E3 E4...
-// while packed would be stored as A1 A2 A3 A4 B1 B2 B3 B4...
-// In cases where the bitmap is a non-divisible-by-four size, we simply treat
-// the bitmap as if it's the right size and add the extra pixels in the allocation.
-// This wasn't an optimization I necessarily _needed_ to make this early on--I
-// never even did any measuring to see if there was any speedup-- but I
-// couldn't resist it. I like doing the software rendering stuff.
-// TODO(fox): I could write an AVX version of this function, but it may not be
-// that much faster since we have to do a bit of uninterleaving.
-// 0 - store in 4x4 chunks
-// 1 - unpack to 1xwidth
-void Bitmap_ConvertPacking(void *Buffer, void *DestBuffer, uint16 Width, uint16 Height, uint16 BytesPerPixel, uint16 Which)
- uint8 *Src = (uint8 *)Buffer;
- uint8 *Temp = (uint8 *)DestBuffer;
- uint32 RemainderPixels = Width % 4;
- uint16 WidthP, HeightP;
- Bitmap_CalcPackedDimensions(Width, Height, &WidthP, &HeightP);
- for (uint32 Y = 0; Y < Height; Y++) {
- uint32 X = 0;
- while (X < Width - RemainderPixels) {
- uint32 XLookup = (X >> 2)*16 + (X % 4);
- uint32 YLookup = (Y >> 2)*(WidthP*4) + (Y % 4)*4;
- uint32 PixelToSeek = XLookup + YLookup;
- if (Y == 48 && X == 0)
- uint8 war = 0;
- // if (YLookup == 2500 && XLookup == 1)
- uint8 *DPixel, *Pixel;
- if (Which == 0) {
- DPixel = Temp + PixelToSeek*BytesPerPixel;
- Pixel = Src + Y*Width*4 + X*BytesPerPixel;
- } else {
- Pixel = Src + PixelToSeek*BytesPerPixel;
- DPixel = Temp + Y*Width*4 + X*BytesPerPixel;
- }
-#if ARM
- if (InstructionMode == instruction_mode_neon) {
- uint32x2x2_t Row = vld2_u32((uint32 *)Pixel);
- vst2_u32((uint32 *)DPixel, Row);
- X += 4;
- if (InstructionMode == instruction_mode_sse || InstructionMode == instruction_mode_avx) {
- __m128i Row = _mm_loadu_si128((__m128i *)Pixel);
- _mm_storeu_si128((__m128i *)DPixel, Row);
- X+=4;
- } else {
- *(uint32 *)DPixel = *(uint32 *)Pixel;
- X++;
- }
- }
- while (X < Width) {
- uint16 WidthP, HeightP;
- Bitmap_CalcPackedDimensions(Width, Height, &WidthP, &HeightP);
- uint32 XLookup = (X >> 2)*16 + (X % 4);
- uint32 YLookup = (Y >> 2)*(WidthP*4) + (Y % 4)*4;
- uint32 PixelToSeek = XLookup + YLookup;
- uint8 *DPixel, *Pixel;
- if (Which == 0) {
- DPixel = Temp + PixelToSeek*BytesPerPixel;
- Pixel = Src + Y*Width*4 + X*BytesPerPixel;
- } else {
- Pixel = Src + PixelToSeek*BytesPerPixel;
- DPixel = Temp + Y*Width*4 + X*BytesPerPixel;
- }
- *(uint32 *)DPixel = *(uint32 *)Pixel;
- X++;
- }
- }
-// TODO(fox): Replace this in the future.
-#if 0
-static void *
-MoveImportToBitmap(memory *Memory, pixel_buffer *Raster, void *Input)
- uint8 *Row = ((uint8 *)Input);
- // void *Output = AllocateMemory(Memory, Bitmap_CalcTotalBytes(Raster->Width, Raster->Height, Raster->BytesPerPixel), B_Layers);
- uint8 *Row2 = ((uint8 *)Output);
- uint64 bytes = 0;
- uint16 ByteOffset = Bitmap_CalculateByteOffset(BytesPerPixel);
- uint64 TotalBytes = Bitmap_CalculateTotalBytes(Width, Height, BytesPerPixel);
- uint64 RemainderBytes = TotalBytes % ByteOffset;
- while (bytes <= TotalBytes - RemainderBytes) {
- uint8 *Pixel = (uint8 *)Row + bytes;
- uint8 *Pixel2 = (uint8 *)Row2 + bytes;
- if (InstructionMode == instruction_mode_sse || InstructionMode == instruction_mode_avx) {
- __m128i OutputPixel = _mm_loadu_si128((__m128i *)Pixel);
- _mm_storeu_si128((__m128i *)Pixel2, OutputPixel);
- bytes += 4*Raster->BytesPerPixel;
- } else {
- *(uint32 *)Pixel2 = *(uint32 *)Pixel;
- bytes += Raster->BytesPerPixel;
- }
- }
- while (bytes <= TotalBytes) {
- uint8 *Pixel = (uint8 *)Row + bytes;
- uint8 *Pixel2 = (uint8 *)Row2 + bytes;
- *(uint32 *)Pixel2 = *(uint32 *)Pixel;
- bytes += Raster->BytesPerPixel;
- }
- return Output;
-static void
-Bitmap_Clear(void *Buffer, uint16 Width, uint16 Height, uint16 BytesPerPixel)
- uint8 *Row = (uint8 *)Buffer;
-#if ARM
- uint32 Zero[4] = {0};
- uint32x2x4_t Zero8 = vld4_dup_u32(Zero);
- __m256i Zero8 = _mm256_setzero_si256();
- __m128i Zero = _mm_setzero_si128();
- uint64 bytes = 0;
- uint16 ByteOffset = Bitmap_CalcByteOffset(BytesPerPixel);
- uint64 TotalBytes = Bitmap_CalcTotalBytes(Width, Height, BytesPerPixel);
- while (bytes < TotalBytes) {
- uint8 *Pixel = Row + bytes;
-#if ARM
- if (InstructionMode == instruction_mode_neon) {
- vst4_u32((uint32 *)Pixel, Zero8);
- if (InstructionMode == instruction_mode_avx) {
- _mm256_storeu_si256((__m256i *)Pixel, Zero8);
- } else if (InstructionMode == instruction_mode_sse) {
- _mm_storeu_si128((__m128i *)Pixel, Zero);
- } else {
- *(uint32 *)Pixel = 0x00000000;
- }
- bytes += ByteOffset;
- }
-static void
-Bitmap_CalcPackedDimensions(uint16 Width, uint16 Height, uint16 *WidthP, uint16 *HeightP) {
- uint16 ExtraWidth = 4 - (Width % 4);
- if (ExtraWidth == 4)
- ExtraWidth = 0;
- uint16 ExtraHeight = 4 - (Height % 4);
- if (ExtraHeight == 4)
- ExtraHeight = 0;
- *WidthP = Width + ExtraWidth;
- *HeightP = Height + ExtraHeight;
-static uint16
-Bitmap_CalcByteOffset(uint16 BytesPerPixel) {
- uint16 ByteOffset = BytesPerPixel;
-#if ARM
- if (InstructionMode == instruction_mode_neon)
- ByteOffset = 8*BytesPerPixel;
- if (InstructionMode == instruction_mode_avx)
- ByteOffset = 8*BytesPerPixel;
- if (InstructionMode == instruction_mode_sse)
- ByteOffset = 4*BytesPerPixel;
- return ByteOffset;
-static uint64
-Bitmap_CalcUnpackedBytes(uint16 Width, uint16 Height, uint16 BytesPerPixel) {
- uint64 TotalBytes = (uint64)Width*Height*BytesPerPixel;
- return TotalBytes;
-static uint64
-Bitmap_CalcTotalBytes(uint16 Width, uint16 Height, uint16 BytesPerPixel) {
- uint16 WidthP, HeightP;
- Bitmap_CalcPackedDimensions(Width, Height, &WidthP, &HeightP);
- uint64 TotalBytes = (uint64)WidthP*HeightP*BytesPerPixel;
- return TotalBytes;
-// TODO(fox): Maybe turn this into a generic memory copy; we don't need to care
-// about pixels for any particular reason here.
-static void
-Bitmap_CopyToPointer(void *Input, void *Output, uint16 BytesPerPixel, uint64 TotalBytes)
- uint8 *Row = (uint8 *)Input;
- uint8 *Row2 = (uint8 *)Output;
- uint64 bytes = 0;
- uint16 ByteOffset = Bitmap_CalcByteOffset(BytesPerPixel);
- uint64 RemainderBytes = TotalBytes % ByteOffset;
- while (bytes <= TotalBytes - RemainderBytes) {
- uint8 *Pixel = (uint8 *)Row + bytes;
- uint8 *Pixel2 = (uint8 *)Row2 + bytes;
-#if ARM
- if (InstructionMode == instruction_mode_neon) {
- uint32x2x4_t OutputPixel = vld4_u32((uint32 *)Pixel);
- vst4_u32((uint32 *)Pixel2, OutputPixel);
- if (InstructionMode == instruction_mode_avx) {
- __m256i OutputPixel = _mm256_loadu_si256((__m256i *)Pixel);
- _mm256_storeu_si256((__m256i *)Pixel2, OutputPixel);
- } else if (InstructionMode == instruction_mode_sse) {
- __m128i OutputPixel = _mm_loadu_si128((__m128i *)Pixel);
- _mm_storeu_si128((__m128i *)Pixel2, OutputPixel);
- } else {
- *(uint32 *)Pixel2 = *(uint32 *)Pixel;
- }
- bytes += ByteOffset;
- }
- while (bytes <= TotalBytes) {
- uint8 *Pixel = (uint8 *)Row + bytes;
- uint8 *Pixel2 = (uint8 *)Row2 + bytes;
- *(uint32 *)Pixel2 = *(uint32 *)Pixel;
- bytes += BytesPerPixel;
- }
-static void
-Bitmap_StencilAlpha(void *Input, void *Output, uint16 BytesPerPixel, uint64 TotalBytes)
- uint8 *Row = (uint8 *)Input;
- uint8 *Row2 = (uint8 *)Output;
- uint64 bytes = 0;
- uint16 ByteOffset = Bitmap_CalcByteOffset(BytesPerPixel);
- uint64 RemainderBytes = TotalBytes % ByteOffset;
-#if ARM
- __m256i AlphaBytes = _mm256_set1_epi32(0x00FFFFFF);
- __m256i Zeroi = _mm256_set1_epi32(0);
- while (bytes <= TotalBytes - RemainderBytes) {
- uint8 *Pixel = (uint8 *)Row + bytes;
- uint8 *Pixel2 = (uint8 *)Row2 + bytes;
-#if ARM
- if (InstructionMode == instruction_mode_neon) {
- // TODO(fox): Optimize and write NEON!
- uint8 *DestAlpha = Pixel2 + (BytesPerPixel/4)*3;
- uint8 Alpha = *DestAlpha;
- uint32 *DestPixel = (uint32 *)Pixel2;
- uint32 *SrcPixel = (uint32 *)Pixel;
- *DestPixel = *SrcPixel;
- *DestAlpha = Alpha;
- bytes += BytesPerPixel;
- if (InstructionMode == instruction_mode_avx) {
- __m256i InputPixel = _mm256_loadu_si256((__m256i *)Pixel);
- __m256i OutputPixel = _mm256_loadu_si256((__m256i *)Pixel2);
- if (_mm256_movemask_epi8(OutputPixel)) {
- OutputPixel = _mm256_blendv_epi8(OutputPixel, InputPixel, AlphaBytes);
- _mm256_storeu_si256((__m256i *)Pixel2, OutputPixel);
- }
- bytes += ByteOffset;
- } else if (InstructionMode == instruction_mode_sse) {
- __m128i OutputPixel = _mm_loadu_si128((__m128i *)Pixel);
- _mm_storeu_si128((__m128i *)Pixel2, OutputPixel);
- bytes += ByteOffset;
- } else {
- // TODO(fox): Optimize and write NEON!
- uint8 *DestAlpha = Pixel2 + (BytesPerPixel/4)*3;
- uint8 Alpha = *DestAlpha;
- uint32 *DestPixel = (uint32 *)Pixel2;
- uint32 *SrcPixel = (uint32 *)Pixel;
- *DestPixel = *SrcPixel;
- *DestAlpha = Alpha;
- bytes += BytesPerPixel;
- }
- }
- while (bytes <= TotalBytes) {
- uint8 *Pixel = (uint8 *)Row + bytes;
- uint8 *Pixel2 = (uint8 *)Row2 + bytes;
- // TODO(fox): Optimize and write NEON!
- uint8 *DestAlpha = Pixel2 + (BytesPerPixel/4)*3;
- uint8 Alpha = *DestAlpha;
- uint32 *DestPixel = (uint32 *)Pixel2;
- uint32 *SrcPixel = (uint32 *)Pixel;
- *DestPixel = *SrcPixel;
- *DestAlpha = Alpha;
- bytes += BytesPerPixel;
- }
-// This would be an easy SIMD if only AVX had a scatter call...
-// NOTE(fox): Only works with unpacked bitmaps for now.
-static void
-Bitmap_CalcHistogram(void *Data, void *Input, uint16 BytesPerPixel, uint64 TotalBytes)
- uint32 *Slot = (uint32 *)Data;
- uint8 *Row = (uint8 *)Input;
- uint64 bytes = 0;
- uint16 ByteOffset = Bitmap_CalcByteOffset(BytesPerPixel);
- uint64 RemainderBytes = TotalBytes % ByteOffset;
- for (int i = 0; i < 256*5; i++) {
- *(real32 *)((uint8 *)Slot + i*sizeof(real32)) = 0;
- }
- while (bytes <= TotalBytes) {
- uint8 *Pixel = (uint8 *)Row + bytes;
- uint8 A = (*(uint32 *)Pixel >> 24);
- uint8 R = (*(uint32 *)Pixel >> 16);
- uint8 G = (*(uint32 *)Pixel >> 8);
- uint8 B = (*(uint32 *)Pixel >> 0);
- uint8 Avg = (uint8)((real32)(R + G + B) / 3.0f);
- *(real32 *)((uint8 *)Slot + Avg*sizeof(real32)) += 1;
- *(real32 *)((uint8 *)Slot + (256 + R)*sizeof(real32)) += 1;
- *(real32 *)((uint8 *)Slot + (256*2 + G)*sizeof(real32)) += 1;
- *(real32 *)((uint8 *)Slot + (256*3 + B)*sizeof(real32)) += 1;
- *(real32 *)((uint8 *)Slot + (256*4 + A)*sizeof(real32)) += 1;
- bytes += BytesPerPixel;
- }
-#if 0
-static void
-BitmapPackRGB(pixel_buffer *Buffer) {
- Assert(Buffer->Pitch);
- Convert4x4Chunk(Buffer, 0);
- CopyToBuffer(Buffer, 1);
- ClearBuffer(Buffer, Buffer->EffectBuffer);
-static void
-DebugFillSolid(pixel_buffer *Raster, v4 Color)
- uint32 ColS = ColToUint32(Color);
- __m256i Col8 = _mm256_set1_epi32(ColS);
- __m128i Col = _mm_set1_epi32(ColS);
- uint8 *Row = (uint8 *)Raster->OriginalBuffer;
- uint64 bytes = 0;
- uint16 ByteOffset = Raster->BytesPerPixel;
- if (InstructionMode == instruction_mode_avx)
- ByteOffset = 8*Raster->BytesPerPixel;
- else if (InstructionMode == instruction_mode_sse)
- ByteOffset = 4*Raster->BytesPerPixel;
- uint64 TotalBytes = Raster->FullHeight*Raster->FullWidth*Raster->BytesPerPixel;
- while (bytes < TotalBytes) {
- uint8 *Pixel = Row + bytes;
- if (InstructionMode == instruction_mode_avx) {
- _mm256_storeu_si256((__m256i *)Pixel, Col8);
- } else if (InstructionMode == instruction_mode_sse) {
- _mm_storeu_si128((__m128i *)Pixel, Col);
- } else {
- *(uint32 *)Pixel = ColS;
- }
- bytes += ByteOffset;
- }
-static void
-DebugBitmap(pixel_buffer *Raster)
- uint8 asda = 0x0;
- uint8 *Row = ((uint8 *)Raster->OriginalBuffer);
- real32 XInc = 255.0f / Raster->Width;
- real32 YInc = 255.0f / Raster->Height;
- for (uint8 Y = 0; Y < Raster->Height; Y++) {
- for (uint8 X = 0; X < Raster->Width; X++) {
- uint8 *Pixel = (uint8 *)Row + Raster->FullWidth*Y*4 + X*4;
- // *(uint32 *)Pixel = 0xffffffff;
- if (Y > 3) { asda = 0xff; }
- *(uint32 *)Pixel = ((0xff << 24) |
- (asda << 16) |
- (RoundReal32ToInt32((YInc * Y)) << 8) |
- (RoundReal32ToInt32((XInc * X))) );
- }
- }