summaryrefslogtreecommitdiff
path: root/bitmap_calls.cpp
diff options
context:
space:
mode:
authorFox Caminiti <fox@foxcam.net>2022-07-27 11:00:45 -0400
committerFox Caminiti <fox@foxcam.net>2022-07-27 11:00:45 -0400
commit83ce428d8bb5f4a762abf879adec076bc34cf36a (patch)
treec1500f027d9eec514ba1a2912e7a4763e7be26b2 /bitmap_calls.cpp
parentc6bd84c356b6aaa029b9708d7b99a4aba1673b6b (diff)
full support for odd-dimension bitmaps and comps
Diffstat (limited to 'bitmap_calls.cpp')
-rw-r--r--bitmap_calls.cpp238
1 files changed, 238 insertions, 0 deletions
diff --git a/bitmap_calls.cpp b/bitmap_calls.cpp
new file mode 100644
index 0000000..2cdb463
--- /dev/null
+++ b/bitmap_calls.cpp
@@ -0,0 +1,238 @@
+// NOTE(fox): Pay attention to how the Y pitch differs between the unpacked
+// bitmaps and the 4x4 packed bitmaps, since odd-sized bitmaps are padded.
+
+// TODO(fox): I could write an AVX version of this function, but it may not be
+// that much faster since we have to do a bit of uninterleaving.
+
+// 0 - store in 4x4 chunks
+// 1 - unpack to 1xwidth
+internal void
+Convert4x4Chunk(pixel_buffer *Buffer, uint8 Which)
+{
+ uint8 *Src = (uint8 *)Buffer->OriginalBuffer;
+ uint8 *Temp = (uint8 *)Buffer->EffectBuffer;
+ uint32 RemainderPixels = Buffer->Width % 4;
+ for (uint32 Y = 0; Y < Buffer->Height; Y++) {
+ uint32 X = 0;
+ while (X < Buffer->Width - RemainderPixels) {
+ uint32 XLookup = (X >> 2)*16 + (X % 4);
+ uint32 YLookup = (Y >> 2)*(Buffer->FullWidth*4) + (Y % 4)*4;
+ uint32 PixelToSeek = XLookup + YLookup;
+ uint8 *DPixel, *Pixel;
+ if (Which == 0) {
+ DPixel = Temp + PixelToSeek*Buffer->BytesPerPixel;
+ Pixel = Src + Y*Buffer->Width*4 + X*Buffer->BytesPerPixel;
+ } else {
+ Pixel = Src + PixelToSeek*Buffer->BytesPerPixel;
+ DPixel = Temp + Y*Buffer->Width*4 + X*Buffer->BytesPerPixel;
+ }
+
+ if (InstructionMode == sse_enabled || InstructionMode == avx_enabled) {
+ __m128i Row = _mm_loadu_si128((__m128i *)Pixel);
+ _mm_storeu_si128((__m128i *)DPixel, Row);
+ X+=4;
+ } else {
+ *(uint32 *)DPixel = *(uint32 *)Pixel;
+ X++;
+ }
+ }
+ while (X < Buffer->Width) {
+ uint32 XLookup = (X >> 2)*16 + (X % 4);
+ uint32 YLookup = (Y >> 2)*(Buffer->FullWidth*4) + (Y % 4)*4;
+ uint32 PixelToSeek = XLookup + YLookup;
+ uint8 *DPixel, *Pixel;
+ if (Which == 0) {
+ DPixel = Temp + PixelToSeek*Buffer->BytesPerPixel;
+ Pixel = Src + Y*Buffer->Width*4 + X*Buffer->BytesPerPixel;
+ } else {
+ Pixel = Src + PixelToSeek*Buffer->BytesPerPixel;
+ DPixel = Temp + Y*Buffer->Width*4 + X*Buffer->BytesPerPixel;
+ }
+
+ *(uint32 *)DPixel = *(uint32 *)Pixel;
+ X++;
+ }
+ }
+}
+
+// TODO(fox): Replace this in the future.
+internal void *
+MoveImportToBitmap(memory *Memory, pixel_buffer *Raster, void *Input)
+{
+ uint8 *Row = ((uint8 *)Input);
+ void *Output = AllocateMemory(Memory, Raster->FullWidth * Raster->FullHeight * Raster->BytesPerPixel, B_Scratch);
+ uint8 *Row2 = ((uint8 *)Output);
+
+ uint64 bytes = 0;
+ uint16 ByteOffset = Raster->BytesPerPixel;
+ if (InstructionMode == avx_enabled)
+ ByteOffset = 8*Raster->BytesPerPixel;
+ else if (InstructionMode == avx_enabled)
+ ByteOffset = 4*Raster->BytesPerPixel;
+
+ uint64 TotalBytes = Raster->Height*Raster->Width*Raster->BytesPerPixel;
+ uint64 RemainderBytes = TotalBytes % ByteOffset;
+
+ while (bytes <= TotalBytes - RemainderBytes) {
+ uint8 *Pixel = (uint8 *)Row + bytes;
+ uint8 *Pixel2 = (uint8 *)Row2 + bytes;
+ if (InstructionMode == sse_enabled || InstructionMode == avx_enabled) {
+ __m128i OutputPixel = _mm_loadu_si128((__m128i *)Pixel);
+ _mm_storeu_si128((__m128i *)Pixel2, OutputPixel);
+ bytes += 4*Raster->BytesPerPixel;
+ } else {
+ *(uint32 *)Pixel2 = *(uint32 *)Pixel;
+ bytes += Raster->BytesPerPixel;
+ }
+ }
+ while (bytes <= TotalBytes) {
+ uint8 *Pixel = (uint8 *)Row + bytes;
+ uint8 *Pixel2 = (uint8 *)Row2 + bytes;
+ *(uint32 *)Pixel2 = *(uint32 *)Pixel;
+ bytes += Raster->BytesPerPixel;
+ }
+ return Output;
+}
+
+internal void
+ClearBuffer(pixel_buffer *Raster, void *Buffer)
+{
+ uint8 *Row = (uint8 *)Buffer;
+ __m256i Zero8 = _mm256_setzero_si256();
+ __m128i Zero = _mm_setzero_si128();
+
+ uint64 bytes = 0;
+ uint16 ByteOffset = Raster->BytesPerPixel;
+ if (InstructionMode == avx_enabled)
+ ByteOffset = 8*Raster->BytesPerPixel;
+ else if (InstructionMode == avx_enabled)
+ ByteOffset = 4*Raster->BytesPerPixel;
+
+ uint64 TotalBytes = Raster->FullHeight*Raster->FullWidth*Raster->BytesPerPixel;
+
+ while (bytes < TotalBytes) {
+ uint8 *Pixel = Row + bytes;
+ if (InstructionMode == avx_enabled) {
+ _mm256_storeu_si256((__m256i *)Pixel, Zero8);
+ } else if (InstructionMode == sse_enabled) {
+ _mm_storeu_si128((__m128i *)Pixel, Zero);
+ } else {
+ *(uint32 *)Pixel = 0x00000000;
+ }
+ bytes += ByteOffset;
+ }
+}
+
+// 0 - original -> effect
+// 1 - effect -> original
+internal void
+CopyToBuffer(pixel_buffer *Raster, uint16 Which)
+{
+ uint8 *Row, *Row2;
+ if (Which == 0) {
+ Row = ((uint8 *)Raster->OriginalBuffer);
+ Row2 = ((uint8 *)Raster->EffectBuffer);
+ } else {
+ Row = ((uint8 *)Raster->EffectBuffer);
+ Row2 = ((uint8 *)Raster->OriginalBuffer);
+ }
+
+ uint64 bytes = 0;
+ uint16 ByteOffset = Raster->BytesPerPixel;
+ if (InstructionMode == avx_enabled)
+ ByteOffset = 8*Raster->BytesPerPixel;
+ else if (InstructionMode == avx_enabled)
+ ByteOffset = 4*Raster->BytesPerPixel;
+
+ uint64 TotalBytes = Raster->FullHeight*Raster->FullWidth*Raster->BytesPerPixel;
+ uint64 RemainderBytes = TotalBytes % ByteOffset;
+
+ while (bytes <= TotalBytes - RemainderBytes) {
+ uint8 *Pixel = (uint8 *)Row + bytes;
+ uint8 *Pixel2 = (uint8 *)Row2 + bytes;
+ if (InstructionMode == sse_enabled || InstructionMode == avx_enabled) {
+ __m128i OutputPixel = _mm_loadu_si128((__m128i *)Pixel);
+ _mm_storeu_si128((__m128i *)Pixel2, OutputPixel);
+ bytes += 4*Raster->BytesPerPixel;
+ } else {
+ *(uint32 *)Pixel2 = *(uint32 *)Pixel;
+ bytes += Raster->BytesPerPixel;
+ }
+ }
+ while (bytes <= TotalBytes) {
+ uint8 *Pixel = (uint8 *)Row + bytes;
+ uint8 *Pixel2 = (uint8 *)Row2 + bytes;
+ *(uint32 *)Pixel2 = *(uint32 *)Pixel;
+ bytes += Raster->BytesPerPixel;
+ }
+}
+
+internal void
+BitmapPackRGB(pixel_buffer *Buffer) {
+ Assert(Buffer->Pitch);
+ Convert4x4Chunk(Buffer, 0);
+ CopyToBuffer(Buffer, 1);
+ ClearBuffer(Buffer, Buffer->EffectBuffer);
+}
+
+internal void
+OutputToViewport(pixel_buffer *CompBuffer, project_state *State, GLuint textureID) {
+ if (D)
+ Convert4x4Chunk(CompBuffer, 1);
+ else
+ CopyToBuffer(CompBuffer, 0);
+ EndRenderState(State);
+ glBindTexture(GL_TEXTURE_2D, textureID);
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, CompBuffer->Width, CompBuffer->Height, GL_RGBA, GL_UNSIGNED_BYTE,
+ CompBuffer->EffectBuffer);
+}
+
+internal void
+DebugFillSolid(pixel_buffer *Raster, v4 Color)
+{
+ uint32 ColS = ColToUint32(Color);
+ __m256i Col8 = _mm256_set1_epi32(ColS);
+ __m128i Col = _mm_set1_epi32(ColS);
+ uint8 *Row = (uint8 *)Raster->OriginalBuffer;
+
+ uint64 bytes = 0;
+ uint16 ByteOffset = Raster->BytesPerPixel;
+ if (InstructionMode == avx_enabled)
+ ByteOffset = 8*Raster->BytesPerPixel;
+ else if (InstructionMode == avx_enabled)
+ ByteOffset = 4*Raster->BytesPerPixel;
+
+ uint64 TotalBytes = Raster->FullHeight*Raster->FullWidth*Raster->BytesPerPixel;
+
+ while (bytes < TotalBytes) {
+ uint8 *Pixel = Row + bytes;
+ if (InstructionMode == avx_enabled) {
+ _mm256_storeu_si256((__m256i *)Pixel, Col8);
+ } else if (InstructionMode == sse_enabled) {
+ _mm_storeu_si128((__m128i *)Pixel, Col);
+ } else {
+ *(uint32 *)Pixel = ColS;
+ }
+ bytes += ByteOffset;
+ }
+}
+
+internal void
+DebugBitmap(pixel_buffer *Raster)
+{
+ uint8 asda = 0x0;
+ uint8 *Row = ((uint8 *)Raster->OriginalBuffer);
+ real32 XInc = 255.0f / Raster->Width;
+ real32 YInc = 255.0f / Raster->Height;
+ for (uint8 Y = 0; Y < Raster->Height; Y++) {
+ for (uint8 X = 0; X < Raster->Width; X++) {
+ uint8 *Pixel = (uint8 *)Row + Raster->FullWidth*Y*4 + X*4;
+ // *(uint32 *)Pixel = 0xffffffff;
+ if (Y > 3) { asda = 0xff; }
+ *(uint32 *)Pixel = ((0xff << 24) |
+ (asda << 16) |
+ (RoundReal32ToInt32((YInc * Y)) << 8) |
+ (RoundReal32ToInt32((XInc * X))) );
+ }
+ }
+}