From 83ce428d8bb5f4a762abf879adec076bc34cf36a Mon Sep 17 00:00:00 2001 From: Fox Caminiti Date: Wed, 27 Jul 2022 11:00:45 -0400 Subject: full support for odd-dimension bitmaps and comps --- bitmap_calls.cpp | 238 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 238 insertions(+) create mode 100644 bitmap_calls.cpp (limited to 'bitmap_calls.cpp') diff --git a/bitmap_calls.cpp b/bitmap_calls.cpp new file mode 100644 index 0000000..2cdb463 --- /dev/null +++ b/bitmap_calls.cpp @@ -0,0 +1,238 @@ +// NOTE(fox): Pay attention to how the Y pitch differs between the unpacked +// bitmaps and the 4x4 packed bitmaps, since odd-sized bitmaps are padded. + +// TODO(fox): I could write an AVX version of this function, but it may not be +// that much faster since we have to do a bit of uninterleaving. + +// 0 - store in 4x4 chunks +// 1 - unpack to 1xwidth +internal void +Convert4x4Chunk(pixel_buffer *Buffer, uint8 Which) +{ + uint8 *Src = (uint8 *)Buffer->OriginalBuffer; + uint8 *Temp = (uint8 *)Buffer->EffectBuffer; + uint32 RemainderPixels = Buffer->Width % 4; + for (uint32 Y = 0; Y < Buffer->Height; Y++) { + uint32 X = 0; + while (X < Buffer->Width - RemainderPixels) { + uint32 XLookup = (X >> 2)*16 + (X % 4); + uint32 YLookup = (Y >> 2)*(Buffer->FullWidth*4) + (Y % 4)*4; + uint32 PixelToSeek = XLookup + YLookup; + uint8 *DPixel, *Pixel; + if (Which == 0) { + DPixel = Temp + PixelToSeek*Buffer->BytesPerPixel; + Pixel = Src + Y*Buffer->Width*4 + X*Buffer->BytesPerPixel; + } else { + Pixel = Src + PixelToSeek*Buffer->BytesPerPixel; + DPixel = Temp + Y*Buffer->Width*4 + X*Buffer->BytesPerPixel; + } + + if (InstructionMode == sse_enabled || InstructionMode == avx_enabled) { + __m128i Row = _mm_loadu_si128((__m128i *)Pixel); + _mm_storeu_si128((__m128i *)DPixel, Row); + X+=4; + } else { + *(uint32 *)DPixel = *(uint32 *)Pixel; + X++; + } + } + while (X < Buffer->Width) { + uint32 XLookup = (X >> 2)*16 + (X % 4); + uint32 YLookup = (Y >> 2)*(Buffer->FullWidth*4) + (Y % 4)*4; + uint32 PixelToSeek = XLookup + YLookup; + uint8 *DPixel, *Pixel; + if (Which == 0) { + DPixel = Temp + PixelToSeek*Buffer->BytesPerPixel; + Pixel = Src + Y*Buffer->Width*4 + X*Buffer->BytesPerPixel; + } else { + Pixel = Src + PixelToSeek*Buffer->BytesPerPixel; + DPixel = Temp + Y*Buffer->Width*4 + X*Buffer->BytesPerPixel; + } + + *(uint32 *)DPixel = *(uint32 *)Pixel; + X++; + } + } +} + +// TODO(fox): Replace this in the future. +internal void * +MoveImportToBitmap(memory *Memory, pixel_buffer *Raster, void *Input) +{ + uint8 *Row = ((uint8 *)Input); + void *Output = AllocateMemory(Memory, Raster->FullWidth * Raster->FullHeight * Raster->BytesPerPixel, B_Scratch); + uint8 *Row2 = ((uint8 *)Output); + + uint64 bytes = 0; + uint16 ByteOffset = Raster->BytesPerPixel; + if (InstructionMode == avx_enabled) + ByteOffset = 8*Raster->BytesPerPixel; + else if (InstructionMode == avx_enabled) + ByteOffset = 4*Raster->BytesPerPixel; + + uint64 TotalBytes = Raster->Height*Raster->Width*Raster->BytesPerPixel; + uint64 RemainderBytes = TotalBytes % ByteOffset; + + while (bytes <= TotalBytes - RemainderBytes) { + uint8 *Pixel = (uint8 *)Row + bytes; + uint8 *Pixel2 = (uint8 *)Row2 + bytes; + if (InstructionMode == sse_enabled || InstructionMode == avx_enabled) { + __m128i OutputPixel = _mm_loadu_si128((__m128i *)Pixel); + _mm_storeu_si128((__m128i *)Pixel2, OutputPixel); + bytes += 4*Raster->BytesPerPixel; + } else { + *(uint32 *)Pixel2 = *(uint32 *)Pixel; + bytes += Raster->BytesPerPixel; + } + } + while (bytes <= TotalBytes) { + uint8 *Pixel = (uint8 *)Row + bytes; + uint8 *Pixel2 = (uint8 *)Row2 + bytes; + *(uint32 *)Pixel2 = *(uint32 *)Pixel; + bytes += Raster->BytesPerPixel; + } + return Output; +} + +internal void +ClearBuffer(pixel_buffer *Raster, void *Buffer) +{ + uint8 *Row = (uint8 *)Buffer; + __m256i Zero8 = _mm256_setzero_si256(); + __m128i Zero = _mm_setzero_si128(); + + uint64 bytes = 0; + uint16 ByteOffset = Raster->BytesPerPixel; + if (InstructionMode == avx_enabled) + ByteOffset = 8*Raster->BytesPerPixel; + else if (InstructionMode == avx_enabled) + ByteOffset = 4*Raster->BytesPerPixel; + + uint64 TotalBytes = Raster->FullHeight*Raster->FullWidth*Raster->BytesPerPixel; + + while (bytes < TotalBytes) { + uint8 *Pixel = Row + bytes; + if (InstructionMode == avx_enabled) { + _mm256_storeu_si256((__m256i *)Pixel, Zero8); + } else if (InstructionMode == sse_enabled) { + _mm_storeu_si128((__m128i *)Pixel, Zero); + } else { + *(uint32 *)Pixel = 0x00000000; + } + bytes += ByteOffset; + } +} + +// 0 - original -> effect +// 1 - effect -> original +internal void +CopyToBuffer(pixel_buffer *Raster, uint16 Which) +{ + uint8 *Row, *Row2; + if (Which == 0) { + Row = ((uint8 *)Raster->OriginalBuffer); + Row2 = ((uint8 *)Raster->EffectBuffer); + } else { + Row = ((uint8 *)Raster->EffectBuffer); + Row2 = ((uint8 *)Raster->OriginalBuffer); + } + + uint64 bytes = 0; + uint16 ByteOffset = Raster->BytesPerPixel; + if (InstructionMode == avx_enabled) + ByteOffset = 8*Raster->BytesPerPixel; + else if (InstructionMode == avx_enabled) + ByteOffset = 4*Raster->BytesPerPixel; + + uint64 TotalBytes = Raster->FullHeight*Raster->FullWidth*Raster->BytesPerPixel; + uint64 RemainderBytes = TotalBytes % ByteOffset; + + while (bytes <= TotalBytes - RemainderBytes) { + uint8 *Pixel = (uint8 *)Row + bytes; + uint8 *Pixel2 = (uint8 *)Row2 + bytes; + if (InstructionMode == sse_enabled || InstructionMode == avx_enabled) { + __m128i OutputPixel = _mm_loadu_si128((__m128i *)Pixel); + _mm_storeu_si128((__m128i *)Pixel2, OutputPixel); + bytes += 4*Raster->BytesPerPixel; + } else { + *(uint32 *)Pixel2 = *(uint32 *)Pixel; + bytes += Raster->BytesPerPixel; + } + } + while (bytes <= TotalBytes) { + uint8 *Pixel = (uint8 *)Row + bytes; + uint8 *Pixel2 = (uint8 *)Row2 + bytes; + *(uint32 *)Pixel2 = *(uint32 *)Pixel; + bytes += Raster->BytesPerPixel; + } +} + +internal void +BitmapPackRGB(pixel_buffer *Buffer) { + Assert(Buffer->Pitch); + Convert4x4Chunk(Buffer, 0); + CopyToBuffer(Buffer, 1); + ClearBuffer(Buffer, Buffer->EffectBuffer); +} + +internal void +OutputToViewport(pixel_buffer *CompBuffer, project_state *State, GLuint textureID) { + if (D) + Convert4x4Chunk(CompBuffer, 1); + else + CopyToBuffer(CompBuffer, 0); + EndRenderState(State); + glBindTexture(GL_TEXTURE_2D, textureID); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, CompBuffer->Width, CompBuffer->Height, GL_RGBA, GL_UNSIGNED_BYTE, + CompBuffer->EffectBuffer); +} + +internal void +DebugFillSolid(pixel_buffer *Raster, v4 Color) +{ + uint32 ColS = ColToUint32(Color); + __m256i Col8 = _mm256_set1_epi32(ColS); + __m128i Col = _mm_set1_epi32(ColS); + uint8 *Row = (uint8 *)Raster->OriginalBuffer; + + uint64 bytes = 0; + uint16 ByteOffset = Raster->BytesPerPixel; + if (InstructionMode == avx_enabled) + ByteOffset = 8*Raster->BytesPerPixel; + else if (InstructionMode == avx_enabled) + ByteOffset = 4*Raster->BytesPerPixel; + + uint64 TotalBytes = Raster->FullHeight*Raster->FullWidth*Raster->BytesPerPixel; + + while (bytes < TotalBytes) { + uint8 *Pixel = Row + bytes; + if (InstructionMode == avx_enabled) { + _mm256_storeu_si256((__m256i *)Pixel, Col8); + } else if (InstructionMode == sse_enabled) { + _mm_storeu_si128((__m128i *)Pixel, Col); + } else { + *(uint32 *)Pixel = ColS; + } + bytes += ByteOffset; + } +} + +internal void +DebugBitmap(pixel_buffer *Raster) +{ + uint8 asda = 0x0; + uint8 *Row = ((uint8 *)Raster->OriginalBuffer); + real32 XInc = 255.0f / Raster->Width; + real32 YInc = 255.0f / Raster->Height; + for (uint8 Y = 0; Y < Raster->Height; Y++) { + for (uint8 X = 0; X < Raster->Width; X++) { + uint8 *Pixel = (uint8 *)Row + Raster->FullWidth*Y*4 + X*4; + // *(uint32 *)Pixel = 0xffffffff; + if (Y > 3) { asda = 0xff; } + *(uint32 *)Pixel = ((0xff << 24) | + (asda << 16) | + (RoundReal32ToInt32((YInc * Y)) << 8) | + (RoundReal32ToInt32((XInc * X))) ); + } + } +} -- cgit v1.2.3