// NOTE(fox): Pay attention to how the Y pitch differs between the unpacked // bitmaps and the 4x4 packed bitmaps, since odd-sized bitmaps are padded. // TODO(fox): I could write an AVX version of this function, but it may not be // that much faster since we have to do a bit of uninterleaving. // 0 - store in 4x4 chunks // 1 - unpack to 1xwidth void Bitmap_ConvertPacking(void *Buffer, void *DestBuffer, uint16 Width, uint16 Height, uint16 BytesPerPixel, uint16 Which) { uint8 *Src = (uint8 *)Buffer; uint8 *Temp = (uint8 *)DestBuffer; uint32 RemainderPixels = Width % 4; for (uint32 Y = 0; Y < Height; Y++) { uint32 X = 0; while (X < Width - RemainderPixels) { uint16 WidthP, HeightP; Bitmap_CalcPackedDimensions(Width, Height, &WidthP, &HeightP); uint32 XLookup = (X >> 2)*16 + (X % 4); uint32 YLookup = (Y >> 2)*(WidthP*4) + (Y % 4)*4; uint32 PixelToSeek = XLookup + YLookup; uint8 *DPixel, *Pixel; if (Which == 0) { DPixel = Temp + PixelToSeek*BytesPerPixel; Pixel = Src + Y*Width*4 + X*BytesPerPixel; } else { Pixel = Src + PixelToSeek*BytesPerPixel; DPixel = Temp + Y*Width*4 + X*BytesPerPixel; } if (InstructionMode == instruction_mode_sse || InstructionMode == instruction_mode_avx) { __m128i Row = _mm_loadu_si128((__m128i *)Pixel); _mm_storeu_si128((__m128i *)DPixel, Row); X+=4; } else { *(uint32 *)DPixel = *(uint32 *)Pixel; X++; } } while (X < Width) { uint16 WidthP, HeightP; Bitmap_CalcPackedDimensions(Width, Height, &WidthP, &HeightP); uint32 XLookup = (X >> 2)*16 + (X % 4); uint32 YLookup = (Y >> 2)*(WidthP*4) + (Y % 4)*4; uint32 PixelToSeek = XLookup + YLookup; uint8 *DPixel, *Pixel; if (Which == 0) { DPixel = Temp + PixelToSeek*BytesPerPixel; Pixel = Src + Y*Width*4 + X*BytesPerPixel; } else { Pixel = Src + PixelToSeek*BytesPerPixel; DPixel = Temp + Y*Width*4 + X*BytesPerPixel; } *(uint32 *)DPixel = *(uint32 *)Pixel; X++; } } } // TODO(fox): Replace this in the future. #if 0 static void * MoveImportToBitmap(memory *Memory, pixel_buffer *Raster, void *Input) { uint8 *Row = ((uint8 *)Input); // void *Output = AllocateMemory(Memory, Bitmap_CalcTotalBytes(Raster->Width, Raster->Height, Raster->BytesPerPixel), B_Layers); uint8 *Row2 = ((uint8 *)Output); uint64 bytes = 0; uint16 ByteOffset = Bitmap_CalculateByteOffset(BytesPerPixel); uint64 TotalBytes = Bitmap_CalculateTotalBytes(Width, Height, BytesPerPixel); uint64 RemainderBytes = TotalBytes % ByteOffset; while (bytes <= TotalBytes - RemainderBytes) { uint8 *Pixel = (uint8 *)Row + bytes; uint8 *Pixel2 = (uint8 *)Row2 + bytes; if (InstructionMode == instruction_mode_sse || InstructionMode == instruction_mode_avx) { __m128i OutputPixel = _mm_loadu_si128((__m128i *)Pixel); _mm_storeu_si128((__m128i *)Pixel2, OutputPixel); bytes += 4*Raster->BytesPerPixel; } else { *(uint32 *)Pixel2 = *(uint32 *)Pixel; bytes += Raster->BytesPerPixel; } } while (bytes <= TotalBytes) { uint8 *Pixel = (uint8 *)Row + bytes; uint8 *Pixel2 = (uint8 *)Row2 + bytes; *(uint32 *)Pixel2 = *(uint32 *)Pixel; bytes += Raster->BytesPerPixel; } return Output; } #endif static void Bitmap_Clear(void *Buffer, uint16 Width, uint16 Height, uint16 BytesPerPixel) { uint8 *Row = (uint8 *)Buffer; __m256i Zero8 = _mm256_setzero_si256(); __m128i Zero = _mm_setzero_si128(); uint64 bytes = 0; uint16 ByteOffset = Bitmap_CalcByteOffset(BytesPerPixel); uint64 TotalBytes = Bitmap_CalcTotalBytes(Width, Height, BytesPerPixel); while (bytes < TotalBytes) { uint8 *Pixel = Row + bytes; if (InstructionMode == instruction_mode_avx) { _mm256_storeu_si256((__m256i *)Pixel, Zero8); } else if (InstructionMode == instruction_mode_sse) { _mm_storeu_si128((__m128i *)Pixel, Zero); } else { *(uint32 *)Pixel = 0x00000000; } bytes += ByteOffset; } } #if 0 // 0 - original -> effect // 1 - effect -> original static void CopyToBuffer(pixel_buffer *Raster, uint16 Which) { uint8 *Row, *Row2; if (Which == 0) { Row = ((uint8 *)Raster->OriginalBuffer); Row2 = ((uint8 *)Raster->EffectBuffer); } else { Row = ((uint8 *)Raster->EffectBuffer); Row2 = ((uint8 *)Raster->OriginalBuffer); } uint64 bytes = 0; uint16 ByteOffset = Bitmap_CalculateByteOffset(BytesPerPixel); uint64 TotalBytes = Bitmap_CalculateTotalBytes(Width, Height, BytesPerPixel); uint64 RemainderBytes = TotalBytes % ByteOffset; while (bytes <= TotalBytes - RemainderBytes) { uint8 *Pixel = (uint8 *)Row + bytes; uint8 *Pixel2 = (uint8 *)Row2 + bytes; if (InstructionMode == instruction_mode_sse || InstructionMode == instruction_mode_avx) { __m128i OutputPixel = _mm_loadu_si128((__m128i *)Pixel); _mm_storeu_si128((__m128i *)Pixel2, OutputPixel); bytes += 4*Raster->BytesPerPixel; } else { *(uint32 *)Pixel2 = *(uint32 *)Pixel; bytes += Raster->BytesPerPixel; } } while (bytes <= TotalBytes) { uint8 *Pixel = (uint8 *)Row + bytes; uint8 *Pixel2 = (uint8 *)Row2 + bytes; *(uint32 *)Pixel2 = *(uint32 *)Pixel; bytes += Raster->BytesPerPixel; } } static void BitmapPackRGB(pixel_buffer *Buffer) { Assert(Buffer->Pitch); Convert4x4Chunk(Buffer, 0); CopyToBuffer(Buffer, 1); ClearBuffer(Buffer, Buffer->EffectBuffer); } static void OutputToViewport(pixel_buffer *CompBuffer, project_state *State, GLuint textureID) { Convert4x4Chunk(CompBuffer, 1); EndRenderState(State); glBindTexture(GL_TEXTURE_2D, textureID); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, CompBuffer->Width, CompBuffer->Height, GL_RGBA, GL_UNSIGNED_BYTE, CompBuffer->EffectBuffer); } static void DebugFillSolid(pixel_buffer *Raster, v4 Color) { uint32 ColS = ColToUint32(Color); __m256i Col8 = _mm256_set1_epi32(ColS); __m128i Col = _mm_set1_epi32(ColS); uint8 *Row = (uint8 *)Raster->OriginalBuffer; uint64 bytes = 0; uint16 ByteOffset = Raster->BytesPerPixel; if (InstructionMode == instruction_mode_avx) ByteOffset = 8*Raster->BytesPerPixel; else if (InstructionMode == instruction_mode_sse) ByteOffset = 4*Raster->BytesPerPixel; uint64 TotalBytes = Raster->FullHeight*Raster->FullWidth*Raster->BytesPerPixel; while (bytes < TotalBytes) { uint8 *Pixel = Row + bytes; if (InstructionMode == instruction_mode_avx) { _mm256_storeu_si256((__m256i *)Pixel, Col8); } else if (InstructionMode == instruction_mode_sse) { _mm_storeu_si128((__m128i *)Pixel, Col); } else { *(uint32 *)Pixel = ColS; } bytes += ByteOffset; } } static void DebugBitmap(pixel_buffer *Raster) { uint8 asda = 0x0; uint8 *Row = ((uint8 *)Raster->OriginalBuffer); real32 XInc = 255.0f / Raster->Width; real32 YInc = 255.0f / Raster->Height; for (uint8 Y = 0; Y < Raster->Height; Y++) { for (uint8 X = 0; X < Raster->Width; X++) { uint8 *Pixel = (uint8 *)Row + Raster->FullWidth*Y*4 + X*4; // *(uint32 *)Pixel = 0xffffffff; if (Y > 3) { asda = 0xff; } *(uint32 *)Pixel = ((0xff << 24) | (asda << 16) | (RoundReal32ToInt32((YInc * Y)) << 8) | (RoundReal32ToInt32((XInc * X))) ); } } } #endif