summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFox Caminiti <fox@foxcam.net>2022-07-28 17:28:13 -0400
committerFox Caminiti <fox@foxcam.net>2022-07-28 17:28:13 -0400
commit7d3dcee5b370c05065eb409ad5c21d0bc64790b1 (patch)
treeff8a849022d40a775f3649a15166649639f95f5e
parent313ca58550163380e072880b360bc6076d27c8e5 (diff)
blend modes implemented in renderers
-rw-r--r--createcalls.cpp6
-rw-r--r--effects.cpp15
-rw-r--r--main.cpp14
-rw-r--r--main.h6
-rw-r--r--my_imgui_widgets.cpp117
-rw-r--r--prenderer.cpp734
-rw-r--r--strings.cpp12
7 files changed, 690 insertions, 214 deletions
diff --git a/createcalls.cpp b/createcalls.cpp
index 1c20b1a..7088694 100644
--- a/createcalls.cpp
+++ b/createcalls.cpp
@@ -300,12 +300,12 @@ CreateSolidLayer(project_data *File, memory *Memory, uint16 Width, uint16 Height
}
internal project_layer *
-CreateDebugLayer(project_data *File, memory *Memory, uint16 Width, uint16 Height)
+CreateDebugLayer(project_data *File, memory *Memory, uint16 Width, uint16 Height, int i)
{
project_layer *Layer = CreateLayer(File, Memory);
Layer->RenderInfo = AllocateMemory(Memory, sizeof(image_source), P_SourceData);
image_source *Source = (image_source *)Layer->RenderInfo;
- Source->Raster = CreateDebugBitmap(Memory, Width, Height);
+ Source->Raster = CreateDebugBitmap(Memory, Width, Height);
Layer->SourceType = source_image;
return Layer;
}
@@ -359,7 +359,7 @@ CreateGrid(project_data *File, memory *Memory) {
real32 YInc = File->Height / Amount;
for (int16 j = 0; j < 8; j++) {
for (int16 i = 0; i < 8; i++) {
- project_layer *Layer = CreateSolidLayer(File, Memory, 200, 200, V4(0.6, 0.3, 0.4, 1.0));
+ project_layer *Layer = CreateSolidLayer(File, Memory, 400, 400, V4(0.6, 0.3, 0.4, 1.0));
Layer->x.CurrentValue.f = (XInc*i);
Layer->y.CurrentValue.f = (XInc*j);
Layer->opacity.CurrentValue.f = 0.25;
diff --git a/effects.cpp b/effects.cpp
index 733e4d6..6c3c946 100644
--- a/effects.cpp
+++ b/effects.cpp
@@ -185,7 +185,7 @@ DrawGradient(pixel_buffer *Buffer, memory *Memory, property_channel Property[])
}
#if WINDOWS
-global_variable effect_header EffectList[2];
+global_variable effect_header EffectList[3];
#else
global_variable effect_header EffectList[] {
{
@@ -202,7 +202,18 @@ global_variable effect_header EffectList[] {
{"End Color", {.col = V4(1.0f, 0.0f, 0.0f, 1.0f)}, type_color, NORMALIZED_COL_MIN, NORMALIZED_COL_MAX},
{"Opacity", {1.0f}, type_real, NORMALIZED_REAL_MIN, NORMALIZED_REAL_MAX}
}
- }
+ },
+ {
+ "Levels",
+ &DrawColor, 6, levels, {
+ {"Start point", {0.0f}, type_real},
+ {"Mid point", {1.0f}, type_real},
+ {"End point", {1.0f}, type_real},
+ {"Start Col", {.col = V4(0.0f)}, type_color},
+ {"Mid Col", {.col = V4(1.0f)}, type_color},
+ {"End Col", {.col = V4(1.0f)}, type_color},
+ }
+ },
};
#endif
#if 0
diff --git a/main.cpp b/main.cpp
index 4d513c4..a575ef4 100644
--- a/main.cpp
+++ b/main.cpp
@@ -108,6 +108,7 @@ SDL_sem *Semaphore;
#include "effects.cpp"
#include "keyframes.cpp"
#include "layer.cpp"
+#include "strings.cpp"
#if THREADED
#include "threading.cpp"
#else
@@ -293,7 +294,7 @@ int main(int argc, char *argv[]) {
InitMemoryTable(&GlobalMemory, &Memory, 10 * 1024 * 1024, F_Effects, "Effects");
InitMemoryTable(&GlobalMemory, &Memory, 10 * 1024 * 1024, F_Keyframes, "Keyframe blocks");
InitMemoryTable(&GlobalMemory, &Memory, 10 * 1024 * 1024, F_Strings, "Strings");
- InitMemoryTable(&GlobalMemory, &Memory, 1024 * 1024 * 1024, B_Scratch, "Scratch buffer");
+ InitMemoryTable(&GlobalMemory, &Memory, (uint64)2 * 1024 * 1024 * 1024, B_Scratch, "Scratch buffer");
project_state State = {};
@@ -305,14 +306,17 @@ int main(int argc, char *argv[]) {
}
project_data File = {};
- File.Width = 1283;
- File.Height = 723;
+ File.Width = 1280;
+ File.Height = 720;
+ // File.Width = 1923;
+ // File.Height = 1083;
File.NumberOfFrames = 65;
File.FPS = 30;
File.CurrentFrame = 1;
File.StartFrame = 0;
File.EndFrame = 65;
+ // CreateLayerFromSource(&File, &State, &Memory, "../asset/b.jpg");
// char String[1024];
// uint16 Size = 1024;
// getcwd(String, Size);
@@ -430,7 +434,7 @@ int main(int argc, char *argv[]) {
// default saves window position to an external .ini file, which can be
// loaded from disk or memory.
io.IniFilename = NULL;
- ImGui::LoadIniSettingsFromMemory(ImGuiPrefs, 1146);
+ ImGui::LoadIniSettingsFromMemory(ImGuiPrefs);
ImGui::StyleColorsDark();
@@ -481,6 +485,8 @@ int main(int argc, char *argv[]) {
ImGui_File(&File, &State, &Memory, &UI, io);
+ ImGui_EffectsPanel(&File, &State, &Memory, &UI, io);
+
ImGui_PropertiesPanel(&File, &State, &UI, &Memory);
ImGui_Timeline(&File, &State, &Memory, &UI, io);
diff --git a/main.h b/main.h
index 3c81c43..612e24f 100644
--- a/main.h
+++ b/main.h
@@ -257,6 +257,7 @@ struct transform_info {
uint32 FullLayerWidth;
uint32 FullLayerHeight;
real32 LayerOpacity;
+ blend_mode BlendMode;
real32 OriginX;
real32 OriginY;
uint32 BufferPitch;
@@ -397,6 +398,9 @@ struct project_state
int32 MsgTime; // currently in "frames"
char *Msg;
+
+ ImGuiTextFilter filter; // This filter API is pretty ballin'.
+ bool32 RerouteEffects; // Allows shift+space hotkey to gain focus on the effects panel.
};
struct brush_tool
@@ -415,7 +419,7 @@ enum focused_window
struct ui
{
- real32 TimelineSplit = 200;
+ real32 TimelineSplit = 600;
real32 TimelineZoom;
ImVec2 CompZoom;
diff --git a/my_imgui_widgets.cpp b/my_imgui_widgets.cpp
index 440694b..f94e47c 100644
--- a/my_imgui_widgets.cpp
+++ b/my_imgui_widgets.cpp
@@ -202,9 +202,9 @@ ImGui_Viewport(project_data File, project_state *State, ui *UI, pixel_buffer Com
// }
ImGui::OpenPopupOnItemClick("context", ImGuiPopupFlags_MouseButtonMiddle);
if (ImGui::BeginPopup("context")) {
- if (ImGui::MenuItem("Scalar", NULL, false, InstructionMode != scalar_only)) { InstructionMode = scalar_only; }
- if (ImGui::MenuItem("SSE", NULL, false, InstructionMode != sse_enabled)) { InstructionMode = sse_enabled; }
- if (ImGui::MenuItem("AVX2", NULL, false, InstructionMode != avx_enabled)) { InstructionMode = avx_enabled; }
+ if (ImGui::MenuItem("Scalar", NULL, false, InstructionMode != scalar_only)) { InstructionMode = scalar_only; State->UpdateFrame = true; }
+ if (ImGui::MenuItem("SSE", NULL, false, InstructionMode != sse_enabled)) { InstructionMode = sse_enabled; State->UpdateFrame = true; }
+ if (ImGui::MenuItem("AVX2", NULL, false, InstructionMode != avx_enabled)) { InstructionMode = avx_enabled; State->UpdateFrame = true; }
ImGui::EndPopup();
}
if (IsActive && ImGui::IsMouseDragging(ImGuiMouseButton_Left, -1.0f) && ImGui::IsKeyDown(ImGuiKey_Z))
@@ -327,6 +327,43 @@ ImGui_File(project_data *File, project_state *State, memory *Memory, ui *UI, ImG
ImGui::End();
}
+internal void
+ImGui_EffectsPanel(project_data *File, project_state *State, memory *Memory, ui *UI, ImGuiIO io)
+{
+ ImGui::Begin("Effects list", NULL);
+ if (State->RerouteEffects) {
+ ImGui::SetKeyboardFocusHere();
+ State->RerouteEffects = 0;
+ }
+ int value_changed = ImGui::InputText("Effect name...", State->filter.InputBuf, IM_ARRAYSIZE(State->filter.InputBuf),
+ ImGuiInputTextFlags_CallbackCompletion, EffectConsoleCallback);
+
+ if (Hacko) {
+ if (!io.KeyShift)
+ EffectSel++;
+ else
+ EffectSel--;
+ Hacko = 0;
+ }
+ if (value_changed) {
+ State->filter.Build();
+ EffectSel = -1;
+ }
+ for (int32 i = 0; i < AmountOf(EffectList); i++) {
+ if (State->filter.PassFilter(EffectList[i].Name)) {
+ if (EffectSel == i) {
+ bool t = true;
+ ImGui::Selectable(EffectList[i].Name, &t);
+ } else {
+ bool s = false;
+ ImGui::Selectable(EffectList[i].Name, &s);
+ }
+ // ImGui::Text(EffectList[i].Name);
+ }
+ }
+ ImGui::End();
+}
+
internal void
ImGui_Timeline(project_data *File, project_state *State, memory *Memory, ui *UI, ImGuiIO io)
@@ -458,6 +495,24 @@ ImGui_Timeline(project_data *File, project_state *State, memory *Memory, ui *UI,
ImGui::Button("V"); ImGui::SameLine();
ImGui::Button("I"); ImGui::SameLine();
ImGui::Text(Layer->Name); ImGui::SameLine();
+ ImGui::Button(BlendmodeNames[Layer->BlendMode]);
+ ImGui::OpenPopupOnItemClick("blendmode_picker", ImGuiPopupFlags_MouseButtonLeft);
+ if (ImGui::BeginPopup("blendmode_picker")) {
+ for (int16 b = 0; b < AmountOf(BlendmodeNames); b++) {
+ if (ImGui::MenuItem(BlendmodeNames[b], NULL, false, Layer->BlendMode != b)) {
+ Layer->BlendMode = (blend_mode)b;
+ State->UpdateFrame = true;
+ }
+ // using IsActivated here instead of above loop doesn't seem to
+ // work; the popup gets closed instead
+ if (ImGui::IsItemHovered() && io.KeyCtrl) {
+ Layer->BlendMode = (blend_mode)b;
+ State->UpdateFrame = true;
+ }
+ }
+ ImGui::EndPopup();
+ }
+ ImGui::SameLine();
ImGui::SetCursorScreenPos(ImVec2(SidebarStartingPos.x, ImGui::GetCursorScreenPos().y));
ImGui::Button("##mover", ImVec2(SidebarSizeWithBorder.x, FontHeight + FramePadding.y*2));
@@ -534,6 +589,9 @@ ImGui_Timeline(project_data *File, project_state *State, memory *Memory, ui *UI,
ImGui::SetCursorScreenPos(ImVec2(WindowMinAbs.x + UI->TimelineSplit - TimelineBorderPadding.x, TimelineAbsolutePos.y));
ImGui::InvisibleButton("##SplitMove", ImVec2(TimelineBorderPadding.x, SidebarSizeWithBorder.y), ImGuiButtonFlags_MouseButtonLeft);
+ if (ImGui::IsItemHovered()) {
+ ImGui::SetMouseCursor(ImGuiMouseCursor_ResizeEW);
+ }
if (ImGui::IsItemActive() && ImGui::IsMouseDragging(ImGuiMouseButton_Left, -1))
{
UI->TimelineSplit += io.MouseDelta.x;
@@ -980,9 +1038,14 @@ ImGui_ProcessInputs(project_data *File, project_state *State, pixel_buffer *Comp
if (ImGui::IsKeyPressed(ImGuiKey_Space)) {
- SwitchBool(State->IsPlaying);
+ if (io.KeyShift) {
+ State->RerouteEffects = true;
+ } else {
+ SwitchBool(State->IsPlaying);
+ }
}
+
if (State->IsPlaying && !IsRendering) {
IncrementFrame(File, 1);
State->UpdateFrame = true;
@@ -1024,6 +1087,10 @@ ImGui_ProcessInputs(project_data *File, project_state *State, pixel_buffer *Comp
}
#if DEBUG
+ if (ImGui::IsKeyPressed(ImGuiKey_W))
+ {
+ SwitchBool(Debug.ToggleWindow);
+ }
if (ImGui::IsKeyPressed(ImGuiKey_M))
{
Debug.Markers[Debug.MarkerIndex] = File->CurrentFrame;
@@ -1065,9 +1132,9 @@ ImGui_ProcessInputs(project_data *File, project_state *State, pixel_buffer *Comp
}
}
-
global_variable char ImGuiPrefs[] = "[Window][DockSpaceViewport_11111111]"
-"\nSize=3153,1837"
+"\nPos=0,0"
+"\nSize=3200,1800"
"\nCollapsed=0"
"\n"
"\n[Window][Debug##Default]"
@@ -1077,18 +1144,19 @@ global_variable char ImGuiPrefs[] = "[Window][DockSpaceViewport_11111111]"
"\n"
"\n[Window][Viewport]"
"\nPos=528,0"
-"\nSize=2121,1208"
+"\nSize=2168,1171"
"\nCollapsed=0"
"\nDockId=0x00000005,0"
"\n"
"\n[Window][###Properties]"
-"\nSize=526,1208"
+"\nPos=0,0"
+"\nSize=526,1171"
"\nCollapsed=0"
"\nDockId=0x00000003,0"
"\n"
"\n[Window][Timeline]"
-"\nPos=0,1210"
-"\nSize=3153,627"
+"\nPos=0,1173"
+"\nSize=3200,627"
"\nCollapsed=0"
"\nDockId=0x00000002,0"
"\n"
@@ -1098,17 +1166,24 @@ global_variable char ImGuiPrefs[] = "[Window][DockSpaceViewport_11111111]"
"\nCollapsed=0"
"\n"
"\n[Window][Files]"
-"\nPos=2651,0"
-"\nSize=502,1208"
+"\nPos=2698,0"
+"\nSize=502,913"
+"\nCollapsed=0"
+"\nDockId=0x00000007,0"
+"\n"
+"\n[Window][Effects list]"
+"\nPos=2698,915"
+"\nSize=502,256"
"\nCollapsed=0"
-"\nDockId=0x00000006,0"
+"\nDockId=0x00000008,0"
"\n"
"\n[Docking][Data]"
-"\nDockSpace ID=0x8B93E3BD Pos=0,0 Size=3153,1837 Split=Y Selected=0x13926F0B"
-"\n DockNode ID=0x00000001 Parent=0x8B93E3BD SizeRef=3200,1171 Split=X Selected=0x13926F0B"
-"\n DockNode ID=0x00000003 Parent=0x00000001 SizeRef=526,1171 Selected=0xDBB8CEFA"
-"\n DockNode ID=0x00000004 Parent=0x00000001 SizeRef=2672,1171 Split=X Selected=0x13926F0B"
-"\n DockNode ID=0x00000005 Parent=0x00000004 SizeRef=2115,1171 CentralNode=1 Selected=0x13926F0B"
-"\n DockNode ID=0x00000006 Parent=0x00000004 SizeRef=502,1171 Selected=0x86FA2F90"
-"\n DockNode ID=0x00000002 Parent=0x8B93E3BD SizeRef=3200,627 HiddenTabBar=1 Selected=0x0F18B61B"
-"\n";
+"\nDockSpace ID=0x8B93E3BD Window=0xA787BDB4 Pos=0,0 Size=3200,1800 Split=Y Selected=0x13926F0B"
+"\n DockNode ID=0x00000001 Parent=0x8B93E3BD SizeRef=3200,1171 Split=X Selected=0x13926F0B"
+"\n DockNode ID=0x00000003 Parent=0x00000001 SizeRef=526,1171 Selected=0xDBB8CEFA"
+"\n DockNode ID=0x00000004 Parent=0x00000001 SizeRef=2672,1171 Split=X Selected=0x13926F0B"
+"\n DockNode ID=0x00000005 Parent=0x00000004 SizeRef=2115,1171 CentralNode=1 Selected=0x13926F0B"
+"\n DockNode ID=0x00000006 Parent=0x00000004 SizeRef=502,1171 Split=Y Selected=0x86FA2F90"
+"\n DockNode ID=0x00000007 Parent=0x00000006 SizeRef=502,913 Selected=0x86FA2F90"
+"\n DockNode ID=0x00000008 Parent=0x00000006 SizeRef=502,256 Selected=0x812F222D"
+"\n DockNode ID=0x00000002 Parent=0x8B93E3BD SizeRef=3200,627 HiddenTabBar=1 Selected=0x0F18B61B";
diff --git a/prenderer.cpp b/prenderer.cpp
index 72c2893..5df28f4 100644
--- a/prenderer.cpp
+++ b/prenderer.cpp
@@ -81,7 +81,8 @@ CalculateTransforms(project_layer *Layer, pixel_buffer *Buffer)
TransformInfo.LayerHeight = (real32)Source->Raster.Height;
TransformInfo.FullLayerWidth = Source->Raster.FullWidth;
TransformInfo.FullLayerHeight = Source->Raster.FullHeight;
- TransformInfo.LayerOpacity = 1.0f - Layer->opacity.CurrentValue.f;
+ TransformInfo.LayerOpacity = Layer->opacity.CurrentValue.f;
+ TransformInfo.BlendMode =Layer->BlendMode;
TransformInfo.OriginX = Origin.x;
TransformInfo.OriginY = Origin.y;
TransformInfo.BufferPitch = Buffer->Pitch;
@@ -152,6 +153,15 @@ QueueCurrentFrame(project_data *File, pixel_buffer *CompBuffer, project_state *S
for (int x = 0; x < 4; x++) {
// if (x == y) {
rectangle RenderRegion = {TileWidth*x, TileHeight*y, TileWidth + TileWidth*x, TileHeight + TileHeight*y};
+ // The render regions always have to be aligned to the top left of
+ // a 4x4 chunk (at least for AVX2) and cannot exceed the bounds of
+ // the comp.
+ // It seems we don't need any special math to guarantee this aside
+ // from dividing by 4 and modulating.
+ RenderRegion.Min.x -= RenderRegion.Min.x % 4;
+ RenderRegion.Min.y -= RenderRegion.Min.y % 4;
+ RenderRegion.Max.x -= RenderRegion.Max.x % 4;
+ RenderRegion.Max.y -= RenderRegion.Max.y % 4;
if (RenderRegion.Max.x > CompBuffer->Width)
RenderRegion.Max.x = CompBuffer->Width;
if (RenderRegion.Max.y > CompBuffer->Height)
@@ -379,8 +389,11 @@ AVX2_RenderLayer(transform_info T, pixel_buffer *Buffer, rectangle RenderRegion)
__m256 OriginX = _mm256_set1_ps(T.OriginX);
__m256 OriginY = _mm256_set1_ps(T.OriginY);
+ __m256 ClipPrevent = _mm256_set1_ps(0.001f);
__m256 One = _mm256_set1_ps(1);
+ __m256 Two = _mm256_set1_ps(2);
__m256 Zero = _mm256_set1_ps(0);
+ __m256 ZeroPointFive = _mm256_set1_ps(0.5);
__m256i Zeroi = _mm256_set1_epi32(0);
__m256i Onei = _mm256_set1_epi32(1);
__m256 Four = _mm256_set1_ps(4);
@@ -389,7 +402,7 @@ AVX2_RenderLayer(transform_info T, pixel_buffer *Buffer, rectangle RenderRegion)
__m256i BottomTwoBits = _mm256_set1_epi32(0x03);
__m256i Fouri = _mm256_set1_epi32(4);
__m256i Sixteeni = _mm256_set1_epi32(16);
- __m256 Reg255 = _mm256_set1_ps(255.0f);
+ __m256 Real255 = _mm256_set1_ps(255.0f);
__m256i Int255 = _mm256_set1_epi32(255);
__m256 Norm255 = _mm256_set1_ps(1/255.0f);
// __m256i White = _mm256_setr_epi32(0xFFFFFFFF, 0, 0, 0, 0xFFFFFFFF, 0, 0, 0);
@@ -450,7 +463,7 @@ AVX2_RenderLayer(transform_info T, pixel_buffer *Buffer, rectangle RenderRegion)
__m256i TexXIntPlusOne = _mm256_add_epi32(TexXInt, _mm256_and_si256(_mm256_cmpgt_epi32(LayerWidthMinusOne, TexXInt), Onei));
__m256i TexYInt = _mm256_cvttps_epi32(TexYFull);
__m256i TexYIntPlusOne = _mm256_add_epi32(TexYInt, _mm256_and_si256(_mm256_cmpgt_epi32(LayerHeightMinusOne, TexYInt), Onei));
- // NOTE(fox): The comparison is for when we're on the last pixel.
+ // NOTE(fox): The comparison is for when we're on the last pixel of the texel.
__m256 TexX = _mm256_sub_ps(TexXFull, _mm256_cvtepi32_ps(TexXInt));
__m256 TexY = _mm256_sub_ps(TexYFull, _mm256_cvtepi32_ps(TexYInt));
@@ -481,71 +494,190 @@ AVX2_RenderLayer(transform_info T, pixel_buffer *Buffer, rectangle RenderRegion)
__m256i PixelsBL = _mm256_i32gather_epi32((const int32 *)TexPTR, PixelLookupBL, 4);
__m256i PixelsBR = _mm256_i32gather_epi32((const int32 *)TexPTR, PixelLookupBR, 4);
- __m256i R_TexTL = _mm256_and_si256( PixelsTL, FF);
- __m256i G_TexTL = _mm256_and_si256(_mm256_srli_epi32(PixelsTL, 8), FF);
- __m256i B_TexTL = _mm256_and_si256(_mm256_srli_epi32(PixelsTL, 16), FF);
- __m256i A_TexTL = _mm256_and_si256(_mm256_srli_epi32(PixelsTL, 24), FF);
-
- __m256i R_TexTR = _mm256_and_si256( PixelsTR, FF);
- __m256i G_TexTR = _mm256_and_si256(_mm256_srli_epi32(PixelsTR, 8), FF);
- __m256i B_TexTR = _mm256_and_si256(_mm256_srli_epi32(PixelsTR, 16), FF);
- __m256i A_TexTR = _mm256_and_si256(_mm256_srli_epi32(PixelsTR, 24), FF);
-
- __m256i R_TexBL = _mm256_and_si256( PixelsBL, FF);
- __m256i G_TexBL = _mm256_and_si256(_mm256_srli_epi32(PixelsBL, 8), FF);
- __m256i B_TexBL = _mm256_and_si256(_mm256_srli_epi32(PixelsBL, 16), FF);
- __m256i A_TexBL = _mm256_and_si256(_mm256_srli_epi32(PixelsBL, 24), FF);
-
- __m256i R_TexBR = _mm256_and_si256( PixelsBR, FF);
- __m256i G_TexBR = _mm256_and_si256(_mm256_srli_epi32(PixelsBR, 8), FF);
- __m256i B_TexBR = _mm256_and_si256(_mm256_srli_epi32(PixelsBR, 16), FF);
- __m256i A_TexBR = _mm256_and_si256(_mm256_srli_epi32(PixelsBR, 24), FF);
-
- __m256 R_PixelBlend = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(TexBothInv, _mm256_cvtepi32_ps(R_TexTL)),
- _mm256_mul_ps(TexBothYInv, _mm256_cvtepi32_ps(R_TexTR))),
- _mm256_add_ps(_mm256_mul_ps(TexBothXInv, _mm256_cvtepi32_ps(R_TexBL)),
- _mm256_mul_ps(TexBoth, _mm256_cvtepi32_ps(R_TexBR))));
- __m256 G_PixelBlend = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(TexBothInv, _mm256_cvtepi32_ps(G_TexTL)),
- _mm256_mul_ps(TexBothYInv, _mm256_cvtepi32_ps(G_TexTR))),
- _mm256_add_ps(_mm256_mul_ps(TexBothXInv, _mm256_cvtepi32_ps(G_TexBL)),
- _mm256_mul_ps(TexBoth, _mm256_cvtepi32_ps(G_TexBR))));
- __m256 B_PixelBlend = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(TexBothInv, _mm256_cvtepi32_ps(B_TexTL)),
- _mm256_mul_ps(TexBothYInv, _mm256_cvtepi32_ps(B_TexTR))),
- _mm256_add_ps(_mm256_mul_ps(TexBothXInv, _mm256_cvtepi32_ps(B_TexBL)),
- _mm256_mul_ps(TexBoth, _mm256_cvtepi32_ps(B_TexBR))));
- __m256 A_PixelBlend = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(TexBothInv, _mm256_cvtepi32_ps(A_TexTL)),
- _mm256_mul_ps(TexBothYInv, _mm256_cvtepi32_ps(A_TexTR))),
- _mm256_add_ps(_mm256_mul_ps(TexBothXInv, _mm256_cvtepi32_ps(A_TexBL)),
- _mm256_mul_ps(TexBoth, _mm256_cvtepi32_ps(A_TexBR))));
-
- A_PixelBlend = _mm256_sub_ps(A_PixelBlend, _mm256_mul_ps(A_PixelBlend, LayerOpacity));
-
- __m256i R_Out, G_Out, B_Out, A_Out;
- // Only do alpha blending if a pixel's value doesn't equal 255
- if (_mm256_movemask_epi8(_mm256_sub_epi32(_mm256_cvtps_epi32(A_PixelBlend), Int255)))
+ __m256 R_TexTL = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256( PixelsTL, FF)), Norm255);
+ __m256 G_TexTL = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsTL, 8), FF)), Norm255);
+ __m256 B_TexTL = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsTL, 16), FF)), Norm255);
+ __m256 A_TexTL = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsTL, 24), FF)), Norm255);
+
+ __m256 R_TexTR = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256( PixelsTR, FF)), Norm255);
+ __m256 G_TexTR = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsTR, 8), FF)), Norm255);
+ __m256 B_TexTR = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsTR, 16), FF)), Norm255);
+ __m256 A_TexTR = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsTR, 24), FF)), Norm255);
+
+ __m256 R_TexBL = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256( PixelsBL, FF)), Norm255);
+ __m256 G_TexBL = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsBL, 8), FF)), Norm255);
+ __m256 B_TexBL = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsBL, 16), FF)), Norm255);
+ __m256 A_TexBL = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsBL, 24), FF)), Norm255);
+
+ __m256 R_TexBR = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256( PixelsBR, FF)), Norm255);
+ __m256 G_TexBR = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsBR, 8), FF)), Norm255);
+ __m256 B_TexBR = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsBR, 16), FF)), Norm255);
+ __m256 A_TexBR = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsBR, 24), FF)), Norm255);
+
+ __m256 R_Col = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(TexBothInv, R_TexTL),
+ _mm256_mul_ps(TexBothYInv, R_TexTR)),
+ _mm256_add_ps(_mm256_mul_ps(TexBothXInv, R_TexBL),
+ _mm256_mul_ps(TexBoth, R_TexBR)));
+ __m256 G_Col = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(TexBothInv, G_TexTL),
+ _mm256_mul_ps(TexBothYInv, G_TexTR)),
+ _mm256_add_ps(_mm256_mul_ps(TexBothXInv, G_TexBL),
+ _mm256_mul_ps(TexBoth, G_TexBR)));
+ __m256 B_Col = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(TexBothInv, B_TexTL),
+ _mm256_mul_ps(TexBothYInv, B_TexTR)),
+ _mm256_add_ps(_mm256_mul_ps(TexBothXInv, B_TexBL),
+ _mm256_mul_ps(TexBoth, B_TexBR)));
+ __m256 A_Col = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(TexBothInv, A_TexTL),
+ _mm256_mul_ps(TexBothYInv, A_TexTR)),
+ _mm256_add_ps(_mm256_mul_ps(TexBothXInv, A_TexBL),
+ _mm256_mul_ps(TexBoth, A_TexBR)));
+
+ __m256 LayerAlpha = _mm256_mul_ps(A_Col, LayerOpacity);
+ __m256 LayerAlphaInv = _mm256_sub_ps(One, LayerAlpha);
+
+ // Hoisted out of some blend modes; maybe it'd be better to just keep them in there.
+ __m256 R_Colx2 = _mm256_mul_ps(R_Col, Two);
+ __m256 R_ColInv = _mm256_sub_ps(One, R_Col);
+
+ __m256 G_Colx2 = _mm256_mul_ps(G_Col, Two);
+ __m256 G_ColInv = _mm256_sub_ps(One, G_Col);
+
+ __m256 B_Colx2 = _mm256_mul_ps(B_Col, Two);
+ __m256 B_ColInv = _mm256_sub_ps(One, B_Col);
+
+ __m256 R_Blend = R_Col;
+ __m256 G_Blend = G_Col;
+ __m256 B_Blend = B_Col;
+ __m256 A_Blend = LayerAlpha;
+
+ // Only load the dest pixel if we actually need to (a pixel's opacity isn't 255 or the blend mode requires it).
+ if (!_mm256_movemask_epi8(_mm256_cmp_ps(LayerAlpha, One, 0)) || T.BlendMode != blend_normal)
{
- __m256 LayerAlpha = _mm256_mul_ps(A_PixelBlend, Norm255);
- __m256 LayerAlphaInv = _mm256_mul_ps(_mm256_sub_ps(Reg255, A_PixelBlend), Norm255);
-
__m256i DestPixel = _mm256_loadu_si256((const __m256i *)Pixel);
- __m256i R_Dest = _mm256_and_si256( DestPixel, FF);
- __m256i G_Dest = _mm256_and_si256(_mm256_srli_epi32(DestPixel, 8), FF);
- __m256i B_Dest = _mm256_and_si256(_mm256_srli_epi32(DestPixel, 16), FF);
- __m256i A_Dest = _mm256_and_si256(_mm256_srli_epi32(DestPixel, 24), FF);
-
- R_Out = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(_mm256_cvtepi32_ps(R_Dest), LayerAlphaInv), _mm256_mul_ps(R_PixelBlend, LayerAlpha)));
- G_Out = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(_mm256_cvtepi32_ps(G_Dest), LayerAlphaInv), _mm256_mul_ps(G_PixelBlend, LayerAlpha)));
- B_Out = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(_mm256_cvtepi32_ps(B_Dest), LayerAlphaInv), _mm256_mul_ps(B_PixelBlend, LayerAlpha)));
- A_Out = _mm256_cvtps_epi32(_mm256_min_ps(_mm256_add_ps(_mm256_cvtepi32_ps(A_Dest), A_PixelBlend), Reg255));
- }
- else
- {
- R_Out = _mm256_cvtps_epi32(R_PixelBlend);
- G_Out = _mm256_cvtps_epi32(G_PixelBlend);
- B_Out = _mm256_cvtps_epi32(B_PixelBlend);
- A_Out = _mm256_cvtps_epi32(A_PixelBlend);
+ __m256 R_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256( DestPixel, FF)), Norm255);
+ __m256 G_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(DestPixel, 8), FF)), Norm255);
+ __m256 B_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(DestPixel, 16), FF)), Norm255);
+ __m256 A_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(DestPixel, 24), FF)), Norm255);
+
+ switch (T.BlendMode)
+ {
+ case blend_normal:
+ {
+ } break;
+ case blend_multiply:
+ {
+ R_Blend = _mm256_mul_ps(R_Dest, R_Col);
+ G_Blend = _mm256_mul_ps(G_Dest, G_Col);
+ B_Blend = _mm256_mul_ps(B_Dest, B_Col);
+ } break;
+ case blend_colorburn:
+ {
+ // NOTE(fox): A small amount is added to Col since images with zero for alpha may also zero out the
+ // color channels, causing black clipping.
+ R_Blend = _mm256_sub_ps(One, _mm256_div_ps(_mm256_sub_ps(One, R_Dest), _mm256_add_ps(R_Col, ClipPrevent)));
+ G_Blend = _mm256_sub_ps(One, _mm256_div_ps(_mm256_sub_ps(One, G_Dest), _mm256_add_ps(G_Col, ClipPrevent)));
+ B_Blend = _mm256_sub_ps(One, _mm256_div_ps(_mm256_sub_ps(One, B_Dest), _mm256_add_ps(B_Col, ClipPrevent)));
+ } break;
+ case blend_linearburn:
+ {
+ R_Blend = _mm256_sub_ps(_mm256_add_ps(R_Dest, R_Col), One);
+ G_Blend = _mm256_sub_ps(_mm256_add_ps(G_Dest, G_Col), One);
+ B_Blend = _mm256_sub_ps(_mm256_add_ps(B_Dest, B_Col), One);
+ } break;
+ case blend_add:
+ {
+ R_Blend = _mm256_add_ps(R_Dest, R_Col);
+ G_Blend = _mm256_add_ps(G_Dest, G_Col);
+ B_Blend = _mm256_add_ps(B_Dest, B_Col);
+ } break;
+ case blend_screen:
+ {
+ R_Blend = _mm256_sub_ps(One, _mm256_mul_ps(_mm256_sub_ps(One, R_Dest), R_ColInv));
+ G_Blend = _mm256_sub_ps(One, _mm256_mul_ps(_mm256_sub_ps(One, G_Dest), G_ColInv));
+ B_Blend = _mm256_sub_ps(One, _mm256_mul_ps(_mm256_sub_ps(One, B_Dest), B_ColInv));
+ } break;
+ case blend_overlay:
+ {
+ __m256 R_Mask = _mm256_cmp_ps(R_Dest, ZeroPointFive, 1);
+ __m256 G_Mask = _mm256_cmp_ps(G_Dest, ZeroPointFive, 1);
+ __m256 B_Mask = _mm256_cmp_ps(B_Dest, ZeroPointFive, 1);
+ __m256 R_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(R_Dest, R_Col));
+ __m256 G_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(G_Dest, G_Col));
+ __m256 B_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(B_Dest, B_Col));
+ __m256 R_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, R_Dest), R_ColInv)));
+ __m256 G_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, G_Dest), G_ColInv)));
+ __m256 B_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, B_Dest), B_ColInv)));
+ R_Blend = _mm256_blendv_ps(R_Upper, R_Lower, R_Mask);
+ G_Blend = _mm256_blendv_ps(G_Upper, G_Lower, G_Mask);
+ B_Blend = _mm256_blendv_ps(B_Upper, B_Lower, B_Mask);
+ } break;
+ case blend_softlight:
+ {
+ // using Pegtop's equation
+ R_Blend = _mm256_add_ps(_mm256_mul_ps(_mm256_sub_ps(One, R_Colx2), _mm256_mul_ps(R_Dest, R_Dest)), _mm256_mul_ps(R_Colx2, R_Dest));
+ G_Blend = _mm256_add_ps(_mm256_mul_ps(_mm256_sub_ps(One, G_Colx2), _mm256_mul_ps(G_Dest, G_Dest)), _mm256_mul_ps(G_Colx2, G_Dest));
+ B_Blend = _mm256_add_ps(_mm256_mul_ps(_mm256_sub_ps(One, B_Colx2), _mm256_mul_ps(B_Dest, B_Dest)), _mm256_mul_ps(B_Colx2, B_Dest));
+ } break;
+ case blend_hardlight:
+ {
+ __m256 R_Mask = _mm256_cmp_ps(R_Dest, ZeroPointFive, 13);
+ __m256 G_Mask = _mm256_cmp_ps(G_Dest, ZeroPointFive, 13);
+ __m256 B_Mask = _mm256_cmp_ps(B_Dest, ZeroPointFive, 13);
+ __m256 R_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(R_Dest, R_Col));
+ __m256 G_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(G_Dest, G_Col));
+ __m256 B_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(B_Dest, B_Col));
+ __m256 R_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, R_Dest), R_ColInv)));
+ __m256 G_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, G_Dest), G_ColInv)));
+ __m256 B_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, B_Dest), B_ColInv)));
+ R_Blend = _mm256_blendv_ps(R_Upper, R_Lower, R_Mask);
+ G_Blend = _mm256_blendv_ps(G_Upper, G_Lower, G_Mask);
+ B_Blend = _mm256_blendv_ps(B_Upper, B_Lower, B_Mask);
+ } break;
+ case blend_subtract:
+ {
+ R_Blend = _mm256_sub_ps(R_Dest, R_Col);
+ G_Blend = _mm256_sub_ps(G_Dest, G_Col);
+ B_Blend = _mm256_sub_ps(B_Dest, B_Col);
+ } break;
+ case blend_divide:
+ {
+ R_Blend = _mm256_div_ps(R_Dest, _mm256_add_ps(R_Col, ClipPrevent));
+ G_Blend = _mm256_div_ps(G_Dest, _mm256_add_ps(G_Col, ClipPrevent));
+ B_Blend = _mm256_div_ps(B_Dest, _mm256_add_ps(B_Col, ClipPrevent));
+ } break;
+ case blend_difference:
+ {
+ __m256 R_Lower = _mm256_sub_ps(R_Col, R_Dest);
+ __m256 G_Lower = _mm256_sub_ps(G_Col, G_Dest);
+ __m256 B_Lower = _mm256_sub_ps(B_Col, B_Dest);
+ __m256 R_Upper = _mm256_sub_ps(R_Dest, R_Col);
+ __m256 G_Upper = _mm256_sub_ps(G_Dest, G_Col);
+ __m256 B_Upper = _mm256_sub_ps(B_Dest, B_Col);
+ __m256 R_Mask = _mm256_cmp_ps(R_Lower, Zero, 14);
+ __m256 G_Mask = _mm256_cmp_ps(G_Lower, Zero, 14);
+ __m256 B_Mask = _mm256_cmp_ps(B_Lower, Zero, 14);
+ R_Blend = _mm256_blendv_ps(R_Upper, R_Lower, R_Mask);
+ G_Blend = _mm256_blendv_ps(G_Upper, G_Lower, G_Mask);
+ B_Blend = _mm256_blendv_ps(B_Upper, B_Lower, B_Mask);
+ } break;
+ }
+
+ R_Blend = _mm256_add_ps(_mm256_mul_ps(R_Dest, LayerAlphaInv), _mm256_mul_ps(R_Blend, LayerAlpha));
+ G_Blend = _mm256_add_ps(_mm256_mul_ps(G_Dest, LayerAlphaInv), _mm256_mul_ps(G_Blend, LayerAlpha));
+ B_Blend = _mm256_add_ps(_mm256_mul_ps(B_Dest, LayerAlphaInv), _mm256_mul_ps(B_Blend, LayerAlpha));
+
+ // Standard behavior in photo apps is for blend modes to
+ // inherit underlying opacity instead of adding to it.
+ if (T.BlendMode == blend_normal)
+ A_Blend = _mm256_add_ps(A_Dest, LayerAlpha);
+ else
+ A_Blend = A_Dest;
}
+ __m256i R_Out = _mm256_cvtps_epi32(_mm256_mul_ps(_mm256_max_ps(_mm256_min_ps(One, R_Blend), Zero), Real255));
+ __m256i G_Out = _mm256_cvtps_epi32(_mm256_mul_ps(_mm256_max_ps(_mm256_min_ps(One, G_Blend), Zero), Real255));
+ __m256i B_Out = _mm256_cvtps_epi32(_mm256_mul_ps(_mm256_max_ps(_mm256_min_ps(One, B_Blend), Zero), Real255));
+ __m256i A_Out = _mm256_cvtps_epi32(_mm256_mul_ps(_mm256_max_ps(_mm256_min_ps(One, A_Blend), Zero), Real255));
+
__m256i OutputPixel = _mm256_or_si256(
_mm256_or_si256(R_Out, _mm256_slli_epi32(G_Out, 8)),
_mm256_or_si256(_mm256_slli_epi32(B_Out, 16), _mm256_slli_epi32(A_Out, 24)));
@@ -585,8 +717,11 @@ SSE2_RenderLayer(transform_info T, pixel_buffer *Buffer, rectangle RenderRegion)
__m128 OriginX = _mm_set1_ps(T.OriginX);
__m128 OriginY = _mm_set1_ps(T.OriginY);
+ __m128 ClipPrevent = _mm_set1_ps(0.001f);
__m128 One = _mm_set1_ps(1);
+ __m128 Two = _mm_set1_ps(2);
__m128 Zero = _mm_set1_ps(0);
+ __m128 ZeroPointFive = _mm_set1_ps(0.5);
__m128i Zeroi = _mm_set1_epi32(0);
__m128i Onei = _mm_set1_epi32(1);
__m128 Four = _mm_set1_ps(4);
@@ -707,71 +842,191 @@ SSE2_RenderLayer(transform_info T, pixel_buffer *Buffer, rectangle RenderRegion)
__m128i PixelsBL = _mm_setr_epi32(S_PixelsBL0, S_PixelsBL1, S_PixelsBL2, S_PixelsBL3);
__m128i PixelsBR = _mm_setr_epi32(S_PixelsBR0, S_PixelsBR1, S_PixelsBR2, S_PixelsBR3);
- __m128i R_TexTL = _mm_and_si128( PixelsTL, FF);
- __m128i G_TexTL = _mm_and_si128(_mm_srli_epi32(PixelsTL, 8), FF);
- __m128i B_TexTL = _mm_and_si128(_mm_srli_epi32(PixelsTL, 16), FF);
- __m128i A_TexTL = _mm_and_si128(_mm_srli_epi32(PixelsTL, 24), FF);
-
- __m128i R_TexTR = _mm_and_si128( PixelsTR, FF);
- __m128i G_TexTR = _mm_and_si128(_mm_srli_epi32(PixelsTR, 8), FF);
- __m128i B_TexTR = _mm_and_si128(_mm_srli_epi32(PixelsTR, 16), FF);
- __m128i A_TexTR = _mm_and_si128(_mm_srli_epi32(PixelsTR, 24), FF);
-
- __m128i R_TexBL = _mm_and_si128( PixelsBL, FF);
- __m128i G_TexBL = _mm_and_si128(_mm_srli_epi32(PixelsBL, 8), FF);
- __m128i B_TexBL = _mm_and_si128(_mm_srli_epi32(PixelsBL, 16), FF);
- __m128i A_TexBL = _mm_and_si128(_mm_srli_epi32(PixelsBL, 24), FF);
-
- __m128i R_TexBR = _mm_and_si128( PixelsBR, FF);
- __m128i G_TexBR = _mm_and_si128(_mm_srli_epi32(PixelsBR, 8), FF);
- __m128i B_TexBR = _mm_and_si128(_mm_srli_epi32(PixelsBR, 16), FF);
- __m128i A_TexBR = _mm_and_si128(_mm_srli_epi32(PixelsBR, 24), FF);
-
- __m128 R_PixelBlend = _mm_add_ps(_mm_add_ps(_mm_mul_ps(TexBothInv, _mm_cvtepi32_ps(R_TexTL)),
- _mm_mul_ps(TexBothYInv, _mm_cvtepi32_ps(R_TexTR))),
- _mm_add_ps(_mm_mul_ps(TexBothXInv, _mm_cvtepi32_ps(R_TexBL)),
- _mm_mul_ps(TexBoth, _mm_cvtepi32_ps(R_TexBR))));
- __m128 G_PixelBlend = _mm_add_ps(_mm_add_ps(_mm_mul_ps(TexBothInv, _mm_cvtepi32_ps(G_TexTL)),
- _mm_mul_ps(TexBothYInv, _mm_cvtepi32_ps(G_TexTR))),
- _mm_add_ps(_mm_mul_ps(TexBothXInv, _mm_cvtepi32_ps(G_TexBL)),
- _mm_mul_ps(TexBoth, _mm_cvtepi32_ps(G_TexBR))));
- __m128 B_PixelBlend = _mm_add_ps(_mm_add_ps(_mm_mul_ps(TexBothInv, _mm_cvtepi32_ps(B_TexTL)),
- _mm_mul_ps(TexBothYInv, _mm_cvtepi32_ps(B_TexTR))),
- _mm_add_ps(_mm_mul_ps(TexBothXInv, _mm_cvtepi32_ps(B_TexBL)),
- _mm_mul_ps(TexBoth, _mm_cvtepi32_ps(B_TexBR))));
- __m128 A_PixelBlend = _mm_add_ps(_mm_add_ps(_mm_mul_ps(TexBothInv, _mm_cvtepi32_ps(A_TexTL)),
- _mm_mul_ps(TexBothYInv, _mm_cvtepi32_ps(A_TexTR))),
- _mm_add_ps(_mm_mul_ps(TexBothXInv, _mm_cvtepi32_ps(A_TexBL)),
- _mm_mul_ps(TexBoth, _mm_cvtepi32_ps(A_TexBR))));
-
- A_PixelBlend = _mm_sub_ps(A_PixelBlend, _mm_mul_ps(A_PixelBlend, LayerOpacity));
+ __m128 R_TexTL = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128( PixelsTL, FF)), Norm255);
+ __m128 G_TexTL = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsTL, 8), FF)), Norm255);
+ __m128 B_TexTL = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsTL, 16), FF)), Norm255);
+ __m128 A_TexTL = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsTL, 24), FF)), Norm255);
+
+ __m128 R_TexTR = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128( PixelsTR, FF)), Norm255);
+ __m128 G_TexTR = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsTR, 8), FF)), Norm255);
+ __m128 B_TexTR = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsTR, 16), FF)), Norm255);
+ __m128 A_TexTR = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsTR, 24), FF)), Norm255);
+
+ __m128 R_TexBL = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128( PixelsBL, FF)), Norm255);
+ __m128 G_TexBL = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsBL, 8), FF)), Norm255);
+ __m128 B_TexBL = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsBL, 16), FF)), Norm255);
+ __m128 A_TexBL = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsBL, 24), FF)), Norm255);
+
+ __m128 R_TexBR = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128( PixelsBR, FF)), Norm255);
+ __m128 G_TexBR = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsBR, 8), FF)), Norm255);
+ __m128 B_TexBR = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsBR, 16), FF)), Norm255);
+ __m128 A_TexBR = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsBR, 24), FF)), Norm255);
+
+ __m128 R_Col = _mm_add_ps(_mm_add_ps(_mm_mul_ps(TexBothInv, R_TexTL),
+ _mm_mul_ps(TexBothYInv, R_TexTR)),
+ _mm_add_ps(_mm_mul_ps(TexBothXInv, R_TexBL),
+ _mm_mul_ps(TexBoth, R_TexBR)));
+ __m128 G_Col = _mm_add_ps(_mm_add_ps(_mm_mul_ps(TexBothInv, G_TexTL),
+ _mm_mul_ps(TexBothYInv, G_TexTR)),
+ _mm_add_ps(_mm_mul_ps(TexBothXInv, G_TexBL),
+ _mm_mul_ps(TexBoth, G_TexBR)));
+ __m128 B_Col = _mm_add_ps(_mm_add_ps(_mm_mul_ps(TexBothInv, B_TexTL),
+ _mm_mul_ps(TexBothYInv, B_TexTR)),
+ _mm_add_ps(_mm_mul_ps(TexBothXInv, B_TexBL),
+ _mm_mul_ps(TexBoth, B_TexBR)));
+ __m128 A_Col = _mm_add_ps(_mm_add_ps(_mm_mul_ps(TexBothInv, A_TexTL),
+ _mm_mul_ps(TexBothYInv, A_TexTR)),
+ _mm_add_ps(_mm_mul_ps(TexBothXInv, A_TexBL),
+ _mm_mul_ps(TexBoth, A_TexBR)));
+
__m128i R_Out, G_Out, B_Out, A_Out;
- // Only do alpha blending if a pixel's value doesn't equal 255
- if (_mm_movemask_epi8(_mm_sub_epi32(_mm_cvtps_epi32(A_PixelBlend), Int255)))
- {
- __m128 LayerAlpha = _mm_mul_ps(A_PixelBlend, Norm255);
- __m128 LayerAlphaInv = _mm_mul_ps(_mm_sub_ps(Reg255, A_PixelBlend), Norm255);
- __m128i DestPixel = _mm_loadu_si128((const __m128i *)Pixel);
- __m128i R_Dest = _mm_and_si128( DestPixel, FF);
- __m128i G_Dest = _mm_and_si128(_mm_srli_epi32(DestPixel, 8), FF);
- __m128i B_Dest = _mm_and_si128(_mm_srli_epi32(DestPixel, 16), FF);
- __m128i A_Dest = _mm_and_si128(_mm_srli_epi32(DestPixel, 24), FF);
-
- R_Out = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(R_Dest), LayerAlphaInv), _mm_mul_ps(R_PixelBlend, LayerAlpha)));
- G_Out = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(G_Dest), LayerAlphaInv), _mm_mul_ps(G_PixelBlend, LayerAlpha)));
- B_Out = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(B_Dest), LayerAlphaInv), _mm_mul_ps(B_PixelBlend, LayerAlpha)));
- A_Out = _mm_cvtps_epi32(_mm_min_ps(_mm_add_ps(_mm_cvtepi32_ps(A_Dest), A_PixelBlend), Reg255));
- }
- else
+ __m128 LayerAlpha = _mm_mul_ps(A_Col, LayerOpacity);
+ __m128 LayerAlphaInv = _mm_sub_ps(One, LayerAlpha);
+
+ __m128 R_Colx2 = _mm_mul_ps(R_Col, Two);
+ __m128 R_ColInv = _mm_sub_ps(One, R_Col);
+
+ __m128 G_Colx2 = _mm_mul_ps(G_Col, Two);
+ __m128 G_ColInv = _mm_sub_ps(One, G_Col);
+
+ __m128 B_Colx2 = _mm_mul_ps(B_Col, Two);
+ __m128 B_ColInv = _mm_sub_ps(One, B_Col);
+
+ __m128 R_Blend = R_Col;
+ __m128 G_Blend = G_Col;
+ __m128 B_Blend = B_Col;
+ __m128 A_Blend = LayerAlpha;
+
+ if (!_mm_movemask_epi8(_mm_cmpeq_ps(LayerAlpha, One)) || T.BlendMode != blend_normal)
{
- R_Out = _mm_cvtps_epi32(R_PixelBlend);
- G_Out = _mm_cvtps_epi32(G_PixelBlend);
- B_Out = _mm_cvtps_epi32(B_PixelBlend);
- A_Out = _mm_cvtps_epi32(A_PixelBlend);
+ __m128i DestPixel = _mm_loadu_si128((const __m128i *)Pixel);
+ __m128 R_Dest = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128( DestPixel, FF)), Norm255);
+ __m128 G_Dest = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(DestPixel, 8), FF)), Norm255);
+ __m128 B_Dest = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(DestPixel, 16), FF)), Norm255);
+ __m128 A_Dest = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(DestPixel, 24), FF)), Norm255);
+
+ switch (T.BlendMode)
+ {
+ case blend_normal:
+ {
+ } break;
+ case blend_multiply:
+ {
+ R_Blend = _mm_mul_ps(R_Dest, R_Col);
+ G_Blend = _mm_mul_ps(G_Dest, G_Col);
+ B_Blend = _mm_mul_ps(B_Dest, B_Col);
+ } break;
+ case blend_colorburn:
+ {
+ // NOTE(fox): A small amount is added to Col since images with zero for alpha may also zero out the
+ // color channels, causing black clipping.
+ R_Blend = _mm_sub_ps(One, _mm_div_ps(_mm_sub_ps(One, R_Dest), _mm_add_ps(R_Col, ClipPrevent)));
+ G_Blend = _mm_sub_ps(One, _mm_div_ps(_mm_sub_ps(One, G_Dest), _mm_add_ps(G_Col, ClipPrevent)));
+ B_Blend = _mm_sub_ps(One, _mm_div_ps(_mm_sub_ps(One, B_Dest), _mm_add_ps(B_Col, ClipPrevent)));
+ } break;
+ case blend_linearburn:
+ {
+ R_Blend = _mm_sub_ps(_mm_add_ps(R_Dest, R_Col), One);
+ G_Blend = _mm_sub_ps(_mm_add_ps(G_Dest, G_Col), One);
+ B_Blend = _mm_sub_ps(_mm_add_ps(B_Dest, B_Col), One);
+ } break;
+ case blend_add:
+ {
+ R_Blend = _mm_add_ps(R_Dest, R_Col);
+ G_Blend = _mm_add_ps(G_Dest, G_Col);
+ B_Blend = _mm_add_ps(B_Dest, B_Col);
+ } break;
+ case blend_screen:
+ {
+ R_Blend = _mm_sub_ps(One, _mm_mul_ps(_mm_sub_ps(One, R_Dest), R_ColInv));
+ G_Blend = _mm_sub_ps(One, _mm_mul_ps(_mm_sub_ps(One, G_Dest), G_ColInv));
+ B_Blend = _mm_sub_ps(One, _mm_mul_ps(_mm_sub_ps(One, B_Dest), B_ColInv));
+ } break;
+ case blend_overlay:
+ {
+ __m128 R_Mask = _mm_cmp_ps(R_Dest, ZeroPointFive, 1);
+ __m128 G_Mask = _mm_cmp_ps(G_Dest, ZeroPointFive, 1);
+ __m128 B_Mask = _mm_cmp_ps(B_Dest, ZeroPointFive, 1);
+ __m128 R_Lower = _mm_mul_ps(Two, _mm_mul_ps(R_Dest, R_Col));
+ __m128 G_Lower = _mm_mul_ps(Two, _mm_mul_ps(G_Dest, G_Col));
+ __m128 B_Lower = _mm_mul_ps(Two, _mm_mul_ps(B_Dest, B_Col));
+ __m128 R_Upper = _mm_sub_ps(One, _mm_mul_ps(Two, _mm_mul_ps(_mm_sub_ps(One, R_Dest), R_ColInv)));
+ __m128 G_Upper = _mm_sub_ps(One, _mm_mul_ps(Two, _mm_mul_ps(_mm_sub_ps(One, G_Dest), G_ColInv)));
+ __m128 B_Upper = _mm_sub_ps(One, _mm_mul_ps(Two, _mm_mul_ps(_mm_sub_ps(One, B_Dest), B_ColInv)));
+ R_Blend = _mm_blendv_ps(R_Upper, R_Lower, R_Mask);
+ G_Blend = _mm_blendv_ps(G_Upper, G_Lower, G_Mask);
+ B_Blend = _mm_blendv_ps(B_Upper, B_Lower, B_Mask);
+ } break;
+ case blend_softlight:
+ {
+ // using Pegtop's equation
+ R_Blend = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(One, R_Colx2), _mm_mul_ps(R_Dest, R_Dest)), _mm_mul_ps(R_Colx2, R_Dest));
+ G_Blend = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(One, G_Colx2), _mm_mul_ps(G_Dest, G_Dest)), _mm_mul_ps(G_Colx2, G_Dest));
+ B_Blend = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(One, B_Colx2), _mm_mul_ps(B_Dest, B_Dest)), _mm_mul_ps(B_Colx2, B_Dest));
+ } break;
+ case blend_hardlight:
+ {
+ __m128 R_Mask = _mm_cmp_ps(R_Dest, ZeroPointFive, 13);
+ __m128 G_Mask = _mm_cmp_ps(G_Dest, ZeroPointFive, 13);
+ __m128 B_Mask = _mm_cmp_ps(B_Dest, ZeroPointFive, 13);
+ __m128 R_Lower = _mm_mul_ps(Two, _mm_mul_ps(R_Dest, R_Col));
+ __m128 G_Lower = _mm_mul_ps(Two, _mm_mul_ps(G_Dest, G_Col));
+ __m128 B_Lower = _mm_mul_ps(Two, _mm_mul_ps(B_Dest, B_Col));
+ __m128 R_Upper = _mm_sub_ps(One, _mm_mul_ps(Two, _mm_mul_ps(_mm_sub_ps(One, R_Dest), R_ColInv)));
+ __m128 G_Upper = _mm_sub_ps(One, _mm_mul_ps(Two, _mm_mul_ps(_mm_sub_ps(One, G_Dest), G_ColInv)));
+ __m128 B_Upper = _mm_sub_ps(One, _mm_mul_ps(Two, _mm_mul_ps(_mm_sub_ps(One, B_Dest), B_ColInv)));
+ R_Blend = _mm_blendv_ps(R_Upper, R_Lower, R_Mask);
+ G_Blend = _mm_blendv_ps(G_Upper, G_Lower, G_Mask);
+ B_Blend = _mm_blendv_ps(B_Upper, B_Lower, B_Mask);
+ } break;
+ case blend_subtract:
+ {
+ R_Blend = _mm_sub_ps(R_Dest, R_Col);
+ G_Blend = _mm_sub_ps(G_Dest, G_Col);
+ B_Blend = _mm_sub_ps(B_Dest, B_Col);
+ } break;
+ case blend_divide:
+ {
+ R_Blend = _mm_div_ps(R_Dest, _mm_add_ps(R_Col, ClipPrevent));
+ G_Blend = _mm_div_ps(G_Dest, _mm_add_ps(G_Col, ClipPrevent));
+ B_Blend = _mm_div_ps(B_Dest, _mm_add_ps(B_Col, ClipPrevent));
+ } break;
+ case blend_difference:
+ {
+ __m128 R_Lower = _mm_sub_ps(R_Col, R_Dest);
+ __m128 G_Lower = _mm_sub_ps(G_Col, G_Dest);
+ __m128 B_Lower = _mm_sub_ps(B_Col, B_Dest);
+ __m128 R_Upper = _mm_sub_ps(R_Dest, R_Col);
+ __m128 G_Upper = _mm_sub_ps(G_Dest, G_Col);
+ __m128 B_Upper = _mm_sub_ps(B_Dest, B_Col);
+ __m128 R_Mask = _mm_cmp_ps(R_Lower, Zero, 14);
+ __m128 G_Mask = _mm_cmp_ps(G_Lower, Zero, 14);
+ __m128 B_Mask = _mm_cmp_ps(B_Lower, Zero, 14);
+ R_Blend = _mm_blendv_ps(R_Upper, R_Lower, R_Mask);
+ G_Blend = _mm_blendv_ps(G_Upper, G_Lower, G_Mask);
+ B_Blend = _mm_blendv_ps(B_Upper, B_Lower, B_Mask);
+ } break;
+ }
+
+ R_Blend = _mm_add_ps(_mm_mul_ps(R_Dest, LayerAlphaInv), _mm_mul_ps(R_Blend, LayerAlpha));
+ G_Blend = _mm_add_ps(_mm_mul_ps(G_Dest, LayerAlphaInv), _mm_mul_ps(G_Blend, LayerAlpha));
+ B_Blend = _mm_add_ps(_mm_mul_ps(B_Dest, LayerAlphaInv), _mm_mul_ps(B_Blend, LayerAlpha));
+
+ // Standard behavior in photo apps is for blend modes to
+ // inherit underlying opacity instead of adding to it.
+ if (T.BlendMode == blend_normal)
+ A_Blend = _mm_add_ps(A_Dest, LayerAlpha);
+ else
+ A_Blend = A_Dest;
}
+ R_Out = _mm_cvtps_epi32(_mm_mul_ps(_mm_max_ps(_mm_min_ps(One, R_Blend), Zero), Reg255));
+ G_Out = _mm_cvtps_epi32(_mm_mul_ps(_mm_max_ps(_mm_min_ps(One, G_Blend), Zero), Reg255));
+ B_Out = _mm_cvtps_epi32(_mm_mul_ps(_mm_max_ps(_mm_min_ps(One, B_Blend), Zero), Reg255));
+ A_Out = _mm_cvtps_epi32(_mm_mul_ps(_mm_max_ps(_mm_min_ps(One, A_Blend), Zero), Reg255));
+
__m128i OutputPixel = _mm_or_si128(
_mm_or_si128(R_Out, _mm_slli_epi32(G_Out, 8)),
_mm_or_si128(_mm_slli_epi32(B_Out, 16), _mm_slli_epi32(A_Out, 24)));
@@ -796,9 +1051,7 @@ Fallback_RenderLayer(transform_info T, pixel_buffer *Buffer, rectangle RenderReg
uint8 *Row = ((uint8 *)Buffer->OriginalBuffer + Buffer->Pitch*(int16)(LayerBounds.Min.y) );
uint32 Channel = (T.LayerWidth * T.LayerHeight);
- // uint32 pp1 = 2;
- // uint32 pp2 = 3;
- // bool32 real = true;
+ real32 Normalized255 = 1 / 255.0f;
for (int16 Y = LayerBounds.Min.y; Y < LayerBounds.Max.y; Y++)
{
@@ -813,6 +1066,7 @@ Fallback_RenderLayer(transform_info T, pixel_buffer *Buffer, rectangle RenderReg
real32 V = (StartVectorX * T.YAxisPX) + (StartVectorY * T.YAxisPY);
if (U <= 1.0f && U >= 0.0f && V <= 1.0f && V >= 0.0f) {
+
real32 TexXFull = U * T.LayerWidth;
uint32 TexXInt = (uint32)TexXFull;
real32 TexX = TexXFull - TexXInt;
@@ -868,70 +1122,184 @@ Fallback_RenderLayer(transform_info T, pixel_buffer *Buffer, rectangle RenderReg
PixelToSeek = XLookup + YLookup;
uint32 PixelD = *(uint32 *)((uint8 *)T.SourceBuffer + PixelToSeek*Buffer->BytesPerPixel);
#endif
-
- uint8 TexRA = (PixelA & 0xFF);
- uint8 TexRB = (PixelB & 0xFF);
- uint8 TexRC = (PixelC & 0xFF);
- uint8 TexRD = (PixelD & 0xFF);
-
- uint8 TexGA = ((PixelA >> 8) & 0xFF);
- uint8 TexGB = ((PixelB >> 8) & 0xFF);
- uint8 TexGC = ((PixelC >> 8) & 0xFF);
- uint8 TexGD = ((PixelD >> 8) & 0xFF);
-
- uint8 TexBA = ((PixelA >> 16) & 0xFF);
- uint8 TexBB = ((PixelB >> 16) & 0xFF);
- uint8 TexBC = ((PixelC >> 16) & 0xFF);
- uint8 TexBD = ((PixelD >> 16) & 0xFF);
-
- uint8 TexAA = ((PixelA >> 24) & 0xFF);
- uint8 TexAB = ((PixelB >> 24) & 0xFF);
- uint8 TexAC = ((PixelC >> 24) & 0xFF);
- uint8 TexAD = ((PixelD >> 24) & 0xFF);
-
- real32 PixelBlendR = (TexBothInv * TexRA) + (TexBothYInv * TexRB)
- + (TexBothXInv * TexRC) + (TexBoth * TexRD);
- real32 PixelBlendG = (TexBothInv * TexGA) + (TexBothYInv * TexGB)
- + (TexBothXInv * TexGC) + (TexBoth * TexGD);
- real32 PixelBlendB = (TexBothInv * TexBA) + (TexBothYInv * TexBB)
- + (TexBothXInv * TexBC) + (TexBoth * TexBD);
- real32 PixelBlendA = (TexBothInv * TexAA) + (TexBothYInv * TexAB)
- + (TexBothXInv * TexAC) + (TexBoth * TexAD);
- PixelBlendA = PixelBlendA - (PixelBlendA * T.LayerOpacity);
-
- uint8 R = (uint8)PixelBlendR;
- uint8 G = (uint8)PixelBlendG;
- uint8 B = (uint8)PixelBlendB;
- uint8 A = (uint8)PixelBlendA;
-
XLookup = (X >> 2)*16 + (X % 4);
YLookup = (Y >> 2)*(Buffer->FullWidth*4) + (Y % 4)*4;
-
- // if (real) {
- // real = false;
- // printf("XLook: %i, YLook: %i\n", XLookup, YLookup);
- // printf("X: %i, Y: %i\n", X, Y);
- // }
PixelToSeek = XLookup + YLookup;
uint32 *Pixel = (uint32 *)((uint8 *)Buffer->OriginalBuffer + PixelToSeek*Buffer->BytesPerPixel);
- uint8 R1 = (*Pixel >> 0);
- uint8 G1 = (*Pixel >> 8);
- uint8 B1 = (*Pixel >> 16);
- uint8 A1 = (*Pixel >> 24);
-
- if (A != 255) {
- real32 LayerAlpha = (255 - A) / 255.0f;
- R = (R1 * LayerAlpha) - (R * LayerAlpha) + R;
- G = (G1 * LayerAlpha) - (G * LayerAlpha) + G;
- B = (B1 * LayerAlpha) - (B * LayerAlpha) + B;
- A = ClipAdd(A1, A);
+ real32 TexRA = (real32)(PixelA & 0xFF) * Normalized255;
+ real32 TexRB = (real32)(PixelB & 0xFF) * Normalized255;
+ real32 TexRC = (real32)(PixelC & 0xFF) * Normalized255;
+ real32 TexRD = (real32)(PixelD & 0xFF) * Normalized255;
+
+ real32 TexGA = (real32)((PixelA >> 8) & 0xFF) * Normalized255;
+ real32 TexGB = (real32)((PixelB >> 8) & 0xFF) * Normalized255;
+ real32 TexGC = (real32)((PixelC >> 8) & 0xFF) * Normalized255;
+ real32 TexGD = (real32)((PixelD >> 8) & 0xFF) * Normalized255;
+
+ real32 TexBA = (real32)((PixelA >> 16) & 0xFF) * Normalized255;
+ real32 TexBB = (real32)((PixelB >> 16) & 0xFF) * Normalized255;
+ real32 TexBC = (real32)((PixelC >> 16) & 0xFF) * Normalized255;
+ real32 TexBD = (real32)((PixelD >> 16) & 0xFF) * Normalized255;
+
+ real32 TexAA = (real32)((PixelA >> 24) & 0xFF) * Normalized255;
+ real32 TexAB = (real32)((PixelB >> 24) & 0xFF) * Normalized255;
+ real32 TexAC = (real32)((PixelC >> 24) & 0xFF) * Normalized255;
+ real32 TexAD = (real32)((PixelD >> 24) & 0xFF) * Normalized255;
+
+ real32 R_Col = (TexBothInv * TexRA) + (TexBothYInv * TexRB)
+ + (TexBothXInv * TexRC) + (TexBoth * TexRD);
+ real32 G_Col = (TexBothInv * TexGA) + (TexBothYInv * TexGB)
+ + (TexBothXInv * TexGC) + (TexBoth * TexGD);
+ real32 B_Col = (TexBothInv * TexBA) + (TexBothYInv * TexBB)
+ + (TexBothXInv * TexBC) + (TexBoth * TexBD);
+ real32 A_Col = (TexBothInv * TexAA) + (TexBothYInv * TexAB)
+ + (TexBothXInv * TexAC) + (TexBoth * TexAD);
+
+ real32 LayerAlpha = A_Col * T.LayerOpacity;
+
+ real32 R_Blend = R_Col;
+ real32 G_Blend = G_Col;
+ real32 B_Blend = B_Col;
+ real32 A_Blend = A_Col;
+
+ if (LayerAlpha != 1.0f || T.BlendMode != blend_normal) {
+
+ real32 R_Dest = (real32)((*Pixel >> 0) & 0xFF) * Normalized255;
+ real32 G_Dest = (real32)((*Pixel >> 8) & 0xFF) * Normalized255;
+ real32 B_Dest = (real32)((*Pixel >> 16) & 0xFF) * Normalized255;
+ real32 A_Dest = (real32)((*Pixel >> 24) & 0xFF) * Normalized255;
+
+ switch (T.BlendMode)
+ {
+ case blend_normal:
+ {
+ } break;
+ case blend_multiply:
+ {
+ R_Blend = R_Dest * R_Col;
+ G_Blend = G_Dest * G_Col;
+ B_Blend = B_Dest * B_Col;
+ } break;
+ case blend_colorburn:
+ {
+ // NOTE(fox): Padding to prevent actual crashing from zero division
+ R_Blend = 1.0f - ((1.0f - R_Dest) / (R_Col + 0.001f));
+ G_Blend = 1.0f - ((1.0f - G_Dest) / (G_Col + 0.001f));
+ B_Blend = 1.0f - ((1.0f - B_Dest) / (B_Col + 0.001f));
+ } break;
+ case blend_linearburn:
+ {
+ R_Blend = (R_Dest + R_Col) - 1.0f;
+ G_Blend = (G_Dest + G_Col) - 1.0f;
+ B_Blend = (B_Dest + B_Col) - 1.0f;
+ } break;
+ case blend_add:
+ {
+ R_Blend = R_Dest + R_Col;
+ G_Blend = G_Dest + G_Col;
+ B_Blend = B_Dest + B_Col;
+ } break;
+ case blend_screen:
+ {
+ R_Blend = 1.0f - ((1.0f - R_Dest) * (1.0f - R_Col));
+ G_Blend = 1.0f - ((1.0f - G_Dest) * (1.0f - G_Col));
+ B_Blend = 1.0f - ((1.0f - B_Dest) * (1.0f - B_Col));
+ } break;
+ case blend_overlay:
+ {
+ if (R_Dest < 0.5) {
+ R_Blend = 2.0f * R_Dest * R_Col;
+ } else {
+ R_Blend = 1.0f - (2.0f * (1.0f - R_Dest) * (1.0f - R_Col));
+ }
+ if (G_Dest < 0.5) {
+ G_Blend = 2.0f * G_Dest * G_Col;
+ } else {
+ G_Blend = 1.0f - (2.0f * (1.0f - G_Dest) * (1.0f - G_Col));
+ }
+ if (B_Dest < 0.5) {
+ B_Blend = 2.0f * B_Dest * B_Col;
+ } else {
+ B_Blend = 1.0f - (2.0f * (1.0f - B_Dest) * (1.0f - B_Col));
+ }
+ } break;
+ case blend_softlight:
+ {
+ // using Pegtop's equation
+ R_Blend = ((1.0f - R_Col * 2) * R_Dest * R_Dest) + (R_Col * 2 * R_Dest);
+ G_Blend = ((1.0f - G_Col * 2) * G_Dest * G_Dest) + (G_Col * 2 * G_Dest);
+ B_Blend = ((1.0f - B_Col * 2) * B_Dest * B_Dest) + (B_Col * 2 * B_Dest);
+ } break;
+ case blend_hardlight:
+ {
+ if (R_Dest > 0.5) {
+ R_Blend = 2.0f * R_Dest * R_Col;
+ } else {
+ R_Blend = 1.0f - (2.0f * (1.0f - R_Dest) * (1.0f - R_Col));
+ }
+ if (G_Dest > 0.5) {
+ G_Blend = 2.0f * G_Dest * G_Col;
+ } else {
+ G_Blend = 1.0f - (2.0f * (1.0f - G_Dest) * (1.0f - G_Col));
+ }
+ if (B_Dest > 0.5) {
+ B_Blend = 2.0f * B_Dest * B_Col;
+ } else {
+ B_Blend = 1.0f - (2.0f * (1.0f - B_Dest) * (1.0f - B_Col));
+ }
+ } break;
+ case blend_subtract:
+ {
+ R_Blend = R_Dest - R_Col;
+ G_Blend = G_Dest - G_Col;
+ B_Blend = B_Dest - B_Col;
+ } break;
+ case blend_divide:
+ {
+ R_Blend = R_Dest / (R_Col + 0.001f);
+ G_Blend = G_Dest / (G_Col + 0.001f);
+ B_Blend = B_Dest / (B_Col + 0.001f);
+ } break;
+ case blend_difference:
+ {
+ if (R_Col - R_Dest > 0) {
+ R_Blend = R_Col - R_Dest;
+ } else {
+ R_Blend = R_Dest - R_Col;
+ }
+ if (G_Col - G_Dest > 0) {
+ G_Blend = G_Col - G_Dest;
+ } else {
+ G_Blend = G_Dest - G_Col;
+ }
+ if (B_Col - B_Dest > 0) {
+ B_Blend = B_Col - B_Dest;
+ } else {
+ B_Blend = B_Dest - B_Col;
+ }
+ } break;
+ }
+
+ R_Blend = (R_Dest * (1.0f - LayerAlpha)) + (R_Blend * LayerAlpha);
+ G_Blend = (G_Dest * (1.0f - LayerAlpha)) + (G_Blend * LayerAlpha);
+ B_Blend = (B_Dest * (1.0f - LayerAlpha)) + (B_Blend * LayerAlpha);
+
+ if (T.BlendMode == blend_normal)
+ A_Blend = A_Dest + LayerAlpha;
+ else
+ A_Blend = A_Dest;
}
- *Pixel = ((A << 24) |
- (B << 16) |
- (G << 8) |
- (R << 0));
+ uint8 R_Out = (uint8)(Normalize(R_Blend) * 255.0f);
+ uint8 G_Out = (uint8)(Normalize(G_Blend) * 255.0f);
+ uint8 B_Out = (uint8)(Normalize(B_Blend) * 255.0f);
+ uint8 A_Out = (uint8)(Normalize(A_Blend) * 255.0f);
+
+ *Pixel = ((A_Out << 24) |
+ (B_Out << 16) |
+ (G_Out << 8) |
+ (R_Out << 0));
}
}
}
diff --git a/strings.cpp b/strings.cpp
new file mode 100644
index 0000000..814a52f
--- /dev/null
+++ b/strings.cpp
@@ -0,0 +1,12 @@
+global_variable bool32 Hacko = false;
+global_variable int32 EffectSel = -1;
+
+internal int
+EffectConsoleCallback(ImGuiInputTextCallbackData* data)
+{
+ if (data->EventFlag == ImGuiInputTextFlags_CallbackCompletion)
+ {
+ Hacko = true;
+ }
+ return 0;
+}