diff options
-rw-r--r-- | createcalls.cpp | 6 | ||||
-rw-r--r-- | effects.cpp | 15 | ||||
-rw-r--r-- | main.cpp | 14 | ||||
-rw-r--r-- | main.h | 6 | ||||
-rw-r--r-- | my_imgui_widgets.cpp | 117 | ||||
-rw-r--r-- | prenderer.cpp | 734 | ||||
-rw-r--r-- | strings.cpp | 12 |
7 files changed, 690 insertions, 214 deletions
diff --git a/createcalls.cpp b/createcalls.cpp index 1c20b1a..7088694 100644 --- a/createcalls.cpp +++ b/createcalls.cpp @@ -300,12 +300,12 @@ CreateSolidLayer(project_data *File, memory *Memory, uint16 Width, uint16 Height } internal project_layer * -CreateDebugLayer(project_data *File, memory *Memory, uint16 Width, uint16 Height) +CreateDebugLayer(project_data *File, memory *Memory, uint16 Width, uint16 Height, int i) { project_layer *Layer = CreateLayer(File, Memory); Layer->RenderInfo = AllocateMemory(Memory, sizeof(image_source), P_SourceData); image_source *Source = (image_source *)Layer->RenderInfo; - Source->Raster = CreateDebugBitmap(Memory, Width, Height); + Source->Raster = CreateDebugBitmap(Memory, Width, Height); Layer->SourceType = source_image; return Layer; } @@ -359,7 +359,7 @@ CreateGrid(project_data *File, memory *Memory) { real32 YInc = File->Height / Amount; for (int16 j = 0; j < 8; j++) { for (int16 i = 0; i < 8; i++) { - project_layer *Layer = CreateSolidLayer(File, Memory, 200, 200, V4(0.6, 0.3, 0.4, 1.0)); + project_layer *Layer = CreateSolidLayer(File, Memory, 400, 400, V4(0.6, 0.3, 0.4, 1.0)); Layer->x.CurrentValue.f = (XInc*i); Layer->y.CurrentValue.f = (XInc*j); Layer->opacity.CurrentValue.f = 0.25; diff --git a/effects.cpp b/effects.cpp index 733e4d6..6c3c946 100644 --- a/effects.cpp +++ b/effects.cpp @@ -185,7 +185,7 @@ DrawGradient(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) } #if WINDOWS -global_variable effect_header EffectList[2]; +global_variable effect_header EffectList[3]; #else global_variable effect_header EffectList[] { { @@ -202,7 +202,18 @@ global_variable effect_header EffectList[] { {"End Color", {.col = V4(1.0f, 0.0f, 0.0f, 1.0f)}, type_color, NORMALIZED_COL_MIN, NORMALIZED_COL_MAX}, {"Opacity", {1.0f}, type_real, NORMALIZED_REAL_MIN, NORMALIZED_REAL_MAX} } - } + }, + { + "Levels", + &DrawColor, 6, levels, { + {"Start point", {0.0f}, type_real}, + {"Mid point", {1.0f}, type_real}, + {"End point", {1.0f}, type_real}, + {"Start Col", {.col = V4(0.0f)}, type_color}, + {"Mid Col", {.col = V4(1.0f)}, type_color}, + {"End Col", {.col = V4(1.0f)}, type_color}, + } + }, }; #endif #if 0 @@ -108,6 +108,7 @@ SDL_sem *Semaphore; #include "effects.cpp" #include "keyframes.cpp" #include "layer.cpp" +#include "strings.cpp" #if THREADED #include "threading.cpp" #else @@ -293,7 +294,7 @@ int main(int argc, char *argv[]) { InitMemoryTable(&GlobalMemory, &Memory, 10 * 1024 * 1024, F_Effects, "Effects"); InitMemoryTable(&GlobalMemory, &Memory, 10 * 1024 * 1024, F_Keyframes, "Keyframe blocks"); InitMemoryTable(&GlobalMemory, &Memory, 10 * 1024 * 1024, F_Strings, "Strings"); - InitMemoryTable(&GlobalMemory, &Memory, 1024 * 1024 * 1024, B_Scratch, "Scratch buffer"); + InitMemoryTable(&GlobalMemory, &Memory, (uint64)2 * 1024 * 1024 * 1024, B_Scratch, "Scratch buffer"); project_state State = {}; @@ -305,14 +306,17 @@ int main(int argc, char *argv[]) { } project_data File = {}; - File.Width = 1283; - File.Height = 723; + File.Width = 1280; + File.Height = 720; + // File.Width = 1923; + // File.Height = 1083; File.NumberOfFrames = 65; File.FPS = 30; File.CurrentFrame = 1; File.StartFrame = 0; File.EndFrame = 65; + // CreateLayerFromSource(&File, &State, &Memory, "../asset/b.jpg"); // char String[1024]; // uint16 Size = 1024; // getcwd(String, Size); @@ -430,7 +434,7 @@ int main(int argc, char *argv[]) { // default saves window position to an external .ini file, which can be // loaded from disk or memory. io.IniFilename = NULL; - ImGui::LoadIniSettingsFromMemory(ImGuiPrefs, 1146); + ImGui::LoadIniSettingsFromMemory(ImGuiPrefs); ImGui::StyleColorsDark(); @@ -481,6 +485,8 @@ int main(int argc, char *argv[]) { ImGui_File(&File, &State, &Memory, &UI, io); + ImGui_EffectsPanel(&File, &State, &Memory, &UI, io); + ImGui_PropertiesPanel(&File, &State, &UI, &Memory); ImGui_Timeline(&File, &State, &Memory, &UI, io); @@ -257,6 +257,7 @@ struct transform_info { uint32 FullLayerWidth; uint32 FullLayerHeight; real32 LayerOpacity; + blend_mode BlendMode; real32 OriginX; real32 OriginY; uint32 BufferPitch; @@ -397,6 +398,9 @@ struct project_state int32 MsgTime; // currently in "frames" char *Msg; + + ImGuiTextFilter filter; // This filter API is pretty ballin'. + bool32 RerouteEffects; // Allows shift+space hotkey to gain focus on the effects panel. }; struct brush_tool @@ -415,7 +419,7 @@ enum focused_window struct ui { - real32 TimelineSplit = 200; + real32 TimelineSplit = 600; real32 TimelineZoom; ImVec2 CompZoom; diff --git a/my_imgui_widgets.cpp b/my_imgui_widgets.cpp index 440694b..f94e47c 100644 --- a/my_imgui_widgets.cpp +++ b/my_imgui_widgets.cpp @@ -202,9 +202,9 @@ ImGui_Viewport(project_data File, project_state *State, ui *UI, pixel_buffer Com // } ImGui::OpenPopupOnItemClick("context", ImGuiPopupFlags_MouseButtonMiddle); if (ImGui::BeginPopup("context")) { - if (ImGui::MenuItem("Scalar", NULL, false, InstructionMode != scalar_only)) { InstructionMode = scalar_only; } - if (ImGui::MenuItem("SSE", NULL, false, InstructionMode != sse_enabled)) { InstructionMode = sse_enabled; } - if (ImGui::MenuItem("AVX2", NULL, false, InstructionMode != avx_enabled)) { InstructionMode = avx_enabled; } + if (ImGui::MenuItem("Scalar", NULL, false, InstructionMode != scalar_only)) { InstructionMode = scalar_only; State->UpdateFrame = true; } + if (ImGui::MenuItem("SSE", NULL, false, InstructionMode != sse_enabled)) { InstructionMode = sse_enabled; State->UpdateFrame = true; } + if (ImGui::MenuItem("AVX2", NULL, false, InstructionMode != avx_enabled)) { InstructionMode = avx_enabled; State->UpdateFrame = true; } ImGui::EndPopup(); } if (IsActive && ImGui::IsMouseDragging(ImGuiMouseButton_Left, -1.0f) && ImGui::IsKeyDown(ImGuiKey_Z)) @@ -327,6 +327,43 @@ ImGui_File(project_data *File, project_state *State, memory *Memory, ui *UI, ImG ImGui::End(); } +internal void +ImGui_EffectsPanel(project_data *File, project_state *State, memory *Memory, ui *UI, ImGuiIO io) +{ + ImGui::Begin("Effects list", NULL); + if (State->RerouteEffects) { + ImGui::SetKeyboardFocusHere(); + State->RerouteEffects = 0; + } + int value_changed = ImGui::InputText("Effect name...", State->filter.InputBuf, IM_ARRAYSIZE(State->filter.InputBuf), + ImGuiInputTextFlags_CallbackCompletion, EffectConsoleCallback); + + if (Hacko) { + if (!io.KeyShift) + EffectSel++; + else + EffectSel--; + Hacko = 0; + } + if (value_changed) { + State->filter.Build(); + EffectSel = -1; + } + for (int32 i = 0; i < AmountOf(EffectList); i++) { + if (State->filter.PassFilter(EffectList[i].Name)) { + if (EffectSel == i) { + bool t = true; + ImGui::Selectable(EffectList[i].Name, &t); + } else { + bool s = false; + ImGui::Selectable(EffectList[i].Name, &s); + } + // ImGui::Text(EffectList[i].Name); + } + } + ImGui::End(); +} + internal void ImGui_Timeline(project_data *File, project_state *State, memory *Memory, ui *UI, ImGuiIO io) @@ -458,6 +495,24 @@ ImGui_Timeline(project_data *File, project_state *State, memory *Memory, ui *UI, ImGui::Button("V"); ImGui::SameLine(); ImGui::Button("I"); ImGui::SameLine(); ImGui::Text(Layer->Name); ImGui::SameLine(); + ImGui::Button(BlendmodeNames[Layer->BlendMode]); + ImGui::OpenPopupOnItemClick("blendmode_picker", ImGuiPopupFlags_MouseButtonLeft); + if (ImGui::BeginPopup("blendmode_picker")) { + for (int16 b = 0; b < AmountOf(BlendmodeNames); b++) { + if (ImGui::MenuItem(BlendmodeNames[b], NULL, false, Layer->BlendMode != b)) { + Layer->BlendMode = (blend_mode)b; + State->UpdateFrame = true; + } + // using IsActivated here instead of above loop doesn't seem to + // work; the popup gets closed instead + if (ImGui::IsItemHovered() && io.KeyCtrl) { + Layer->BlendMode = (blend_mode)b; + State->UpdateFrame = true; + } + } + ImGui::EndPopup(); + } + ImGui::SameLine(); ImGui::SetCursorScreenPos(ImVec2(SidebarStartingPos.x, ImGui::GetCursorScreenPos().y)); ImGui::Button("##mover", ImVec2(SidebarSizeWithBorder.x, FontHeight + FramePadding.y*2)); @@ -534,6 +589,9 @@ ImGui_Timeline(project_data *File, project_state *State, memory *Memory, ui *UI, ImGui::SetCursorScreenPos(ImVec2(WindowMinAbs.x + UI->TimelineSplit - TimelineBorderPadding.x, TimelineAbsolutePos.y)); ImGui::InvisibleButton("##SplitMove", ImVec2(TimelineBorderPadding.x, SidebarSizeWithBorder.y), ImGuiButtonFlags_MouseButtonLeft); + if (ImGui::IsItemHovered()) { + ImGui::SetMouseCursor(ImGuiMouseCursor_ResizeEW); + } if (ImGui::IsItemActive() && ImGui::IsMouseDragging(ImGuiMouseButton_Left, -1)) { UI->TimelineSplit += io.MouseDelta.x; @@ -980,9 +1038,14 @@ ImGui_ProcessInputs(project_data *File, project_state *State, pixel_buffer *Comp if (ImGui::IsKeyPressed(ImGuiKey_Space)) { - SwitchBool(State->IsPlaying); + if (io.KeyShift) { + State->RerouteEffects = true; + } else { + SwitchBool(State->IsPlaying); + } } + if (State->IsPlaying && !IsRendering) { IncrementFrame(File, 1); State->UpdateFrame = true; @@ -1024,6 +1087,10 @@ ImGui_ProcessInputs(project_data *File, project_state *State, pixel_buffer *Comp } #if DEBUG + if (ImGui::IsKeyPressed(ImGuiKey_W)) + { + SwitchBool(Debug.ToggleWindow); + } if (ImGui::IsKeyPressed(ImGuiKey_M)) { Debug.Markers[Debug.MarkerIndex] = File->CurrentFrame; @@ -1065,9 +1132,9 @@ ImGui_ProcessInputs(project_data *File, project_state *State, pixel_buffer *Comp } } - global_variable char ImGuiPrefs[] = "[Window][DockSpaceViewport_11111111]" -"\nSize=3153,1837" +"\nPos=0,0" +"\nSize=3200,1800" "\nCollapsed=0" "\n" "\n[Window][Debug##Default]" @@ -1077,18 +1144,19 @@ global_variable char ImGuiPrefs[] = "[Window][DockSpaceViewport_11111111]" "\n" "\n[Window][Viewport]" "\nPos=528,0" -"\nSize=2121,1208" +"\nSize=2168,1171" "\nCollapsed=0" "\nDockId=0x00000005,0" "\n" "\n[Window][###Properties]" -"\nSize=526,1208" +"\nPos=0,0" +"\nSize=526,1171" "\nCollapsed=0" "\nDockId=0x00000003,0" "\n" "\n[Window][Timeline]" -"\nPos=0,1210" -"\nSize=3153,627" +"\nPos=0,1173" +"\nSize=3200,627" "\nCollapsed=0" "\nDockId=0x00000002,0" "\n" @@ -1098,17 +1166,24 @@ global_variable char ImGuiPrefs[] = "[Window][DockSpaceViewport_11111111]" "\nCollapsed=0" "\n" "\n[Window][Files]" -"\nPos=2651,0" -"\nSize=502,1208" +"\nPos=2698,0" +"\nSize=502,913" +"\nCollapsed=0" +"\nDockId=0x00000007,0" +"\n" +"\n[Window][Effects list]" +"\nPos=2698,915" +"\nSize=502,256" "\nCollapsed=0" -"\nDockId=0x00000006,0" +"\nDockId=0x00000008,0" "\n" "\n[Docking][Data]" -"\nDockSpace ID=0x8B93E3BD Pos=0,0 Size=3153,1837 Split=Y Selected=0x13926F0B" -"\n DockNode ID=0x00000001 Parent=0x8B93E3BD SizeRef=3200,1171 Split=X Selected=0x13926F0B" -"\n DockNode ID=0x00000003 Parent=0x00000001 SizeRef=526,1171 Selected=0xDBB8CEFA" -"\n DockNode ID=0x00000004 Parent=0x00000001 SizeRef=2672,1171 Split=X Selected=0x13926F0B" -"\n DockNode ID=0x00000005 Parent=0x00000004 SizeRef=2115,1171 CentralNode=1 Selected=0x13926F0B" -"\n DockNode ID=0x00000006 Parent=0x00000004 SizeRef=502,1171 Selected=0x86FA2F90" -"\n DockNode ID=0x00000002 Parent=0x8B93E3BD SizeRef=3200,627 HiddenTabBar=1 Selected=0x0F18B61B" -"\n"; +"\nDockSpace ID=0x8B93E3BD Window=0xA787BDB4 Pos=0,0 Size=3200,1800 Split=Y Selected=0x13926F0B" +"\n DockNode ID=0x00000001 Parent=0x8B93E3BD SizeRef=3200,1171 Split=X Selected=0x13926F0B" +"\n DockNode ID=0x00000003 Parent=0x00000001 SizeRef=526,1171 Selected=0xDBB8CEFA" +"\n DockNode ID=0x00000004 Parent=0x00000001 SizeRef=2672,1171 Split=X Selected=0x13926F0B" +"\n DockNode ID=0x00000005 Parent=0x00000004 SizeRef=2115,1171 CentralNode=1 Selected=0x13926F0B" +"\n DockNode ID=0x00000006 Parent=0x00000004 SizeRef=502,1171 Split=Y Selected=0x86FA2F90" +"\n DockNode ID=0x00000007 Parent=0x00000006 SizeRef=502,913 Selected=0x86FA2F90" +"\n DockNode ID=0x00000008 Parent=0x00000006 SizeRef=502,256 Selected=0x812F222D" +"\n DockNode ID=0x00000002 Parent=0x8B93E3BD SizeRef=3200,627 HiddenTabBar=1 Selected=0x0F18B61B"; diff --git a/prenderer.cpp b/prenderer.cpp index 72c2893..5df28f4 100644 --- a/prenderer.cpp +++ b/prenderer.cpp @@ -81,7 +81,8 @@ CalculateTransforms(project_layer *Layer, pixel_buffer *Buffer) TransformInfo.LayerHeight = (real32)Source->Raster.Height; TransformInfo.FullLayerWidth = Source->Raster.FullWidth; TransformInfo.FullLayerHeight = Source->Raster.FullHeight; - TransformInfo.LayerOpacity = 1.0f - Layer->opacity.CurrentValue.f; + TransformInfo.LayerOpacity = Layer->opacity.CurrentValue.f; + TransformInfo.BlendMode =Layer->BlendMode; TransformInfo.OriginX = Origin.x; TransformInfo.OriginY = Origin.y; TransformInfo.BufferPitch = Buffer->Pitch; @@ -152,6 +153,15 @@ QueueCurrentFrame(project_data *File, pixel_buffer *CompBuffer, project_state *S for (int x = 0; x < 4; x++) { // if (x == y) { rectangle RenderRegion = {TileWidth*x, TileHeight*y, TileWidth + TileWidth*x, TileHeight + TileHeight*y}; + // The render regions always have to be aligned to the top left of + // a 4x4 chunk (at least for AVX2) and cannot exceed the bounds of + // the comp. + // It seems we don't need any special math to guarantee this aside + // from dividing by 4 and modulating. + RenderRegion.Min.x -= RenderRegion.Min.x % 4; + RenderRegion.Min.y -= RenderRegion.Min.y % 4; + RenderRegion.Max.x -= RenderRegion.Max.x % 4; + RenderRegion.Max.y -= RenderRegion.Max.y % 4; if (RenderRegion.Max.x > CompBuffer->Width) RenderRegion.Max.x = CompBuffer->Width; if (RenderRegion.Max.y > CompBuffer->Height) @@ -379,8 +389,11 @@ AVX2_RenderLayer(transform_info T, pixel_buffer *Buffer, rectangle RenderRegion) __m256 OriginX = _mm256_set1_ps(T.OriginX); __m256 OriginY = _mm256_set1_ps(T.OriginY); + __m256 ClipPrevent = _mm256_set1_ps(0.001f); __m256 One = _mm256_set1_ps(1); + __m256 Two = _mm256_set1_ps(2); __m256 Zero = _mm256_set1_ps(0); + __m256 ZeroPointFive = _mm256_set1_ps(0.5); __m256i Zeroi = _mm256_set1_epi32(0); __m256i Onei = _mm256_set1_epi32(1); __m256 Four = _mm256_set1_ps(4); @@ -389,7 +402,7 @@ AVX2_RenderLayer(transform_info T, pixel_buffer *Buffer, rectangle RenderRegion) __m256i BottomTwoBits = _mm256_set1_epi32(0x03); __m256i Fouri = _mm256_set1_epi32(4); __m256i Sixteeni = _mm256_set1_epi32(16); - __m256 Reg255 = _mm256_set1_ps(255.0f); + __m256 Real255 = _mm256_set1_ps(255.0f); __m256i Int255 = _mm256_set1_epi32(255); __m256 Norm255 = _mm256_set1_ps(1/255.0f); // __m256i White = _mm256_setr_epi32(0xFFFFFFFF, 0, 0, 0, 0xFFFFFFFF, 0, 0, 0); @@ -450,7 +463,7 @@ AVX2_RenderLayer(transform_info T, pixel_buffer *Buffer, rectangle RenderRegion) __m256i TexXIntPlusOne = _mm256_add_epi32(TexXInt, _mm256_and_si256(_mm256_cmpgt_epi32(LayerWidthMinusOne, TexXInt), Onei)); __m256i TexYInt = _mm256_cvttps_epi32(TexYFull); __m256i TexYIntPlusOne = _mm256_add_epi32(TexYInt, _mm256_and_si256(_mm256_cmpgt_epi32(LayerHeightMinusOne, TexYInt), Onei)); - // NOTE(fox): The comparison is for when we're on the last pixel. + // NOTE(fox): The comparison is for when we're on the last pixel of the texel. __m256 TexX = _mm256_sub_ps(TexXFull, _mm256_cvtepi32_ps(TexXInt)); __m256 TexY = _mm256_sub_ps(TexYFull, _mm256_cvtepi32_ps(TexYInt)); @@ -481,71 +494,190 @@ AVX2_RenderLayer(transform_info T, pixel_buffer *Buffer, rectangle RenderRegion) __m256i PixelsBL = _mm256_i32gather_epi32((const int32 *)TexPTR, PixelLookupBL, 4); __m256i PixelsBR = _mm256_i32gather_epi32((const int32 *)TexPTR, PixelLookupBR, 4); - __m256i R_TexTL = _mm256_and_si256( PixelsTL, FF); - __m256i G_TexTL = _mm256_and_si256(_mm256_srli_epi32(PixelsTL, 8), FF); - __m256i B_TexTL = _mm256_and_si256(_mm256_srli_epi32(PixelsTL, 16), FF); - __m256i A_TexTL = _mm256_and_si256(_mm256_srli_epi32(PixelsTL, 24), FF); - - __m256i R_TexTR = _mm256_and_si256( PixelsTR, FF); - __m256i G_TexTR = _mm256_and_si256(_mm256_srli_epi32(PixelsTR, 8), FF); - __m256i B_TexTR = _mm256_and_si256(_mm256_srli_epi32(PixelsTR, 16), FF); - __m256i A_TexTR = _mm256_and_si256(_mm256_srli_epi32(PixelsTR, 24), FF); - - __m256i R_TexBL = _mm256_and_si256( PixelsBL, FF); - __m256i G_TexBL = _mm256_and_si256(_mm256_srli_epi32(PixelsBL, 8), FF); - __m256i B_TexBL = _mm256_and_si256(_mm256_srli_epi32(PixelsBL, 16), FF); - __m256i A_TexBL = _mm256_and_si256(_mm256_srli_epi32(PixelsBL, 24), FF); - - __m256i R_TexBR = _mm256_and_si256( PixelsBR, FF); - __m256i G_TexBR = _mm256_and_si256(_mm256_srli_epi32(PixelsBR, 8), FF); - __m256i B_TexBR = _mm256_and_si256(_mm256_srli_epi32(PixelsBR, 16), FF); - __m256i A_TexBR = _mm256_and_si256(_mm256_srli_epi32(PixelsBR, 24), FF); - - __m256 R_PixelBlend = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(TexBothInv, _mm256_cvtepi32_ps(R_TexTL)), - _mm256_mul_ps(TexBothYInv, _mm256_cvtepi32_ps(R_TexTR))), - _mm256_add_ps(_mm256_mul_ps(TexBothXInv, _mm256_cvtepi32_ps(R_TexBL)), - _mm256_mul_ps(TexBoth, _mm256_cvtepi32_ps(R_TexBR)))); - __m256 G_PixelBlend = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(TexBothInv, _mm256_cvtepi32_ps(G_TexTL)), - _mm256_mul_ps(TexBothYInv, _mm256_cvtepi32_ps(G_TexTR))), - _mm256_add_ps(_mm256_mul_ps(TexBothXInv, _mm256_cvtepi32_ps(G_TexBL)), - _mm256_mul_ps(TexBoth, _mm256_cvtepi32_ps(G_TexBR)))); - __m256 B_PixelBlend = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(TexBothInv, _mm256_cvtepi32_ps(B_TexTL)), - _mm256_mul_ps(TexBothYInv, _mm256_cvtepi32_ps(B_TexTR))), - _mm256_add_ps(_mm256_mul_ps(TexBothXInv, _mm256_cvtepi32_ps(B_TexBL)), - _mm256_mul_ps(TexBoth, _mm256_cvtepi32_ps(B_TexBR)))); - __m256 A_PixelBlend = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(TexBothInv, _mm256_cvtepi32_ps(A_TexTL)), - _mm256_mul_ps(TexBothYInv, _mm256_cvtepi32_ps(A_TexTR))), - _mm256_add_ps(_mm256_mul_ps(TexBothXInv, _mm256_cvtepi32_ps(A_TexBL)), - _mm256_mul_ps(TexBoth, _mm256_cvtepi32_ps(A_TexBR)))); - - A_PixelBlend = _mm256_sub_ps(A_PixelBlend, _mm256_mul_ps(A_PixelBlend, LayerOpacity)); - - __m256i R_Out, G_Out, B_Out, A_Out; - // Only do alpha blending if a pixel's value doesn't equal 255 - if (_mm256_movemask_epi8(_mm256_sub_epi32(_mm256_cvtps_epi32(A_PixelBlend), Int255))) + __m256 R_TexTL = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256( PixelsTL, FF)), Norm255); + __m256 G_TexTL = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsTL, 8), FF)), Norm255); + __m256 B_TexTL = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsTL, 16), FF)), Norm255); + __m256 A_TexTL = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsTL, 24), FF)), Norm255); + + __m256 R_TexTR = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256( PixelsTR, FF)), Norm255); + __m256 G_TexTR = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsTR, 8), FF)), Norm255); + __m256 B_TexTR = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsTR, 16), FF)), Norm255); + __m256 A_TexTR = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsTR, 24), FF)), Norm255); + + __m256 R_TexBL = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256( PixelsBL, FF)), Norm255); + __m256 G_TexBL = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsBL, 8), FF)), Norm255); + __m256 B_TexBL = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsBL, 16), FF)), Norm255); + __m256 A_TexBL = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsBL, 24), FF)), Norm255); + + __m256 R_TexBR = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256( PixelsBR, FF)), Norm255); + __m256 G_TexBR = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsBR, 8), FF)), Norm255); + __m256 B_TexBR = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsBR, 16), FF)), Norm255); + __m256 A_TexBR = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(PixelsBR, 24), FF)), Norm255); + + __m256 R_Col = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(TexBothInv, R_TexTL), + _mm256_mul_ps(TexBothYInv, R_TexTR)), + _mm256_add_ps(_mm256_mul_ps(TexBothXInv, R_TexBL), + _mm256_mul_ps(TexBoth, R_TexBR))); + __m256 G_Col = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(TexBothInv, G_TexTL), + _mm256_mul_ps(TexBothYInv, G_TexTR)), + _mm256_add_ps(_mm256_mul_ps(TexBothXInv, G_TexBL), + _mm256_mul_ps(TexBoth, G_TexBR))); + __m256 B_Col = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(TexBothInv, B_TexTL), + _mm256_mul_ps(TexBothYInv, B_TexTR)), + _mm256_add_ps(_mm256_mul_ps(TexBothXInv, B_TexBL), + _mm256_mul_ps(TexBoth, B_TexBR))); + __m256 A_Col = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(TexBothInv, A_TexTL), + _mm256_mul_ps(TexBothYInv, A_TexTR)), + _mm256_add_ps(_mm256_mul_ps(TexBothXInv, A_TexBL), + _mm256_mul_ps(TexBoth, A_TexBR))); + + __m256 LayerAlpha = _mm256_mul_ps(A_Col, LayerOpacity); + __m256 LayerAlphaInv = _mm256_sub_ps(One, LayerAlpha); + + // Hoisted out of some blend modes; maybe it'd be better to just keep them in there. + __m256 R_Colx2 = _mm256_mul_ps(R_Col, Two); + __m256 R_ColInv = _mm256_sub_ps(One, R_Col); + + __m256 G_Colx2 = _mm256_mul_ps(G_Col, Two); + __m256 G_ColInv = _mm256_sub_ps(One, G_Col); + + __m256 B_Colx2 = _mm256_mul_ps(B_Col, Two); + __m256 B_ColInv = _mm256_sub_ps(One, B_Col); + + __m256 R_Blend = R_Col; + __m256 G_Blend = G_Col; + __m256 B_Blend = B_Col; + __m256 A_Blend = LayerAlpha; + + // Only load the dest pixel if we actually need to (a pixel's opacity isn't 255 or the blend mode requires it). + if (!_mm256_movemask_epi8(_mm256_cmp_ps(LayerAlpha, One, 0)) || T.BlendMode != blend_normal) { - __m256 LayerAlpha = _mm256_mul_ps(A_PixelBlend, Norm255); - __m256 LayerAlphaInv = _mm256_mul_ps(_mm256_sub_ps(Reg255, A_PixelBlend), Norm255); - __m256i DestPixel = _mm256_loadu_si256((const __m256i *)Pixel); - __m256i R_Dest = _mm256_and_si256( DestPixel, FF); - __m256i G_Dest = _mm256_and_si256(_mm256_srli_epi32(DestPixel, 8), FF); - __m256i B_Dest = _mm256_and_si256(_mm256_srli_epi32(DestPixel, 16), FF); - __m256i A_Dest = _mm256_and_si256(_mm256_srli_epi32(DestPixel, 24), FF); - - R_Out = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(_mm256_cvtepi32_ps(R_Dest), LayerAlphaInv), _mm256_mul_ps(R_PixelBlend, LayerAlpha))); - G_Out = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(_mm256_cvtepi32_ps(G_Dest), LayerAlphaInv), _mm256_mul_ps(G_PixelBlend, LayerAlpha))); - B_Out = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(_mm256_cvtepi32_ps(B_Dest), LayerAlphaInv), _mm256_mul_ps(B_PixelBlend, LayerAlpha))); - A_Out = _mm256_cvtps_epi32(_mm256_min_ps(_mm256_add_ps(_mm256_cvtepi32_ps(A_Dest), A_PixelBlend), Reg255)); - } - else - { - R_Out = _mm256_cvtps_epi32(R_PixelBlend); - G_Out = _mm256_cvtps_epi32(G_PixelBlend); - B_Out = _mm256_cvtps_epi32(B_PixelBlend); - A_Out = _mm256_cvtps_epi32(A_PixelBlend); + __m256 R_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256( DestPixel, FF)), Norm255); + __m256 G_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(DestPixel, 8), FF)), Norm255); + __m256 B_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(DestPixel, 16), FF)), Norm255); + __m256 A_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(DestPixel, 24), FF)), Norm255); + + switch (T.BlendMode) + { + case blend_normal: + { + } break; + case blend_multiply: + { + R_Blend = _mm256_mul_ps(R_Dest, R_Col); + G_Blend = _mm256_mul_ps(G_Dest, G_Col); + B_Blend = _mm256_mul_ps(B_Dest, B_Col); + } break; + case blend_colorburn: + { + // NOTE(fox): A small amount is added to Col since images with zero for alpha may also zero out the + // color channels, causing black clipping. + R_Blend = _mm256_sub_ps(One, _mm256_div_ps(_mm256_sub_ps(One, R_Dest), _mm256_add_ps(R_Col, ClipPrevent))); + G_Blend = _mm256_sub_ps(One, _mm256_div_ps(_mm256_sub_ps(One, G_Dest), _mm256_add_ps(G_Col, ClipPrevent))); + B_Blend = _mm256_sub_ps(One, _mm256_div_ps(_mm256_sub_ps(One, B_Dest), _mm256_add_ps(B_Col, ClipPrevent))); + } break; + case blend_linearburn: + { + R_Blend = _mm256_sub_ps(_mm256_add_ps(R_Dest, R_Col), One); + G_Blend = _mm256_sub_ps(_mm256_add_ps(G_Dest, G_Col), One); + B_Blend = _mm256_sub_ps(_mm256_add_ps(B_Dest, B_Col), One); + } break; + case blend_add: + { + R_Blend = _mm256_add_ps(R_Dest, R_Col); + G_Blend = _mm256_add_ps(G_Dest, G_Col); + B_Blend = _mm256_add_ps(B_Dest, B_Col); + } break; + case blend_screen: + { + R_Blend = _mm256_sub_ps(One, _mm256_mul_ps(_mm256_sub_ps(One, R_Dest), R_ColInv)); + G_Blend = _mm256_sub_ps(One, _mm256_mul_ps(_mm256_sub_ps(One, G_Dest), G_ColInv)); + B_Blend = _mm256_sub_ps(One, _mm256_mul_ps(_mm256_sub_ps(One, B_Dest), B_ColInv)); + } break; + case blend_overlay: + { + __m256 R_Mask = _mm256_cmp_ps(R_Dest, ZeroPointFive, 1); + __m256 G_Mask = _mm256_cmp_ps(G_Dest, ZeroPointFive, 1); + __m256 B_Mask = _mm256_cmp_ps(B_Dest, ZeroPointFive, 1); + __m256 R_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(R_Dest, R_Col)); + __m256 G_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(G_Dest, G_Col)); + __m256 B_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(B_Dest, B_Col)); + __m256 R_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, R_Dest), R_ColInv))); + __m256 G_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, G_Dest), G_ColInv))); + __m256 B_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, B_Dest), B_ColInv))); + R_Blend = _mm256_blendv_ps(R_Upper, R_Lower, R_Mask); + G_Blend = _mm256_blendv_ps(G_Upper, G_Lower, G_Mask); + B_Blend = _mm256_blendv_ps(B_Upper, B_Lower, B_Mask); + } break; + case blend_softlight: + { + // using Pegtop's equation + R_Blend = _mm256_add_ps(_mm256_mul_ps(_mm256_sub_ps(One, R_Colx2), _mm256_mul_ps(R_Dest, R_Dest)), _mm256_mul_ps(R_Colx2, R_Dest)); + G_Blend = _mm256_add_ps(_mm256_mul_ps(_mm256_sub_ps(One, G_Colx2), _mm256_mul_ps(G_Dest, G_Dest)), _mm256_mul_ps(G_Colx2, G_Dest)); + B_Blend = _mm256_add_ps(_mm256_mul_ps(_mm256_sub_ps(One, B_Colx2), _mm256_mul_ps(B_Dest, B_Dest)), _mm256_mul_ps(B_Colx2, B_Dest)); + } break; + case blend_hardlight: + { + __m256 R_Mask = _mm256_cmp_ps(R_Dest, ZeroPointFive, 13); + __m256 G_Mask = _mm256_cmp_ps(G_Dest, ZeroPointFive, 13); + __m256 B_Mask = _mm256_cmp_ps(B_Dest, ZeroPointFive, 13); + __m256 R_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(R_Dest, R_Col)); + __m256 G_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(G_Dest, G_Col)); + __m256 B_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(B_Dest, B_Col)); + __m256 R_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, R_Dest), R_ColInv))); + __m256 G_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, G_Dest), G_ColInv))); + __m256 B_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, B_Dest), B_ColInv))); + R_Blend = _mm256_blendv_ps(R_Upper, R_Lower, R_Mask); + G_Blend = _mm256_blendv_ps(G_Upper, G_Lower, G_Mask); + B_Blend = _mm256_blendv_ps(B_Upper, B_Lower, B_Mask); + } break; + case blend_subtract: + { + R_Blend = _mm256_sub_ps(R_Dest, R_Col); + G_Blend = _mm256_sub_ps(G_Dest, G_Col); + B_Blend = _mm256_sub_ps(B_Dest, B_Col); + } break; + case blend_divide: + { + R_Blend = _mm256_div_ps(R_Dest, _mm256_add_ps(R_Col, ClipPrevent)); + G_Blend = _mm256_div_ps(G_Dest, _mm256_add_ps(G_Col, ClipPrevent)); + B_Blend = _mm256_div_ps(B_Dest, _mm256_add_ps(B_Col, ClipPrevent)); + } break; + case blend_difference: + { + __m256 R_Lower = _mm256_sub_ps(R_Col, R_Dest); + __m256 G_Lower = _mm256_sub_ps(G_Col, G_Dest); + __m256 B_Lower = _mm256_sub_ps(B_Col, B_Dest); + __m256 R_Upper = _mm256_sub_ps(R_Dest, R_Col); + __m256 G_Upper = _mm256_sub_ps(G_Dest, G_Col); + __m256 B_Upper = _mm256_sub_ps(B_Dest, B_Col); + __m256 R_Mask = _mm256_cmp_ps(R_Lower, Zero, 14); + __m256 G_Mask = _mm256_cmp_ps(G_Lower, Zero, 14); + __m256 B_Mask = _mm256_cmp_ps(B_Lower, Zero, 14); + R_Blend = _mm256_blendv_ps(R_Upper, R_Lower, R_Mask); + G_Blend = _mm256_blendv_ps(G_Upper, G_Lower, G_Mask); + B_Blend = _mm256_blendv_ps(B_Upper, B_Lower, B_Mask); + } break; + } + + R_Blend = _mm256_add_ps(_mm256_mul_ps(R_Dest, LayerAlphaInv), _mm256_mul_ps(R_Blend, LayerAlpha)); + G_Blend = _mm256_add_ps(_mm256_mul_ps(G_Dest, LayerAlphaInv), _mm256_mul_ps(G_Blend, LayerAlpha)); + B_Blend = _mm256_add_ps(_mm256_mul_ps(B_Dest, LayerAlphaInv), _mm256_mul_ps(B_Blend, LayerAlpha)); + + // Standard behavior in photo apps is for blend modes to + // inherit underlying opacity instead of adding to it. + if (T.BlendMode == blend_normal) + A_Blend = _mm256_add_ps(A_Dest, LayerAlpha); + else + A_Blend = A_Dest; } + __m256i R_Out = _mm256_cvtps_epi32(_mm256_mul_ps(_mm256_max_ps(_mm256_min_ps(One, R_Blend), Zero), Real255)); + __m256i G_Out = _mm256_cvtps_epi32(_mm256_mul_ps(_mm256_max_ps(_mm256_min_ps(One, G_Blend), Zero), Real255)); + __m256i B_Out = _mm256_cvtps_epi32(_mm256_mul_ps(_mm256_max_ps(_mm256_min_ps(One, B_Blend), Zero), Real255)); + __m256i A_Out = _mm256_cvtps_epi32(_mm256_mul_ps(_mm256_max_ps(_mm256_min_ps(One, A_Blend), Zero), Real255)); + __m256i OutputPixel = _mm256_or_si256( _mm256_or_si256(R_Out, _mm256_slli_epi32(G_Out, 8)), _mm256_or_si256(_mm256_slli_epi32(B_Out, 16), _mm256_slli_epi32(A_Out, 24))); @@ -585,8 +717,11 @@ SSE2_RenderLayer(transform_info T, pixel_buffer *Buffer, rectangle RenderRegion) __m128 OriginX = _mm_set1_ps(T.OriginX); __m128 OriginY = _mm_set1_ps(T.OriginY); + __m128 ClipPrevent = _mm_set1_ps(0.001f); __m128 One = _mm_set1_ps(1); + __m128 Two = _mm_set1_ps(2); __m128 Zero = _mm_set1_ps(0); + __m128 ZeroPointFive = _mm_set1_ps(0.5); __m128i Zeroi = _mm_set1_epi32(0); __m128i Onei = _mm_set1_epi32(1); __m128 Four = _mm_set1_ps(4); @@ -707,71 +842,191 @@ SSE2_RenderLayer(transform_info T, pixel_buffer *Buffer, rectangle RenderRegion) __m128i PixelsBL = _mm_setr_epi32(S_PixelsBL0, S_PixelsBL1, S_PixelsBL2, S_PixelsBL3); __m128i PixelsBR = _mm_setr_epi32(S_PixelsBR0, S_PixelsBR1, S_PixelsBR2, S_PixelsBR3); - __m128i R_TexTL = _mm_and_si128( PixelsTL, FF); - __m128i G_TexTL = _mm_and_si128(_mm_srli_epi32(PixelsTL, 8), FF); - __m128i B_TexTL = _mm_and_si128(_mm_srli_epi32(PixelsTL, 16), FF); - __m128i A_TexTL = _mm_and_si128(_mm_srli_epi32(PixelsTL, 24), FF); - - __m128i R_TexTR = _mm_and_si128( PixelsTR, FF); - __m128i G_TexTR = _mm_and_si128(_mm_srli_epi32(PixelsTR, 8), FF); - __m128i B_TexTR = _mm_and_si128(_mm_srli_epi32(PixelsTR, 16), FF); - __m128i A_TexTR = _mm_and_si128(_mm_srli_epi32(PixelsTR, 24), FF); - - __m128i R_TexBL = _mm_and_si128( PixelsBL, FF); - __m128i G_TexBL = _mm_and_si128(_mm_srli_epi32(PixelsBL, 8), FF); - __m128i B_TexBL = _mm_and_si128(_mm_srli_epi32(PixelsBL, 16), FF); - __m128i A_TexBL = _mm_and_si128(_mm_srli_epi32(PixelsBL, 24), FF); - - __m128i R_TexBR = _mm_and_si128( PixelsBR, FF); - __m128i G_TexBR = _mm_and_si128(_mm_srli_epi32(PixelsBR, 8), FF); - __m128i B_TexBR = _mm_and_si128(_mm_srli_epi32(PixelsBR, 16), FF); - __m128i A_TexBR = _mm_and_si128(_mm_srli_epi32(PixelsBR, 24), FF); - - __m128 R_PixelBlend = _mm_add_ps(_mm_add_ps(_mm_mul_ps(TexBothInv, _mm_cvtepi32_ps(R_TexTL)), - _mm_mul_ps(TexBothYInv, _mm_cvtepi32_ps(R_TexTR))), - _mm_add_ps(_mm_mul_ps(TexBothXInv, _mm_cvtepi32_ps(R_TexBL)), - _mm_mul_ps(TexBoth, _mm_cvtepi32_ps(R_TexBR)))); - __m128 G_PixelBlend = _mm_add_ps(_mm_add_ps(_mm_mul_ps(TexBothInv, _mm_cvtepi32_ps(G_TexTL)), - _mm_mul_ps(TexBothYInv, _mm_cvtepi32_ps(G_TexTR))), - _mm_add_ps(_mm_mul_ps(TexBothXInv, _mm_cvtepi32_ps(G_TexBL)), - _mm_mul_ps(TexBoth, _mm_cvtepi32_ps(G_TexBR)))); - __m128 B_PixelBlend = _mm_add_ps(_mm_add_ps(_mm_mul_ps(TexBothInv, _mm_cvtepi32_ps(B_TexTL)), - _mm_mul_ps(TexBothYInv, _mm_cvtepi32_ps(B_TexTR))), - _mm_add_ps(_mm_mul_ps(TexBothXInv, _mm_cvtepi32_ps(B_TexBL)), - _mm_mul_ps(TexBoth, _mm_cvtepi32_ps(B_TexBR)))); - __m128 A_PixelBlend = _mm_add_ps(_mm_add_ps(_mm_mul_ps(TexBothInv, _mm_cvtepi32_ps(A_TexTL)), - _mm_mul_ps(TexBothYInv, _mm_cvtepi32_ps(A_TexTR))), - _mm_add_ps(_mm_mul_ps(TexBothXInv, _mm_cvtepi32_ps(A_TexBL)), - _mm_mul_ps(TexBoth, _mm_cvtepi32_ps(A_TexBR)))); - - A_PixelBlend = _mm_sub_ps(A_PixelBlend, _mm_mul_ps(A_PixelBlend, LayerOpacity)); + __m128 R_TexTL = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128( PixelsTL, FF)), Norm255); + __m128 G_TexTL = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsTL, 8), FF)), Norm255); + __m128 B_TexTL = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsTL, 16), FF)), Norm255); + __m128 A_TexTL = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsTL, 24), FF)), Norm255); + + __m128 R_TexTR = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128( PixelsTR, FF)), Norm255); + __m128 G_TexTR = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsTR, 8), FF)), Norm255); + __m128 B_TexTR = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsTR, 16), FF)), Norm255); + __m128 A_TexTR = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsTR, 24), FF)), Norm255); + + __m128 R_TexBL = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128( PixelsBL, FF)), Norm255); + __m128 G_TexBL = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsBL, 8), FF)), Norm255); + __m128 B_TexBL = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsBL, 16), FF)), Norm255); + __m128 A_TexBL = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsBL, 24), FF)), Norm255); + + __m128 R_TexBR = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128( PixelsBR, FF)), Norm255); + __m128 G_TexBR = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsBR, 8), FF)), Norm255); + __m128 B_TexBR = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsBR, 16), FF)), Norm255); + __m128 A_TexBR = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(PixelsBR, 24), FF)), Norm255); + + __m128 R_Col = _mm_add_ps(_mm_add_ps(_mm_mul_ps(TexBothInv, R_TexTL), + _mm_mul_ps(TexBothYInv, R_TexTR)), + _mm_add_ps(_mm_mul_ps(TexBothXInv, R_TexBL), + _mm_mul_ps(TexBoth, R_TexBR))); + __m128 G_Col = _mm_add_ps(_mm_add_ps(_mm_mul_ps(TexBothInv, G_TexTL), + _mm_mul_ps(TexBothYInv, G_TexTR)), + _mm_add_ps(_mm_mul_ps(TexBothXInv, G_TexBL), + _mm_mul_ps(TexBoth, G_TexBR))); + __m128 B_Col = _mm_add_ps(_mm_add_ps(_mm_mul_ps(TexBothInv, B_TexTL), + _mm_mul_ps(TexBothYInv, B_TexTR)), + _mm_add_ps(_mm_mul_ps(TexBothXInv, B_TexBL), + _mm_mul_ps(TexBoth, B_TexBR))); + __m128 A_Col = _mm_add_ps(_mm_add_ps(_mm_mul_ps(TexBothInv, A_TexTL), + _mm_mul_ps(TexBothYInv, A_TexTR)), + _mm_add_ps(_mm_mul_ps(TexBothXInv, A_TexBL), + _mm_mul_ps(TexBoth, A_TexBR))); + __m128i R_Out, G_Out, B_Out, A_Out; - // Only do alpha blending if a pixel's value doesn't equal 255 - if (_mm_movemask_epi8(_mm_sub_epi32(_mm_cvtps_epi32(A_PixelBlend), Int255))) - { - __m128 LayerAlpha = _mm_mul_ps(A_PixelBlend, Norm255); - __m128 LayerAlphaInv = _mm_mul_ps(_mm_sub_ps(Reg255, A_PixelBlend), Norm255); - __m128i DestPixel = _mm_loadu_si128((const __m128i *)Pixel); - __m128i R_Dest = _mm_and_si128( DestPixel, FF); - __m128i G_Dest = _mm_and_si128(_mm_srli_epi32(DestPixel, 8), FF); - __m128i B_Dest = _mm_and_si128(_mm_srli_epi32(DestPixel, 16), FF); - __m128i A_Dest = _mm_and_si128(_mm_srli_epi32(DestPixel, 24), FF); - - R_Out = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(R_Dest), LayerAlphaInv), _mm_mul_ps(R_PixelBlend, LayerAlpha))); - G_Out = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(G_Dest), LayerAlphaInv), _mm_mul_ps(G_PixelBlend, LayerAlpha))); - B_Out = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(B_Dest), LayerAlphaInv), _mm_mul_ps(B_PixelBlend, LayerAlpha))); - A_Out = _mm_cvtps_epi32(_mm_min_ps(_mm_add_ps(_mm_cvtepi32_ps(A_Dest), A_PixelBlend), Reg255)); - } - else + __m128 LayerAlpha = _mm_mul_ps(A_Col, LayerOpacity); + __m128 LayerAlphaInv = _mm_sub_ps(One, LayerAlpha); + + __m128 R_Colx2 = _mm_mul_ps(R_Col, Two); + __m128 R_ColInv = _mm_sub_ps(One, R_Col); + + __m128 G_Colx2 = _mm_mul_ps(G_Col, Two); + __m128 G_ColInv = _mm_sub_ps(One, G_Col); + + __m128 B_Colx2 = _mm_mul_ps(B_Col, Two); + __m128 B_ColInv = _mm_sub_ps(One, B_Col); + + __m128 R_Blend = R_Col; + __m128 G_Blend = G_Col; + __m128 B_Blend = B_Col; + __m128 A_Blend = LayerAlpha; + + if (!_mm_movemask_epi8(_mm_cmpeq_ps(LayerAlpha, One)) || T.BlendMode != blend_normal) { - R_Out = _mm_cvtps_epi32(R_PixelBlend); - G_Out = _mm_cvtps_epi32(G_PixelBlend); - B_Out = _mm_cvtps_epi32(B_PixelBlend); - A_Out = _mm_cvtps_epi32(A_PixelBlend); + __m128i DestPixel = _mm_loadu_si128((const __m128i *)Pixel); + __m128 R_Dest = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128( DestPixel, FF)), Norm255); + __m128 G_Dest = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(DestPixel, 8), FF)), Norm255); + __m128 B_Dest = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(DestPixel, 16), FF)), Norm255); + __m128 A_Dest = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(DestPixel, 24), FF)), Norm255); + + switch (T.BlendMode) + { + case blend_normal: + { + } break; + case blend_multiply: + { + R_Blend = _mm_mul_ps(R_Dest, R_Col); + G_Blend = _mm_mul_ps(G_Dest, G_Col); + B_Blend = _mm_mul_ps(B_Dest, B_Col); + } break; + case blend_colorburn: + { + // NOTE(fox): A small amount is added to Col since images with zero for alpha may also zero out the + // color channels, causing black clipping. + R_Blend = _mm_sub_ps(One, _mm_div_ps(_mm_sub_ps(One, R_Dest), _mm_add_ps(R_Col, ClipPrevent))); + G_Blend = _mm_sub_ps(One, _mm_div_ps(_mm_sub_ps(One, G_Dest), _mm_add_ps(G_Col, ClipPrevent))); + B_Blend = _mm_sub_ps(One, _mm_div_ps(_mm_sub_ps(One, B_Dest), _mm_add_ps(B_Col, ClipPrevent))); + } break; + case blend_linearburn: + { + R_Blend = _mm_sub_ps(_mm_add_ps(R_Dest, R_Col), One); + G_Blend = _mm_sub_ps(_mm_add_ps(G_Dest, G_Col), One); + B_Blend = _mm_sub_ps(_mm_add_ps(B_Dest, B_Col), One); + } break; + case blend_add: + { + R_Blend = _mm_add_ps(R_Dest, R_Col); + G_Blend = _mm_add_ps(G_Dest, G_Col); + B_Blend = _mm_add_ps(B_Dest, B_Col); + } break; + case blend_screen: + { + R_Blend = _mm_sub_ps(One, _mm_mul_ps(_mm_sub_ps(One, R_Dest), R_ColInv)); + G_Blend = _mm_sub_ps(One, _mm_mul_ps(_mm_sub_ps(One, G_Dest), G_ColInv)); + B_Blend = _mm_sub_ps(One, _mm_mul_ps(_mm_sub_ps(One, B_Dest), B_ColInv)); + } break; + case blend_overlay: + { + __m128 R_Mask = _mm_cmp_ps(R_Dest, ZeroPointFive, 1); + __m128 G_Mask = _mm_cmp_ps(G_Dest, ZeroPointFive, 1); + __m128 B_Mask = _mm_cmp_ps(B_Dest, ZeroPointFive, 1); + __m128 R_Lower = _mm_mul_ps(Two, _mm_mul_ps(R_Dest, R_Col)); + __m128 G_Lower = _mm_mul_ps(Two, _mm_mul_ps(G_Dest, G_Col)); + __m128 B_Lower = _mm_mul_ps(Two, _mm_mul_ps(B_Dest, B_Col)); + __m128 R_Upper = _mm_sub_ps(One, _mm_mul_ps(Two, _mm_mul_ps(_mm_sub_ps(One, R_Dest), R_ColInv))); + __m128 G_Upper = _mm_sub_ps(One, _mm_mul_ps(Two, _mm_mul_ps(_mm_sub_ps(One, G_Dest), G_ColInv))); + __m128 B_Upper = _mm_sub_ps(One, _mm_mul_ps(Two, _mm_mul_ps(_mm_sub_ps(One, B_Dest), B_ColInv))); + R_Blend = _mm_blendv_ps(R_Upper, R_Lower, R_Mask); + G_Blend = _mm_blendv_ps(G_Upper, G_Lower, G_Mask); + B_Blend = _mm_blendv_ps(B_Upper, B_Lower, B_Mask); + } break; + case blend_softlight: + { + // using Pegtop's equation + R_Blend = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(One, R_Colx2), _mm_mul_ps(R_Dest, R_Dest)), _mm_mul_ps(R_Colx2, R_Dest)); + G_Blend = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(One, G_Colx2), _mm_mul_ps(G_Dest, G_Dest)), _mm_mul_ps(G_Colx2, G_Dest)); + B_Blend = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(One, B_Colx2), _mm_mul_ps(B_Dest, B_Dest)), _mm_mul_ps(B_Colx2, B_Dest)); + } break; + case blend_hardlight: + { + __m128 R_Mask = _mm_cmp_ps(R_Dest, ZeroPointFive, 13); + __m128 G_Mask = _mm_cmp_ps(G_Dest, ZeroPointFive, 13); + __m128 B_Mask = _mm_cmp_ps(B_Dest, ZeroPointFive, 13); + __m128 R_Lower = _mm_mul_ps(Two, _mm_mul_ps(R_Dest, R_Col)); + __m128 G_Lower = _mm_mul_ps(Two, _mm_mul_ps(G_Dest, G_Col)); + __m128 B_Lower = _mm_mul_ps(Two, _mm_mul_ps(B_Dest, B_Col)); + __m128 R_Upper = _mm_sub_ps(One, _mm_mul_ps(Two, _mm_mul_ps(_mm_sub_ps(One, R_Dest), R_ColInv))); + __m128 G_Upper = _mm_sub_ps(One, _mm_mul_ps(Two, _mm_mul_ps(_mm_sub_ps(One, G_Dest), G_ColInv))); + __m128 B_Upper = _mm_sub_ps(One, _mm_mul_ps(Two, _mm_mul_ps(_mm_sub_ps(One, B_Dest), B_ColInv))); + R_Blend = _mm_blendv_ps(R_Upper, R_Lower, R_Mask); + G_Blend = _mm_blendv_ps(G_Upper, G_Lower, G_Mask); + B_Blend = _mm_blendv_ps(B_Upper, B_Lower, B_Mask); + } break; + case blend_subtract: + { + R_Blend = _mm_sub_ps(R_Dest, R_Col); + G_Blend = _mm_sub_ps(G_Dest, G_Col); + B_Blend = _mm_sub_ps(B_Dest, B_Col); + } break; + case blend_divide: + { + R_Blend = _mm_div_ps(R_Dest, _mm_add_ps(R_Col, ClipPrevent)); + G_Blend = _mm_div_ps(G_Dest, _mm_add_ps(G_Col, ClipPrevent)); + B_Blend = _mm_div_ps(B_Dest, _mm_add_ps(B_Col, ClipPrevent)); + } break; + case blend_difference: + { + __m128 R_Lower = _mm_sub_ps(R_Col, R_Dest); + __m128 G_Lower = _mm_sub_ps(G_Col, G_Dest); + __m128 B_Lower = _mm_sub_ps(B_Col, B_Dest); + __m128 R_Upper = _mm_sub_ps(R_Dest, R_Col); + __m128 G_Upper = _mm_sub_ps(G_Dest, G_Col); + __m128 B_Upper = _mm_sub_ps(B_Dest, B_Col); + __m128 R_Mask = _mm_cmp_ps(R_Lower, Zero, 14); + __m128 G_Mask = _mm_cmp_ps(G_Lower, Zero, 14); + __m128 B_Mask = _mm_cmp_ps(B_Lower, Zero, 14); + R_Blend = _mm_blendv_ps(R_Upper, R_Lower, R_Mask); + G_Blend = _mm_blendv_ps(G_Upper, G_Lower, G_Mask); + B_Blend = _mm_blendv_ps(B_Upper, B_Lower, B_Mask); + } break; + } + + R_Blend = _mm_add_ps(_mm_mul_ps(R_Dest, LayerAlphaInv), _mm_mul_ps(R_Blend, LayerAlpha)); + G_Blend = _mm_add_ps(_mm_mul_ps(G_Dest, LayerAlphaInv), _mm_mul_ps(G_Blend, LayerAlpha)); + B_Blend = _mm_add_ps(_mm_mul_ps(B_Dest, LayerAlphaInv), _mm_mul_ps(B_Blend, LayerAlpha)); + + // Standard behavior in photo apps is for blend modes to + // inherit underlying opacity instead of adding to it. + if (T.BlendMode == blend_normal) + A_Blend = _mm_add_ps(A_Dest, LayerAlpha); + else + A_Blend = A_Dest; } + R_Out = _mm_cvtps_epi32(_mm_mul_ps(_mm_max_ps(_mm_min_ps(One, R_Blend), Zero), Reg255)); + G_Out = _mm_cvtps_epi32(_mm_mul_ps(_mm_max_ps(_mm_min_ps(One, G_Blend), Zero), Reg255)); + B_Out = _mm_cvtps_epi32(_mm_mul_ps(_mm_max_ps(_mm_min_ps(One, B_Blend), Zero), Reg255)); + A_Out = _mm_cvtps_epi32(_mm_mul_ps(_mm_max_ps(_mm_min_ps(One, A_Blend), Zero), Reg255)); + __m128i OutputPixel = _mm_or_si128( _mm_or_si128(R_Out, _mm_slli_epi32(G_Out, 8)), _mm_or_si128(_mm_slli_epi32(B_Out, 16), _mm_slli_epi32(A_Out, 24))); @@ -796,9 +1051,7 @@ Fallback_RenderLayer(transform_info T, pixel_buffer *Buffer, rectangle RenderReg uint8 *Row = ((uint8 *)Buffer->OriginalBuffer + Buffer->Pitch*(int16)(LayerBounds.Min.y) ); uint32 Channel = (T.LayerWidth * T.LayerHeight); - // uint32 pp1 = 2; - // uint32 pp2 = 3; - // bool32 real = true; + real32 Normalized255 = 1 / 255.0f; for (int16 Y = LayerBounds.Min.y; Y < LayerBounds.Max.y; Y++) { @@ -813,6 +1066,7 @@ Fallback_RenderLayer(transform_info T, pixel_buffer *Buffer, rectangle RenderReg real32 V = (StartVectorX * T.YAxisPX) + (StartVectorY * T.YAxisPY); if (U <= 1.0f && U >= 0.0f && V <= 1.0f && V >= 0.0f) { + real32 TexXFull = U * T.LayerWidth; uint32 TexXInt = (uint32)TexXFull; real32 TexX = TexXFull - TexXInt; @@ -868,70 +1122,184 @@ Fallback_RenderLayer(transform_info T, pixel_buffer *Buffer, rectangle RenderReg PixelToSeek = XLookup + YLookup; uint32 PixelD = *(uint32 *)((uint8 *)T.SourceBuffer + PixelToSeek*Buffer->BytesPerPixel); #endif - - uint8 TexRA = (PixelA & 0xFF); - uint8 TexRB = (PixelB & 0xFF); - uint8 TexRC = (PixelC & 0xFF); - uint8 TexRD = (PixelD & 0xFF); - - uint8 TexGA = ((PixelA >> 8) & 0xFF); - uint8 TexGB = ((PixelB >> 8) & 0xFF); - uint8 TexGC = ((PixelC >> 8) & 0xFF); - uint8 TexGD = ((PixelD >> 8) & 0xFF); - - uint8 TexBA = ((PixelA >> 16) & 0xFF); - uint8 TexBB = ((PixelB >> 16) & 0xFF); - uint8 TexBC = ((PixelC >> 16) & 0xFF); - uint8 TexBD = ((PixelD >> 16) & 0xFF); - - uint8 TexAA = ((PixelA >> 24) & 0xFF); - uint8 TexAB = ((PixelB >> 24) & 0xFF); - uint8 TexAC = ((PixelC >> 24) & 0xFF); - uint8 TexAD = ((PixelD >> 24) & 0xFF); - - real32 PixelBlendR = (TexBothInv * TexRA) + (TexBothYInv * TexRB) - + (TexBothXInv * TexRC) + (TexBoth * TexRD); - real32 PixelBlendG = (TexBothInv * TexGA) + (TexBothYInv * TexGB) - + (TexBothXInv * TexGC) + (TexBoth * TexGD); - real32 PixelBlendB = (TexBothInv * TexBA) + (TexBothYInv * TexBB) - + (TexBothXInv * TexBC) + (TexBoth * TexBD); - real32 PixelBlendA = (TexBothInv * TexAA) + (TexBothYInv * TexAB) - + (TexBothXInv * TexAC) + (TexBoth * TexAD); - PixelBlendA = PixelBlendA - (PixelBlendA * T.LayerOpacity); - - uint8 R = (uint8)PixelBlendR; - uint8 G = (uint8)PixelBlendG; - uint8 B = (uint8)PixelBlendB; - uint8 A = (uint8)PixelBlendA; - XLookup = (X >> 2)*16 + (X % 4); YLookup = (Y >> 2)*(Buffer->FullWidth*4) + (Y % 4)*4; - - // if (real) { - // real = false; - // printf("XLook: %i, YLook: %i\n", XLookup, YLookup); - // printf("X: %i, Y: %i\n", X, Y); - // } PixelToSeek = XLookup + YLookup; uint32 *Pixel = (uint32 *)((uint8 *)Buffer->OriginalBuffer + PixelToSeek*Buffer->BytesPerPixel); - uint8 R1 = (*Pixel >> 0); - uint8 G1 = (*Pixel >> 8); - uint8 B1 = (*Pixel >> 16); - uint8 A1 = (*Pixel >> 24); - - if (A != 255) { - real32 LayerAlpha = (255 - A) / 255.0f; - R = (R1 * LayerAlpha) - (R * LayerAlpha) + R; - G = (G1 * LayerAlpha) - (G * LayerAlpha) + G; - B = (B1 * LayerAlpha) - (B * LayerAlpha) + B; - A = ClipAdd(A1, A); + real32 TexRA = (real32)(PixelA & 0xFF) * Normalized255; + real32 TexRB = (real32)(PixelB & 0xFF) * Normalized255; + real32 TexRC = (real32)(PixelC & 0xFF) * Normalized255; + real32 TexRD = (real32)(PixelD & 0xFF) * Normalized255; + + real32 TexGA = (real32)((PixelA >> 8) & 0xFF) * Normalized255; + real32 TexGB = (real32)((PixelB >> 8) & 0xFF) * Normalized255; + real32 TexGC = (real32)((PixelC >> 8) & 0xFF) * Normalized255; + real32 TexGD = (real32)((PixelD >> 8) & 0xFF) * Normalized255; + + real32 TexBA = (real32)((PixelA >> 16) & 0xFF) * Normalized255; + real32 TexBB = (real32)((PixelB >> 16) & 0xFF) * Normalized255; + real32 TexBC = (real32)((PixelC >> 16) & 0xFF) * Normalized255; + real32 TexBD = (real32)((PixelD >> 16) & 0xFF) * Normalized255; + + real32 TexAA = (real32)((PixelA >> 24) & 0xFF) * Normalized255; + real32 TexAB = (real32)((PixelB >> 24) & 0xFF) * Normalized255; + real32 TexAC = (real32)((PixelC >> 24) & 0xFF) * Normalized255; + real32 TexAD = (real32)((PixelD >> 24) & 0xFF) * Normalized255; + + real32 R_Col = (TexBothInv * TexRA) + (TexBothYInv * TexRB) + + (TexBothXInv * TexRC) + (TexBoth * TexRD); + real32 G_Col = (TexBothInv * TexGA) + (TexBothYInv * TexGB) + + (TexBothXInv * TexGC) + (TexBoth * TexGD); + real32 B_Col = (TexBothInv * TexBA) + (TexBothYInv * TexBB) + + (TexBothXInv * TexBC) + (TexBoth * TexBD); + real32 A_Col = (TexBothInv * TexAA) + (TexBothYInv * TexAB) + + (TexBothXInv * TexAC) + (TexBoth * TexAD); + + real32 LayerAlpha = A_Col * T.LayerOpacity; + + real32 R_Blend = R_Col; + real32 G_Blend = G_Col; + real32 B_Blend = B_Col; + real32 A_Blend = A_Col; + + if (LayerAlpha != 1.0f || T.BlendMode != blend_normal) { + + real32 R_Dest = (real32)((*Pixel >> 0) & 0xFF) * Normalized255; + real32 G_Dest = (real32)((*Pixel >> 8) & 0xFF) * Normalized255; + real32 B_Dest = (real32)((*Pixel >> 16) & 0xFF) * Normalized255; + real32 A_Dest = (real32)((*Pixel >> 24) & 0xFF) * Normalized255; + + switch (T.BlendMode) + { + case blend_normal: + { + } break; + case blend_multiply: + { + R_Blend = R_Dest * R_Col; + G_Blend = G_Dest * G_Col; + B_Blend = B_Dest * B_Col; + } break; + case blend_colorburn: + { + // NOTE(fox): Padding to prevent actual crashing from zero division + R_Blend = 1.0f - ((1.0f - R_Dest) / (R_Col + 0.001f)); + G_Blend = 1.0f - ((1.0f - G_Dest) / (G_Col + 0.001f)); + B_Blend = 1.0f - ((1.0f - B_Dest) / (B_Col + 0.001f)); + } break; + case blend_linearburn: + { + R_Blend = (R_Dest + R_Col) - 1.0f; + G_Blend = (G_Dest + G_Col) - 1.0f; + B_Blend = (B_Dest + B_Col) - 1.0f; + } break; + case blend_add: + { + R_Blend = R_Dest + R_Col; + G_Blend = G_Dest + G_Col; + B_Blend = B_Dest + B_Col; + } break; + case blend_screen: + { + R_Blend = 1.0f - ((1.0f - R_Dest) * (1.0f - R_Col)); + G_Blend = 1.0f - ((1.0f - G_Dest) * (1.0f - G_Col)); + B_Blend = 1.0f - ((1.0f - B_Dest) * (1.0f - B_Col)); + } break; + case blend_overlay: + { + if (R_Dest < 0.5) { + R_Blend = 2.0f * R_Dest * R_Col; + } else { + R_Blend = 1.0f - (2.0f * (1.0f - R_Dest) * (1.0f - R_Col)); + } + if (G_Dest < 0.5) { + G_Blend = 2.0f * G_Dest * G_Col; + } else { + G_Blend = 1.0f - (2.0f * (1.0f - G_Dest) * (1.0f - G_Col)); + } + if (B_Dest < 0.5) { + B_Blend = 2.0f * B_Dest * B_Col; + } else { + B_Blend = 1.0f - (2.0f * (1.0f - B_Dest) * (1.0f - B_Col)); + } + } break; + case blend_softlight: + { + // using Pegtop's equation + R_Blend = ((1.0f - R_Col * 2) * R_Dest * R_Dest) + (R_Col * 2 * R_Dest); + G_Blend = ((1.0f - G_Col * 2) * G_Dest * G_Dest) + (G_Col * 2 * G_Dest); + B_Blend = ((1.0f - B_Col * 2) * B_Dest * B_Dest) + (B_Col * 2 * B_Dest); + } break; + case blend_hardlight: + { + if (R_Dest > 0.5) { + R_Blend = 2.0f * R_Dest * R_Col; + } else { + R_Blend = 1.0f - (2.0f * (1.0f - R_Dest) * (1.0f - R_Col)); + } + if (G_Dest > 0.5) { + G_Blend = 2.0f * G_Dest * G_Col; + } else { + G_Blend = 1.0f - (2.0f * (1.0f - G_Dest) * (1.0f - G_Col)); + } + if (B_Dest > 0.5) { + B_Blend = 2.0f * B_Dest * B_Col; + } else { + B_Blend = 1.0f - (2.0f * (1.0f - B_Dest) * (1.0f - B_Col)); + } + } break; + case blend_subtract: + { + R_Blend = R_Dest - R_Col; + G_Blend = G_Dest - G_Col; + B_Blend = B_Dest - B_Col; + } break; + case blend_divide: + { + R_Blend = R_Dest / (R_Col + 0.001f); + G_Blend = G_Dest / (G_Col + 0.001f); + B_Blend = B_Dest / (B_Col + 0.001f); + } break; + case blend_difference: + { + if (R_Col - R_Dest > 0) { + R_Blend = R_Col - R_Dest; + } else { + R_Blend = R_Dest - R_Col; + } + if (G_Col - G_Dest > 0) { + G_Blend = G_Col - G_Dest; + } else { + G_Blend = G_Dest - G_Col; + } + if (B_Col - B_Dest > 0) { + B_Blend = B_Col - B_Dest; + } else { + B_Blend = B_Dest - B_Col; + } + } break; + } + + R_Blend = (R_Dest * (1.0f - LayerAlpha)) + (R_Blend * LayerAlpha); + G_Blend = (G_Dest * (1.0f - LayerAlpha)) + (G_Blend * LayerAlpha); + B_Blend = (B_Dest * (1.0f - LayerAlpha)) + (B_Blend * LayerAlpha); + + if (T.BlendMode == blend_normal) + A_Blend = A_Dest + LayerAlpha; + else + A_Blend = A_Dest; } - *Pixel = ((A << 24) | - (B << 16) | - (G << 8) | - (R << 0)); + uint8 R_Out = (uint8)(Normalize(R_Blend) * 255.0f); + uint8 G_Out = (uint8)(Normalize(G_Blend) * 255.0f); + uint8 B_Out = (uint8)(Normalize(B_Blend) * 255.0f); + uint8 A_Out = (uint8)(Normalize(A_Blend) * 255.0f); + + *Pixel = ((A_Out << 24) | + (B_Out << 16) | + (G_Out << 8) | + (R_Out << 0)); } } } diff --git a/strings.cpp b/strings.cpp new file mode 100644 index 0000000..814a52f --- /dev/null +++ b/strings.cpp @@ -0,0 +1,12 @@ +global_variable bool32 Hacko = false; +global_variable int32 EffectSel = -1; + +internal int +EffectConsoleCallback(ImGuiInputTextCallbackData* data) +{ + if (data->EventFlag == ImGuiInputTextFlags_CallbackCompletion) + { + Hacko = true; + } + return 0; +} |