diff options
-rw-r--r-- | bitmap_calls.cpp | 22 | ||||
-rwxr-xr-x | build.sh | 108 | ||||
-rw-r--r-- | createcalls.cpp | 7 | ||||
-rw-r--r-- | effects.cpp | 2 | ||||
-rw-r--r-- | functions.h | 12 | ||||
-rw-r--r-- | layer.cpp | 46 | ||||
-rw-r--r-- | main.cpp | 12 | ||||
-rw-r--r-- | main.h | 15 | ||||
-rw-r--r-- | my_imgui_widgets.cpp | 252 | ||||
-rw-r--r-- | prenderer.cpp | 263 | ||||
-rw-r--r-- | undo.cpp | 82 |
11 files changed, 458 insertions, 363 deletions
diff --git a/bitmap_calls.cpp b/bitmap_calls.cpp index 6425e6d..5d549c2 100644 --- a/bitmap_calls.cpp +++ b/bitmap_calls.cpp @@ -1,5 +1,23 @@ -// NOTE(fox): Pay attention to how the Y pitch differs between the unpacked -// bitmaps and the 4x4 packed bitmaps, since odd-sized bitmaps are padded. +// Bitmaps are curently stored two ways in this program, which I'm calling +// "packed" and "unpacked." Both are 0xAAGGBBRR little endian. Unpacked bitmaps +// use the typical method of storage, rows of X that you increment by +// Width*BytesPerPixel to look up the Y coordinate. "Packed" bitmaps encode +// pixels as 4x4 chunks. To illustrate this clearly with an 8x4 bitmap: + +// A1 A2 A3 A4 E1 E2 E3 E4 +// B1 B2 B3 B4 F1 F2 F3 F4 +// C1 C2 C3 C4 G1 G2 G3 G4 +// D1 D2 D3 D4 H1 H2 H3 H4 + +// Unpacked would be stored in memory order as A1 A2 A3 A4 E1 E2 E3 E4... +// while packed would be stored as A1 A2 A3 A4 B1 B2 B3 B4... + +// In cases where the bitmap is a non-divisible-by-four size, we simply treat +// the bitmap as if it's the right size and add the extra pixels in the allocation. + +// This wasn't an optimization I necessarily _needed_ to make this early on--I +// never even did any measuring to see if there was any speedup-- but I +// couldn't resist it. I like doing the software rendering stuff. // TODO(fox): I could write an AVX version of this function, but it may not be // that much faster since we have to do a bit of uninterleaving. @@ -1,21 +1,10 @@ #!/bin/bash -WINDOWS=0 - -WARNING_FLAGS=" - -Wall -Wextra \ - -Wno-unused-function -Wno-unused-variable -Wno-unused-parameter -Wno-unused-but-set-variable \ - -Wno-missing-field-initializers -Wno-sign-compare -Wno-unused-but-set-parameter \ - -Wno-missing-braces -Wno-format-security \ - -fno-exceptions -Wno-strict-aliasing \ - -Wno-write-strings - -DDEBUG=1 -DARM=0 -DTHREADED=1 \ -" - -if [[ "$WINDOWS" == 1 ]]; then -WARNING_FLAGS="$WARNING_FLAGS -DWINDOWS=1" -fi - +DEBUG=1 # Compile with debug symbols. +IMGUI=0 # Compile ImGui libs. Our custom ImGui functions still compile on zero. +THREADED=1 # Compile with threading. Useful to disable when stepping through the renderer. +WINDOWS=0 # Compile for Windows with Mingw. +ARM=0 # Compile on ARM machines. FFMPEG_LIBS=" libavdevice @@ -27,17 +16,12 @@ FFMPEG_LIBS=" libavutil " -# IMGUI_FILES=" -# imgui/imgui -# imgui/imgui_demo -# imgui/imgui_draw -# imgui/imgui_tables -# imgui/imgui_widgets -# my_imgui_internal_widgets -# " - IMGUI_FILES=" - my_imgui_internal_widgets + imgui + imgui_demo + imgui_draw + imgui_tables + imgui_widgets " IMGUI_FILES_IMPL=" @@ -45,25 +29,47 @@ IMGUI_FILES_IMPL=" imgui_impl_opengl3 " +WARNING_FLAGS=" + -Wall -Wextra \ + -Wno-unused-function -Wno-unused-variable -Wno-unused-parameter -Wno-unused-but-set-variable \ + -Wno-missing-field-initializers -Wno-sign-compare -Wno-unused-but-set-parameter \ + -Wno-missing-braces -Wno-format-security \ + -fno-exceptions -Wno-strict-aliasing \ + -Wno-write-strings +" + +if [[ "$DEBUG" == 1 ]]; then +WARNING_FLAGS="$WARNING_FLAGS -DDEBUG=1" +DEBUG="-g" +else +DEBUG="-O2" +fi +if [[ "$THREADED" == 1 ]]; then +WARNING_FLAGS="$WARNING_FLAGS -DTHREADED=1" +fi +if [[ "$WINDOWS" == 1 ]]; then +WARNING_FLAGS="$WARNING_FLAGS -DWINDOWS=1" +fi +if [[ "$ARM" == 1 ]]; then +WARNING_FLAGS="$WARNING_FLAGS -DARM=1" +ADDITIONAL_FLAGS=" + -march=armv8.5-a+sve +" +else +ADDITIONAL_FLAGS=" + -march=native +" +fi + if [[ "$OSTYPE" =~ ^darwin ]]; then IMGUI_FLAGS=" - -std=c++11 -Iimgui -Iimgui/backends -g -Wall -Wformat `sdl2-config --cflags` -I/usr/local/include -I/opt/local/include -c + -std=c++11 -Iimgui -Iimgui/backends $DEBUG -Wall -Wformat `sdl2-config --cflags` -I/usr/local/include -I/opt/local/include -c " else IMGUI_FLAGS=" - -Iimgui -Iimgui/backends -g -Wall -Wformat `sdl2-config --cflags` -c + -Iimgui -Iimgui/backends $DEBUG -Wall -Wformat `sdl2-config --cflags` -c " fi -imgui() { - for i in $IMGUI_FILES - do - clang $IMGUI_FLAGS -o bin/$i.o $i.cpp - done -# for i in $IMGUI_FILES_IMPL -# do -# clang $IMGUI_FLAGS -o bin/$i.o imgui/backends/$i.cpp -# done -} if [[ "$OSTYPE" =~ ^darwin ]]; then SDL_ARGS="`sdl2-config --cflags` -framework OpenGL -ldl `sdl2-config --libs`" @@ -71,29 +77,37 @@ else SDL_ARGS="`sdl2-config --cflags` -lGL -ldl `sdl2-config --libs`" fi -imgui - GLAD_FLAGS=" -Ilib/glad/include " - # -lm $(pkg-config --cflags --libs $FFMPEG_LIBS) -MJresult.json + +clang $IMGUI_FLAGS -o bin/my_imgui_internal_widgets.o my_imgui_internal_widgets.cpp + +if [[ "$IMGUI" == 1 ]]; then + for i in $IMGUI_FILES + do + clang $IMGUI_FLAGS -o bin/$i.o imgui/$i.cpp + done + for i in $IMGUI_FILES_IMPL + do + clang $IMGUI_FLAGS -o bin/$i.o imgui/backends/$i.cpp + done +fi if [[ "$WINDOWS" == 1 ]]; then -clang++ -g $WARNING_FLAGS -target x86_64-pc-windows-gnu -march=x86-64-v3 -I .. -Iimgui -Iimgui/backends \ +clang++ $DEBUG $WARNING_FLAGS -target x86_64-pc-windows-gnu -march=x86-64-v3 -I .. -Iimgui -Iimgui/backends \ main.cpp imgui/imgui*.cpp imgui/backends/imgui_impl_sdl.cpp imgui/backends/imgui_impl_opengl3.cpp \ -I/usr/x86_64-w64-mingw32/include/SDL2 -I/usr/x86_64-w64-mingw32/include/GL \ -lmingw32 -lopengl32 -lSDL2main -lSDL2 -llibavcodec -llibswscale -llibavformat -llibavutil \ -o bin/real2d else clang lib/glad.c $GLAD_FLAGS -I/usr/local/include -I/opt/local/include -c \ - $WARNING_FLAGS -g -march=native -o bin/glad.o -# clang gl_calls.cpp $GLAD_FLAGS -I/usr/local/include -I/opt/local/include -c \ -# $WARNING_FLAGS -g -march=native -o bin/gl.o -clang main.cpp $WARNING_FLAGS -g -march=native -o bin/real2d bin/*.o \ + $WARNING_FLAGS $DEBUG $ADDITIONAL_FLAGS -o bin/glad.o +clang main.cpp $WARNING_FLAGS $DEBUG $ADDITIONAL_FLAGS -o bin/real2d bin/*.o \ $GLAD_FLAGS \ -std=c++11 -lstdc++ -Iimgui -Iimgui/backends \ $SDL_ARGS \ -I . \ - -lm -I /usr/local/include ~/.local/src/ffmpeg/bin/*.so + -lm -I /usr/local/include $(pkg-config --cflags --libs $FFMPEG_LIBS) fi diff --git a/createcalls.cpp b/createcalls.cpp index f2a173f..5564264 100644 --- a/createcalls.cpp +++ b/createcalls.cpp @@ -31,7 +31,7 @@ Source_Generate(project_data *File, memory *Memory, void *Path) } if (Found) { - Action_Entry_Begin(Memory, action_entry_default, "Add source"); + Action_Entry_Commit(Memory, action_entry_default, "Add source"); Action_Change_Commit(Memory, &Source->Path, &Source->Path, &Path, action_change_ptr); uint32 i = File->NumberOfSources + 1; Action_Change_Commit(Memory, &File->NumberOfSources, &File->NumberOfSources, &i, action_change_u16); @@ -141,7 +141,7 @@ project_layer * Layer_Init(project_data *File, memory *Memory) // from this index in the Action tree since all we need to do to "delete" // the layer is to unset this. The layer that gets made here is always at // the top of the index. - Action_Entry_Begin(Memory, action_entry_layerinit, "Create layer"); + Action_Entry_Commit(Memory, action_entry_layerinit, "Create layer"); Action_Change_Commit(Memory, &File->NumberOfLayers, &Index, &NextIndex, action_change_u16); File->Layer[Index] = (project_layer *)AllocateMemory(Memory, sizeof(project_layer), F_Layers); @@ -372,7 +372,8 @@ Layer_UpdateBitmap(project_data *File, project_layer *Layer, memory *Memory, int if (Layer->NumberOfMasks) { for (int i = 0; i < Layer->NumberOfMasks; i++) { mask *Mask = &Layer->Mask[i]; - Mask_TriangulateAndRasterize(Memory, Layer, Mask); + if (Mask->IsClosed) + Mask_TriangulateAndRasterize(Memory, Layer, Mask); } } diff --git a/effects.cpp b/effects.cpp index b15b0fb..0820cab 100644 --- a/effects.cpp +++ b/effects.cpp @@ -1,6 +1,7 @@ static void DrawColor(source *Source, layer_bitmap_info *BitmapInfo, memory *Memory, property_channel Property[]) { +#if 0 v4 FloatColor = Property[0].CurrentValue.col; blend_mode BlendMode = Property[1].CurrentValue.blendmode; @@ -171,6 +172,7 @@ DrawColor(source *Source, layer_bitmap_info *BitmapInfo, memory *Memory, propert _mm256_storeu_si256((__m256i *)Pixel, OutputPixel); } } +#endif } static void diff --git a/functions.h b/functions.h index 6cecb76..654c709 100644 --- a/functions.h +++ b/functions.h @@ -34,8 +34,18 @@ void Bezier_CubicCalcPoints(v2 p1, v2 p2, v2 p3, v2 p4, void *Data, uint32 *Incr static void Action_Change_Commit(memory *Memory, void *DataLocation, void *OriginalData, void *NewData, action_change_type ActionChange); static void Action_Entry_SetPointer(memory *Memory, void *Data); -static void Action_Entry_Begin(memory *Memory, action_entry_type Type, char *Name); +static void Action_Entry_Commit(memory *Memory, action_entry_type Type, char *Name); static void Action_Entry_End(memory *Memory); static void Action_Undo(memory *Memory); static void Action_Redo(memory *Memor); +#if ARM +static void NEON_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion); +#else +static void AVX2_RenderLayer(transform_info TransformInfo, comp_buffer *Buffer, rectangle RenderRegion); +static void SSE2_RenderLayer(transform_info TransformInfo, comp_buffer *Buffer, rectangle RenderRegion); +#endif +static void Fallback_RenderLayer(transform_info TransformInfo, comp_buffer *Buffer, rectangle RenderRegion); +static void PushRect(rectangle RenderRegion); +static bool32 CheckQueue(render_queue RenderInfo, uint16 Index); + @@ -35,45 +35,6 @@ CompUVToLayerUV(project_layer *Layer, comp_buffer *CompBuffer, v2 CompUV) } static void -CalculateAnchorOffset(project_layer *, real32, uint16); - -static void -InteractProperty(int16 a, project_data *File, project_state *State, bool32 Ended, real32 Value, memory *Memory) -{ - temp_layer_list List = FindSelectedLayerIndex(File, State->NumberOfSelectedLayers); - for (int i = 0; i < State->NumberOfSelectedLayers; i++) { - project_layer *Layer = File->Layer[List.LayerIndex[i]]; - // keyframe *Keyframe = InsertKeyframeAtFrame(&File->LayerPTR[State->SelectedLayerIndex[r]]->Property[Index], *State, File->CurrentFrame, Memory, Cache); - if (a == 2) { - CalculateAnchorOffset(Layer, Value, 0); - } else if (a == 3) { - CalculateAnchorOffset(Layer, Value, 1); - } else { - Layer->Property[a].CurrentValue.f += Value; - } - } - // Cache->Interact = Active; - // Cache->InteractIndex = State->SelectedLayerIndex[0]; - if (Ended) - { - project_layer *Layer = File->Layer[State->MostRecentlySelectedLayer]; - // We can't end for pos/anchor point until we commit both channels. Should think of something better. - if ((a == 1) || (a == 3)) { - Action_Change_Commit(Memory, &Layer->Property[a].CurrentValue.f, &State->InteractCache[1], &Layer->Property[a].CurrentValue.f, action_change_r32); - } else { - Action_Change_Commit(Memory, &Layer->Property[a].CurrentValue.f, &State->InteractCache[0], &Layer->Property[a].CurrentValue.f, action_change_r32); - } - if (!(a == 0) && !(a == 2)) { - Action_Entry_End(Memory); - } - State->IsInteracting = false; - // Cache->Interact = Clear; - } - State->UpdateFrame = true; - // Cache->Frame[File->CurrentFrame].Cached = false; -} - -static void TransformsInteract(project_data *File, project_state *State, memory *Memory, ui *UI, transforms_hotkey_interact Mode) { if (UI->FocusedWindow == focus_timeline) { @@ -95,13 +56,14 @@ TransformsInteract(project_data *File, project_state *State, memory *Memory, ui } else if (UI->FocusedWindow == focus_viewport) { // TODO(fox): Make multi-select possible! project_layer *Layer = File->Layer[State->MostRecentlySelectedLayer]; - Action_Entry_Begin(Memory, action_entry_default, "Tranforms interact"); if (Mode == sliding_position) { State->InteractCache[0] = Layer->x.CurrentValue.f; State->InteractCache[1] = Layer->y.CurrentValue.f; } else if (Mode == sliding_anchorpoint) { - State->InteractCache[0] = Layer->ax.CurrentValue.f; - State->InteractCache[1] = Layer->ay.CurrentValue.f; + State->InteractCache[0] = Layer->x.CurrentValue.f; + State->InteractCache[1] = Layer->y.CurrentValue.f; + State->InteractCache[2] = Layer->ax.CurrentValue.f; + State->InteractCache[3] = Layer->ay.CurrentValue.f; } else if (Mode == sliding_rotation) { State->InteractCache[0] = Layer->rotation.CurrentValue.f; } else if (Mode == sliding_scale) { @@ -9,6 +9,7 @@ #if ARM #include <arm_neon.h> +#include <arm_sve.h> #else #include <smmintrin.h> #endif @@ -244,12 +245,16 @@ int main(int argc, char *argv[]) { project_state State = {}; +#if ARM + InstructionMode = instruction_mode_neon; +#else if (SDL_HasSSE2()) { InstructionMode = instruction_mode_sse; } if (SDL_HasAVX2()) { InstructionMode = instruction_mode_avx; } +#endif project_data File = {}; File.Width = 1280; @@ -360,8 +365,11 @@ int main(int argc, char *argv[]) { SDL_GL_SetAttribute(SDL_GL_STENCIL_SIZE, 8); SDL_WindowFlags window_flags = (SDL_WindowFlags)(SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI); #if DEBUG - // uint32 ScreenSize[2] = {2560/1.2, 1600/1.2}; +#if ARM + uint32 ScreenSize[2] = {(uint32)(2560/1.2), (uint32)(1600/1.2)}; +#else real32 ScreenSize[2] = {3840/1.2, 2160/1.2}; +#endif #else real32 ScreenSize[2]; SDL_DisplayMode current; @@ -452,7 +460,7 @@ int main(int argc, char *argv[]) { ImGui::DockSpaceOverViewport(); - ImGui_Viewport(File, &State, &UI, CompBuffer, io, textureID); + ImGui_Viewport(File, &State, &UI, &Memory, CompBuffer, io, textureID); ImGui_File(&File, &State, &Memory, &UI, io); @@ -1,7 +1,11 @@ enum instruction_mode { instruction_mode_scalar, +#if ARM + instruction_mode_neon, +#else instruction_mode_sse, instruction_mode_avx +#endif }; struct cache { @@ -67,8 +71,6 @@ struct cached_bitmap { uint32 Frame; // What frame it is. }; -// Some actions may require a higher-level function to undo to a satisfactory -// standard, so actions are allowed to do more than just set/swap single values. enum action_entry_type { action_entry_layerinit, action_entry_default @@ -207,6 +209,8 @@ union val { struct keyframe { val Value; + // NOTE(fox): Frame values are relative to the layer's FrameOffset! This is + // done to reduce the footprint of layer moving in the undo tree. int32 FrameNumber; keyframe_type Type; bool32 IsSelected; @@ -291,17 +295,17 @@ struct source { struct layer_bitmap_info { // Image and video void *BitmapBuffer; // Each layer has a persistent bitmap that the source data gets packed into. + int32 FrameOffset; // The "true" position of the layer, separate from StartFrame. Starts at zero and only gets incremented when the layer is moved. bool32 ToUpdate = 1; gl_effect_layer Test; // TODO(fox): Find a better place to store this. Either give effects a more // fleshed-out API to add things to a struct like this or integrate into ImGui. - void *HistogramVals; // 256*5 packed floats (all channel average + RGBA). + void *HistogramVals; // 256*5 floats (all channel average + RGBA). uint16 LevelsSelector; // Which channel is currently active // Video only - int32 FrameOffset; // the "true" position of video layers, separate from StartFrame - int32 CurrentFrame = -1; // The last frame number rendered to the bitmap. + int32 CurrentFrame = -1; // The last frame number rendered to the bitmap. -1 gurantees a load upon layer creation. void *AVInfo; // Internal data containing current frame info }; @@ -364,6 +368,7 @@ struct mask_point { struct mask { mask_point Point[16]; + bool32 IsClosed; uint16 NumberOfPoints; uint16 NumberOfSelectedPoints; void *TriangulatedPointCache; diff --git a/my_imgui_widgets.cpp b/my_imgui_widgets.cpp index b924d55..4bf6b5e 100644 --- a/my_imgui_widgets.cpp +++ b/my_imgui_widgets.cpp @@ -76,16 +76,20 @@ ImGui_InteractSliderProperty(project_state *State, memory *Memory, property_chan ImGui::DragScalar(Property->Name, ImGuiDataType_Float, &Property->CurrentValue.f, Property->ScrubVal.f, &Property->MinVal.f, &Property->MaxVal.f, "%f"); if (ImGui::IsItemActivated()) { - Action_Entry_Begin(Memory, action_entry_default, "Tranforms interact"); State->InteractCache[0] = Property->CurrentValue.f; } if (ImGui::IsItemActive()) { State->UpdateFrame = true; } - if (ImGui::IsItemDeactivated()) { - Action_Change_Commit(Memory, &Property->CurrentValue.f, &State->InteractCache[0], - &Property->CurrentValue.f, action_change_r32); - Action_Entry_End(Memory); + if (ImGui::IsItemDeactivatedAfterEdit()) { + if (ImGui::IsKeyPressed(ImGuiKey_Escape)) { + Property->CurrentValue.f = State->InteractCache[0]; + } else { + Action_Entry_Commit(Memory, action_entry_default, "Tranforms interact"); + Action_Change_Commit(Memory, &Property->CurrentValue.f, &State->InteractCache[0], + &Property->CurrentValue.f, action_change_r32); + Action_Entry_End(Memory); + } State->UpdateFrame = true; } } @@ -105,6 +109,7 @@ static void ImGui_DebugUndoTree(project_data *File, memory *Memory) { ImGui::SetNextWindowSize(ImVec2(200, 800)); + ImGui::SetNextWindowPos(ImVec2(2498, 10)); ImGui::Begin("undotree"); for (int i = 0; i < Memory->Action.NumberOfEntries; i++) { action_entry Entry = Memory->Action.Entry[i]; @@ -361,7 +366,7 @@ ImGui_PropertiesPanel(project_data *File, project_state *State, ui *UI, memory * } static void -ImGui_Viewport(project_data File, project_state *State, ui *UI, comp_buffer CompBuffer, +ImGui_Viewport(project_data File, project_state *State, ui *UI, memory *Memory, comp_buffer CompBuffer, ImGuiIO io, GLuint textureID) { bool open = true; @@ -445,13 +450,13 @@ ImGui_Viewport(project_data File, project_state *State, ui *UI, comp_buffer Comp // The handle itself - if (Point0->IsSelected) { + // if (Point0->IsSelected || State->Tool == tool_pen) { col = ImGui::GetColorU32(ImGuiCol_ButtonHovered); draw_list->AddNgon(Point0_ScreenPos_Left, 10, col, 8, 5.0f); draw_list->AddNgon(Point0_ScreenPos_Right, 10, col, 8, 5.0f); draw_list->AddLine(Point0_ScreenPos, Point0_ScreenPos_Left, col, 2.0f); draw_list->AddLine(Point0_ScreenPos, Point0_ScreenPos_Right, col, 2.0f); - } + // } draw_list->AddNgon(Point0_ScreenPos, 10, col, 8, 5.0f); @@ -478,31 +483,34 @@ ImGui_Viewport(project_data File, project_state *State, ui *UI, comp_buffer Comp ImGui::SetMouseCursor(ImGuiMouseCursor_ResizeEW); } if (ImGui::IsItemActivated() && b == 0) { + if (p == 0 && State->Pen.IsActive) { + // TODO(fox): I think we need some alternate + // Change functions to make these types of + // value sets more easy to read... + bool32 SetFalse = false; + bool32 SetTrue = true; + Action_Entry_Commit(Memory, action_entry_default, "Close mask path"); + Action_Change_Commit(Memory, &State->Pen.IsActive, + &State->Pen.IsActive, &SetFalse, action_change_i32); + Action_Change_Commit(Memory, &Mask->IsClosed, + &Mask->IsClosed, &SetTrue, action_change_i32); + Action_Entry_End(Memory); + // State->Pen.IsActive = false; + // Mask->IsClosed = true; + } Point0->IsSelected = 1; } if (ImGui::IsItemActive()) { - // TODO(fox): Combine this with the anchor point code. ImVec2 MouseIncrement = io.MouseDelta * (ImVec2(CompBuffer.Width, CompBuffer.Height) / UI->CompZoom); - real32 Rad = (Layer->rotation.CurrentValue.f * (PI / 180)); - real32 s = Layer->scale.CurrentValue.f; - v2 XAxis = V2(cos(Rad), sin(Rad)) * (MouseIncrement.x / s); - v2 YAxis = V2(sin(Rad), -cos(Rad)) * (MouseIncrement.y / -s); - if (b == 0) { - Point0->Pos.x += XAxis.x; - Point0->Pos.y -= XAxis.y; - Point0->Pos.x -= YAxis.x; - Point0->Pos.y += YAxis.y; + Layer_CalcRotatedOffset(Layer, V2(MouseIncrement), V2(1, 1), + &Point0->Pos.x, &Point0->Pos.y); } else if (b == 1) { - Point0->TangentLeft.x += XAxis.x; - Point0->TangentLeft.y -= XAxis.y; - Point0->TangentLeft.x -= YAxis.x; - Point0->TangentLeft.y += YAxis.y; + Layer_CalcRotatedOffset(Layer, V2(MouseIncrement), V2(1, 1), + &Point0->TangentLeft.x, &Point0->TangentLeft.y); } else { - Point0->TangentRight.x += XAxis.x; - Point0->TangentRight.y -= XAxis.y; - Point0->TangentRight.x -= YAxis.x; - Point0->TangentRight.y += YAxis.y; + Layer_CalcRotatedOffset(Layer, V2(MouseIncrement), V2(1, 1), + &Point0->TangentRight.x, &Point0->TangentRight.y); } State->UpdateFrame = true; } @@ -511,7 +519,8 @@ ImGui_Viewport(project_data File, project_state *State, ui *UI, comp_buffer Comp // The bezier path - if (Mask->NumberOfPoints == 1) { + + if (Mask->NumberOfPoints == 1 || (p+1 == Mask->NumberOfPoints && !Mask->IsClosed)) { ImGui::PopID(); continue; } @@ -598,18 +607,19 @@ ImGui_Viewport(project_data File, project_state *State, ui *UI, comp_buffer Comp bool32 IsHovered = ImGui::IsItemHovered(); bool32 IsActive = ImGui::IsItemActive(); bool32 IsActivated = ImGui::IsItemActivated(); + bool32 IsDeactivated = ImGui::IsItemDeactivated(); - /* if (State->MostRecentlySelectedLayer > -1) { project_layer *Layer = File.Layer[State->MostRecentlySelectedLayer]; - if (IsActivated && ImGui::IsMouseDown(ImGuiMouseButton_Left)) { - if (State->Tool == tool_pen && !State->Pen.IsActive) { - State->Pen.IsActive = true; - Layer->NumberOfMasks++; + if (Layer->NumberOfMasks == 0) { + if (IsActivated && ImGui::IsMouseDown(ImGuiMouseButton_Left)) { + if (State->Tool == tool_pen && !State->Pen.IsActive) { + State->Pen.IsActive = true; + Layer->NumberOfMasks++; + } } } - if (State->Pen.IsActive && !ImGui::IsKeyDown(ImGuiKey_Z)) { if (IsActivated) { v2 LayerPos = Layer_ScreenSpaceToLocal(Layer, UI, CompBuffer, ViewportMin, io.MousePos); @@ -622,21 +632,67 @@ ImGui_Viewport(project_data File, project_state *State, ui *UI, comp_buffer Comp v2 LayerUV = CompUVToLayerUV(Layer, &CompBuffer, CompUV); v2 LayerPos = LayerUV * V2(Layer->Source->Info.Width, Layer->Source->Info.Height); v2 OffsetPos = CurrentPoint->Pos - LayerPos; + CurrentPoint->HandleBezier = true; CurrentPoint->TangentRight = -OffsetPos; CurrentPoint->TangentLeft = OffsetPos; } + if (ImGui::IsKeyPressed(ImGuiKey_Escape) && IsActive) { + mask *Mask = &Layer->Mask[Layer->NumberOfMasks-1]; + if (Mask->NumberOfPoints == 1) { + Layer->NumberOfMasks--; + Mask->NumberOfPoints = 0; + } + State->Pen.IsActive = false; + IsDeactivated = false; // just in case escape and mouse release happen simultaneously + } + if (IsDeactivated) { + mask *Mask = &Layer->Mask[Layer->NumberOfMasks-1]; + if (Mask->NumberOfPoints == 1) { + uint16 PreviousNumberOfMasks = Layer->NumberOfMasks - 1; + uint16 PreviousNumberOfPoints = Mask->NumberOfPoints - 1; + Action_Entry_Commit(Memory, action_entry_default, "Create mask"); + Action_Change_Commit(Memory, &Layer->NumberOfMasks, &PreviousNumberOfMasks, + &Layer->NumberOfMasks, action_change_u16); + Action_Change_Commit(Memory, &Mask->NumberOfPoints, &PreviousNumberOfPoints, + &Mask->NumberOfPoints, action_change_u16); + Action_Entry_End(Memory); + } else { + uint16 PreviousNumberOfPoints = Mask->NumberOfPoints - 1; + uint16 Empty = 0; + mask_point *CurrentPoint = &Mask->Point[Mask->NumberOfPoints-1]; + Action_Entry_Commit(Memory, action_entry_default, "Add point"); + Action_Change_Commit(Memory, &Mask->NumberOfPoints, &PreviousNumberOfPoints, + &Mask->NumberOfPoints, action_change_u16); + Action_Change_Commit(Memory, &CurrentPoint->Pos.x, &Empty, + &CurrentPoint->Pos.x, action_change_r32); + Action_Change_Commit(Memory, &CurrentPoint->Pos.y, &Empty, + &CurrentPoint->Pos.y, action_change_r32); + Action_Change_Commit(Memory, &CurrentPoint->TangentLeft.x, &Empty, + &CurrentPoint->TangentLeft.x, action_change_r32); + Action_Change_Commit(Memory, &CurrentPoint->TangentLeft.y, &Empty, + &CurrentPoint->TangentLeft.y, action_change_r32); + Action_Change_Commit(Memory, &CurrentPoint->TangentRight.x, &Empty, + &CurrentPoint->TangentRight.x, action_change_r32); + Action_Change_Commit(Memory, &CurrentPoint->TangentRight.y, &Empty, + &CurrentPoint->TangentRight.y, action_change_r32); + Action_Entry_End(Memory); + } + } if (State->Tool != tool_pen) { State->Pen.IsActive = false; } } } - */ ImGui::OpenPopupOnItemClick("context", ImGuiPopupFlags_MouseButtonMiddle); if (ImGui::BeginPopup("context")) { if (ImGui::MenuItem("Scalar", NULL, false, InstructionMode != instruction_mode_scalar)) { InstructionMode = instruction_mode_scalar; State->UpdateFrame = true; } +#if ARM + if (ImGui::MenuItem("NEON", NULL, false, InstructionMode != instruction_mode_neon)) { InstructionMode = instruction_mode_neon; State->UpdateFrame = true; } +#else if (ImGui::MenuItem("SSE", NULL, false, InstructionMode != instruction_mode_sse)) { InstructionMode = instruction_mode_sse; State->UpdateFrame = true; } if (ImGui::MenuItem("AVX2", NULL, false, InstructionMode != instruction_mode_avx)) { InstructionMode = instruction_mode_avx; State->UpdateFrame = true; } +#endif ImGui::EndPopup(); } @@ -644,8 +700,6 @@ ImGui_Viewport(project_data File, project_state *State, ui *UI, comp_buffer Comp { // Point to zoom in on if Z is held UI->TempZoomRatio = ImGui_ScreenPointToCompUV(ViewportMin, UI->CompPos, UI->CompZoom, io.MousePos); - DebugWatchVar("MouseScreenUV", &UI->TempZoomRatio.x, d_float); - DebugWatchVar("MouseScreenUV", &UI->TempZoomRatio.y, d_float); // Layer selection if (!ImGui::IsKeyDown(ImGuiKey_Z) || !State->Pen.IsActive) { @@ -694,6 +748,14 @@ static bool32 ImGui_SlidingLayer(project_layer *Layer, real32 *DraggingThreshold, real32 Delta, int16 TimelineZoom, int16 Side) { bool32 Result = 0; + if (ImGui::IsItemActivated()) + { + // if (Side & 1) + // Layer->StartFrame += Increment; + // if (Side & 2) + // Layer->EndFrame += Increment; + // if (Side == 3) { + } if (ImGui::IsItemActive() && ImGui::IsMouseDragging(ImGuiMouseButton_Left, -1)) { ImGui::SetMouseCursor(ImGuiMouseCursor_ResizeEW); @@ -701,19 +763,15 @@ ImGui_SlidingLayer(project_layer *Layer, real32 *DraggingThreshold, real32 Delta if (abs(*DraggingThreshold) >= TimelineZoom) { int16 Increment = *DraggingThreshold/TimelineZoom; - // TODO(fox): Properly handle the start and end points wrapping. - - if (!(Increment < 0 && Layer->StartFrame == 0 && Side & 1)) - { - if (Side & 1) - Layer->StartFrame += Increment; - if (Side & 2) - Layer->EndFrame += Increment; - if (Side == 3) { - IncrementKeyframesInLayer(Layer, Increment); - if (Layer->Source->SourceType == source_type_video) { - Layer->BitmapInfo.FrameOffset += Increment; - } + if (Side & 1) + Layer->StartFrame += Increment; + if (Side & 2) + Layer->EndFrame += Increment; + if (Side == 3) { + // TODO(fox): Make frame offset in keyframes local! + IncrementKeyframesInLayer(Layer, Increment); + if (Layer->Source->SourceType == source_type_video) { + Layer->BitmapInfo.FrameOffset += Increment; } } *DraggingThreshold += -1*Increment*TimelineZoom; @@ -1488,8 +1546,10 @@ ImGui_Timeline(project_data *File, project_state *State, memory *Memory, ui *UI, real32 ZoomAmount = io.MouseWheel*16; real32 LocalMousePos = ImGui::GetMousePos().x - TimelineStartingPos.x; real32 ZoomRatio = LocalMousePos / UI->TimelineZoom; - UI->TimelineZoom += ZoomAmount; - UI->ScrollXOffset -= ZoomAmount*ZoomRatio; + if (UI->TimelineZoom + ZoomAmount > 0) { + UI->TimelineZoom += ZoomAmount; + UI->ScrollXOffset -= ZoomAmount*ZoomRatio; + } } else if (io.KeyShift && io.MouseWheel) { UI->ScrollXOffset += io.MouseWheel*16; } else { @@ -1532,6 +1592,14 @@ ImGui_ProcessInputs(project_data *File, project_state *State, comp_buffer *CompB State->UpdateKeyframes = true; } + if (ImGui::IsKeyPressed(ImGuiKey_V)) { + if (State->Tool == tool_pen) { + State->Tool = tool_default; + } else { + State->Tool = tool_pen; + } + } + if (ImGui::IsKeyPressed(ImGuiKey_Space)) { if (io.KeyShift) { State->RerouteEffects = true; @@ -1595,33 +1663,99 @@ ImGui_ProcessInputs(project_data *File, project_state *State, comp_buffer *CompB } #endif - bool32 Ended = ImGui::IsMouseDown(ImGuiMouseButton_Left); if (State->IsInteracting) { ImVec2 MouseIncrement = io.MouseDelta * (ImVec2(CompBuffer->Width, CompBuffer->Height) / UI->CompZoom); + project_layer *Layer = File->Layer[State->MostRecentlySelectedLayer]; switch (State->TransformsHotkeyInteract) { case sliding_position: { - InteractProperty(0, File, State, Ended, MouseIncrement.x, Memory); - InteractProperty(1, File, State, Ended, MouseIncrement.y, Memory); + Layer->x.CurrentValue.f += MouseIncrement.x; + Layer->y.CurrentValue.f += MouseIncrement.y; } break; case sliding_anchorpoint: { - InteractProperty(2, File, State, Ended, MouseIncrement.x, Memory); - InteractProperty(3, File, State, Ended, MouseIncrement.y, Memory); + source *Source = Layer->Source; + Layer->x.CurrentValue.f += MouseIncrement.x; + Layer->y.CurrentValue.f += MouseIncrement.y; + Layer_CalcRotatedOffset(Layer, V2(MouseIncrement), V2(Source->Info.Width, Source->Info.Height), + &Layer->ax.CurrentValue.f, &Layer->ay.CurrentValue.f); } break; case sliding_rotation: { - InteractProperty(4, File, State, Ended, MouseIncrement.x / 10.0, Memory); + Layer->rotation.CurrentValue.f += MouseIncrement.x / 10.0; } break; case sliding_scale: { - InteractProperty(5, File, State, Ended, MouseIncrement.x / 200.0, Memory); + Layer->scale.CurrentValue.f += MouseIncrement.x / 200.0; } break; } + if (ImGui::IsKeyPressed(ImGuiKey_Escape)) { + switch (State->TransformsHotkeyInteract) + { + case sliding_position: + { + Layer->x.CurrentValue.f = State->InteractCache[0]; + Layer->y.CurrentValue.f = State->InteractCache[1]; + } break; + case sliding_anchorpoint: + { + Layer->x.CurrentValue.f = State->InteractCache[0]; + Layer->y.CurrentValue.f = State->InteractCache[1]; + Layer->ax.CurrentValue.f = State->InteractCache[2]; + Layer->ay.CurrentValue.f = State->InteractCache[3]; + } break; + case sliding_rotation: + { + Layer->rotation.CurrentValue.f = State->InteractCache[0]; + } break; + case sliding_scale: + { + Layer->scale.CurrentValue.f = State->InteractCache[0]; + } break; + } + State->IsInteracting = false; + State->UpdateFrame = true; + } + if (ImGui::IsMouseDown(ImGuiMouseButton_Left)) { + Action_Entry_Commit(Memory, action_entry_default, "Tranforms interact"); + switch (State->TransformsHotkeyInteract) + { + case sliding_position: + { + Action_Change_Commit(Memory, &Layer->x.CurrentValue.f, &State->InteractCache[0], + &Layer->x.CurrentValue.f, action_change_r32); + Action_Change_Commit(Memory, &Layer->y.CurrentValue.f, &State->InteractCache[1], + &Layer->y.CurrentValue.f, action_change_r32); + } break; + case sliding_anchorpoint: + { + Action_Change_Commit(Memory, &Layer->x.CurrentValue.f, &State->InteractCache[0], + &Layer->x.CurrentValue.f, action_change_r32); + Action_Change_Commit(Memory, &Layer->y.CurrentValue.f, &State->InteractCache[1], + &Layer->y.CurrentValue.f, action_change_r32); + Action_Change_Commit(Memory, &Layer->ax.CurrentValue.f, &State->InteractCache[2], + &Layer->ax.CurrentValue.f, action_change_r32); + Action_Change_Commit(Memory, &Layer->ay.CurrentValue.f, &State->InteractCache[3], + &Layer->ay.CurrentValue.f, action_change_r32); + } break; + case sliding_rotation: + { + Action_Change_Commit(Memory, &Layer->rotation.CurrentValue.f, &State->InteractCache[0], + &Layer->rotation.CurrentValue.f, action_change_r32); + } break; + case sliding_scale: + { + Action_Change_Commit(Memory, &Layer->scale.CurrentValue.f, &State->InteractCache[0], + &Layer->scale.CurrentValue.f, action_change_r32); + } break; + } + Action_Entry_End(Memory); + State->IsInteracting = false; + } + State->UpdateFrame = true; } - if (!ImGui::IsMouseDown(ImGuiMouseButton_Left)) { UI->DraggingLayerThreshold = 0; UI->DraggingTimelineThreshold = 0; diff --git a/prenderer.cpp b/prenderer.cpp index a93fa90..e755fe7 100644 --- a/prenderer.cpp +++ b/prenderer.cpp @@ -1,39 +1,17 @@ - -static void -PushRect(rectangle RenderRegion); - -static void -RenderLayerNeon(project_layer *Layer, comp_buffer *Buffer, rectangle RenderRegion); static void -AVX2_RenderLayer(transform_info TransformInfo, comp_buffer *Buffer, rectangle RenderRegion); -static void -SSE2_RenderLayer(transform_info TransformInfo, comp_buffer *Buffer, rectangle RenderRegion); -static void -Fallback_RenderLayer(transform_info TransformInfo, comp_buffer *Buffer, rectangle RenderRegion); - -static bool32 -CheckQueue(render_queue RenderInfo, uint16 Index); - -// for the anchor point moving UI -static void -CalculateAnchorOffset(project_layer *Layer, real32 Value, uint16 Dir) +Layer_CalcRotatedOffset(project_layer *Layer, v2 Increment, v2 Divisor, real32 *ValueX, real32 *ValueY) { - source *Source = Layer->Source; real32 Rad = (Layer->rotation.CurrentValue.f * (PI / 180)); real32 s = Layer->scale.CurrentValue.f; - if (Dir == 0) { - v2 XAxis = V2(cos(Rad), sin(Rad)) * (Value / s); - Layer->x.CurrentValue.f += Value; - Layer->ax.CurrentValue.f += XAxis.x/Source->Info.Width; - Layer->ay.CurrentValue.f -= XAxis.y/Source->Info.Height; - } else { - v2 YAxis = V2(sin(Rad), -cos(Rad)) * (Value / -s); - Layer->y.CurrentValue.f += Value; - Layer->ax.CurrentValue.f -= YAxis.x/Source->Info.Width; - Layer->ay.CurrentValue.f += YAxis.y/Source->Info.Height; - } + v2 XAxis = V2(cos(Rad), sin(Rad)) * (Increment.x / s); + v2 YAxis = V2(sin(Rad), -cos(Rad)) * (Increment.y / -s); + + *ValueX += XAxis.x/Divisor.x; + *ValueY -= XAxis.y/Divisor.y; + *ValueX -= YAxis.x/Divisor.x; + *ValueY += YAxis.y/Divisor.y; } static transform_info @@ -121,10 +99,15 @@ static void RenderLayers(render_queue *RenderInfo, rectangle RenderRegion) { for (int16 i = 0; i < RenderInfo->State->NumberOfLayersToRender; i++) { int16 Idx = RenderInfo->State->LayersToRender[i]; +#if ARM + if (InstructionMode == instruction_mode_neon) + Fallback_RenderLayer(RenderInfo->File->Layer[Idx]->TransformInfo, RenderInfo->CompBuffer, RenderRegion); +#else if (InstructionMode == instruction_mode_avx) AVX2_RenderLayer(RenderInfo->File->Layer[Idx]->TransformInfo, RenderInfo->CompBuffer, RenderRegion); else if (InstructionMode == instruction_mode_sse) SSE2_RenderLayer(RenderInfo->File->Layer[Idx]->TransformInfo, RenderInfo->CompBuffer, RenderRegion); +#endif else Fallback_RenderLayer(RenderInfo->File->Layer[Idx]->TransformInfo, RenderInfo->CompBuffer, RenderRegion); } @@ -134,7 +117,7 @@ static void QueueCurrentFrame(project_data *File, comp_buffer *CompBuffer, project_state *State) { IsRendering = true; - // render_queue RenderInfo = {File, State, CompBuffer}; + render_queue RenderInfo = {File, State, CompBuffer}; for (int16 i = 0; i < File->NumberOfLayers; i++) { @@ -182,74 +165,100 @@ QueueCurrentFrame(project_data *File, comp_buffer *CompBuffer, project_state *St #endif } - #if ARM + static void -RenderLayerNeon(project_layer *Layer, pixel_buffer *Buffer, rectangle RenderRegion) +NEON_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) { - float32x4_t XAxisPX = vdupq_n_f32(XAxisP.x); - float32x4_t XAxisPY = vdupq_n_f32(XAxisP.y); - float32x4_t YAxisPX = vdupq_n_f32(YAxisP.x); - float32x4_t YAxisPY = vdupq_n_f32(YAxisP.y); - float32x4_t LayerWidth = vdupq_n_f32(); - float32x4_t LayerHeight = vdupq_n_f32(); - float32x4_t LayerOpacity = vdupq_n_f32(); - float32x4_t OriginX = vdupq_n_f32(Origin.x); - float32x4_t OriginY = vdupq_n_f32(Origin.y); + rectangle LayerBounds = ClipRectangle( T.ClipRect, + RenderRegion ); + // Remember: since bitmaps are packed in 4x4 cubes, we always need to be aligned. + LayerBounds.Min.x -= LayerBounds.Min.x % 4; + LayerBounds.Min.y -= LayerBounds.Min.y % 4; + uint16 WidthP, HeightP; + Bitmap_CalcPackedDimensions(Buffer->Width, Buffer->Height, &WidthP, &HeightP); + uint8 *TexPTR = (uint8 *)T.SourceBuffer; + Assert(LayerBounds.Max.x <= Buffer->Width); + Assert(LayerBounds.Max.y <= Buffer->Height); + + float32x4_t XAxisPX = vdupq_n_f32(T.XAxisPX); + float32x4_t XAxisPY = vdupq_n_f32(T.XAxisPY); + float32x4_t YAxisPX = vdupq_n_f32(T.YAxisPX); + float32x4_t YAxisPY = vdupq_n_f32(T.YAxisPY); + + float32x4_t LayerWidth = vdupq_n_f32(T.LayerWidth); + int32x4_t FullLayerWidth4i = vdupq_n_s32(T.FullLayerWidth*4); + int32x4_t LayerWidthMinusOne = vdupq_n_s32(T.LayerWidth - 1); + int32x4_t LayerHeightMinusOne = vdupq_n_s32(T.LayerHeight - 1); + float32x4_t LayerHeight = vdupq_n_f32(T.LayerHeight); + float32x4_t LayerOpacity = vdupq_n_f32(T.LayerOpacity); + float32x4_t OriginX = vdupq_n_f32(T.OriginX); + float32x4_t OriginY = vdupq_n_f32(T.OriginY); + + float32x4_t ClipPrevent = vdupq_n_f32(0.001f); float32x4_t One = vdupq_n_f32(1); + float32x4_t Two = vdupq_n_f32(2); float32x4_t Zero = vdupq_n_f32(0); + + float32x4_t ZeroPoint25 = vdupq_n_f32(0.25); + float32x4_t ZeroPointFive = vdupq_n_f32(0.5); + int32x4_t Onei = vdupq_n_s32(1); float32x4_t Four = vdupq_n_f32(4); - int32x4_t FourInt = vdupq_n_s32(4); - int32x4_t EightInt = vdupq_n_s32(8); - int32x4_t SixteenInt = vdupq_n_s32(16); - int32x4_t TwentyFourInt = vdupq_n_s32(24); - float32x4_t Float255 = vdupq_n_f32(255.0f); - int32x4_t Int255 = vdupq_n_s32(255); + int32x4_t FF = vdupq_n_s32(0xFF); + int32x4_t BottomTwoBits = vdupq_n_s32(0x03); + int32x4_t Fouri = vdupq_n_s32(4); + int32x4_t Sixteeni = vdupq_n_s32(16); + float32x4_t Real255 = vdupq_n_f32(255.0f); float32x4_t Norm255 = vdupq_n_f32(1/255.0f); - for(int16 Y = LayerBounds.Min.y; - Y < LayerBounds.Max.y; - Y++) + // NOTE(fox): Each loop operates on 4 pixels, 4 horizontal by 1 vertical. + + // TODO(fox): A possible optimization could be made by using the 32x4x4 + // load intrinsic and a loop that repeats four times. + + for (int32 Y = LayerBounds.Min.y; Y < LayerBounds.Max.y; Y++) { - uint32 *Pixel = (uint32 *)Row + LayerBounds.Min.x; + real32 xvals[4] = { (real32)LayerBounds.Min.x, (real32)LayerBounds.Min.x+1, + (real32)LayerBounds.Min.x+2, (real32)LayerBounds.Min.x+3 }; + float32x4_t PixelX = vld1q_f32(xvals); - real32 ScalarPixelX[4] = {(real32)LayerBounds.Min.x, - (real32)LayerBounds.Min.x+1, - (real32)LayerBounds.Min.x+2, - (real32)LayerBounds.Min.x+3}; - float32x4_t PixelX = vld1q_f32(ScalarPixelX); float32x4_t PixelY = vdupq_n_f32((real32)Y); float32x4_t StartVectorY = vsubq_f32(PixelY, OriginY); - for(int16 XI = LayerBounds.Min.x; - XI < LayerBounds.Max.x; - XI += 1) + for (int32 X = LayerBounds.Min.x; X < LayerBounds.Max.x; X += 4) { + float32x4_t StartVectorX = vsubq_f32(PixelX, OriginX); + + uint32 XLookup = (X >> 2)*16 + (X % 4); + uint32 YLookup = (Y >> 2)*(WidthP*4) + (Y % 4)*4; + uint32 PixelToSeek = XLookup + YLookup; + uint8 *Pixel = (uint8 *)Buffer->PackedBuffer + PixelToSeek*Buffer->BytesPerPixel; + float32x4_t U = vaddq_f32(vmulq_f32(StartVectorX, XAxisPX), vmulq_f32(StartVectorY, XAxisPY)); float32x4_t V = vaddq_f32(vmulq_f32(StartVectorX, YAxisPX), vmulq_f32(StartVectorY, YAxisPY)); - uint32x4_t R = vandq_u32(vandq_u32(vcleq_f32(U, One), vcgezq_f32(U)), - vandq_u32(vcleq_f32(V, One), vcgezq_f32(V))); + uint32x4_t LayerMask = vandq_u32(vandq_u32(vcgeq_f32(U, Zero), vcltq_f32(U, One)), + vandq_u32(vcgeq_f32(V, Zero), vcltq_f32(V, One))); // TODO(fox): Make more efficient with some sort of truncation uint32 comp[4]; - vst1q_u32(comp, R); + vst1q_u32(comp, LayerMask); if (comp[0] || comp[1] || comp[2] || comp[3]) { U = vmaxq_f32(vminq_f32(One, U), Zero); V = vmaxq_f32(vminq_f32(One, V), Zero); float32x4_t TexXFull = vmulq_f32(U, LayerWidth); float32x4_t TexYFull = vmulq_f32(V, LayerHeight); - int32x4_t TexXInt = vcvtq_s32_f32(TexXFull); + int32x4_t TexXIntPlusOne = vaddq_f32(TexXInt, vandq_u32(vcltq_u32(TexXInt, LayerWidthMinusOne), Onei)); int32x4_t TexYInt = vcvtq_s32_f32(TexYFull); + int32x4_t TexYIntPlusOne = vaddq_f32(TexYInt, vandq_u32(vcltq_u32(TexYInt, LayerWidthMinusOne), Onei)); - // fractions - float32x4_t TexX = vsubq_f32(TexXFull, vcvtq_f32_s32(TexXInt)); - float32x4_t TexY = vsubq_f32(TexYFull, vcvtq_f32_s32(TexYInt)); + float32x4_t TexX = vsubq_f32(TexXFull, vcvtq_f32_u32(TexXInt)); + float32x4_t TexY = vsubq_f32(TexYFull, vcvtq_f32_u32(TexYInt)); float32x4_t TexXInv = vsubq_f32(One, TexX); float32x4_t TexYInv = vsubq_f32(One, TexY); float32x4_t TexBothXInv = vmulq_f32(TexXInv, TexY); @@ -257,112 +266,28 @@ RenderLayerNeon(project_layer *Layer, pixel_buffer *Buffer, rectangle RenderRegi float32x4_t TexBoth = vmulq_f32(TexY, TexX); float32x4_t TexBothInv = vmulq_f32(TexXInv, TexYInv); - int32 TexXP[4]; - vst1q_s32(TexXP, TexXInt); - int32 TexYP[4]; - vst1q_s32(TexYP, TexYInt); - - uint8 *TexPTR0 = ((uint8 *)Source->Raster.MainBuffer + LayerPitch*TexYP[0] + TexXP[0]*sizeof(uint32)); - uint8 *TexPTR1 = ((uint8 *)Source->Raster.MainBuffer + LayerPitch*TexYP[1] + TexXP[1]*sizeof(uint32)); - uint8 *TexPTR2 = ((uint8 *)Source->Raster.MainBuffer + LayerPitch*TexYP[2] + TexXP[2]*sizeof(uint32)); - uint8 *TexPTR3 = ((uint8 *)Source->Raster.MainBuffer + LayerPitch*TexYP[3] + TexXP[3]*sizeof(uint32)); - - // TexRGBA = vld4_u8(TexPTR0); - // TexRGBA = vld4q_lane_u8(TexPTR0, TexRGBA, 0); - // TexRGBA = vld4q_lane_u8(TexPTR1, TexRGBA, 4); - // TexRGBA = vld4q_lane_u8(TexPTR2, TexRGBA, 8); - // TexRGBA = vld4q_lane_u8(TexPTR3, TexRGBA, 12); - // TexRGBA = vld4q_lane_u8(TexPTR0 + sizeof(uint32), TexRGBA, 1); - // TexRGBA = vld4q_lane_u8(TexPTR1 + sizeof(uint32), TexRGBA, 5); - // TexRGBA = vld4q_lane_u8(TexPTR2 + sizeof(uint32), TexRGBA, 9); - // TexRGBA = vld4q_lane_u8(TexPTR3 + sizeof(uint32), TexRGBA, 13); - // TexRGBA = vld4q_lane_u8(TexPTR0 + LayerPitch, TexRGBA, 2); - // TexRGBA = vld4q_lane_u8(TexPTR1 + LayerPitch, TexRGBA, 6); - // TexRGBA = vld4q_lane_u8(TexPTR2 + LayerPitch, TexRGBA, 10); - // TexRGBA = vld4q_lane_u8(TexPTR3 + LayerPitch, TexRGBA, 14); - // TexRGBA = vld4q_lane_u8(TexPTR0 + LayerPitch + sizeof(uint32), TexRGBA, 3); - // TexRGBA = vld4q_lane_u8(TexPTR1 + LayerPitch + sizeof(uint32), TexRGBA, 7); - // TexRGBA = vld4q_lane_u8(TexPTR2 + LayerPitch + sizeof(uint32), TexRGBA, 11); - // TexRGBA = vld4q_lane_u8(TexPTR3 + LayerPitch + sizeof(uint32), TexRGBA, 15); - uint8x16x4_t TexRGBA_A = {}; - uint8x16x4_t TexRGBA_B = {}; - uint8x16x4_t TexRGBA_C = {}; - uint8x16x4_t TexRGBA_D = {}; - TexRGBA_A = vld4q_lane_u8(TexPTR0, TexRGBA_A, 0); - TexRGBA_B = vld4q_lane_u8(TexPTR1, TexRGBA_B, 0); - TexRGBA_C = vld4q_lane_u8(TexPTR2, TexRGBA_C, 0); - TexRGBA_D = vld4q_lane_u8(TexPTR3, TexRGBA_D, 0); - TexRGBA_A = vld4q_lane_u8(TexPTR0 + sizeof(uint32), TexRGBA_A, 4); - TexRGBA_B = vld4q_lane_u8(TexPTR1 + sizeof(uint32), TexRGBA_B, 4); - TexRGBA_C = vld4q_lane_u8(TexPTR2 + sizeof(uint32), TexRGBA_C, 4); - TexRGBA_D = vld4q_lane_u8(TexPTR3 + sizeof(uint32), TexRGBA_D, 4); - TexRGBA_A = vld4q_lane_u8(TexPTR0 + LayerPitch, TexRGBA_A, 8); - TexRGBA_B = vld4q_lane_u8(TexPTR1 + LayerPitch, TexRGBA_B, 8); - TexRGBA_C = vld4q_lane_u8(TexPTR2 + LayerPitch, TexRGBA_C, 8); - TexRGBA_D = vld4q_lane_u8(TexPTR3 + LayerPitch, TexRGBA_D, 8); - TexRGBA_A = vld4q_lane_u8(TexPTR0 + LayerPitch + sizeof(uint32), TexRGBA_A, 12); - TexRGBA_B = vld4q_lane_u8(TexPTR1 + LayerPitch + sizeof(uint32), TexRGBA_B, 12); - TexRGBA_C = vld4q_lane_u8(TexPTR2 + LayerPitch + sizeof(uint32), TexRGBA_C, 12); - TexRGBA_D = vld4q_lane_u8(TexPTR3 + LayerPitch + sizeof(uint32), TexRGBA_D, 12); - - uint32x4_t test = (uint32x4_t)TexRGBA_A.val[0]; - - float32x4_t asd = vcvtq_f32_u32((uint32x4_t)TexRGBA_A.val[0]); - float32x4_t pp = vaddq_f32(vmulq_f32(TexBothInv, vcvtq_f32_u32((uint32x4_t)TexRGBA_A.val[0])), - vmulq_f32(TexBothYInv, vcvtq_f32_u32((uint32x4_t)TexRGBA_B.val[0]))); - - uint32x4_t test2 = (uint32x4_t)TexRGBA_A.val[0]; - -#if 0 - float32x4_t PixelBlendR = vaddq_f32(vaddq_f32(vmulq_f32(TexBothInv, vcvtq_f32_u32((uint32x4_t)TexRGBA_A.val[0])), - vmulq_f32(TexBothYInv, vcvtq_f32_u32((uint32x4_t)TexRGBA_B.val[0]))), - vaddq_f32(vmulq_f32(TexBothXInv, vcvtq_f32_u32((uint32x4_t)TexRGBA_C.val[0])), - vmulq_f32(TexBoth, vcvtq_f32_u32((uint32x4_t)TexRGBA_D.val[0])))); - - float32x4_t PixelBlendG = vaddq_f32(vaddq_f32(vmulq_f32(TexBothInv, vcvtq_f32_u32((uint32x4_t)TexRGBA_A.val[1])), - vmulq_f32(TexBothYInv, vcvtq_f32_u32((uint32x4_t)TexRGBA_B.val[1]))), - vaddq_f32(vmulq_f32(TexBothXInv, vcvtq_f32_u32((uint32x4_t)TexRGBA_C.val[1])), - vmulq_f32(TexBoth, vcvtq_f32_u32((uint32x4_t)TexRGBA_D.val[1])))); - - float32x4_t PixelBlendB = vaddq_f32(vaddq_f32(vmulq_f32(TexBothInv, vcvtq_f32_u32((uint32x4_t)TexRGBA_A.val[2])), - vmulq_f32(TexBothYInv, vcvtq_f32_u32((uint32x4_t)TexRGBA_B.val[2]))), - vaddq_f32(vmulq_f32(TexBothXInv, vcvtq_f32_u32((uint32x4_t)TexRGBA_C.val[2])), - vmulq_f32(TexBoth, vcvtq_f32_u32((uint32x4_t)TexRGBA_D.val[2])))); - - float32x4_t PixelBlendA = vaddq_f32(vaddq_f32(vmulq_f32(TexBothInv, vcvtq_f32_u32((uint32x4_t)TexRGBA_A.val[3])), - vmulq_f32(TexBothYInv, vcvtq_f32_u32((uint32x4_t)TexRGBA_B.val[3]))), - vaddq_f32(vmulq_f32(TexBothXInv, vcvtq_f32_u32((uint32x4_t)TexRGBA_C.val[3])), - vmulq_f32(TexBoth, vcvtq_f32_u32((uint32x4_t)TexRGBA_D.val[3])))); -#endif - float32x4_t PixelBlendR = vcvtq_f32_u32((uint32x4_t)TexRGBA_A.val[0]); - float32x4_t PixelBlendG = vcvtq_f32_u32((uint32x4_t)TexRGBA_A.val[1]); - float32x4_t PixelBlendB = vcvtq_f32_u32((uint32x4_t)TexRGBA_A.val[2]); - float32x4_t PixelBlendA = vcvtq_f32_u32((uint32x4_t)TexRGBA_A.val[3]); - - // __m128 PixelBlendR = _mm_add_ps(_mm_add_ps(_mm_mul_ps(TexBothInv, TexARx4), - // _mm_mul_ps(TexBothYInv, TexBRx4)), - // _mm_add_ps(_mm_mul_ps(TexBothXInv, TexCRx4), - // _mm_mul_ps(TexBoth, TexDRx4))); - - PixelBlendA = vsubq_f32(PixelBlendA, vmulq_f32(PixelBlendA, LayerOpacity)); - uint32x4_t Output = vorrq_u32(vorrq_u32(vshlq_n_u32(vcvtq_u32_f32(PixelBlendR), 16), - vshlq_n_u32(vcvtq_u32_f32(PixelBlendA), 24)), - (vorrq_u32(vshlq_n_u32(vcvtq_u32_f32(PixelBlendG), 8), - vcvtq_u32_f32(PixelBlendB)))); - - uint32 ma[4] = {0xFFFFFFFF, 0, 0, 0}; - uint32x4_t mask = vld1q_u32(ma); - Output = vandq_u32(Output, mask); - vst1q_u32(Pixel, Output); - + int32x4_t XLookup = vaddq_u32(vmulq_u32(vshrq_n_u32(TexXInt, 2), Sixteeni), + vandq_u32(TexXInt, BottomTwoBits)); + int32x4_t YLookup = vaddq_u32(vmulq_u32(vshrq_n_u32(TexYInt, 2), FullLayerWidth4i), + vmulq_u32(vandq_u32(TexYInt, BottomTwoBits), Fouri)); + int32x4_t XLookupPlusOne = vaddq_u32(vmulq_u32(vshrq_n_u32(TexXIntPlusOne, 2), Sixteeni), + vandq_u32(TexXIntPlusOne, BottomTwoBits)); + int32x4_t YLookupPlusOne = vaddq_u32(vmulq_u32(vshrq_n_u32(TexYIntPlusOne, 2), FullLayerWidth4i), + vmulq_u32(vandq_u32(TexYIntPlusOne, BottomTwoBits), Fouri)); + + int32x4_t PixelLookupTL = vaddq_u32(XLookup, YLookup); + int32x4_t PixelLookupTR = vaddq_u32(XLookupPlusOne, YLookup); + int32x4_t PixelLookupBL = vaddq_u32(XLookup, YLookupPlusOne); + int32x4_t PixelLookupBR = vaddq_u32(XLookupPlusOne, YLookupPlusOne); + + // I thought NEON had gather/scatter, but it appears it doesn't... } - Pixel++; - PixelX = vaddq_f32(PixelX, One); + + PixelX = vaddq_f32(PixelX, Four); } - Row += BufferPitch; } - } + #else static void @@ -1,5 +1,16 @@ // get ready for some MLG... +// The undo system currently works in two layers: a lower-level structless +// stack that records "actions" and a higher-level array of "entries" that +// bundle actions together. An action can either record a single change to a +// specific memory address or a shift of pointers. Entries are what the user +// sees and can contain multiple actions (i.e. adding a source changes the +// value of the string and the increment of how many sources there are). +// Entries are allowed to call functions in case there's something that can't +// be incorporated into this memory model (i.e. deallocating libav contexts +// when a layer's creation is undone), though they should be used only +// when necessary. + // These get four things pushed together: the address of what's being // changed, what type the data is, the original data, and the type again. // The type is encoded twice so we always know how big the data is whether @@ -92,18 +103,19 @@ void Action_Change_Commit(memory *Memory, void *DataAddress, void *OriginalData, *(action_change_type *)Data = ActionChange; } -void Action_Entry_Begin(memory *Memory, action_entry_type Type, char *Name) -{ - Memory->Action.Entry[Memory->Action.Index].Name = Name; - Memory->Action.Entry[Memory->Action.Index].Type = Type; -} - -void Action_Entry_Begin2(memory *Memory, action_entry_type Type, char *Name, char *Name2) +// This is only called when we're certain the action is going to be taken. +void Action_Entry_Commit(memory *Memory, action_entry_type Type, char *Name) { + // We need to at least clear NumberOfActions in case this index is being reused. + Memory->Action.Entry[Memory->Action.Index] = {}; Memory->Action.Entry[Memory->Action.Index].Name = Name; Memory->Action.Entry[Memory->Action.Index].Type = Type; - Memory->Action.Entry[Memory->Action.Index + 1].Name = Name2; - Memory->Action.Entry[Memory->Action.Index + 1].Type = Type; + // Effectively deletes entries in front if we're beginning out of an undo. + // It wouldn't be all that much more difficult to support branched undoing + // now that I think about it... (would anyone use it though?) + if (Memory->Action.Index != Memory->Action.NumberOfEntries) { + Memory->Action.NumberOfEntries = Memory->Action.Index; + } } void Action_Entry_SetPointer(memory *Memory, void *Data) @@ -247,35 +259,39 @@ void Action_Change_Redo(memory *Memory) { } void Action_Undo(memory *Memory) { - Memory->Action.Index--; - action_entry Entry = Memory->Action.Entry[Memory->Action.Index]; - switch (Entry.Type) - { - case action_entry_layerinit: + if (Memory->Action.Index != 0) { + Memory->Action.Index--; + action_entry Entry = Memory->Action.Entry[Memory->Action.Index]; + switch (Entry.Type) { - AV_Dealloc((av_info *)*(ptrsize *)Entry.ExtraPointer); - *(ptrsize *)Entry.ExtraPointer = 0x0; // what actually dereferences the pointer - } break; - case action_entry_default: - { - } break; + case action_entry_layerinit: + { + AV_Dealloc((av_info *)*(ptrsize *)Entry.ExtraPointer); + *(ptrsize *)Entry.ExtraPointer = 0x0; // what actually dereferences the pointer + } break; + case action_entry_default: + { + } break; + } + for (int i = 0; i < Entry.NumberOfActions; i++) + Action_Change_Undo(Memory); } - for (int i = 0; i < Entry.NumberOfActions; i++) - Action_Change_Undo(Memory); } void Action_Redo(memory *Memory) { - action_entry Entry = Memory->Action.Entry[Memory->Action.Index]; - switch (Entry.Type) - { - case action_entry_layerinit: + if (Memory->Action.Index != Memory->Action.NumberOfEntries) { + action_entry Entry = Memory->Action.Entry[Memory->Action.Index]; + switch (Entry.Type) { - } break; - case action_entry_default: - { - } break; + case action_entry_layerinit: + { + } break; + case action_entry_default: + { + } break; + } + for (int i = 0; i < Entry.NumberOfActions; i++) + Action_Change_Redo(Memory); + Memory->Action.Index++; } - for (int i = 0; i < Entry.NumberOfActions; i++) - Action_Change_Redo(Memory); - Memory->Action.Index++; } |