diff options
-rw-r--r-- | bezier.cpp | 4 | ||||
-rw-r--r-- | bitmap_calls.cpp | 7 | ||||
-rwxr-xr-x | build.sh | 8 | ||||
-rw-r--r-- | createcalls.cpp | 132 | ||||
-rw-r--r-- | debug.h | 34 | ||||
-rw-r--r-- | defines.h | 12 | ||||
-rw-r--r-- | ffmpeg_backend.cpp | 33 | ||||
-rw-r--r-- | functions.h | 3 | ||||
-rw-r--r-- | keyframes.cpp | 78 | ||||
-rw-r--r-- | layer.cpp | 6 | ||||
-rw-r--r-- | main.cpp | 30 | ||||
-rw-r--r-- | main.h | 5 | ||||
-rw-r--r-- | my_imgui_widgets.cpp | 64 | ||||
-rw-r--r-- | prenderer.cpp | 149 | ||||
-rw-r--r-- | undo.cpp | 7 |
15 files changed, 366 insertions, 206 deletions
@@ -177,6 +177,10 @@ Mask_PushPoint(mask *Mask, v2 Pos) Mask->NumberOfPoints++; } +Mask_DeletePoint(mask *Mask, uint32 p) +{ +} + static void Mask_AddPointToCurve(mask *Mask, uint16 Index, real32 ratio) { diff --git a/bitmap_calls.cpp b/bitmap_calls.cpp index 5d549c2..a67cd0b 100644 --- a/bitmap_calls.cpp +++ b/bitmap_calls.cpp @@ -19,6 +19,7 @@ // never even did any measuring to see if there was any speedup-- but I // couldn't resist it. I like doing the software rendering stuff. + // TODO(fox): I could write an AVX version of this function, but it may not be // that much faster since we have to do a bit of uninterleaving. @@ -29,11 +30,11 @@ void Bitmap_ConvertPacking(void *Buffer, void *DestBuffer, uint16 Width, uint16 uint8 *Src = (uint8 *)Buffer; uint8 *Temp = (uint8 *)DestBuffer; uint32 RemainderPixels = Width % 4; + uint16 WidthP, HeightP; + Bitmap_CalcPackedDimensions(Width, Height, &WidthP, &HeightP); for (uint32 Y = 0; Y < Height; Y++) { uint32 X = 0; while (X < Width - RemainderPixels) { - uint16 WidthP, HeightP; - Bitmap_CalcPackedDimensions(Width, Height, &WidthP, &HeightP); uint32 XLookup = (X >> 2)*16 + (X % 4); uint32 YLookup = (Y >> 2)*(WidthP*4) + (Y % 4)*4; uint32 PixelToSeek = XLookup + YLookup; @@ -210,8 +211,6 @@ Bitmap_CopyToPointer(void *Input, void *Output, uint16 BytesPerPixel, uint64 Tot uint64 RemainderBytes = TotalBytes % ByteOffset; while (bytes <= TotalBytes - RemainderBytes) { - if (bytes > 2496*4) - int pp = 0; uint8 *Pixel = (uint8 *)Row + bytes; uint8 *Pixel2 = (uint8 *)Row2 + bytes; #if ARM @@ -5,6 +5,8 @@ IMGUI=0 # Compile ImGui libs. Our custom ImGui functions still compi THREADED=1 # Compile with threading. Useful to disable when stepping through the renderer. WINDOWS=0 # Compile for Windows with Mingw. ARM=0 # Compile on ARM machines. +PACKEDRGB=0 # Use 4x4 chunks for the software rasterizer. +PERF=0 # Print cycle stats. FFMPEG_LIBS=" libavdevice @@ -60,6 +62,12 @@ ADDITIONAL_FLAGS=" -march=native " fi +if [[ "$PACKEDRGB" == 1 ]]; then +WARNING_FLAGS="$WARNING_FLAGS -DPACKEDRGB=1" +fi +if [[ "$PERF" == 1 ]]; then +WARNING_FLAGS="$WARNING_FLAGS -DPERF=1" +fi if [[ "$OSTYPE" =~ ^darwin ]]; then IMGUI_FLAGS=" diff --git a/createcalls.cpp b/createcalls.cpp index 5564264..2e754af 100644 --- a/createcalls.cpp +++ b/createcalls.cpp @@ -12,25 +12,13 @@ Source_Generate(project_data *File, memory *Memory, void *Path) { Assert(File->NumberOfSources < MAX_SOURCES); source *Source = &File->Source[File->NumberOfSources]; + bool32 IsVideo = 0; - bool32 Found = 0; - - /* - // TODO(fox): Unbreak stbi! - int w, h; - if (stbi_info(Path, &w, &h, NULL)) { - Source->SourceType = source_type_image; - Found = true; - } - */ - - // TODO(fox): This cast won't work above STRING_MAX. - if (!Found && AV_IsFileSupported((char *)Path)) { - Source->SourceType = source_type_video; - Found = true; - } - - if (Found) { + if (AV_IsFileSupported((char *)Path, &IsVideo)) { + if (IsVideo) + Source->SourceType = source_type_video; + else + Source->SourceType = source_type_image; Action_Entry_Commit(Memory, action_entry_default, "Add source"); Action_Change_Commit(Memory, &Source->Path, &Source->Path, &Path, action_change_ptr); uint32 i = File->NumberOfSources + 1; @@ -44,32 +32,6 @@ Source_Generate(project_data *File, memory *Memory, void *Path) return 0; } - /* -static pixel_buffer -LoadImage(memory *Memory, char *filename) -{ - pixel_buffer Buffer = {}; - Buffer.BytesPerPixel = 4; - - int n = 0; - int h, w; - void *temp = stbi_load(filename, &w, &h, &n, 4); - // printf("%s", stbi_failure_reason()); - Buffer.Height = h; - Buffer.Width = w; - CalculateFull(&Buffer); - Buffer.Pitch = Buffer.FullWidth*Buffer.BytesPerPixel; - // TODO(fox): Implement custom malloc in stbi so we don't have to do this. - Buffer.OriginalBuffer = MoveImportToBitmap(Memory, &Buffer, temp); - stbi_image_free(temp); - Buffer.EffectBuffer = AllocateMemory(Memory, Buffer.FullWidth * Buffer.FullHeight * Buffer.BytesPerPixel, B_Scratch); - BitmapPackRGB(&Buffer); - Buffer.ToUpdate = true; - return Buffer; -} - - */ - static property_channel InitFloatProperty(char *Name, real32 Val, real32 ScrubVal, real32 MinVal = PROPERTY_REAL_MIN, real32 MaxVal = PROPERTY_REAL_MAX) { property_channel Property = {}; @@ -83,7 +45,6 @@ InitFloatProperty(char *Name, real32 Val, real32 ScrubVal, real32 MinVal = PROPE return Property; } - static void CreateKeyframeBlock(property_channel *Property, memory *Memory) { @@ -211,15 +172,29 @@ Mask_RasterizePoints(mask *Mask) if (i+1 == Mask->NumberOfPoints) Point1 = Mask->Point[0]; - if (Point0.HandleBezier || Point1.HandleBezier) { + if (Point0.HandleBezier && Point1.HandleBezier) { Bezier_CubicCalcPoints(Point0.Pos, Point0.Pos + Point0.TangentRight, Point1.Pos + Point1.TangentLeft, Point1.Pos, Mask->TriangulatedPointCache, &Mask->NumberOfVerts); + } else if (Point0.HandleBezier) { + Bezier_CubicCalcPoints(Point0.Pos, Point0.Pos + Point0.TangentRight, Point1.Pos, Point1.Pos, + Mask->TriangulatedPointCache, &Mask->NumberOfVerts); + } else if (Point1.HandleBezier) { + Bezier_CubicCalcPoints(Point0.Pos, Point0.Pos, Point1.Pos + Point1.TangentLeft, Point1.Pos, + Mask->TriangulatedPointCache, &Mask->NumberOfVerts); } else { real32 *Data = (real32 *)Mask->TriangulatedPointCache + Mask->NumberOfVerts*3; *(Data++) = Point0.Pos.x; *(Data++) = Point0.Pos.y; *(Data++) = 0; - Mask->NumberOfVerts += 1; + // NOTE(fox): CubicCalcPoints sometimes misses generating the start + // point of the next path in the above two cases, so I'm making + // straight lines always add both points as a hotfix. This leads + // to cases of duplicate verts, but it doesn't seem like it harms + // the rendering in any way. + *(Data++) = Point1.Pos.x; + *(Data++) = Point1.Pos.y; + *(Data++) = 0; + Mask->NumberOfVerts += 2; } } } @@ -354,7 +329,7 @@ Layer_UpdateBitmap(project_data *File, project_layer *Layer, memory *Memory, int cached_bitmap *Bitmap = Cache_CheckBitmap(Source, BitmapInfo, Memory, CurrentFrame); if (!Bitmap) { if (Source->SourceType == source_type_image) { - Bitmap = STB_LoadStill(Source, BitmapInfo, Memory); + Bitmap = AV_LoadVideoFrame(Source, BitmapInfo, Memory, 1); } else { Bitmap = AV_LoadVideoFrame(Source, BitmapInfo, Memory, CurrentFrame); } @@ -364,26 +339,35 @@ Layer_UpdateBitmap(project_data *File, project_layer *Layer, memory *Memory, int uint16 BytesPerPixel = Source->Info.BytesPerPixel; void *DestBuffer = BitmapInfo->BitmapBuffer; uint64 UnpackedSize = Bitmap_CalcUnpackedBytes(Source->Info.Width, Source->Info.Height, Source->Info.BytesPerPixel); +#if PACKEDRGB uint64 PackedSize = Bitmap_CalcTotalBytes(Source->Info.Width, Source->Info.Height, Source->Info.BytesPerPixel); - Bitmap_CopyToPointer(Bitmap->Data, DestBuffer, BytesPerPixel, UnpackedSize); - - TestGL_InitTexture(&BitmapInfo->Test, DestBuffer, Width, Height); - if (Layer->NumberOfMasks) { - for (int i = 0; i < Layer->NumberOfMasks; i++) { - mask *Mask = &Layer->Mask[i]; - if (Mask->IsClosed) - Mask_TriangulateAndRasterize(Memory, Layer, Mask); + if (Layer->NumberOfMasks == 0 && Layer->NumberOfEffects == 0) { + Bitmap_ConvertPacking(Bitmap->Data, Memory->Scratch, Width, Height, BytesPerPixel, 0); + Bitmap_CopyToPointer(Memory->Scratch, DestBuffer, BytesPerPixel, PackedSize); + } else { + Bitmap_CopyToPointer(Bitmap->Data, DestBuffer, BytesPerPixel, UnpackedSize); + TestGL_InitTexture(&BitmapInfo->Test, DestBuffer, Width, Height); + + if (Layer->NumberOfMasks) { + for (int i = 0; i < Layer->NumberOfMasks; i++) { + mask *Mask = &Layer->Mask[i]; + if (Mask->IsClosed) + Mask_TriangulateAndRasterize(Memory, Layer, Mask); + } } - } - for (int i = 0; i < Layer->NumberOfEffects; i++) - { - if (Layer->Effect[i]->IsActive) - Layer->Effect[i]->func(Source, BitmapInfo, Memory, Layer->Effect[i]->Property); + for (int i = 0; i < Layer->NumberOfEffects; i++) + { + if (Layer->Effect[i]->IsActive) + Layer->Effect[i]->func(Source, BitmapInfo, Memory, Layer->Effect[i]->Property); + } + Bitmap_ConvertPacking(DestBuffer, Memory->Scratch, Width, Height, BytesPerPixel, 0); + Bitmap_CopyToPointer(Memory->Scratch, DestBuffer, BytesPerPixel, PackedSize); } - Bitmap_ConvertPacking(DestBuffer, Memory->Scratch, Width, Height, BytesPerPixel, 0); - Bitmap_CopyToPointer(Memory->Scratch, DestBuffer, BytesPerPixel, PackedSize); +#else + Bitmap_CopyToPointer(Bitmap->Data, DestBuffer, BytesPerPixel, UnpackedSize); +#endif } static ImVec2 @@ -422,10 +406,19 @@ LoadTestFootage(project_data *File, project_state *State, memory *Memory) Source_Generate(File, Memory, SourceString); source *Source = &File->Source[0]; Layer_CreateFromSource(File, State, Memory, Source); - // Action_Undo(Memory); - // Action_Redo(Memory); - SelectLayer(File->Layer[0], State, 0); + property_channel *Property = &File->Layer[0]->x; + // for (int i = 0; i < 15; i++) + // ManualKeyframeInsertF(Property, Memory, i*2, i*2*100); + ManualKeyframeInsertF(Property, Memory, 1, 100); + ManualKeyframeInsertF(Property, Memory, 3, 300); + ManualKeyframeInsertF(Property, Memory, 23, 2300); + ManualKeyframeInsertF(Property, Memory, 5, 500); + Property->IsToggled = true; + Property->IsGraphToggled = true; + Property->GraphLength = 150; + Property->GraphYOffset = (Property->GraphWindowHeight - Property->GraphLength)/2; + // AddEffect(File->Layer[0], Memory, 3); /* @@ -484,9 +477,12 @@ LoadTestFootage(project_data *File, project_state *State, memory *Memory) // Layer2->StartFrame = 11; // Layer2->EndFrame = 23; - // if (!Source_Generate(File, Memory, "../asset/b.jpg")) + // void *SourceString1 = String_GenerateFromChar(Memory, "../asset/b.jpg"); + // if (!Source_Generate(File, Memory, SourceString1)) // PostMsg(State, "File open fail..."); - // source *Source2 = &File->Source[2]; + // source *Source1 = &File->Source[1]; + // for (int i = 0; i < 25; i++) + // Layer_CreateFromSource(File, State, Memory, Source1); // project_layer *Layer2 = Layer_Init(File, Memory); // Layer_InitSource(Layer2, Source2, Memory); @@ -15,6 +15,10 @@ union debugval { int32 i; }; +enum PerfID { + perf_renderframe +}; + // things that get cleared every frame with the UI struct debug_temp { @@ -28,16 +32,22 @@ struct project_debug { debug_temp Temp; bool32 ToggleWindow; + uint64 PixelCountTransparent; + uint64 PixelCountRendered; + uint64 PixelCountChecked; + // NOTE(fox): Pixel count isn't thread safe; don't use with multithreading! + uint64 LayerCycleCount[64]; }; static project_debug Debug; -#if ARM -#define DEBUG_CycleCountStart(ID) -#define DEBUG_CycleCountEnd(ID) + +#if DEBUG +#define Debug_CycleCountStart(ID) Debug.CycleCount[ID] = __rdtsc(); +#define Debug_CycleCountEnd(ID) Debug.EndCycleCount[ID] += __rdtsc() - Debug.CycleCount[ID]; Debug.ExecutionAmount[ID]++; #else -#define DEBUG_CycleCountStart(ID) Debug.CycleCount[ID] = __rdtsc(); -#define DEBUG_CycleCountEnd(ID) Debug.EndCycleCount[ID] += __rdtsc() - Debug.CycleCount[ID]; Debug.ExecutionAmount[ID]++; +#define Debug_CycleCountStart(ID) +#define Debug_CycleCountEnd(ID) #endif static void @@ -80,3 +90,17 @@ DebugWatchVar(char *Name, void *Address, valtype Type) { } #endif +#ifdef PERF + +struct perf_stats +{ + uint64 PixelCountTransparent; + uint64 PixelCountRendered; + uint64 PixelCountChecked; +}; + +static perf_stats Perf; +static uint64 Test; + +#endif + @@ -1,5 +1,3 @@ -#define AmountOf(Array) sizeof((Array)) / sizeof((Array)[1]) - typedef int8_t int8; typedef int16_t int16; typedef int32_t int32; @@ -15,8 +13,7 @@ typedef uint64_t uint64; typedef float real32; typedef double real64; -// is there a compiler variable for 32 vs 64 bit like this? -typedef uint64 ptrsize; +typedef uint64 ptrsize; // is there a compiler variable for 32 vs 64 bit like this? #define NORMALIZED_COL_MIN { .col = V4(0.0f, 0.0f, 0.0f, 0.0f) } #define NORMALIZED_COL_MAX { .col = V4(1.0f, 1.0f, 1.0f, 1.0f) } @@ -36,10 +33,11 @@ typedef uint64 ptrsize; #define MAX_SOURCES 1024 #define MAX_MASKS 8 #define MAX_PROPERTIES_PER_EFFECT 16 -// max keyframes on a single channel is 2048 #define MAX_KEYFRAME_BLOCKS 64 -#define MAX_KEYFRAMES_PER_BLOCK 32 -#define STRING_SIZE 1024 +#define MAX_KEYFRAMES_PER_BLOCK 32 // max keyframes on a single channel is 2048 +#define STRING_SIZE 1024 // TODO(fox): Paths above STRING_SIZE length aren't handled properly. #define MAX_SELECTED_PROPERTIES 16 +#define AmountOf(Array) sizeof((Array)) / sizeof((Array)[1]) + diff --git a/ffmpeg_backend.cpp b/ffmpeg_backend.cpp index 6f0aeb5..03d3214 100644 --- a/ffmpeg_backend.cpp +++ b/ffmpeg_backend.cpp @@ -6,6 +6,7 @@ extern "C" { #include <libswscale/swscale.h> } +/* // workaround to make libav error printing work #ifdef av_err2str #undef av_err2str @@ -16,6 +17,7 @@ av_always_inline std::string av_err2string(int errnum) { } #define av_err2str(err) av_err2string(err).c_str() #endif // av_err2str +*/ #include "ffmpeg_backend.h" @@ -54,9 +56,7 @@ bool32 AV_TryFrame(av_info *AV, int32 *err) return 0; } -// TODO(fox): Could be combined into AV_Init once we have dealloc functions for -// the AVInfo allocation. -bool32 AV_IsFileSupported(char *filename) +bool32 AV_IsFileSupported(char *filename, bool32 *IsVideo) { int32 err = 0; @@ -81,9 +81,22 @@ bool32 AV_IsFileSupported(char *filename) return 0; } - avformat_free_context(temp); + for (uint32 i = 0; i < temp->nb_streams; i++) + { + AVCodecParameters *LocalCodecParameters = NULL; + LocalCodecParameters = temp->streams[i]->codecpar; + if (LocalCodecParameters->codec_type == AVMEDIA_TYPE_VIDEO) { + if (temp->streams[i]->duration > 1) { + *IsVideo = true; + } + avformat_free_context(temp); + return 1; + } + } - return 1; + printf("Libav error: No video track found."); + + return 0; } // Note that we can't get away with not having to keep track of the AV pointer @@ -142,7 +155,7 @@ void AV_Init(source *Source, av_info *AV, memory *Memory) AV->VideoCodec = avcodec_find_decoder(AV->VideoCodecParameters->codec_id); if (!AV->VideoCodec) { - printf("Libav error: Video codec could not be identified."); + printf("Libav error: Suitable decoder could not be identified for codec:\n"); } /* int16 codecs = 0; @@ -261,7 +274,7 @@ cached_bitmap * AV_LoadVideoFrame(source *Source, layer_bitmap_info *BitmapInfo, // first frame, so we need to check until it's correct. if (FrameToSeek == 0 && AV->VideoFrame->pts != AV->VideoStream->start_time) { av_frame_unref(AV->VideoFrame); - printf("NON-START: avg: %li, real pts: %li", SeekPTS, AV->VideoFrame->pts); + // printf("NON-START: avg: %li, real pts: %li", SeekPTS, AV->VideoFrame->pts); continue; } @@ -270,9 +283,9 @@ cached_bitmap * AV_LoadVideoFrame(source *Source, layer_bitmap_info *BitmapInfo, { if (AV->PreviousPTS == -1) { AV->PreviousPTS = AV->VideoFrame->pts; - printf("avg: %li, real pts: %li, difference: %li\n", SeekPTS, AV->VideoFrame->pts, Difference); + // printf("avg: %li, real pts: %li, difference: %li\n", SeekPTS, AV->VideoFrame->pts, Difference); } else { - printf("avg: %li, real pts: %li, difference: %li difference from last pts: %li\n", SeekPTS, AV->VideoFrame->pts, AV->VideoFrame->pts - SeekPTS, AV->VideoFrame->pts - AV->PreviousPTS); + // printf("avg: %li, real pts: %li, difference: %li difference from last pts: %li\n", SeekPTS, AV->VideoFrame->pts, AV->VideoFrame->pts - SeekPTS, AV->VideoFrame->pts - AV->PreviousPTS); AV->PreviousPTS = AV->VideoFrame->pts; } @@ -308,7 +321,7 @@ cached_bitmap * AV_LoadVideoFrame(source *Source, layer_bitmap_info *BitmapInfo, else { // If this gets printed when not seeking, a frame has been skipped! - printf("FRAME SKIP: avg: %li, real pts: %li, difference: %li\n", SeekPTS, AV->VideoFrame->pts, Difference); + // printf("FRAME SKIP: avg: %li, real pts: %li, difference: %li\n", SeekPTS, AV->VideoFrame->pts, Difference); } av_frame_unref(AV->VideoFrame); } diff --git a/functions.h b/functions.h index 654c709..39e4f8d 100644 --- a/functions.h +++ b/functions.h @@ -16,7 +16,7 @@ static bool32 Source_Generate(project_data *File, memory *Memory, void *Path); / // Libav (ffmpeg) backend for decoding video -static bool32 AV_IsFileSupported(char *filename); // Tests whether a decoder is available for a given file. +static bool32 AV_IsFileSupported(char *filename, bool32 *IsVideo); static void AV_Init(char *filename, source *Source, memory *Memory); // Initializes all internal structs and calculates average PTS. static cached_bitmap * AV_LoadVideoFrame(source *Source, memory *Memory, int32 TimelineFrame); // Loads video frame at TimelineFrame. @@ -33,6 +33,7 @@ static v2 ImGui_ScreenPointToCompUV(ImVec2 ViewportMin, ImVec2 CompPos, ImVec2 C void Bezier_CubicCalcPoints(v2 p1, v2 p2, v2 p3, v2 p4, void *Data, uint32 *Increment); static void Action_Change_Commit(memory *Memory, void *DataLocation, void *OriginalData, void *NewData, action_change_type ActionChange); +static void Action_Change_Commit_SwapBool(memory *Memory, bool32 *Bool); static void Action_Entry_SetPointer(memory *Memory, void *Data); static void Action_Entry_Commit(memory *Memory, action_entry_type Type, char *Name); static void Action_Entry_End(memory *Memory); diff --git a/keyframes.cpp b/keyframes.cpp index 25c41c0..c9f7813 100644 --- a/keyframes.cpp +++ b/keyframes.cpp @@ -1,5 +1,5 @@ static keyframe* -KeyframeLookupMemory(property_channel *Property, int16 i) { +KeyframeLookup(property_channel *Property, int16 i) { int16 b = i / MAX_KEYFRAMES_PER_BLOCK; int16 k = i - b*MAX_KEYFRAMES_PER_BLOCK; return &Property->KeyframeBlock[b]->Keyframe[k]; @@ -21,7 +21,7 @@ GetSelectedKeyframes(project_data *File) for (int i = 0; i < File->NumberOfLayers; i++) { for (int a = 0; a < AmountOf(File->Layer[i]->Property); a++) { for (int l = 0; l < File->Layer[i]->Property[a].NumberOfTotalKeyframes; l++) { - keyframe *Keyframe = KeyframeLookupMemory(&File->Layer[i]->Property[a], l); + keyframe *Keyframe = KeyframeLookup(&File->Layer[i]->Property[a], l); if (Keyframe->IsSelected) { KeyframeList.SelectedKeyframe[z] = Keyframe; z++; @@ -104,8 +104,8 @@ DeleteKeyframeFromMemory(property_channel *Property, int16 Increment, int16 Stop if (Increment > 0) { int16 i = Property->NumberOfTotalKeyframes - 1; while (i > StopAt) { - keyframe *CurrentKeyframe = KeyframeLookupMemory(Property, i); - keyframe *NextKeyframe = KeyframeLookupMemory(Property, i + Increment); + keyframe *CurrentKeyframe = KeyframeLookup(Property, i); + keyframe *NextKeyframe = KeyframeLookup(Property, i + Increment); *NextKeyframe = *CurrentKeyframe; i--; } @@ -113,8 +113,8 @@ DeleteKeyframeFromMemory(property_channel *Property, int16 Increment, int16 Stop } else { int16 i = StopAt; while (i < Property->NumberOfTotalKeyframes - 1) { - keyframe *CurrentKeyframe = KeyframeLookupMemory(Property, i); - keyframe *NextKeyframe = KeyframeLookupMemory(Property, i - Increment); + keyframe *CurrentKeyframe = KeyframeLookup(Property, i); + keyframe *NextKeyframe = KeyframeLookup(Property, i - Increment); *CurrentKeyframe = *NextKeyframe; i++; } @@ -155,12 +155,12 @@ DeleteSelectedKeyframes(project_data *File, memory *Memory) for (int a = 0; a < AmountOf(File->Layer[i]->Property); a++) { property_channel *Property = &File->Layer[i]->Property[a]; for (int l = 0; l < Property->NumberOfTotalKeyframes; l++) { - keyframe *Keyframe = KeyframeLookupMemory(Property, l); + keyframe *Keyframe = KeyframeLookup(Property, l); if (Keyframe->IsSelected) { int16 ToShift = 1; bool32 Until = true; while (Until) { - keyframe *KeyframeN = KeyframeLookupMemory(Property, l + ToShift); + keyframe *KeyframeN = KeyframeLookup(Property, l + ToShift); if (KeyframeN->IsSelected) { ToShift += 1; } else { @@ -180,7 +180,7 @@ CalculatePropertyMinMax(property_channel *Property) { Property->LocalMaxVal = Property->MinVal; Property->LocalMinVal = Property->MaxVal; for (int16 i = 0; i < Property->NumberOfTotalKeyframes; i++) { - keyframe *Keyframe = KeyframeLookupMemory(Property, i); + keyframe *Keyframe = KeyframeLookup(Property, i); Property->LocalMinVal.f = Ceil(Property->LocalMinVal.f, Keyframe->Value.f); Property->LocalMaxVal.f = Floor(Property->LocalMaxVal.f, Keyframe->Value.f); } @@ -220,8 +220,8 @@ static void ClampKeyframeHandles(property_channel *Property, int16 b, int16 dir) { if (dir == 0) { if (b > 0) { - keyframe *Keyframe = KeyframeLookupMemory(Property, b - 1); - keyframe *NextKeyframe = KeyframeLookupMemory(Property, b); + keyframe *Keyframe = KeyframeLookup(Property, b - 1); + keyframe *NextKeyframe = KeyframeLookup(Property, b); real32 XSpan = NextKeyframe->FrameNumber - Keyframe->FrameNumber; // TODO(fox): Fix this! #if WINDOWS @@ -236,8 +236,8 @@ ClampKeyframeHandles(property_channel *Property, int16 b, int16 dir) { } if (dir == 1) { if (b < Property->NumberOfTotalKeyframes - 1) { - keyframe *Keyframe = KeyframeLookupMemory(Property, b); - keyframe *NextKeyframe = KeyframeLookupMemory(Property, b + 1); + keyframe *Keyframe = KeyframeLookup(Property, b); + keyframe *NextKeyframe = KeyframeLookup(Property, b + 1); real32 XSpan = NextKeyframe->FrameNumber - Keyframe->FrameNumber; if (Keyframe->TangentRight.x > XSpan) Keyframe->TangentRight.x = XSpan; @@ -266,36 +266,55 @@ ClampSurroundingKeyframeHandles(property_channel *Property, int16 b) { static void ManualKeyframeInsertF(property_channel *Property, memory *Memory, int32 CurrentFrame, real32 Val) { - /* if (!(Property->NumberOfTotalKeyframes % MAX_KEYFRAMES_PER_BLOCK)) { CreateKeyframeBlock(Property, Memory); } keyframe *Keyframe = NULL; if (Property->NumberOfTotalKeyframes == 0) { Keyframe = &Property->KeyframeBlock[0]->Keyframe[0]; - } else if (Property->NumberOfTotalKeyframes == 1) { - keyframe *FirstKeyframe = &Property->KeyframeBlock[0]->Keyframe[0]; - if (CurrentFrame > FirstKeyframe->FrameNumber) { - Keyframe = &Property->KeyframeBlock[0]->Keyframe[1]; - } else { - Keyframe = &Property->KeyframeBlock[0]->Keyframe[0]; - } + Property->NumberOfTotalKeyframes++; } else { - uint32 Index = Property->NumberOfTotalKeyframes - 1; + uint32 Index = Property->NumberOfTotalKeyframes; bool32 Found = false; while (!Found) { - keyframe *CurrentKeyframe = KeyframeLookupMemory(Property, Index); - if (CurrentKeyframe->FrameNumber < CurrentFrame) { - ShiftKeyframes(Property, 1, i - 1); + keyframe *PreviousKeyframe = KeyframeLookup(Property, Index - 1); + keyframe *NextKeyframe = KeyframeLookup(Property, Index + 1); + if (PreviousKeyframe->FrameNumber < CurrentFrame) { + if (NextKeyframe->FrameNumber >= CurrentFrame || Index == Property->NumberOfTotalKeyframes) { + keyframe *CurrentKeyframe = KeyframeLookup(Property, Index); + if (CurrentKeyframe->FrameNumber == CurrentFrame || Index == Property->NumberOfTotalKeyframes) { + Keyframe = CurrentKeyframe; + Property->NumberOfTotalKeyframes++; + } else { + if (CurrentKeyframe->FrameNumber > CurrentFrame) { + Keyframe = KeyframeLookup(Property, Index); + } else { + Keyframe = KeyframeLookup(Property, Index + 1); + } + } + Found = true; + } else { + Index += (Property->NumberOfTotalKeyframes - Index) / 2; + } + // We can only progress from this first if statement if + // NextKeyframe is valid, so we need to check for these conditions. + } else if (Property->NumberOfTotalKeyframes == 1) { + Property->NumberOfTotalKeyframes++; + Keyframe = KeyframeLookup(Property, 0); + Found = true; + } else if (Property->NumberOfTotalKeyframes == 2) { + keyframe *FirstKeyframe = KeyframeLookup(Property, Index + 1); + if (FirstKeyframe->FrameNumber > CurrentFrame) + Keyframe = KeyframeLookup(Property, 0); + else + Keyframe = KeyframeLookup(Property, 1); + Property->NumberOfTotalKeyframes++; Found = true; } else { - Index--; + Index = Index / 2; } } - Keyframe = PushKeyframe(Property); } - if (Keyframe->FrameNumber != CurrentFrame) - Property->NumberOfTotalKeyframes++; Assert(!(Keyframe == NULL)) Keyframe->FrameNumber = CurrentFrame; @@ -305,7 +324,6 @@ ManualKeyframeInsertF(property_channel *Property, memory *Memory, int32 CurrentF Keyframe->TangentRight = V2(1, 0); Keyframe->ImguiID = RandomGlobalIncrement++; CalculatePropertyMinMax(Property); - */ } @@ -83,6 +83,12 @@ SelectLayer(project_layer *Layer, project_state *State, int32 i) } static void +DuplicateLayer(project_data *File, project_state *State) +{ + project_layer *Layer = File->Layer[State->MostRecentlySelectedLayer]; +} + +static void DeselectAllLayers(project_data *File, project_state *State) { temp_layer_list List = FindSelectedLayerIndex(File, State->NumberOfSelectedLayers); @@ -14,7 +14,6 @@ #include <smmintrin.h> #endif - #include "imgui/imgui.h" #include "imgui/backends/imgui_impl_sdl.h" #include "imgui/backends/imgui_impl_opengl3.h" @@ -33,13 +32,6 @@ #define STBI_FAILURE_USERMSG #include "lib/stb_image.h" -#if 0 -#include <iacaMarks.h> -#else -#define IACA_START -#define IACA_END -#endif - #include "defines.h" #include "my_math.h" #include "main.h" @@ -67,7 +59,6 @@ SDL_sem *Semaphore; #include "bezier.cpp" #if THREADED #include "threading.cpp" -#else #endif #include "prenderer.cpp" #include "ffmpeg_backend.cpp" @@ -508,26 +499,13 @@ int main(int argc, char *argv[]) { } C = SDL_AtomicGet(&CompletedEntries); if (C == 16) { - Bitmap_ConvertPacking(CompBuffer.PackedBuffer, CompBuffer.UnpackedBuffer, - CompBuffer.Width, CompBuffer.Height, CompBuffer.BytesPerPixel, 1); - EndRenderState(&State); - glBindTexture(GL_TEXTURE_2D, textureID); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, CompBuffer.Width, CompBuffer.Height, GL_RGBA, GL_UNSIGNED_BYTE, - CompBuffer.UnpackedBuffer); - - // shmp->shared_framenumber = File.CurrentFrame; - // if (sem_post(&shmp->sem2) == -1) - // Assert(0); - + FinishRenderAndUpload(&State, &CompBuffer, textureID); } } #else - Bitmap_ConvertPacking(CompBuffer.PackedBuffer, CompBuffer.UnpackedBuffer, - CompBuffer.Width, CompBuffer.Height, CompBuffer.BytesPerPixel, 1); - EndRenderState(&State); - glBindTexture(GL_TEXTURE_2D, textureID); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, CompBuffer.Width, CompBuffer.Height, GL_RGBA, GL_UNSIGNED_BYTE, - CompBuffer.UnpackedBuffer); + if (IsRendering) { + FinishRenderAndUpload(&State, &CompBuffer, textureID); + } #endif ImGui::Render(); @@ -480,6 +480,11 @@ enum selection_type selection_source }; +char *ToolName[] { + "Move", + "Pen" +}; + enum tool { tool_default, tool_pen, diff --git a/my_imgui_widgets.cpp b/my_imgui_widgets.cpp index 4bf6b5e..54fa9c1 100644 --- a/my_imgui_widgets.cpp +++ b/my_imgui_widgets.cpp @@ -6,7 +6,7 @@ static void ImGui_KeyframeDragging(project_data *File, project_state *State, ui *UI, property_channel *Property, int32 b, ImGuiIO io, int16 Type) { - keyframe *Keyframe = KeyframeLookupMemory(Property, b); + keyframe *Keyframe = KeyframeLookup(Property, b); if (ImGui::IsItemActive()) { if (!Keyframe->IsSelected && ImGui::IsItemActivated()) @@ -442,7 +442,7 @@ ImGui_Viewport(project_data File, project_state *State, ui *UI, memory *Memory, ImVec2 Point1_ScreenPos = Layer_LocalToScreenSpace(Layer, UI, CompBuffer, Point1_Pos); ImVec2 Point1_ScreenPos_Left = Layer_LocalToScreenSpace(Layer, UI, CompBuffer, Point1_Pos_Left); - ImVec2 Point1_ScreenPos_Right = Layer_LocalToScreenSpace(Layer, UI, CompBuffer, Point1_Pos_Right); + // ImVec2 Point1_ScreenPos_Right = Layer_LocalToScreenSpace(Layer, UI, CompBuffer, Point1_Pos_Right); ImGui::PushID(p); @@ -450,13 +450,13 @@ ImGui_Viewport(project_data File, project_state *State, ui *UI, memory *Memory, // The handle itself - // if (Point0->IsSelected || State->Tool == tool_pen) { - col = ImGui::GetColorU32(ImGuiCol_ButtonHovered); + col = ImGui::GetColorU32(ImGuiCol_ButtonHovered); + if (Point0->HandleBezier) { draw_list->AddNgon(Point0_ScreenPos_Left, 10, col, 8, 5.0f); draw_list->AddNgon(Point0_ScreenPos_Right, 10, col, 8, 5.0f); draw_list->AddLine(Point0_ScreenPos, Point0_ScreenPos_Left, col, 2.0f); draw_list->AddLine(Point0_ScreenPos, Point0_ScreenPos_Right, col, 2.0f); - // } + } draw_list->AddNgon(Point0_ScreenPos, 10, col, 8, 5.0f); @@ -484,21 +484,20 @@ ImGui_Viewport(project_data File, project_state *State, ui *UI, memory *Memory, } if (ImGui::IsItemActivated() && b == 0) { if (p == 0 && State->Pen.IsActive) { - // TODO(fox): I think we need some alternate - // Change functions to make these types of - // value sets more easy to read... - bool32 SetFalse = false; - bool32 SetTrue = true; Action_Entry_Commit(Memory, action_entry_default, "Close mask path"); - Action_Change_Commit(Memory, &State->Pen.IsActive, - &State->Pen.IsActive, &SetFalse, action_change_i32); - Action_Change_Commit(Memory, &Mask->IsClosed, - &Mask->IsClosed, &SetTrue, action_change_i32); - Action_Entry_End(Memory); + Action_Change_Commit_SwapBool(Memory, &State->Pen.IsActive); + Action_Change_Commit_SwapBool(Memory, &Mask->IsClosed); // State->Pen.IsActive = false; // Mask->IsClosed = true; + Action_Entry_End(Memory); + } else if (io.KeyCtrl) { + // TODO(fox): Mask delete! + } else if (io.KeyAlt) { + Action_Entry_Commit(Memory, action_entry_default, "Switch handles on point"); + Action_Change_Commit_SwapBool(Memory, &Point0->HandleBezier); + Action_Entry_End(Memory); } - Point0->IsSelected = 1; + Point0->IsSelected = true; } if (ImGui::IsItemActive()) { ImVec2 MouseIncrement = io.MouseDelta * (ImVec2(CompBuffer.Width, CompBuffer.Height) / UI->CompZoom); @@ -527,8 +526,14 @@ ImGui_Viewport(project_data File, project_state *State, ui *UI, memory *Memory, ImU32 col2 = ImGui::GetColorU32(ImGuiCol_Button); - if (Point0->HandleBezier || Point1->HandleBezier) { + if (Point0->HandleBezier && Point1->HandleBezier) { + draw_list->AddBezierCubic(Point0_ScreenPos, Point0_ScreenPos_Right, + Point1_ScreenPos_Left, Point1_ScreenPos, col2, 6.0f, 0); + } else if (Point0->HandleBezier) { draw_list->AddBezierCubic(Point0_ScreenPos, Point0_ScreenPos_Right, + Point1_ScreenPos, Point1_ScreenPos, col2, 6.0f, 0); + } else if (Point1->HandleBezier) { + draw_list->AddBezierCubic(Point0_ScreenPos, Point0_ScreenPos, Point1_ScreenPos_Left, Point1_ScreenPos, col2, 6.0f, 0); } else { draw_list->AddLine(Point0_ScreenPos, Point1_ScreenPos, col2, 6.0f); @@ -547,6 +552,7 @@ ImGui_Viewport(project_data File, project_state *State, ui *UI, memory *Memory, if (ImGui::IsItemHovered()) { #if DEBUG + // Code that basically mimics Mask_AddPointToCurve but visualized in screen space, for checking bugs. v2 LayerPoint = Layer_ScreenSpaceToLocal(Layer, UI, CompBuffer, ViewportMin, io.MousePos); real32 ratio = Bezier_CubicRatioOfPoint(Point0_Pos, Point0_Pos_Right, Point1_Pos_Left, Point1_Pos, LayerPoint); draw_list->AddNgon(io.MousePos, 2, col, 8, 5.0f); @@ -597,7 +603,7 @@ ImGui_Viewport(project_data File, project_state *State, ui *UI, memory *Memory, for (int t = 0; t < tool_count; t++) { ImGui::PushID(t); bool32 Selected = (State->Tool == t); - if (ImGui::Selectable("##tool", Selected, 0, ImVec2(ButtonSize, ButtonSize))) { + if (ImGui::Selectable(ToolName[t], Selected, 0, ImVec2(ButtonSize*2, ButtonSize))) { State->Tool = (tool)t; } ImGui::PopID(); @@ -636,13 +642,19 @@ ImGui_Viewport(project_data File, project_state *State, ui *UI, memory *Memory, CurrentPoint->TangentRight = -OffsetPos; CurrentPoint->TangentLeft = OffsetPos; } - if (ImGui::IsKeyPressed(ImGuiKey_Escape) && IsActive) { + // Escape can be pressed to exit point-adding mode or to delete the + // mask if it was just created. + if (ImGui::IsKeyPressed(ImGuiKey_Escape)) { mask *Mask = &Layer->Mask[Layer->NumberOfMasks-1]; - if (Mask->NumberOfPoints == 1) { + if (IsActive && Mask->NumberOfPoints == 1) { Layer->NumberOfMasks--; Mask->NumberOfPoints = 0; + State->Pen.IsActive = false; + } else { + Action_Entry_Commit(Memory, action_entry_default, "Path adding exited"); + Action_Change_Commit_SwapBool(Memory, &State->Pen.IsActive); + Action_Entry_End(Memory); } - State->Pen.IsActive = false; IsDeactivated = false; // just in case escape and mouse release happen simultaneously } if (IsDeactivated) { @@ -1216,7 +1228,7 @@ ImGui_Timeline(project_data *File, project_state *State, memory *Memory, ui *UI, ImGui::PushID(Property); for (int b = 0; b < Layer->Property[a].NumberOfTotalKeyframes; b++) { - keyframe *Keyframe = KeyframeLookupMemory(Property, b); + keyframe *Keyframe = KeyframeLookup(Property, b); real32 KeyframeOrigin = TimelineStartingPos.x + UI->TimelineZoom*Keyframe->FrameNumber; ImVec2 KeyframePosition = ImVec2(KeyframeOrigin - FontHeight/2, InitialY); @@ -1270,7 +1282,7 @@ ImGui_Timeline(project_data *File, project_state *State, memory *Memory, ui *UI, ImU32 col = ImGui::GetColorU32(ImGuiCol_ScrollbarGrab); for (int b = 0; b < Property->NumberOfTotalKeyframes; b++) { - keyframe *Keyframe = KeyframeLookupMemory(Property, b); + keyframe *Keyframe = KeyframeLookup(Property, b); // int32 Index = KeyframeMemoryToIndex(Property, b); ImGui::PushID(Keyframe); @@ -1351,7 +1363,7 @@ ImGui_Timeline(project_data *File, project_state *State, memory *Memory, ui *UI, // TODO(fox): Reformat this so it's all done in one loop. for (int b = 0; b < Property->NumberOfTotalKeyframes; b++) { - keyframe *Keyframe = KeyframeLookupMemory(Property, b); + keyframe *Keyframe = KeyframeLookup(Property, b); real32 MinVal = Property->LocalMinVal.f; real32 MaxVal = Property->LocalMaxVal.f; @@ -1627,6 +1639,10 @@ ImGui_ProcessInputs(project_data *File, project_state *State, comp_buffer *CompB if (ImGui::IsKeyPressed(ImGuiKey_1)) LoadTestFootage(File, State, Memory); + if (ImGui::IsKeyPressed(ImGuiKey_D) && io.KeyCtrl) + { + } + if (ImGui::IsKeyPressed(ImGuiKey_Delete)) { switch (State->RecentSelectionType) diff --git a/prenderer.cpp b/prenderer.cpp index e755fe7..909fc4c 100644 --- a/prenderer.cpp +++ b/prenderer.cpp @@ -99,6 +99,7 @@ static void RenderLayers(render_queue *RenderInfo, rectangle RenderRegion) { for (int16 i = 0; i < RenderInfo->State->NumberOfLayersToRender; i++) { int16 Idx = RenderInfo->State->LayersToRender[i]; + #if ARM if (InstructionMode == instruction_mode_neon) Fallback_RenderLayer(RenderInfo->File->Layer[Idx]->TransformInfo, RenderInfo->CompBuffer, RenderRegion); @@ -114,10 +115,45 @@ RenderLayers(render_queue *RenderInfo, rectangle RenderRegion) { } static void +FinishRenderAndUpload(project_state *State, comp_buffer *CompBuffer, GLuint textureID) +{ +#if PERF + Test = __rdtsc() - Test; + + Debug.PixelCountRendered = 1280*720*5; + printf("Cycles per pixel rendered: %li ", Test / Debug.PixelCountRendered); + printf("Pixels rendered: %li ", Debug.PixelCountRendered); + printf("Cycles: %li\n", Test); + + Test = 0; + Debug.PixelCountTransparent = 0; + Debug.PixelCountRendered = 0; + Debug.PixelCountChecked = 0; +#endif + + +#if PACKEDRGB + Bitmap_ConvertPacking(CompBuffer->PackedBuffer, CompBuffer->UnpackedBuffer, + CompBuffer->Width, CompBuffer->Height, CompBuffer->BytesPerPixel, 1); +#endif + EndRenderState(State); + glBindTexture(GL_TEXTURE_2D, textureID); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, CompBuffer->Width, CompBuffer->Height, GL_RGBA, GL_UNSIGNED_BYTE, + CompBuffer->UnpackedBuffer); + + // shmp->shared_framenumber = File.CurrentFrame; + // if (sem_post(&shmp->sem2) == -1) + // Assert(0); +} + +static void QueueCurrentFrame(project_data *File, comp_buffer *CompBuffer, project_state *State) { IsRendering = true; render_queue RenderInfo = {File, State, CompBuffer}; +#if PERF + Test = __rdtsc(); +#endif for (int16 i = 0; i < File->NumberOfLayers; i++) { @@ -290,6 +326,13 @@ NEON_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) #else +#if 0 +#include "iacaMarks.h" +#else +#define IACA_START +#define IACA_END +#endif + static void AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) { @@ -329,6 +372,7 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) __m256 ZeroPointFive = _mm256_set1_ps(0.5); __m256i Onei = _mm256_set1_epi32(1); __m256 Four = _mm256_set1_ps(4); + __m256 Eight = _mm256_set1_ps(8); __m256i FF = _mm256_set1_epi32(0xFF); __m256i BottomTwoBits = _mm256_set1_epi32(0x03); __m256i Fouri = _mm256_set1_epi32(4); @@ -338,9 +382,24 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) // __m256i White = _mm256_setr_epi32(0xFFFFFFFF, 0, 0, 0, 0xFFFFFFFF, 0, 0, 0); // __m256i White2 = _mm256_set1_epi32(0xFFFFFFFF); - // NOTE(fox): Each loop operates on 8 pixels, 4 horizontal by 2 vertical, - // as per the bitmap packing scheme in memory. + // TODO(fox): Tried an MSAA technique for anti aliasing, but it still looks pretty sucky. + __m256 X0 = _mm256_set1_ps(0.30); + __m256 Y0 = _mm256_set1_ps(0.10); + __m256 X1 = _mm256_set1_ps(0.80); + __m256 Y1 = _mm256_set1_ps(0.35); + __m256 X2 = _mm256_set1_ps(0.05); + __m256 Y2 = _mm256_set1_ps(0.60); + __m256 X3 = _mm256_set1_ps(0.55); + __m256 Y3 = _mm256_set1_ps(0.85); + +#if PACKEDRGB +#else + __m256i LayerPitch = _mm256_set1_epi32(T.LayerPitch); + __m256i BytesPerPixel = _mm256_set1_epi32(Buffer->BytesPerPixel); +#endif + +#if PACKEDRGB for (int32 Y = LayerBounds.Min.y; Y < LayerBounds.Max.y; Y+=2) { __m256 PixelX = _mm256_setr_ps((real32)LayerBounds.Min.x, @@ -360,22 +419,31 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) (real32)Y+1, (real32)Y+1, (real32)Y+1); +#else + for (int32 Y = LayerBounds.Min.y; Y < LayerBounds.Max.y; Y++) + { + __m256 PixelX = _mm256_setr_ps((real32)LayerBounds.Min.x, + (real32)LayerBounds.Min.x+1, + (real32)LayerBounds.Min.x+2, + (real32)LayerBounds.Min.x+3, + (real32)LayerBounds.Min.x+4, + (real32)LayerBounds.Min.x+5, + (real32)LayerBounds.Min.x+6, + (real32)LayerBounds.Min.x+7); + + __m256 PixelY = _mm256_set1_ps((real32)Y); +#endif __m256 StartVectorY = _mm256_sub_ps(PixelY, OriginY); +#if PACKEDRGB for (int32 X = LayerBounds.Min.x; X < LayerBounds.Max.x; X += 4) +#else + for (int32 X = LayerBounds.Min.x; X < LayerBounds.Max.x; X += 8) +#endif { - IACA_START; - // TODO(fox): Tried an MSAA technique for anti aliasing, but it still looks pretty sucky. - __m256 X0 = _mm256_set1_ps(0.30); - __m256 Y0 = _mm256_set1_ps(0.10); - __m256 X1 = _mm256_set1_ps(0.80); - __m256 Y1 = _mm256_set1_ps(0.35); - __m256 X2 = _mm256_set1_ps(0.05); - __m256 Y2 = _mm256_set1_ps(0.60); - __m256 X3 = _mm256_set1_ps(0.55); - __m256 Y3 = _mm256_set1_ps(0.85); + IACA_START; __m256 StartVectorX = _mm256_sub_ps(PixelX, OriginX); __m256 StartVectorX0 = _mm256_add_ps(StartVectorX, X0); @@ -387,10 +455,14 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) __m256 StartVectorX3 = _mm256_add_ps(StartVectorX, X3); __m256 StartVectorY3 = _mm256_add_ps(StartVectorY, Y3); +#if PACKEDRGB uint32 XLookup = (X >> 2)*16 + (X % 4); uint32 YLookup = (Y >> 2)*(WidthP*4) + (Y % 4)*4; uint32 PixelToSeek = XLookup + YLookup; uint8 *Pixel = (uint8 *)Buffer->PackedBuffer + PixelToSeek*Buffer->BytesPerPixel; +#else + uint8 *Pixel = (uint8 *)Buffer->UnpackedBuffer + Y*T.BufferPitch + X*Buffer->BytesPerPixel; +#endif __m256 U = _mm256_add_ps(_mm256_mul_ps(StartVectorX, XAxisPX), _mm256_mul_ps(StartVectorY, XAxisPY)); __m256 V = _mm256_add_ps(_mm256_mul_ps(StartVectorX, YAxisPX), _mm256_mul_ps(StartVectorY, YAxisPY)); @@ -422,9 +494,12 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) __m256i Mask = _mm256_cmp_ps(Avg, Zero, 14); __m256i NonEdge = _mm256_cmp_ps(Avg, One, 13); + __m256 LayerMask = _mm256_and_ps(_mm256_and_ps(_mm256_cmp_ps(U, Zero, 13), _mm256_cmp_ps(U, One, 1)), + _mm256_and_ps(_mm256_cmp_ps(V, Zero, 13), _mm256_cmp_ps(V, One, 1))); + // If all of the pixels are zeroed in the mask (aka fall outside // the UV lookup), we can skip the iteration. - if (_mm256_movemask_epi8(Mask)) + if (_mm256_movemask_epi8(LayerMask)) { __m256i EdgeMask = _mm256_andnot_si256(NonEdge, Mask); @@ -434,10 +509,9 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) __m256 TexXFull = _mm256_mul_ps(U, LayerWidth); __m256 TexYFull = _mm256_mul_ps(V, LayerHeight); __m256i TexXInt = _mm256_cvttps_epi32(TexXFull); - __m256i TexXIntPlusOne = _mm256_add_epi32(TexXInt, _mm256_and_si256(_mm256_cmpgt_epi32(LayerWidthMinusOne, TexXInt), Onei)); __m256i TexYInt = _mm256_cvttps_epi32(TexYFull); + __m256i TexXIntPlusOne = _mm256_add_epi32(TexXInt, _mm256_and_si256(_mm256_cmpgt_epi32(LayerWidthMinusOne, TexXInt), Onei)); __m256i TexYIntPlusOne = _mm256_add_epi32(TexYInt, _mm256_and_si256(_mm256_cmpgt_epi32(LayerHeightMinusOne, TexYInt), Onei)); - // NOTE(fox): The comparison is for when we're on the last pixel of the texel. __m256 TexX = _mm256_sub_ps(TexXFull, _mm256_cvtepi32_ps(TexXInt)); @@ -449,6 +523,7 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) __m256 TexBoth = _mm256_mul_ps(TexY, TexX); __m256 TexBothInv = _mm256_mul_ps(TexXInv, TexYInv); +#if PACKEDRGB __m256i XLookup = _mm256_add_epi32(_mm256_mullo_epi32(_mm256_srli_epi32(TexXInt, 2), Sixteeni), _mm256_and_si256(TexXInt, BottomTwoBits)); __m256i YLookup = _mm256_add_epi32(_mm256_mullo_epi32(_mm256_srli_epi32(TexYInt, 2), FullLayerWidth4i), @@ -457,6 +532,12 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) _mm256_and_si256(TexXIntPlusOne, BottomTwoBits)); __m256i YLookupPlusOne = _mm256_add_epi32(_mm256_mullo_epi32(_mm256_srli_epi32(TexYIntPlusOne, 2), FullLayerWidth4i), _mm256_mullo_epi32(_mm256_and_si256(TexYIntPlusOne, BottomTwoBits), Fouri)); +#else + __m256i XLookup = TexXInt; + __m256i YLookup = _mm256_cvtps_epi32(_mm256_mul_ps(_mm256_cvtepi32_ps(TexYInt), LayerWidth)); + __m256i XLookupPlusOne = TexXIntPlusOne; + __m256i YLookupPlusOne = _mm256_cvtps_epi32(_mm256_mul_ps(_mm256_cvtepi32_ps(TexYIntPlusOne), LayerWidth)); +#endif __m256i PixelLookupTL = _mm256_add_epi32(XLookup, YLookup); __m256i PixelLookupTR = _mm256_add_epi32(XLookupPlusOne, YLookup); @@ -512,6 +593,7 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) A_Col = _mm256_blendv_ps(A_Col, _mm256_mul_ps(A_Col, Avg), EdgeMask); } + IACA_END; __m256 LayerAlpha = _mm256_mul_ps(A_Col, LayerOpacity); __m256 LayerAlphaInv = _mm256_sub_ps(One, LayerAlpha); @@ -531,7 +613,7 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) __m256 A_Blend = LayerAlpha; // Only load the dest pixel if we actually need to (a pixel's opacity isn't 255 or the blend mode requires it). - if (_mm256_movemask_epi8(_mm256_cmp_ps(LayerAlpha, One, 2)) || T.BlendMode != blend_normal) + if (T.BlendMode != blend_normal || _mm256_movemask_epi8(_mm256_cmp_ps(LayerAlpha, One, 2))) { __m256i DestPixel = _mm256_loadu_si256((const __m256i *)Pixel); __m256 R_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256( DestPixel, FF)), Norm255); @@ -663,10 +745,13 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) _mm256_or_si256(R_Out, _mm256_slli_epi32(G_Out, 8)), _mm256_or_si256(_mm256_slli_epi32(B_Out, 16), _mm256_slli_epi32(A_Out, 24))); - - _mm256_maskstore_epi32((int *)Pixel, Mask, OutputPixel); + _mm256_maskstore_epi32((int *)Pixel, LayerMask, OutputPixel); } +#if PACKEDRGB PixelX = _mm256_add_ps(PixelX, Four); +#else + PixelX = _mm256_add_ps(PixelX, Eight); +#endif } } } @@ -729,7 +814,6 @@ SSE2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) for (int32 X = LayerBounds.Min.x; X < LayerBounds.Max.x; X += 4) { - IACA_START; __m128 StartVectorX = _mm_sub_ps(PixelX, OriginX); @@ -1040,7 +1124,6 @@ Fallback_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegi for (int16 X = LayerBounds.Min.x; X < LayerBounds.Max.x; X++) { - IACA_START; real32 StartVectorX = X - T.OriginX; real32 U = (StartVectorX * T.XAxisPX) + (StartVectorY * T.XAxisPY); @@ -1063,23 +1146,14 @@ Fallback_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegi real32 TexBoth = TexY * TexX; real32 TexBothInv = TexXInv * TexYInv; -#if 0 - uint8 *TexPTR0 = ((uint8 *)T.SourceBuffer + (uint16)T.LayerPitch*TexYInt + TexXInt*Buffer->BytesPerPixel); - uint8 *TexPTR1 = ((uint8 *)T.SourceBuffer + (uint16)T.LayerPitch*(TexYInt+1) + TexXInt*Buffer->BytesPerPixel); - - uint32 PixelA = *(uint32 *)TexPTR0; - uint32 PixelB = *((uint32 *)TexPTR0 + 1); - uint32 PixelC = *(uint32 *)TexPTR1; - uint32 PixelD = *((uint32 *)TexPTR1 + 1); -#else uint32 XLookup, YLookup, PixelToSeek; - // TODO(fox): Anti-aliasing on edges uint16 LX = TexXInt; uint16 LY = TexYInt; uint16 LXPlus = Ceil(TexXInt+1, (uint32)T.LayerWidth - 1); uint16 LYPlus = Ceil(TexYInt+1, (uint32)T.LayerHeight - 1); +#if PACKEDRGB // TODO(fox): Be careful with the BytesPerPixel here! It's the // buffer's, not the layer's (currently everything is 4 bytes // per pixel). @@ -1102,12 +1176,25 @@ Fallback_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegi YLookup = (LYPlus >> 2)*(T.FullLayerWidth*4) + (LYPlus % 4)*4; PixelToSeek = XLookup + YLookup; uint32 PixelD = *(uint32 *)((uint8 *)T.SourceBuffer + PixelToSeek*Buffer->BytesPerPixel); -#endif + XLookup = (X >> 2)*16 + (X % 4); YLookup = (Y >> 2)*(WidthP*4) + (Y % 4)*4; PixelToSeek = XLookup + YLookup; uint32 *Pixel = (uint32 *)((uint8 *)Buffer->PackedBuffer + PixelToSeek*Buffer->BytesPerPixel); +#else + uint8 *TexPTR0 = ((uint8 *)T.SourceBuffer + (uint16)T.LayerPitch*LY + LX*Buffer->BytesPerPixel); + uint8 *TexPTR1 = ((uint8 *)T.SourceBuffer + (uint16)T.LayerPitch*LY + LXPlus*Buffer->BytesPerPixel); + uint8 *TexPTR2 = ((uint8 *)T.SourceBuffer + (uint16)T.LayerPitch*LYPlus + LX*Buffer->BytesPerPixel); + uint8 *TexPTR3 = ((uint8 *)T.SourceBuffer + (uint16)T.LayerPitch*LYPlus + LXPlus*Buffer->BytesPerPixel); + + uint32 PixelA = *(uint32 *)TexPTR0; + uint32 PixelB = *(uint32 *)TexPTR1; + uint32 PixelC = *(uint32 *)TexPTR2; + uint32 PixelD = *(uint32 *)TexPTR3; + + uint32 *Pixel = (uint32 *)((uint8 *)Buffer->UnpackedBuffer + Y*T.BufferPitch + X*Buffer->BytesPerPixel); +#endif real32 TexRA = (real32)(PixelA & 0xFF) * Normalized255; real32 TexRB = (real32)(PixelB & 0xFF) * Normalized255; real32 TexRC = (real32)(PixelC & 0xFF) * Normalized255; @@ -103,6 +103,13 @@ void Action_Change_Commit(memory *Memory, void *DataAddress, void *OriginalData, *(action_change_type *)Data = ActionChange; } +// Helper functions for common types of data changes. +void Action_Change_Commit_SwapBool(memory *Memory, bool32 *Bool) +{ + bool32 OppositeBool = *Bool ^ 1; + Action_Change_Commit(Memory, Bool, Bool, &OppositeBool, action_change_i32); +} + // This is only called when we're certain the action is going to be taken. void Action_Entry_Commit(memory *Memory, action_entry_type Type, char *Name) { |