summaryrefslogtreecommitdiff
path: root/effects.cpp
diff options
context:
space:
mode:
authorFox Caminiti <fox@foxcam.net>2022-07-22 20:45:08 -0400
committerFox Caminiti <fox@foxcam.net>2022-07-22 20:45:08 -0400
commitfc8040d695644aaca4596adebeca4ea1369ef630 (patch)
treeaea6979da97c43df8f03f3a2d7b421ee71bef370 /effects.cpp
first
Diffstat (limited to 'effects.cpp')
-rw-r--r--effects.cpp777
1 files changed, 777 insertions, 0 deletions
diff --git a/effects.cpp b/effects.cpp
new file mode 100644
index 0000000..fe593a4
--- /dev/null
+++ b/effects.cpp
@@ -0,0 +1,777 @@
+internal void
+DrawColor(pixel_buffer *Buffer, memory *Memory, property_channel Property[])
+{
+ v4 FloatColor = Property[0].CurrentValue.col;
+ blend_mode BlendMode = Property[1].CurrentValue.blendmode;
+
+ __m256 ZeroReal = _mm256_set1_ps(0);
+ __m256 ZeroPointFive = _mm256_set1_ps(0.5);
+ __m256 One = _mm256_set1_ps(1);
+ __m256 Two = _mm256_set1_ps(2);
+ __m256 Four = _mm256_set1_ps(4);
+
+ __m256 Fraction255 = _mm256_set1_ps(1/255.0f);
+ __m256 Real255 = _mm256_set1_ps(255);
+
+ __m256i Zero = _mm256_set1_epi8(0);
+ __m256i FF = _mm256_set1_epi32(0xFF);
+ __m256i Int255 = _mm256_set1_epi8((uint8)255);
+
+ __m256 Alpha = _mm256_set1_ps(FloatColor.a);
+ __m256 AlphaInv = _mm256_set1_ps(1.0f - FloatColor.a);
+
+ __m256 R_Col = _mm256_set1_ps(FloatColor.E[0]);
+ __m256 R_Colx2 = _mm256_mul_ps(R_Col, Two);
+ __m256 R_ColInv = _mm256_set1_ps(1.0f - FloatColor.E[0]);
+
+ __m256 G_Col = _mm256_set1_ps(FloatColor.E[1]);
+ __m256 G_Colx2 = _mm256_mul_ps(G_Col, Two);
+ __m256 G_ColInv = _mm256_set1_ps(1.0f - FloatColor.E[1]);
+
+ __m256 B_Col = _mm256_set1_ps(FloatColor.E[2]);
+ __m256 B_Colx2 = _mm256_mul_ps(B_Col, Two);
+ __m256 B_ColInv = _mm256_set1_ps(1.0f - FloatColor.E[2]);
+
+ for (int16 Y = 0; Y < Buffer->Height; Y += 2)
+ {
+ for (int16 X = 0; X < Buffer->Width; X += 4)
+ {
+ uint32 XLookup = (X >> 2)*16 + (X % 4);
+ uint32 YLookup = (Y >> 2)*(Buffer->Width*4) + (Y % 4)*4;
+ uint32 PixelToSeek = XLookup + YLookup;
+ uint8 *Pixel = (uint8 *)Buffer->EffectBuffer + PixelToSeek*Buffer->BytesPerPixel;
+ __m256i DestPixel = _mm256_loadu_si256((const __m256i *)Pixel);
+
+ // normalized values
+ __m256 R_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256( DestPixel, FF)), Fraction255);
+ __m256 G_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(DestPixel, 8), FF)), Fraction255);
+ __m256 B_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(DestPixel, 16), FF)), Fraction255);
+ __m256i A_Out = _mm256_and_si256(_mm256_srli_epi32(DestPixel, 24), FF);
+ __m256 A_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(A_Out), Fraction255);
+
+ __m256 R_Blend = _mm256_setzero_ps();
+ __m256 G_Blend = _mm256_setzero_ps();
+ __m256 B_Blend = _mm256_setzero_ps();
+ switch (BlendMode)
+ {
+ case blend_normal:
+ {
+ } break;
+ case blend_multiply:
+ {
+ R_Blend = _mm256_mul_ps(R_Dest, R_Col);
+ G_Blend = _mm256_mul_ps(G_Dest, G_Col);
+ B_Blend = _mm256_mul_ps(B_Dest, B_Col);
+ } break;
+ case blend_colorburn:
+ {
+ R_Blend = _mm256_sub_ps(One, _mm256_div_ps(_mm256_sub_ps(One, R_Dest), R_Col));
+ G_Blend = _mm256_sub_ps(One, _mm256_div_ps(_mm256_sub_ps(One, G_Dest), G_Col));
+ B_Blend = _mm256_sub_ps(One, _mm256_div_ps(_mm256_sub_ps(One, B_Dest), B_Col));
+ } break;
+ case blend_linearburn:
+ {
+ R_Blend = _mm256_sub_ps(_mm256_add_ps(R_Dest, R_Col), One);
+ G_Blend = _mm256_sub_ps(_mm256_add_ps(G_Dest, G_Col), One);
+ B_Blend = _mm256_sub_ps(_mm256_add_ps(B_Dest, B_Col), One);
+ } break;
+ case blend_add:
+ {
+ R_Blend = _mm256_add_ps(R_Dest, R_Col);
+ G_Blend = _mm256_add_ps(G_Dest, G_Col);
+ B_Blend = _mm256_add_ps(B_Dest, B_Col);
+ } break;
+ case blend_screen:
+ {
+ R_Blend = _mm256_sub_ps(One, _mm256_mul_ps(_mm256_sub_ps(One, R_Dest), R_ColInv));
+ G_Blend = _mm256_sub_ps(One, _mm256_mul_ps(_mm256_sub_ps(One, G_Dest), G_ColInv));
+ B_Blend = _mm256_sub_ps(One, _mm256_mul_ps(_mm256_sub_ps(One, B_Dest), B_ColInv));
+ } break;
+ case blend_overlay:
+ {
+ __m256 R_Mask = _mm256_cmp_ps(R_Dest, ZeroPointFive, 1);
+ __m256 G_Mask = _mm256_cmp_ps(G_Dest, ZeroPointFive, 1);
+ __m256 B_Mask = _mm256_cmp_ps(B_Dest, ZeroPointFive, 1);
+ __m256 R_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(R_Dest, R_Col));
+ __m256 G_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(G_Dest, G_Col));
+ __m256 B_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(B_Dest, B_Col));
+ __m256 R_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, R_Dest), R_ColInv)));
+ __m256 G_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, G_Dest), G_ColInv)));
+ __m256 B_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, B_Dest), B_ColInv)));
+ R_Blend = _mm256_blendv_ps(R_Upper, R_Lower, R_Mask);
+ G_Blend = _mm256_blendv_ps(G_Upper, G_Lower, G_Mask);
+ B_Blend = _mm256_blendv_ps(B_Upper, B_Lower, B_Mask);
+ } break;
+ case blend_softlight:
+ {
+ // using Pegtop's equation
+ R_Blend = _mm256_add_ps(_mm256_mul_ps(_mm256_sub_ps(One, R_Colx2), _mm256_mul_ps(R_Dest, R_Dest)), _mm256_mul_ps(R_Colx2, R_Dest));
+ G_Blend = _mm256_add_ps(_mm256_mul_ps(_mm256_sub_ps(One, G_Colx2), _mm256_mul_ps(G_Dest, G_Dest)), _mm256_mul_ps(G_Colx2, G_Dest));
+ B_Blend = _mm256_add_ps(_mm256_mul_ps(_mm256_sub_ps(One, B_Colx2), _mm256_mul_ps(B_Dest, B_Dest)), _mm256_mul_ps(B_Colx2, B_Dest));
+ } break;
+ case blend_hardlight:
+ {
+ __m256 R_Mask = _mm256_cmp_ps(R_Dest, ZeroPointFive, 13);
+ __m256 G_Mask = _mm256_cmp_ps(G_Dest, ZeroPointFive, 13);
+ __m256 B_Mask = _mm256_cmp_ps(B_Dest, ZeroPointFive, 13);
+ __m256 R_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(R_Dest, R_Col));
+ __m256 G_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(G_Dest, G_Col));
+ __m256 B_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(B_Dest, B_Col));
+ __m256 R_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, R_Dest), R_ColInv)));
+ __m256 G_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, G_Dest), G_ColInv)));
+ __m256 B_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, B_Dest), B_ColInv)));
+ R_Blend = _mm256_blendv_ps(R_Upper, R_Lower, R_Mask);
+ G_Blend = _mm256_blendv_ps(G_Upper, G_Lower, G_Mask);
+ B_Blend = _mm256_blendv_ps(B_Upper, B_Lower, B_Mask);
+ } break;
+ case blend_subtract:
+ {
+ R_Blend = _mm256_sub_ps(R_Dest, R_Col);
+ G_Blend = _mm256_sub_ps(G_Dest, G_Col);
+ B_Blend = _mm256_sub_ps(B_Dest, B_Col);
+ } break;
+ case blend_divide:
+ {
+ R_Blend = _mm256_div_ps(R_Dest, R_Col);
+ G_Blend = _mm256_div_ps(G_Dest, G_Col);
+ B_Blend = _mm256_div_ps(B_Dest, B_Col);
+ } break;
+ case blend_difference:
+ {
+ __m256 R_Lower = _mm256_sub_ps(R_Col, R_Dest);
+ __m256 G_Lower = _mm256_sub_ps(G_Col, G_Dest);
+ __m256 B_Lower = _mm256_sub_ps(B_Col, B_Dest);
+ __m256 R_Upper = _mm256_sub_ps(R_Dest, R_Col);
+ __m256 G_Upper = _mm256_sub_ps(G_Dest, G_Col);
+ __m256 B_Upper = _mm256_sub_ps(B_Dest, B_Col);
+ __m256 R_Mask = _mm256_cmp_ps(R_Lower, ZeroReal, 14);
+ __m256 G_Mask = _mm256_cmp_ps(G_Lower, ZeroReal, 14);
+ __m256 B_Mask = _mm256_cmp_ps(B_Lower, ZeroReal, 14);
+ R_Blend = _mm256_blendv_ps(R_Upper, R_Lower, R_Mask);
+ G_Blend = _mm256_blendv_ps(G_Upper, G_Lower, G_Mask);
+ B_Blend = _mm256_blendv_ps(B_Upper, B_Lower, B_Mask);
+ } break;
+ }
+
+ R_Blend = _mm256_add_ps(_mm256_mul_ps(R_Dest, AlphaInv),
+ _mm256_mul_ps(R_Blend, Alpha));
+ G_Blend = _mm256_add_ps(_mm256_mul_ps(G_Dest, AlphaInv),
+ _mm256_mul_ps(G_Blend, Alpha));
+ B_Blend = _mm256_add_ps(_mm256_mul_ps(B_Dest, AlphaInv),
+ _mm256_mul_ps(B_Blend, Alpha));
+
+ R_Blend = _mm256_max_ps(_mm256_min_ps(One, R_Blend), ZeroReal);
+ G_Blend = _mm256_max_ps(_mm256_min_ps(One, G_Blend), ZeroReal);
+ B_Blend = _mm256_max_ps(_mm256_min_ps(One, B_Blend), ZeroReal);
+
+ __m256i R_Out = _mm256_cvttps_epi32(_mm256_mul_ps(R_Blend, Real255));
+ __m256i G_Out = _mm256_cvttps_epi32(_mm256_mul_ps(G_Blend, Real255));
+ __m256i B_Out = _mm256_cvttps_epi32(_mm256_mul_ps(B_Blend, Real255));
+
+ __m256i OutputPixel = _mm256_or_si256(
+ _mm256_or_si256(R_Out, _mm256_slli_epi32(G_Out, 8)),
+ _mm256_or_si256(_mm256_slli_epi32(B_Out, 16), _mm256_slli_epi32(A_Out, 24)));
+
+ _mm256_storeu_si256((__m256i *)Pixel, OutputPixel);
+ }
+ }
+}
+
+internal void
+DrawGradient(pixel_buffer *Buffer, memory *Memory, property_channel Property[])
+{
+ v4 StartColor = Property[0].CurrentValue.col;
+ v4 EndColor = Property[1].CurrentValue.col;
+}
+
+global_variable effect_header EffectList[] {
+ {
+ "Solid Color",
+ &DrawColor, 2, standard, {
+ {"Color", {.col = V4(0.0f, 0.0f, 0.0f, 0.0f)}, type_color, NORMALIZED_COL_MIN, NORMALIZED_COL_MAX},
+ {"Blend mode", {.blendmode = blend_normal}, type_blendmode},
+ }
+ },
+ {
+ "Linear Gradient",
+ &DrawGradient, 3, standard, {
+ {"Start Color", {.col = V4(0.0f, 1.0f, 0.0f, 0.0f)}, type_color, NORMALIZED_COL_MIN, NORMALIZED_COL_MAX},
+ {"End Color", {.col = V4(1.0f, 0.0f, 0.0f, 1.0f)}, type_color, NORMALIZED_COL_MIN, NORMALIZED_COL_MAX},
+ {"Opacity", {1.0f}, type_real, NORMALIZED_REAL_MIN, NORMALIZED_REAL_MAX}
+ }
+ }
+};
+#if 0
+ {
+ "Solid Color",
+ &DrawColor, standard, {
+ {"Color", {.col = V4(0.5f, 1.0f, 0.4f, 0.5f)}, color},
+ }
+ },
+ {
+ "Test Grid",
+ &DrawGrid, standard, {
+ {"Color 1", {.col = V4(0.5f, 1.0f, 0.4f, 1.0f)}, color},
+ {"Color 2", {.col = V4(0.0f, 0.0f, 0.0f, 1.0f)}, color}
+ }
+ },
+ {
+ "Gaussian Blur",
+ &GaussianBlur, standard, {
+ {"Radius", {2.0f}, real},
+ }
+ },
+ {
+ "Canny edges",
+ &Canny, standard, {
+ {"Blur Radius", {1.0f}, real},
+ {"Threshold", {5.0f}, real},
+ }
+ },
+ {
+ "Levels",
+ &Levels, levels, {
+ {"Start point", {0.0f}, real},
+ {"Mid point", {1.0f}, real},
+ {"End point", {1.0f}, real},
+ {"Start Col", {.col = V4(0.0f)}, color},
+ {"Mid Col", {.col = V4(1.0f)}, color},
+ {"End Col", {.col = V4(1.0f)}, color},
+ }
+ },
+ {
+ "Kernel",
+ &SpacialFilter, standard, {
+ {"V1", {-1.0f}, real},
+ {"V2", {0.0f}, real},
+ {"V3", {1.0f}, real},
+ {"V4", {-2.0f}, real},
+ {"V5", {0.0f}, real},
+ {"V6", {2.0f}, real},
+ {"V7", {-1.0f}, real},
+ {"V8", {0.0f}, real},
+ {"V9", {1.0f}, real},
+ }
+ },
+ {
+ "Invert",
+ &Invert, 0, standard, {
+ }
+ }
+#endif
+
+internal void
+AddEffect(project_layer *Layer, memory *Memory, uint16 EffectListIndex)
+{
+ Layer->Effect[Layer->NumberOfEffects] = (effect *)AllocateMemory(Memory, sizeof(effect), F_Effects);
+ effect *Effect = Layer->Effect[Layer->NumberOfEffects];
+ effect_header EffectHeader = EffectList[EffectListIndex];
+ Effect->Name = EffectHeader.Name;
+ Effect->func = EffectHeader.func;
+ Effect->NumberOfProperties = EffectHeader.NumberOfProperties;
+ Effect->DisplayType = EffectHeader.DisplayType;
+ Effect->IsActive = true;
+ for (int16 i = 0; i < Effect->NumberOfProperties; i++) {
+ Effect->Property[i].Name = EffectHeader.PropertyHeader[i].Name;
+ Effect->Property[i].CurrentValue = EffectHeader.PropertyHeader[i].Value;
+ Effect->Property[i].MinVal = EffectHeader.PropertyHeader[i].MinVal;
+ Effect->Property[i].MaxVal = EffectHeader.PropertyHeader[i].MaxVal;
+ Effect->Property[i].VarType = EffectHeader.PropertyHeader[i].VarType;
+ }
+ Layer->NumberOfEffects++;
+}
+
+internal void
+SSE_CopyToBuffer(pixel_buffer *, uint16 asda = 0);
+
+internal void
+UpdateEffects(project_layer *Layer, memory *Memory)
+{
+ image_source *Source = (image_source *)Layer->RenderInfo;
+ if (!Source->Raster.EffectBuffer) {
+ Source->Raster.EffectBuffer = AllocateMemory(Memory, Source->Raster.Width * Source->Raster.Height * Source->Raster.BytesPerPixel,
+ B_Scratch);
+ }
+ SSE_CopyToBuffer(&Source->Raster);
+ for (int i = 0; i < Layer->NumberOfEffects; i++)
+ {
+ if (Layer->Effect[i]->IsActive)
+ Layer->Effect[i]->func(&Source->Raster, Memory, Layer->Effect[i]->Property);
+ }
+}
+
+#if 0
+
+internal void
+DrawColor(pixel_buffer *Buffer, memory *Memory, property_channel Property[])
+{
+ v4 FloatColor = Property[0].CurrentValue.col;
+ uint32 Color = ColToUint32(FloatColor);
+
+ uint8 *Row = ((uint8 *)Buffer->EffectBuffer);
+ v2 Origin = {(real32)Buffer->Width / 2.0f, (real32)Buffer->Height / 2.0f};
+
+ real32 MaxLength = sqrt(LengthSq(Origin));
+
+ for(int Y = 0;
+ Y < Buffer->Height;
+ ++Y)
+ {
+ uint32 *Pixel = (uint32 *)Row;
+ for(int X = 0;
+ X < Buffer->Width;
+ ++X)
+ {
+ RenderAlpha(Pixel, Color);
+ Pixel++;
+ }
+ Row += Buffer->Pitch;
+ }
+}
+
+internal void
+Invert(pixel_buffer *Buffer, memory *Memory, property_channel Property[])
+{
+ uint8 *Row = ((uint8 *)Buffer->EffectBuffer);
+
+ for(int Y = 0;
+ Y < Buffer->Height;
+ ++Y)
+ {
+ uint32 *Pixel = (uint32 *)Row;
+ for(int X = 0;
+ X < Buffer->Width;
+ ++X)
+ {
+ v4 col = Uint32ToCol8(*Pixel);
+ col.r = 255 - col.r;
+ col.g = 255 - col.g;
+ col.b = 255 - col.b;
+ *Pixel++ = Col8ToUint32(col);
+ }
+ Row += Buffer->Pitch;
+ }
+}
+
+internal void
+DrawGradient(pixel_buffer *Buffer, memory *Memory, property_channel Property[])
+{
+ v4 StartColor = Property[0].CurrentValue.col;
+ v4 EndColor = Property[1].CurrentValue.col;
+ real32 Alpha = Property[2].CurrentValue.f;
+ uint8 *Row = ((uint8 *)Buffer->EffectBuffer +
+ Buffer->BytesPerPixel +
+ Buffer->Pitch);
+ for(int Y = 0;
+ Y < Buffer->Height;
+ ++Y)
+ {
+ uint32 *Pixel = (uint32 *)Row;
+ for(int X = 0;
+ X < Buffer->Width;
+ ++X)
+ {
+ real32 PlusAlpha = ((real32)X / Buffer->Width);
+ v4 PL = V4(V3(PlusAlpha), 1.0f);
+ v4 C1 = ClipV4((StartColor - PL));
+ v4 C2 = ClipV4( (EndColor - (1 - PL) ) );
+ v4 FloatColor = ClipV4( C1 + C2 );
+
+ uint32 Color = ColToUint32(FloatColor);
+ *(uint32 *)Pixel++ = Color;
+ }
+ Row += Buffer->Pitch;
+ }
+}
+
+internal void
+DrawGrid(pixel_buffer *Buffer, memory *Memory, property_channel Property[])
+{
+ v4 StartColor = Property[0].CurrentValue.col;
+ v4 EndColor = Property[1].CurrentValue.col;
+ uint32 Color1 = ColToUint32(StartColor);
+ uint32 Color2 = ColToUint32(EndColor);
+
+ uint8 *Row = ((uint8 *)Buffer->EffectBuffer);
+ for(int Y = 0;
+ Y < Buffer->Height;
+ ++Y)
+ {
+ uint32 *Pixel = (uint32 *)Row;
+ for(int X = 0;
+ X < Buffer->Width;
+ ++X)
+ {
+ if (X & 4 || Y & 4) {
+ *(uint32 *)Pixel++ = Color1;
+ } else {
+ *(uint32 *)Pixel++ = Color2;
+ }
+ }
+ Row += Buffer->Pitch;
+ }
+}
+
+internal real32
+KernLoop(pixel_buffer *Buffer, int16 Xp, int16 Yp, real32 Value[8])
+{
+ real32 P[9];
+ uint8 *Row = ((uint8 *)Buffer->EffectBuffer +
+ (Buffer->Pitch*Yp));
+ Row -= Buffer->Pitch;
+ int16 n = 0;
+ for(int Y = 0;
+ Y < 3;
+ ++Y)
+ {
+ uint32 *Pixel = (uint32 *)Row + Xp;
+ for(int X = 0;
+ X < 3;
+ ++X)
+ {
+ real32 BW = Uint32ToNormalizedBW(*Pixel);
+ P[n] = BW * Value[n];
+ Pixel++;
+ n++;
+ }
+ Row += Buffer->Pitch;
+ }
+ real32 Sum = P[0] + P[1] + P[2] +
+ P[3] + P[4] + P[5] +
+ P[6] + P[7] + P[8] ;
+ return Sum;
+}
+
+internal void
+SpacialFilter(pixel_buffer *Buffer, memory *Memory, property_channel Property[])
+{
+ real32 P[9];
+ P[0] = Property[0].CurrentValue.f;
+ P[1] = Property[1].CurrentValue.f;
+ P[2] = Property[2].CurrentValue.f;
+ P[3] = Property[3].CurrentValue.f;
+ P[4] = Property[4].CurrentValue.f;
+ P[5] = Property[5].CurrentValue.f;
+ P[6] = Property[6].CurrentValue.f;
+ P[7] = Property[7].CurrentValue.f;
+ P[8] = Property[8].CurrentValue.f;
+
+ if (!Buffer->Scratch) {
+ Buffer->Scratch = (uint64 *)Memory->Address + Memory->CurrentPosition;
+ Memory->CurrentPosition += Buffer->Width * Buffer->Height * Buffer->BytesPerPixel;
+ }
+
+ for(int Y = 1;
+ Y < Buffer->Height - 1;
+ ++Y)
+ {
+ for(int X = 1;
+ X < Buffer->Width - 1;
+ ++X)
+ {
+ real32 Sum = KernLoop(Buffer, X, Y, P);
+ uint8 *FloatRow = ((uint8 *)Buffer->Scratch +
+ Buffer->BytesPerPixel +
+ Buffer->Pitch + (Buffer->Pitch*Y));
+ real32 *FloatValue = (real32 *)FloatRow + X;
+ *(real32 *)FloatValue++ = Sum;
+ }
+ }
+ for(int Y = 1;
+ Y < Buffer->Height - 1;
+ ++Y)
+ {
+ for(int X = 1;
+ X < Buffer->Width - 1;
+ ++X)
+ {
+ uint8 *Row = ((uint8 *)Buffer->EffectBuffer +
+ Buffer->BytesPerPixel +
+ Buffer->Pitch + (Buffer->Pitch*Y));
+ uint32 *Pixel = (uint32 *)Row + X;
+ uint8 *RowR = ((uint8 *)Buffer->Scratch +
+ Buffer->BytesPerPixel +
+ Buffer->Pitch + (Buffer->Pitch*Y));
+ real32 *PixelR = (real32 *)RowR + X;
+ *(uint32 *)Pixel= ColToUint32(abs(*PixelR / 4.0f));
+ PixelR++;
+ }
+ }
+}
+
+
+internal void
+Gaussian(pixel_buffer *Buffer, void *FloatStorage, real32 Radius)
+{
+ if (Radius < 1.0f)
+ Radius = 1.0f;
+ real32 Omega = Radius / 2;
+ real32 Total = pow((Radius + Radius + 1), 2) / 2;
+ int32 ColorPitch = Buffer->Pitch * 2;
+
+ real32 P2 = 2*(Omega*Omega);
+ for(int16 Y = Radius;
+ Y < Buffer->Height - Radius;
+ ++Y)
+ {
+ uint8 *Row = ((uint8 *)Buffer->EffectBuffer +
+ Buffer->BytesPerPixel +
+ Buffer->Pitch + Buffer->Pitch*(Y));
+ for(int16 X = Radius;
+ X < Buffer->Width - Radius;
+ ++X)
+ {
+ uint32 *Pixel = (uint32 *)Row + X;
+ v4 FloatCol = Uint32ToNormalizedCol(*Pixel);
+ for(int16 Y2 = -Radius;
+ Y2 <= Radius;
+ ++Y2)
+ {
+ uint16 *TempRow = ((uint16 *)FloatStorage +
+ Buffer->BytesPerPixel +
+ ColorPitch + (ColorPitch*(Y + Y2)));
+ for(int16 X2 = -Radius;
+ X2 <= Radius;
+ ++X2)
+ {
+ v4 *TempValue = (v4 *)TempRow + (X + X2);
+ real32 P1 = ((X2 * X2) + (Y2 * Y2));
+ real32 G = exp(-(P1/P2));
+ *TempValue = *TempValue + (FloatCol*V4(G) / V4(Total));
+ }
+ }
+ }
+ }
+ for(int Y = Radius;
+ Y < Buffer->Height - Radius;
+ ++Y)
+ {
+ for(int X = Radius;
+ X < Buffer->Width - Radius;
+ ++X)
+ {
+ uint8 *Row = ((uint8 *)Buffer->EffectBuffer +
+ Buffer->BytesPerPixel +
+ Buffer->Pitch + (Buffer->Pitch*Y));
+ uint32 *Pixel = (uint32 *)Row + X;
+ uint16 *TempRow = ((uint16 *)FloatStorage +
+ Buffer->BytesPerPixel +
+ ColorPitch + (ColorPitch*Y));
+ v4 *TempValue = (v4 *)TempRow + X;
+ TempValue->a = 1.0f;
+ uint32 Color = ColToUint32(Clamp(0.0, *TempValue, 1.0));
+ *Pixel = Color;
+ *TempValue = {0};
+ }
+ }
+}
+
+internal void
+Canny(pixel_buffer *Buffer, memory *Memory, property_channel Property[])
+{
+ real32 SH[9] = { -1, 0, 1,
+ -2, 0, 2,
+ -1, 0, 1 };
+ real32 SV[9] = { -1, -2, -1,
+ 0, 0, 0,
+ 1, 2, 1 };
+
+ real32 Radius = Property[0].CurrentValue.f;
+ real32 Threshold = Property[1].CurrentValue.f / 100;
+ real32 UpperThreshold = Threshold * 1.5;
+ int32 ColorPitch = Buffer->Pitch * 2;
+
+ if (!Buffer->Scratch) {
+ Buffer->Scratch = (uint64 *)Memory->Address + Memory->CurrentPosition;
+ // NOTE(fox): this buffer is four times as large to store four real32s
+ Memory->CurrentPosition += Buffer->Width * Buffer->Height * Buffer->BytesPerPixel * 4;
+ }
+
+ Gaussian(Buffer, Buffer->Scratch, Radius);
+
+ for(int Y = 1;
+ Y < Buffer->Height - 1;
+ ++Y)
+ {
+ uint16 *TempRow = ((uint16 *)Buffer->Scratch +
+ Buffer->BytesPerPixel +
+ ColorPitch + (ColorPitch*(Y)));
+ for(int X = 1;
+ X < Buffer->Width - 1;
+ ++X)
+ {
+ real32 HSum = KernLoop(Buffer, X, Y, SH);
+ real32 VSum = KernLoop(Buffer, X, Y, SV);
+ real32 Mag = sqrt((HSum*HSum) + (VSum*VSum));
+ real32 Angle = atan(VSum/HSum) * (180 / PI);
+ v4 *TempValue = (v4 *)TempRow + (X);
+ TempValue->r = Mag;
+ TempValue->g = Angle;
+ }
+ }
+ for(int Y = 1;
+ Y < Buffer->Height - 1;
+ ++Y)
+ {
+ uint16 *TempRow = ((uint16 *)Buffer->Scratch +
+ Buffer->BytesPerPixel +
+ ColorPitch + (ColorPitch*(Y)));
+ uint16 *Row = ((uint16 *)Buffer->EffectBuffer +
+ Buffer->BytesPerPixel +
+ Buffer->Pitch + (Buffer->Pitch*Y));
+ for(int X = 1;
+ X < Buffer->Width - 1;
+ ++X)
+ {
+ uint32 *Pixel = (uint32 *)Row + X;
+ v4 *TempValue = (v4 *)TempRow + X;
+ if (TempValue->g < 45 && TempValue->g > -45) {
+ v4 *Mag1 = (v4 *)TempRow + X + 1;
+ v4 *Mag2 = (v4 *)TempRow + X - 1;
+ if (TempValue->r > Mag1->r && TempValue->r > Mag2->r)
+ TempValue->b = 1;
+ }
+ if (TempValue->g < 90 && TempValue->g > 45) {
+ v4 *Mag1 = (v4 *)(TempRow + ColorPitch) + X + 1;
+ v4 *Mag2 = (v4 *)(TempRow - ColorPitch) + X - 1;
+ if (TempValue->r > Mag1->r && TempValue->r > Mag2->r)
+ TempValue->b = 1;
+ }
+ if (TempValue->g < -45 && TempValue->g > -90) {
+ v4 *Mag1 = (v4 *)(TempRow - ColorPitch) + X + 1;
+ v4 *Mag2 = (v4 *)(TempRow + ColorPitch) + X - 1;
+ if (TempValue->r > Mag1->r && TempValue->r > Mag2->r)
+ TempValue->b = 1;
+ } else {
+ v4 *Mag1 = (v4 *)(TempRow + ColorPitch) + X;
+ v4 *Mag2 = (v4 *)(TempRow - ColorPitch) + X;
+ if (TempValue->r > Mag1->r && TempValue->r > Mag2->r)
+ TempValue->b = 1;
+ }
+ }
+ for(int Y = 1;
+ Y < Buffer->Height - 1;
+ ++Y)
+ {
+ uint16 *TempRow = ((uint16 *)Buffer->Scratch +
+ Buffer->BytesPerPixel +
+ ColorPitch + (ColorPitch*(Y)));
+ uint8 *Row = ((uint8 *)Buffer->EffectBuffer +
+ Buffer->BytesPerPixel +
+ Buffer->Pitch + (Buffer->Pitch*Y));
+ for(int X = 1;
+ X < Buffer->Width - 1;
+ ++X)
+ {
+ uint32 *Pixel = (uint32 *)Row + X;
+ v4 *TempValue = (v4 *)TempRow + (X);
+ if (TempValue->b == 1) {
+ if (TempValue->r > UpperThreshold)
+ *Pixel = 0xFF0000FF;
+ }
+ else if (TempValue->r > Threshold)
+ {
+ bool32 pp = false;
+ uint16 *TempRow2 = TempRow - ColorPitch;
+ for(int Y2 = 0;
+ Y2 < 3;
+ ++Y2)
+ {
+ v4 *TempValue2 = (v4 *)TempRow + (X - 1);
+ for(int X2 = 0;
+ X2 < 3;
+ ++X2)
+ {
+ if (TempValue2->r > UpperThreshold)
+ pp = true;
+ TempValue2++;
+ }
+ TempRow2 += ColorPitch;
+ }
+ if (pp)
+ *Pixel = 0xFFFFFF00;
+ }
+ }
+ }
+ }
+}
+
+internal void
+Levels(pixel_buffer *Buffer, memory *Memory, property_channel Property[])
+{
+ real32 Min = Property[0].CurrentValue.f;
+ real32 Mid = Property[1].CurrentValue.f;
+ real32 Max = Property[2].CurrentValue.f;
+
+ v4 ColMin = Property[3].CurrentValue.col;
+ v4 ColMid = Property[4].CurrentValue.col;
+ v4 ColMax = Property[5].CurrentValue.col;
+
+ if (!Property[0].Scratch) {
+ Property[0].Scratch = (uint64 *)Memory->Address + Memory->CurrentPosition;
+ Memory->CurrentPosition += Buffer->Width * Buffer->Height * Buffer->BytesPerPixel;
+
+ uint16 *Levels = (uint16 *)Property[0].Scratch;
+ uint8 *Row = ((uint8 *)Buffer->OriginalBuffer);
+
+ for(int Y = 0;
+ Y < Buffer->Height;
+ ++Y)
+ {
+ uint32 *Pixel = (uint32 *)Row;
+ for(int X = 0;
+ X < Buffer->Width;
+ ++X)
+ {
+ v4 Col = Uint32ToCol8(*Pixel);
+ uint16 Global = (uint16)(RoundReal32ToUint32((Col.r + Col.g + Col.b)/3));
+ *(Levels + Global) += 1;
+ *(Levels + 256 + (uint16)Col.r) += 1;
+ *(Levels + 256*2 + (uint16)Col.g) += 1;
+ *(Levels + 256*3 + (uint16)Col.b) += 1;
+ *(Levels + 256*4 + (uint16)Col.a) += 1;
+ Pixel++;
+ }
+ Row += Buffer->Pitch;
+ }
+ }
+
+
+ uint8 *Row = ((uint8 *)Buffer->EffectBuffer);
+ for(int Y = 0;
+ Y < Buffer->Height;
+ ++Y)
+ {
+ uint32 *Pixel = (uint32 *)Row;
+ for(int X = 0;
+ X < Buffer->Width;
+ ++X)
+ {
+ // individual channels
+ v4 ColorI = powv4(Uint32ToNormalizedCol(*Pixel), ColMid);
+ v4 ValI = 1.0f/(ColMax-ColMin) * (ColorI - ColMin);
+
+ // global channel
+ v4 ColorG = powv4(ValI, Mid);
+ v4 ValG = 1.0f/(Max-Min) * (ColorG - Min);
+
+ *Pixel++ = ColToUint32(Clamp(0.0f, ValG, 1.0f));
+ }
+ Row += Buffer->Pitch;
+ }
+
+}
+
+internal void
+GaussianBlur(pixel_buffer *Buffer, memory *Memory, property_channel Property[])
+{
+ real32 Radius = Property[0].CurrentValue.f;
+
+ if (!Buffer->Scratch) {
+ Buffer->Scratch = (uint64 *)Memory->Address + Memory->CurrentPosition;
+ Memory->CurrentPosition += Buffer->Width * Buffer->Height * Buffer->BytesPerPixel;
+ }
+
+ Gaussian(Buffer, Buffer->Scratch, Radius);
+}
+#endif