From fc8040d695644aaca4596adebeca4ea1369ef630 Mon Sep 17 00:00:00 2001 From: Fox Caminiti Date: Fri, 22 Jul 2022 20:45:08 -0400 Subject: first --- effects.cpp | 777 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 777 insertions(+) create mode 100644 effects.cpp (limited to 'effects.cpp') diff --git a/effects.cpp b/effects.cpp new file mode 100644 index 0000000..fe593a4 --- /dev/null +++ b/effects.cpp @@ -0,0 +1,777 @@ +internal void +DrawColor(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) +{ + v4 FloatColor = Property[0].CurrentValue.col; + blend_mode BlendMode = Property[1].CurrentValue.blendmode; + + __m256 ZeroReal = _mm256_set1_ps(0); + __m256 ZeroPointFive = _mm256_set1_ps(0.5); + __m256 One = _mm256_set1_ps(1); + __m256 Two = _mm256_set1_ps(2); + __m256 Four = _mm256_set1_ps(4); + + __m256 Fraction255 = _mm256_set1_ps(1/255.0f); + __m256 Real255 = _mm256_set1_ps(255); + + __m256i Zero = _mm256_set1_epi8(0); + __m256i FF = _mm256_set1_epi32(0xFF); + __m256i Int255 = _mm256_set1_epi8((uint8)255); + + __m256 Alpha = _mm256_set1_ps(FloatColor.a); + __m256 AlphaInv = _mm256_set1_ps(1.0f - FloatColor.a); + + __m256 R_Col = _mm256_set1_ps(FloatColor.E[0]); + __m256 R_Colx2 = _mm256_mul_ps(R_Col, Two); + __m256 R_ColInv = _mm256_set1_ps(1.0f - FloatColor.E[0]); + + __m256 G_Col = _mm256_set1_ps(FloatColor.E[1]); + __m256 G_Colx2 = _mm256_mul_ps(G_Col, Two); + __m256 G_ColInv = _mm256_set1_ps(1.0f - FloatColor.E[1]); + + __m256 B_Col = _mm256_set1_ps(FloatColor.E[2]); + __m256 B_Colx2 = _mm256_mul_ps(B_Col, Two); + __m256 B_ColInv = _mm256_set1_ps(1.0f - FloatColor.E[2]); + + for (int16 Y = 0; Y < Buffer->Height; Y += 2) + { + for (int16 X = 0; X < Buffer->Width; X += 4) + { + uint32 XLookup = (X >> 2)*16 + (X % 4); + uint32 YLookup = (Y >> 2)*(Buffer->Width*4) + (Y % 4)*4; + uint32 PixelToSeek = XLookup + YLookup; + uint8 *Pixel = (uint8 *)Buffer->EffectBuffer + PixelToSeek*Buffer->BytesPerPixel; + __m256i DestPixel = _mm256_loadu_si256((const __m256i *)Pixel); + + // normalized values + __m256 R_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256( DestPixel, FF)), Fraction255); + __m256 G_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(DestPixel, 8), FF)), Fraction255); + __m256 B_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(DestPixel, 16), FF)), Fraction255); + __m256i A_Out = _mm256_and_si256(_mm256_srli_epi32(DestPixel, 24), FF); + __m256 A_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(A_Out), Fraction255); + + __m256 R_Blend = _mm256_setzero_ps(); + __m256 G_Blend = _mm256_setzero_ps(); + __m256 B_Blend = _mm256_setzero_ps(); + switch (BlendMode) + { + case blend_normal: + { + } break; + case blend_multiply: + { + R_Blend = _mm256_mul_ps(R_Dest, R_Col); + G_Blend = _mm256_mul_ps(G_Dest, G_Col); + B_Blend = _mm256_mul_ps(B_Dest, B_Col); + } break; + case blend_colorburn: + { + R_Blend = _mm256_sub_ps(One, _mm256_div_ps(_mm256_sub_ps(One, R_Dest), R_Col)); + G_Blend = _mm256_sub_ps(One, _mm256_div_ps(_mm256_sub_ps(One, G_Dest), G_Col)); + B_Blend = _mm256_sub_ps(One, _mm256_div_ps(_mm256_sub_ps(One, B_Dest), B_Col)); + } break; + case blend_linearburn: + { + R_Blend = _mm256_sub_ps(_mm256_add_ps(R_Dest, R_Col), One); + G_Blend = _mm256_sub_ps(_mm256_add_ps(G_Dest, G_Col), One); + B_Blend = _mm256_sub_ps(_mm256_add_ps(B_Dest, B_Col), One); + } break; + case blend_add: + { + R_Blend = _mm256_add_ps(R_Dest, R_Col); + G_Blend = _mm256_add_ps(G_Dest, G_Col); + B_Blend = _mm256_add_ps(B_Dest, B_Col); + } break; + case blend_screen: + { + R_Blend = _mm256_sub_ps(One, _mm256_mul_ps(_mm256_sub_ps(One, R_Dest), R_ColInv)); + G_Blend = _mm256_sub_ps(One, _mm256_mul_ps(_mm256_sub_ps(One, G_Dest), G_ColInv)); + B_Blend = _mm256_sub_ps(One, _mm256_mul_ps(_mm256_sub_ps(One, B_Dest), B_ColInv)); + } break; + case blend_overlay: + { + __m256 R_Mask = _mm256_cmp_ps(R_Dest, ZeroPointFive, 1); + __m256 G_Mask = _mm256_cmp_ps(G_Dest, ZeroPointFive, 1); + __m256 B_Mask = _mm256_cmp_ps(B_Dest, ZeroPointFive, 1); + __m256 R_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(R_Dest, R_Col)); + __m256 G_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(G_Dest, G_Col)); + __m256 B_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(B_Dest, B_Col)); + __m256 R_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, R_Dest), R_ColInv))); + __m256 G_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, G_Dest), G_ColInv))); + __m256 B_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, B_Dest), B_ColInv))); + R_Blend = _mm256_blendv_ps(R_Upper, R_Lower, R_Mask); + G_Blend = _mm256_blendv_ps(G_Upper, G_Lower, G_Mask); + B_Blend = _mm256_blendv_ps(B_Upper, B_Lower, B_Mask); + } break; + case blend_softlight: + { + // using Pegtop's equation + R_Blend = _mm256_add_ps(_mm256_mul_ps(_mm256_sub_ps(One, R_Colx2), _mm256_mul_ps(R_Dest, R_Dest)), _mm256_mul_ps(R_Colx2, R_Dest)); + G_Blend = _mm256_add_ps(_mm256_mul_ps(_mm256_sub_ps(One, G_Colx2), _mm256_mul_ps(G_Dest, G_Dest)), _mm256_mul_ps(G_Colx2, G_Dest)); + B_Blend = _mm256_add_ps(_mm256_mul_ps(_mm256_sub_ps(One, B_Colx2), _mm256_mul_ps(B_Dest, B_Dest)), _mm256_mul_ps(B_Colx2, B_Dest)); + } break; + case blend_hardlight: + { + __m256 R_Mask = _mm256_cmp_ps(R_Dest, ZeroPointFive, 13); + __m256 G_Mask = _mm256_cmp_ps(G_Dest, ZeroPointFive, 13); + __m256 B_Mask = _mm256_cmp_ps(B_Dest, ZeroPointFive, 13); + __m256 R_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(R_Dest, R_Col)); + __m256 G_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(G_Dest, G_Col)); + __m256 B_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(B_Dest, B_Col)); + __m256 R_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, R_Dest), R_ColInv))); + __m256 G_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, G_Dest), G_ColInv))); + __m256 B_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, B_Dest), B_ColInv))); + R_Blend = _mm256_blendv_ps(R_Upper, R_Lower, R_Mask); + G_Blend = _mm256_blendv_ps(G_Upper, G_Lower, G_Mask); + B_Blend = _mm256_blendv_ps(B_Upper, B_Lower, B_Mask); + } break; + case blend_subtract: + { + R_Blend = _mm256_sub_ps(R_Dest, R_Col); + G_Blend = _mm256_sub_ps(G_Dest, G_Col); + B_Blend = _mm256_sub_ps(B_Dest, B_Col); + } break; + case blend_divide: + { + R_Blend = _mm256_div_ps(R_Dest, R_Col); + G_Blend = _mm256_div_ps(G_Dest, G_Col); + B_Blend = _mm256_div_ps(B_Dest, B_Col); + } break; + case blend_difference: + { + __m256 R_Lower = _mm256_sub_ps(R_Col, R_Dest); + __m256 G_Lower = _mm256_sub_ps(G_Col, G_Dest); + __m256 B_Lower = _mm256_sub_ps(B_Col, B_Dest); + __m256 R_Upper = _mm256_sub_ps(R_Dest, R_Col); + __m256 G_Upper = _mm256_sub_ps(G_Dest, G_Col); + __m256 B_Upper = _mm256_sub_ps(B_Dest, B_Col); + __m256 R_Mask = _mm256_cmp_ps(R_Lower, ZeroReal, 14); + __m256 G_Mask = _mm256_cmp_ps(G_Lower, ZeroReal, 14); + __m256 B_Mask = _mm256_cmp_ps(B_Lower, ZeroReal, 14); + R_Blend = _mm256_blendv_ps(R_Upper, R_Lower, R_Mask); + G_Blend = _mm256_blendv_ps(G_Upper, G_Lower, G_Mask); + B_Blend = _mm256_blendv_ps(B_Upper, B_Lower, B_Mask); + } break; + } + + R_Blend = _mm256_add_ps(_mm256_mul_ps(R_Dest, AlphaInv), + _mm256_mul_ps(R_Blend, Alpha)); + G_Blend = _mm256_add_ps(_mm256_mul_ps(G_Dest, AlphaInv), + _mm256_mul_ps(G_Blend, Alpha)); + B_Blend = _mm256_add_ps(_mm256_mul_ps(B_Dest, AlphaInv), + _mm256_mul_ps(B_Blend, Alpha)); + + R_Blend = _mm256_max_ps(_mm256_min_ps(One, R_Blend), ZeroReal); + G_Blend = _mm256_max_ps(_mm256_min_ps(One, G_Blend), ZeroReal); + B_Blend = _mm256_max_ps(_mm256_min_ps(One, B_Blend), ZeroReal); + + __m256i R_Out = _mm256_cvttps_epi32(_mm256_mul_ps(R_Blend, Real255)); + __m256i G_Out = _mm256_cvttps_epi32(_mm256_mul_ps(G_Blend, Real255)); + __m256i B_Out = _mm256_cvttps_epi32(_mm256_mul_ps(B_Blend, Real255)); + + __m256i OutputPixel = _mm256_or_si256( + _mm256_or_si256(R_Out, _mm256_slli_epi32(G_Out, 8)), + _mm256_or_si256(_mm256_slli_epi32(B_Out, 16), _mm256_slli_epi32(A_Out, 24))); + + _mm256_storeu_si256((__m256i *)Pixel, OutputPixel); + } + } +} + +internal void +DrawGradient(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) +{ + v4 StartColor = Property[0].CurrentValue.col; + v4 EndColor = Property[1].CurrentValue.col; +} + +global_variable effect_header EffectList[] { + { + "Solid Color", + &DrawColor, 2, standard, { + {"Color", {.col = V4(0.0f, 0.0f, 0.0f, 0.0f)}, type_color, NORMALIZED_COL_MIN, NORMALIZED_COL_MAX}, + {"Blend mode", {.blendmode = blend_normal}, type_blendmode}, + } + }, + { + "Linear Gradient", + &DrawGradient, 3, standard, { + {"Start Color", {.col = V4(0.0f, 1.0f, 0.0f, 0.0f)}, type_color, NORMALIZED_COL_MIN, NORMALIZED_COL_MAX}, + {"End Color", {.col = V4(1.0f, 0.0f, 0.0f, 1.0f)}, type_color, NORMALIZED_COL_MIN, NORMALIZED_COL_MAX}, + {"Opacity", {1.0f}, type_real, NORMALIZED_REAL_MIN, NORMALIZED_REAL_MAX} + } + } +}; +#if 0 + { + "Solid Color", + &DrawColor, standard, { + {"Color", {.col = V4(0.5f, 1.0f, 0.4f, 0.5f)}, color}, + } + }, + { + "Test Grid", + &DrawGrid, standard, { + {"Color 1", {.col = V4(0.5f, 1.0f, 0.4f, 1.0f)}, color}, + {"Color 2", {.col = V4(0.0f, 0.0f, 0.0f, 1.0f)}, color} + } + }, + { + "Gaussian Blur", + &GaussianBlur, standard, { + {"Radius", {2.0f}, real}, + } + }, + { + "Canny edges", + &Canny, standard, { + {"Blur Radius", {1.0f}, real}, + {"Threshold", {5.0f}, real}, + } + }, + { + "Levels", + &Levels, levels, { + {"Start point", {0.0f}, real}, + {"Mid point", {1.0f}, real}, + {"End point", {1.0f}, real}, + {"Start Col", {.col = V4(0.0f)}, color}, + {"Mid Col", {.col = V4(1.0f)}, color}, + {"End Col", {.col = V4(1.0f)}, color}, + } + }, + { + "Kernel", + &SpacialFilter, standard, { + {"V1", {-1.0f}, real}, + {"V2", {0.0f}, real}, + {"V3", {1.0f}, real}, + {"V4", {-2.0f}, real}, + {"V5", {0.0f}, real}, + {"V6", {2.0f}, real}, + {"V7", {-1.0f}, real}, + {"V8", {0.0f}, real}, + {"V9", {1.0f}, real}, + } + }, + { + "Invert", + &Invert, 0, standard, { + } + } +#endif + +internal void +AddEffect(project_layer *Layer, memory *Memory, uint16 EffectListIndex) +{ + Layer->Effect[Layer->NumberOfEffects] = (effect *)AllocateMemory(Memory, sizeof(effect), F_Effects); + effect *Effect = Layer->Effect[Layer->NumberOfEffects]; + effect_header EffectHeader = EffectList[EffectListIndex]; + Effect->Name = EffectHeader.Name; + Effect->func = EffectHeader.func; + Effect->NumberOfProperties = EffectHeader.NumberOfProperties; + Effect->DisplayType = EffectHeader.DisplayType; + Effect->IsActive = true; + for (int16 i = 0; i < Effect->NumberOfProperties; i++) { + Effect->Property[i].Name = EffectHeader.PropertyHeader[i].Name; + Effect->Property[i].CurrentValue = EffectHeader.PropertyHeader[i].Value; + Effect->Property[i].MinVal = EffectHeader.PropertyHeader[i].MinVal; + Effect->Property[i].MaxVal = EffectHeader.PropertyHeader[i].MaxVal; + Effect->Property[i].VarType = EffectHeader.PropertyHeader[i].VarType; + } + Layer->NumberOfEffects++; +} + +internal void +SSE_CopyToBuffer(pixel_buffer *, uint16 asda = 0); + +internal void +UpdateEffects(project_layer *Layer, memory *Memory) +{ + image_source *Source = (image_source *)Layer->RenderInfo; + if (!Source->Raster.EffectBuffer) { + Source->Raster.EffectBuffer = AllocateMemory(Memory, Source->Raster.Width * Source->Raster.Height * Source->Raster.BytesPerPixel, + B_Scratch); + } + SSE_CopyToBuffer(&Source->Raster); + for (int i = 0; i < Layer->NumberOfEffects; i++) + { + if (Layer->Effect[i]->IsActive) + Layer->Effect[i]->func(&Source->Raster, Memory, Layer->Effect[i]->Property); + } +} + +#if 0 + +internal void +DrawColor(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) +{ + v4 FloatColor = Property[0].CurrentValue.col; + uint32 Color = ColToUint32(FloatColor); + + uint8 *Row = ((uint8 *)Buffer->EffectBuffer); + v2 Origin = {(real32)Buffer->Width / 2.0f, (real32)Buffer->Height / 2.0f}; + + real32 MaxLength = sqrt(LengthSq(Origin)); + + for(int Y = 0; + Y < Buffer->Height; + ++Y) + { + uint32 *Pixel = (uint32 *)Row; + for(int X = 0; + X < Buffer->Width; + ++X) + { + RenderAlpha(Pixel, Color); + Pixel++; + } + Row += Buffer->Pitch; + } +} + +internal void +Invert(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) +{ + uint8 *Row = ((uint8 *)Buffer->EffectBuffer); + + for(int Y = 0; + Y < Buffer->Height; + ++Y) + { + uint32 *Pixel = (uint32 *)Row; + for(int X = 0; + X < Buffer->Width; + ++X) + { + v4 col = Uint32ToCol8(*Pixel); + col.r = 255 - col.r; + col.g = 255 - col.g; + col.b = 255 - col.b; + *Pixel++ = Col8ToUint32(col); + } + Row += Buffer->Pitch; + } +} + +internal void +DrawGradient(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) +{ + v4 StartColor = Property[0].CurrentValue.col; + v4 EndColor = Property[1].CurrentValue.col; + real32 Alpha = Property[2].CurrentValue.f; + uint8 *Row = ((uint8 *)Buffer->EffectBuffer + + Buffer->BytesPerPixel + + Buffer->Pitch); + for(int Y = 0; + Y < Buffer->Height; + ++Y) + { + uint32 *Pixel = (uint32 *)Row; + for(int X = 0; + X < Buffer->Width; + ++X) + { + real32 PlusAlpha = ((real32)X / Buffer->Width); + v4 PL = V4(V3(PlusAlpha), 1.0f); + v4 C1 = ClipV4((StartColor - PL)); + v4 C2 = ClipV4( (EndColor - (1 - PL) ) ); + v4 FloatColor = ClipV4( C1 + C2 ); + + uint32 Color = ColToUint32(FloatColor); + *(uint32 *)Pixel++ = Color; + } + Row += Buffer->Pitch; + } +} + +internal void +DrawGrid(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) +{ + v4 StartColor = Property[0].CurrentValue.col; + v4 EndColor = Property[1].CurrentValue.col; + uint32 Color1 = ColToUint32(StartColor); + uint32 Color2 = ColToUint32(EndColor); + + uint8 *Row = ((uint8 *)Buffer->EffectBuffer); + for(int Y = 0; + Y < Buffer->Height; + ++Y) + { + uint32 *Pixel = (uint32 *)Row; + for(int X = 0; + X < Buffer->Width; + ++X) + { + if (X & 4 || Y & 4) { + *(uint32 *)Pixel++ = Color1; + } else { + *(uint32 *)Pixel++ = Color2; + } + } + Row += Buffer->Pitch; + } +} + +internal real32 +KernLoop(pixel_buffer *Buffer, int16 Xp, int16 Yp, real32 Value[8]) +{ + real32 P[9]; + uint8 *Row = ((uint8 *)Buffer->EffectBuffer + + (Buffer->Pitch*Yp)); + Row -= Buffer->Pitch; + int16 n = 0; + for(int Y = 0; + Y < 3; + ++Y) + { + uint32 *Pixel = (uint32 *)Row + Xp; + for(int X = 0; + X < 3; + ++X) + { + real32 BW = Uint32ToNormalizedBW(*Pixel); + P[n] = BW * Value[n]; + Pixel++; + n++; + } + Row += Buffer->Pitch; + } + real32 Sum = P[0] + P[1] + P[2] + + P[3] + P[4] + P[5] + + P[6] + P[7] + P[8] ; + return Sum; +} + +internal void +SpacialFilter(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) +{ + real32 P[9]; + P[0] = Property[0].CurrentValue.f; + P[1] = Property[1].CurrentValue.f; + P[2] = Property[2].CurrentValue.f; + P[3] = Property[3].CurrentValue.f; + P[4] = Property[4].CurrentValue.f; + P[5] = Property[5].CurrentValue.f; + P[6] = Property[6].CurrentValue.f; + P[7] = Property[7].CurrentValue.f; + P[8] = Property[8].CurrentValue.f; + + if (!Buffer->Scratch) { + Buffer->Scratch = (uint64 *)Memory->Address + Memory->CurrentPosition; + Memory->CurrentPosition += Buffer->Width * Buffer->Height * Buffer->BytesPerPixel; + } + + for(int Y = 1; + Y < Buffer->Height - 1; + ++Y) + { + for(int X = 1; + X < Buffer->Width - 1; + ++X) + { + real32 Sum = KernLoop(Buffer, X, Y, P); + uint8 *FloatRow = ((uint8 *)Buffer->Scratch + + Buffer->BytesPerPixel + + Buffer->Pitch + (Buffer->Pitch*Y)); + real32 *FloatValue = (real32 *)FloatRow + X; + *(real32 *)FloatValue++ = Sum; + } + } + for(int Y = 1; + Y < Buffer->Height - 1; + ++Y) + { + for(int X = 1; + X < Buffer->Width - 1; + ++X) + { + uint8 *Row = ((uint8 *)Buffer->EffectBuffer + + Buffer->BytesPerPixel + + Buffer->Pitch + (Buffer->Pitch*Y)); + uint32 *Pixel = (uint32 *)Row + X; + uint8 *RowR = ((uint8 *)Buffer->Scratch + + Buffer->BytesPerPixel + + Buffer->Pitch + (Buffer->Pitch*Y)); + real32 *PixelR = (real32 *)RowR + X; + *(uint32 *)Pixel= ColToUint32(abs(*PixelR / 4.0f)); + PixelR++; + } + } +} + + +internal void +Gaussian(pixel_buffer *Buffer, void *FloatStorage, real32 Radius) +{ + if (Radius < 1.0f) + Radius = 1.0f; + real32 Omega = Radius / 2; + real32 Total = pow((Radius + Radius + 1), 2) / 2; + int32 ColorPitch = Buffer->Pitch * 2; + + real32 P2 = 2*(Omega*Omega); + for(int16 Y = Radius; + Y < Buffer->Height - Radius; + ++Y) + { + uint8 *Row = ((uint8 *)Buffer->EffectBuffer + + Buffer->BytesPerPixel + + Buffer->Pitch + Buffer->Pitch*(Y)); + for(int16 X = Radius; + X < Buffer->Width - Radius; + ++X) + { + uint32 *Pixel = (uint32 *)Row + X; + v4 FloatCol = Uint32ToNormalizedCol(*Pixel); + for(int16 Y2 = -Radius; + Y2 <= Radius; + ++Y2) + { + uint16 *TempRow = ((uint16 *)FloatStorage + + Buffer->BytesPerPixel + + ColorPitch + (ColorPitch*(Y + Y2))); + for(int16 X2 = -Radius; + X2 <= Radius; + ++X2) + { + v4 *TempValue = (v4 *)TempRow + (X + X2); + real32 P1 = ((X2 * X2) + (Y2 * Y2)); + real32 G = exp(-(P1/P2)); + *TempValue = *TempValue + (FloatCol*V4(G) / V4(Total)); + } + } + } + } + for(int Y = Radius; + Y < Buffer->Height - Radius; + ++Y) + { + for(int X = Radius; + X < Buffer->Width - Radius; + ++X) + { + uint8 *Row = ((uint8 *)Buffer->EffectBuffer + + Buffer->BytesPerPixel + + Buffer->Pitch + (Buffer->Pitch*Y)); + uint32 *Pixel = (uint32 *)Row + X; + uint16 *TempRow = ((uint16 *)FloatStorage + + Buffer->BytesPerPixel + + ColorPitch + (ColorPitch*Y)); + v4 *TempValue = (v4 *)TempRow + X; + TempValue->a = 1.0f; + uint32 Color = ColToUint32(Clamp(0.0, *TempValue, 1.0)); + *Pixel = Color; + *TempValue = {0}; + } + } +} + +internal void +Canny(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) +{ + real32 SH[9] = { -1, 0, 1, + -2, 0, 2, + -1, 0, 1 }; + real32 SV[9] = { -1, -2, -1, + 0, 0, 0, + 1, 2, 1 }; + + real32 Radius = Property[0].CurrentValue.f; + real32 Threshold = Property[1].CurrentValue.f / 100; + real32 UpperThreshold = Threshold * 1.5; + int32 ColorPitch = Buffer->Pitch * 2; + + if (!Buffer->Scratch) { + Buffer->Scratch = (uint64 *)Memory->Address + Memory->CurrentPosition; + // NOTE(fox): this buffer is four times as large to store four real32s + Memory->CurrentPosition += Buffer->Width * Buffer->Height * Buffer->BytesPerPixel * 4; + } + + Gaussian(Buffer, Buffer->Scratch, Radius); + + for(int Y = 1; + Y < Buffer->Height - 1; + ++Y) + { + uint16 *TempRow = ((uint16 *)Buffer->Scratch + + Buffer->BytesPerPixel + + ColorPitch + (ColorPitch*(Y))); + for(int X = 1; + X < Buffer->Width - 1; + ++X) + { + real32 HSum = KernLoop(Buffer, X, Y, SH); + real32 VSum = KernLoop(Buffer, X, Y, SV); + real32 Mag = sqrt((HSum*HSum) + (VSum*VSum)); + real32 Angle = atan(VSum/HSum) * (180 / PI); + v4 *TempValue = (v4 *)TempRow + (X); + TempValue->r = Mag; + TempValue->g = Angle; + } + } + for(int Y = 1; + Y < Buffer->Height - 1; + ++Y) + { + uint16 *TempRow = ((uint16 *)Buffer->Scratch + + Buffer->BytesPerPixel + + ColorPitch + (ColorPitch*(Y))); + uint16 *Row = ((uint16 *)Buffer->EffectBuffer + + Buffer->BytesPerPixel + + Buffer->Pitch + (Buffer->Pitch*Y)); + for(int X = 1; + X < Buffer->Width - 1; + ++X) + { + uint32 *Pixel = (uint32 *)Row + X; + v4 *TempValue = (v4 *)TempRow + X; + if (TempValue->g < 45 && TempValue->g > -45) { + v4 *Mag1 = (v4 *)TempRow + X + 1; + v4 *Mag2 = (v4 *)TempRow + X - 1; + if (TempValue->r > Mag1->r && TempValue->r > Mag2->r) + TempValue->b = 1; + } + if (TempValue->g < 90 && TempValue->g > 45) { + v4 *Mag1 = (v4 *)(TempRow + ColorPitch) + X + 1; + v4 *Mag2 = (v4 *)(TempRow - ColorPitch) + X - 1; + if (TempValue->r > Mag1->r && TempValue->r > Mag2->r) + TempValue->b = 1; + } + if (TempValue->g < -45 && TempValue->g > -90) { + v4 *Mag1 = (v4 *)(TempRow - ColorPitch) + X + 1; + v4 *Mag2 = (v4 *)(TempRow + ColorPitch) + X - 1; + if (TempValue->r > Mag1->r && TempValue->r > Mag2->r) + TempValue->b = 1; + } else { + v4 *Mag1 = (v4 *)(TempRow + ColorPitch) + X; + v4 *Mag2 = (v4 *)(TempRow - ColorPitch) + X; + if (TempValue->r > Mag1->r && TempValue->r > Mag2->r) + TempValue->b = 1; + } + } + for(int Y = 1; + Y < Buffer->Height - 1; + ++Y) + { + uint16 *TempRow = ((uint16 *)Buffer->Scratch + + Buffer->BytesPerPixel + + ColorPitch + (ColorPitch*(Y))); + uint8 *Row = ((uint8 *)Buffer->EffectBuffer + + Buffer->BytesPerPixel + + Buffer->Pitch + (Buffer->Pitch*Y)); + for(int X = 1; + X < Buffer->Width - 1; + ++X) + { + uint32 *Pixel = (uint32 *)Row + X; + v4 *TempValue = (v4 *)TempRow + (X); + if (TempValue->b == 1) { + if (TempValue->r > UpperThreshold) + *Pixel = 0xFF0000FF; + } + else if (TempValue->r > Threshold) + { + bool32 pp = false; + uint16 *TempRow2 = TempRow - ColorPitch; + for(int Y2 = 0; + Y2 < 3; + ++Y2) + { + v4 *TempValue2 = (v4 *)TempRow + (X - 1); + for(int X2 = 0; + X2 < 3; + ++X2) + { + if (TempValue2->r > UpperThreshold) + pp = true; + TempValue2++; + } + TempRow2 += ColorPitch; + } + if (pp) + *Pixel = 0xFFFFFF00; + } + } + } + } +} + +internal void +Levels(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) +{ + real32 Min = Property[0].CurrentValue.f; + real32 Mid = Property[1].CurrentValue.f; + real32 Max = Property[2].CurrentValue.f; + + v4 ColMin = Property[3].CurrentValue.col; + v4 ColMid = Property[4].CurrentValue.col; + v4 ColMax = Property[5].CurrentValue.col; + + if (!Property[0].Scratch) { + Property[0].Scratch = (uint64 *)Memory->Address + Memory->CurrentPosition; + Memory->CurrentPosition += Buffer->Width * Buffer->Height * Buffer->BytesPerPixel; + + uint16 *Levels = (uint16 *)Property[0].Scratch; + uint8 *Row = ((uint8 *)Buffer->OriginalBuffer); + + for(int Y = 0; + Y < Buffer->Height; + ++Y) + { + uint32 *Pixel = (uint32 *)Row; + for(int X = 0; + X < Buffer->Width; + ++X) + { + v4 Col = Uint32ToCol8(*Pixel); + uint16 Global = (uint16)(RoundReal32ToUint32((Col.r + Col.g + Col.b)/3)); + *(Levels + Global) += 1; + *(Levels + 256 + (uint16)Col.r) += 1; + *(Levels + 256*2 + (uint16)Col.g) += 1; + *(Levels + 256*3 + (uint16)Col.b) += 1; + *(Levels + 256*4 + (uint16)Col.a) += 1; + Pixel++; + } + Row += Buffer->Pitch; + } + } + + + uint8 *Row = ((uint8 *)Buffer->EffectBuffer); + for(int Y = 0; + Y < Buffer->Height; + ++Y) + { + uint32 *Pixel = (uint32 *)Row; + for(int X = 0; + X < Buffer->Width; + ++X) + { + // individual channels + v4 ColorI = powv4(Uint32ToNormalizedCol(*Pixel), ColMid); + v4 ValI = 1.0f/(ColMax-ColMin) * (ColorI - ColMin); + + // global channel + v4 ColorG = powv4(ValI, Mid); + v4 ValG = 1.0f/(Max-Min) * (ColorG - Min); + + *Pixel++ = ColToUint32(Clamp(0.0f, ValG, 1.0f)); + } + Row += Buffer->Pitch; + } + +} + +internal void +GaussianBlur(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) +{ + real32 Radius = Property[0].CurrentValue.f; + + if (!Buffer->Scratch) { + Buffer->Scratch = (uint64 *)Memory->Address + Memory->CurrentPosition; + Memory->CurrentPosition += Buffer->Width * Buffer->Height * Buffer->BytesPerPixel; + } + + Gaussian(Buffer, Buffer->Scratch, Radius); +} +#endif -- cgit v1.2.3