internal void DrawColor(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) { v4 FloatColor = Property[0].CurrentValue.col; blend_mode BlendMode = Property[1].CurrentValue.blendmode; __m256 ZeroReal = _mm256_set1_ps(0); __m256 ZeroPointFive = _mm256_set1_ps(0.5); __m256 One = _mm256_set1_ps(1); __m256 Two = _mm256_set1_ps(2); __m256 Four = _mm256_set1_ps(4); __m256 Fraction255 = _mm256_set1_ps(1/255.0f); __m256 Real255 = _mm256_set1_ps(255); __m256i Zero = _mm256_set1_epi8(0); __m256i FF = _mm256_set1_epi32(0xFF); __m256i Int255 = _mm256_set1_epi8((uint8)255); __m256 Alpha = _mm256_set1_ps(FloatColor.a); __m256 AlphaInv = _mm256_set1_ps(1.0f - FloatColor.a); __m256 R_Col = _mm256_set1_ps(FloatColor.E[0]); __m256 R_Colx2 = _mm256_mul_ps(R_Col, Two); __m256 R_ColInv = _mm256_set1_ps(1.0f - FloatColor.E[0]); __m256 G_Col = _mm256_set1_ps(FloatColor.E[1]); __m256 G_Colx2 = _mm256_mul_ps(G_Col, Two); __m256 G_ColInv = _mm256_set1_ps(1.0f - FloatColor.E[1]); __m256 B_Col = _mm256_set1_ps(FloatColor.E[2]); __m256 B_Colx2 = _mm256_mul_ps(B_Col, Two); __m256 B_ColInv = _mm256_set1_ps(1.0f - FloatColor.E[2]); for (int16 Y = 0; Y < Buffer->Height; Y += 2) { for (int16 X = 0; X < Buffer->Width; X += 4) { uint32 XLookup = (X >> 2)*16 + (X % 4); uint32 YLookup = (Y >> 2)*(Buffer->Width*4) + (Y % 4)*4; uint32 PixelToSeek = XLookup + YLookup; uint8 *Pixel = (uint8 *)Buffer->EffectBuffer + PixelToSeek*Buffer->BytesPerPixel; __m256i DestPixel = _mm256_loadu_si256((const __m256i *)Pixel); // normalized values __m256 R_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256( DestPixel, FF)), Fraction255); __m256 G_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(DestPixel, 8), FF)), Fraction255); __m256 B_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(DestPixel, 16), FF)), Fraction255); __m256i A_Out = _mm256_and_si256(_mm256_srli_epi32(DestPixel, 24), FF); __m256 A_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(A_Out), Fraction255); __m256 R_Blend = _mm256_setzero_ps(); __m256 G_Blend = _mm256_setzero_ps(); __m256 B_Blend = _mm256_setzero_ps(); switch (BlendMode) { case blend_normal: { } break; case blend_multiply: { R_Blend = _mm256_mul_ps(R_Dest, R_Col); G_Blend = _mm256_mul_ps(G_Dest, G_Col); B_Blend = _mm256_mul_ps(B_Dest, B_Col); } break; case blend_colorburn: { R_Blend = _mm256_sub_ps(One, _mm256_div_ps(_mm256_sub_ps(One, R_Dest), R_Col)); G_Blend = _mm256_sub_ps(One, _mm256_div_ps(_mm256_sub_ps(One, G_Dest), G_Col)); B_Blend = _mm256_sub_ps(One, _mm256_div_ps(_mm256_sub_ps(One, B_Dest), B_Col)); } break; case blend_linearburn: { R_Blend = _mm256_sub_ps(_mm256_add_ps(R_Dest, R_Col), One); G_Blend = _mm256_sub_ps(_mm256_add_ps(G_Dest, G_Col), One); B_Blend = _mm256_sub_ps(_mm256_add_ps(B_Dest, B_Col), One); } break; case blend_add: { R_Blend = _mm256_add_ps(R_Dest, R_Col); G_Blend = _mm256_add_ps(G_Dest, G_Col); B_Blend = _mm256_add_ps(B_Dest, B_Col); } break; case blend_screen: { R_Blend = _mm256_sub_ps(One, _mm256_mul_ps(_mm256_sub_ps(One, R_Dest), R_ColInv)); G_Blend = _mm256_sub_ps(One, _mm256_mul_ps(_mm256_sub_ps(One, G_Dest), G_ColInv)); B_Blend = _mm256_sub_ps(One, _mm256_mul_ps(_mm256_sub_ps(One, B_Dest), B_ColInv)); } break; case blend_overlay: { __m256 R_Mask = _mm256_cmp_ps(R_Dest, ZeroPointFive, 1); __m256 G_Mask = _mm256_cmp_ps(G_Dest, ZeroPointFive, 1); __m256 B_Mask = _mm256_cmp_ps(B_Dest, ZeroPointFive, 1); __m256 R_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(R_Dest, R_Col)); __m256 G_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(G_Dest, G_Col)); __m256 B_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(B_Dest, B_Col)); __m256 R_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, R_Dest), R_ColInv))); __m256 G_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, G_Dest), G_ColInv))); __m256 B_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, B_Dest), B_ColInv))); R_Blend = _mm256_blendv_ps(R_Upper, R_Lower, R_Mask); G_Blend = _mm256_blendv_ps(G_Upper, G_Lower, G_Mask); B_Blend = _mm256_blendv_ps(B_Upper, B_Lower, B_Mask); } break; case blend_softlight: { // using Pegtop's equation R_Blend = _mm256_add_ps(_mm256_mul_ps(_mm256_sub_ps(One, R_Colx2), _mm256_mul_ps(R_Dest, R_Dest)), _mm256_mul_ps(R_Colx2, R_Dest)); G_Blend = _mm256_add_ps(_mm256_mul_ps(_mm256_sub_ps(One, G_Colx2), _mm256_mul_ps(G_Dest, G_Dest)), _mm256_mul_ps(G_Colx2, G_Dest)); B_Blend = _mm256_add_ps(_mm256_mul_ps(_mm256_sub_ps(One, B_Colx2), _mm256_mul_ps(B_Dest, B_Dest)), _mm256_mul_ps(B_Colx2, B_Dest)); } break; case blend_hardlight: { __m256 R_Mask = _mm256_cmp_ps(R_Dest, ZeroPointFive, 13); __m256 G_Mask = _mm256_cmp_ps(G_Dest, ZeroPointFive, 13); __m256 B_Mask = _mm256_cmp_ps(B_Dest, ZeroPointFive, 13); __m256 R_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(R_Dest, R_Col)); __m256 G_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(G_Dest, G_Col)); __m256 B_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(B_Dest, B_Col)); __m256 R_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, R_Dest), R_ColInv))); __m256 G_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, G_Dest), G_ColInv))); __m256 B_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, B_Dest), B_ColInv))); R_Blend = _mm256_blendv_ps(R_Upper, R_Lower, R_Mask); G_Blend = _mm256_blendv_ps(G_Upper, G_Lower, G_Mask); B_Blend = _mm256_blendv_ps(B_Upper, B_Lower, B_Mask); } break; case blend_subtract: { R_Blend = _mm256_sub_ps(R_Dest, R_Col); G_Blend = _mm256_sub_ps(G_Dest, G_Col); B_Blend = _mm256_sub_ps(B_Dest, B_Col); } break; case blend_divide: { R_Blend = _mm256_div_ps(R_Dest, R_Col); G_Blend = _mm256_div_ps(G_Dest, G_Col); B_Blend = _mm256_div_ps(B_Dest, B_Col); } break; case blend_difference: { __m256 R_Lower = _mm256_sub_ps(R_Col, R_Dest); __m256 G_Lower = _mm256_sub_ps(G_Col, G_Dest); __m256 B_Lower = _mm256_sub_ps(B_Col, B_Dest); __m256 R_Upper = _mm256_sub_ps(R_Dest, R_Col); __m256 G_Upper = _mm256_sub_ps(G_Dest, G_Col); __m256 B_Upper = _mm256_sub_ps(B_Dest, B_Col); __m256 R_Mask = _mm256_cmp_ps(R_Lower, ZeroReal, 14); __m256 G_Mask = _mm256_cmp_ps(G_Lower, ZeroReal, 14); __m256 B_Mask = _mm256_cmp_ps(B_Lower, ZeroReal, 14); R_Blend = _mm256_blendv_ps(R_Upper, R_Lower, R_Mask); G_Blend = _mm256_blendv_ps(G_Upper, G_Lower, G_Mask); B_Blend = _mm256_blendv_ps(B_Upper, B_Lower, B_Mask); } break; } R_Blend = _mm256_add_ps(_mm256_mul_ps(R_Dest, AlphaInv), _mm256_mul_ps(R_Blend, Alpha)); G_Blend = _mm256_add_ps(_mm256_mul_ps(G_Dest, AlphaInv), _mm256_mul_ps(G_Blend, Alpha)); B_Blend = _mm256_add_ps(_mm256_mul_ps(B_Dest, AlphaInv), _mm256_mul_ps(B_Blend, Alpha)); R_Blend = _mm256_max_ps(_mm256_min_ps(One, R_Blend), ZeroReal); G_Blend = _mm256_max_ps(_mm256_min_ps(One, G_Blend), ZeroReal); B_Blend = _mm256_max_ps(_mm256_min_ps(One, B_Blend), ZeroReal); __m256i R_Out = _mm256_cvttps_epi32(_mm256_mul_ps(R_Blend, Real255)); __m256i G_Out = _mm256_cvttps_epi32(_mm256_mul_ps(G_Blend, Real255)); __m256i B_Out = _mm256_cvttps_epi32(_mm256_mul_ps(B_Blend, Real255)); __m256i OutputPixel = _mm256_or_si256( _mm256_or_si256(R_Out, _mm256_slli_epi32(G_Out, 8)), _mm256_or_si256(_mm256_slli_epi32(B_Out, 16), _mm256_slli_epi32(A_Out, 24))); _mm256_storeu_si256((__m256i *)Pixel, OutputPixel); } } } internal void DrawGradient(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) { v4 StartColor = Property[0].CurrentValue.col; v4 EndColor = Property[1].CurrentValue.col; } #if WINDOWS global_variable effect_header EffectList[3]; #else global_variable effect_header EffectList[] { { "Solid Color", &DrawColor, 2, standard, { {"Color", {.col = V4(0.0f, 0.0f, 0.0f, 0.0f)}, type_color, NORMALIZED_COL_MIN, NORMALIZED_COL_MAX}, {"Blend mode", {.blendmode = blend_normal}, type_blendmode}, } }, { "Linear Gradient", &DrawGradient, 3, standard, { {"Start Color", {.col = V4(0.0f, 1.0f, 0.0f, 0.0f)}, type_color, NORMALIZED_COL_MIN, NORMALIZED_COL_MAX}, {"End Color", {.col = V4(1.0f, 0.0f, 0.0f, 1.0f)}, type_color, NORMALIZED_COL_MIN, NORMALIZED_COL_MAX}, {"Opacity", {1.0f}, type_real, NORMALIZED_REAL_MIN, NORMALIZED_REAL_MAX} } }, { "Levels", &DrawColor, 6, levels, { {"Start point", {0.0f}, type_real}, {"Mid point", {1.0f}, type_real}, {"End point", {1.0f}, type_real}, {"Start Col", {.col = V4(0.0f)}, type_color}, {"Mid Col", {.col = V4(1.0f)}, type_color}, {"End Col", {.col = V4(1.0f)}, type_color}, } }, }; #endif #if 0 { "Solid Color", &DrawColor, standard, { {"Color", {.col = V4(0.5f, 1.0f, 0.4f, 0.5f)}, color}, } }, { "Test Grid", &DrawGrid, standard, { {"Color 1", {.col = V4(0.5f, 1.0f, 0.4f, 1.0f)}, color}, {"Color 2", {.col = V4(0.0f, 0.0f, 0.0f, 1.0f)}, color} } }, { "Gaussian Blur", &GaussianBlur, standard, { {"Radius", {2.0f}, real}, } }, { "Canny edges", &Canny, standard, { {"Blur Radius", {1.0f}, real}, {"Threshold", {5.0f}, real}, } }, { "Levels", &Levels, levels, { {"Start point", {0.0f}, real}, {"Mid point", {1.0f}, real}, {"End point", {1.0f}, real}, {"Start Col", {.col = V4(0.0f)}, color}, {"Mid Col", {.col = V4(1.0f)}, color}, {"End Col", {.col = V4(1.0f)}, color}, } }, { "Kernel", &SpacialFilter, standard, { {"V1", {-1.0f}, real}, {"V2", {0.0f}, real}, {"V3", {1.0f}, real}, {"V4", {-2.0f}, real}, {"V5", {0.0f}, real}, {"V6", {2.0f}, real}, {"V7", {-1.0f}, real}, {"V8", {0.0f}, real}, {"V9", {1.0f}, real}, } }, { "Invert", &Invert, 0, standard, { } } #endif internal void AddEffect(project_layer *Layer, memory *Memory, uint16 EffectListIndex) { Layer->Effect[Layer->NumberOfEffects] = (effect *)AllocateMemory(Memory, sizeof(effect), F_Effects); effect *Effect = Layer->Effect[Layer->NumberOfEffects]; effect_header EffectHeader = EffectList[EffectListIndex]; Effect->Name = EffectHeader.Name; Effect->func = EffectHeader.func; Effect->NumberOfProperties = EffectHeader.NumberOfProperties; Effect->DisplayType = EffectHeader.DisplayType; Effect->IsActive = true; for (int16 i = 0; i < Effect->NumberOfProperties; i++) { Effect->Property[i].Name = EffectHeader.PropertyHeader[i].Name; Effect->Property[i].CurrentValue = EffectHeader.PropertyHeader[i].Value; Effect->Property[i].MinVal = EffectHeader.PropertyHeader[i].MinVal; Effect->Property[i].MaxVal = EffectHeader.PropertyHeader[i].MaxVal; Effect->Property[i].VarType = EffectHeader.PropertyHeader[i].VarType; } Layer->NumberOfEffects++; } internal void CopyToBuffer(pixel_buffer *, uint16 asda = 0); internal void UpdateEffects(project_layer *Layer, memory *Memory) { image_source *Source = (image_source *)Layer->RenderInfo; if (!Source->Raster.EffectBuffer) { Source->Raster.EffectBuffer = AllocateMemory(Memory, Source->Raster.Width * Source->Raster.Height * Source->Raster.BytesPerPixel, B_Scratch); } CopyToBuffer(&Source->Raster); for (int i = 0; i < Layer->NumberOfEffects; i++) { if (Layer->Effect[i]->IsActive) Layer->Effect[i]->func(&Source->Raster, Memory, Layer->Effect[i]->Property); } } #if 0 internal void DrawColor(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) { v4 FloatColor = Property[0].CurrentValue.col; uint32 Color = ColToUint32(FloatColor); uint8 *Row = ((uint8 *)Buffer->EffectBuffer); v2 Origin = {(real32)Buffer->Width / 2.0f, (real32)Buffer->Height / 2.0f}; real32 MaxLength = sqrt(LengthSq(Origin)); for(int Y = 0; Y < Buffer->Height; ++Y) { uint32 *Pixel = (uint32 *)Row; for(int X = 0; X < Buffer->Width; ++X) { RenderAlpha(Pixel, Color); Pixel++; } Row += Buffer->Pitch; } } internal void Invert(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) { uint8 *Row = ((uint8 *)Buffer->EffectBuffer); for(int Y = 0; Y < Buffer->Height; ++Y) { uint32 *Pixel = (uint32 *)Row; for(int X = 0; X < Buffer->Width; ++X) { v4 col = Uint32ToCol8(*Pixel); col.r = 255 - col.r; col.g = 255 - col.g; col.b = 255 - col.b; *Pixel++ = Col8ToUint32(col); } Row += Buffer->Pitch; } } internal void DrawGradient(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) { v4 StartColor = Property[0].CurrentValue.col; v4 EndColor = Property[1].CurrentValue.col; real32 Alpha = Property[2].CurrentValue.f; uint8 *Row = ((uint8 *)Buffer->EffectBuffer + Buffer->BytesPerPixel + Buffer->Pitch); for(int Y = 0; Y < Buffer->Height; ++Y) { uint32 *Pixel = (uint32 *)Row; for(int X = 0; X < Buffer->Width; ++X) { real32 PlusAlpha = ((real32)X / Buffer->Width); v4 PL = V4(V3(PlusAlpha), 1.0f); v4 C1 = ClipV4((StartColor - PL)); v4 C2 = ClipV4( (EndColor - (1 - PL) ) ); v4 FloatColor = ClipV4( C1 + C2 ); uint32 Color = ColToUint32(FloatColor); *(uint32 *)Pixel++ = Color; } Row += Buffer->Pitch; } } internal void DrawGrid(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) { v4 StartColor = Property[0].CurrentValue.col; v4 EndColor = Property[1].CurrentValue.col; uint32 Color1 = ColToUint32(StartColor); uint32 Color2 = ColToUint32(EndColor); uint8 *Row = ((uint8 *)Buffer->EffectBuffer); for(int Y = 0; Y < Buffer->Height; ++Y) { uint32 *Pixel = (uint32 *)Row; for(int X = 0; X < Buffer->Width; ++X) { if (X & 4 || Y & 4) { *(uint32 *)Pixel++ = Color1; } else { *(uint32 *)Pixel++ = Color2; } } Row += Buffer->Pitch; } } internal real32 KernLoop(pixel_buffer *Buffer, int16 Xp, int16 Yp, real32 Value[8]) { real32 P[9]; uint8 *Row = ((uint8 *)Buffer->EffectBuffer + (Buffer->Pitch*Yp)); Row -= Buffer->Pitch; int16 n = 0; for(int Y = 0; Y < 3; ++Y) { uint32 *Pixel = (uint32 *)Row + Xp; for(int X = 0; X < 3; ++X) { real32 BW = Uint32ToNormalizedBW(*Pixel); P[n] = BW * Value[n]; Pixel++; n++; } Row += Buffer->Pitch; } real32 Sum = P[0] + P[1] + P[2] + P[3] + P[4] + P[5] + P[6] + P[7] + P[8] ; return Sum; } internal void SpacialFilter(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) { real32 P[9]; P[0] = Property[0].CurrentValue.f; P[1] = Property[1].CurrentValue.f; P[2] = Property[2].CurrentValue.f; P[3] = Property[3].CurrentValue.f; P[4] = Property[4].CurrentValue.f; P[5] = Property[5].CurrentValue.f; P[6] = Property[6].CurrentValue.f; P[7] = Property[7].CurrentValue.f; P[8] = Property[8].CurrentValue.f; if (!Buffer->Scratch) { Buffer->Scratch = (uint64 *)Memory->Address + Memory->CurrentPosition; Memory->CurrentPosition += Buffer->Width * Buffer->Height * Buffer->BytesPerPixel; } for(int Y = 1; Y < Buffer->Height - 1; ++Y) { for(int X = 1; X < Buffer->Width - 1; ++X) { real32 Sum = KernLoop(Buffer, X, Y, P); uint8 *FloatRow = ((uint8 *)Buffer->Scratch + Buffer->BytesPerPixel + Buffer->Pitch + (Buffer->Pitch*Y)); real32 *FloatValue = (real32 *)FloatRow + X; *(real32 *)FloatValue++ = Sum; } } for(int Y = 1; Y < Buffer->Height - 1; ++Y) { for(int X = 1; X < Buffer->Width - 1; ++X) { uint8 *Row = ((uint8 *)Buffer->EffectBuffer + Buffer->BytesPerPixel + Buffer->Pitch + (Buffer->Pitch*Y)); uint32 *Pixel = (uint32 *)Row + X; uint8 *RowR = ((uint8 *)Buffer->Scratch + Buffer->BytesPerPixel + Buffer->Pitch + (Buffer->Pitch*Y)); real32 *PixelR = (real32 *)RowR + X; *(uint32 *)Pixel= ColToUint32(abs(*PixelR / 4.0f)); PixelR++; } } } internal void Gaussian(pixel_buffer *Buffer, void *FloatStorage, real32 Radius) { if (Radius < 1.0f) Radius = 1.0f; real32 Omega = Radius / 2; real32 Total = pow((Radius + Radius + 1), 2) / 2; int32 ColorPitch = Buffer->Pitch * 2; real32 P2 = 2*(Omega*Omega); for(int16 Y = Radius; Y < Buffer->Height - Radius; ++Y) { uint8 *Row = ((uint8 *)Buffer->EffectBuffer + Buffer->BytesPerPixel + Buffer->Pitch + Buffer->Pitch*(Y)); for(int16 X = Radius; X < Buffer->Width - Radius; ++X) { uint32 *Pixel = (uint32 *)Row + X; v4 FloatCol = Uint32ToNormalizedCol(*Pixel); for(int16 Y2 = -Radius; Y2 <= Radius; ++Y2) { uint16 *TempRow = ((uint16 *)FloatStorage + Buffer->BytesPerPixel + ColorPitch + (ColorPitch*(Y + Y2))); for(int16 X2 = -Radius; X2 <= Radius; ++X2) { v4 *TempValue = (v4 *)TempRow + (X + X2); real32 P1 = ((X2 * X2) + (Y2 * Y2)); real32 G = exp(-(P1/P2)); *TempValue = *TempValue + (FloatCol*V4(G) / V4(Total)); } } } } for(int Y = Radius; Y < Buffer->Height - Radius; ++Y) { for(int X = Radius; X < Buffer->Width - Radius; ++X) { uint8 *Row = ((uint8 *)Buffer->EffectBuffer + Buffer->BytesPerPixel + Buffer->Pitch + (Buffer->Pitch*Y)); uint32 *Pixel = (uint32 *)Row + X; uint16 *TempRow = ((uint16 *)FloatStorage + Buffer->BytesPerPixel + ColorPitch + (ColorPitch*Y)); v4 *TempValue = (v4 *)TempRow + X; TempValue->a = 1.0f; uint32 Color = ColToUint32(Clamp(0.0, *TempValue, 1.0)); *Pixel = Color; *TempValue = {0}; } } } internal void Canny(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) { real32 SH[9] = { -1, 0, 1, -2, 0, 2, -1, 0, 1 }; real32 SV[9] = { -1, -2, -1, 0, 0, 0, 1, 2, 1 }; real32 Radius = Property[0].CurrentValue.f; real32 Threshold = Property[1].CurrentValue.f / 100; real32 UpperThreshold = Threshold * 1.5; int32 ColorPitch = Buffer->Pitch * 2; if (!Buffer->Scratch) { Buffer->Scratch = (uint64 *)Memory->Address + Memory->CurrentPosition; // NOTE(fox): this buffer is four times as large to store four real32s Memory->CurrentPosition += Buffer->Width * Buffer->Height * Buffer->BytesPerPixel * 4; } Gaussian(Buffer, Buffer->Scratch, Radius); for(int Y = 1; Y < Buffer->Height - 1; ++Y) { uint16 *TempRow = ((uint16 *)Buffer->Scratch + Buffer->BytesPerPixel + ColorPitch + (ColorPitch*(Y))); for(int X = 1; X < Buffer->Width - 1; ++X) { real32 HSum = KernLoop(Buffer, X, Y, SH); real32 VSum = KernLoop(Buffer, X, Y, SV); real32 Mag = sqrt((HSum*HSum) + (VSum*VSum)); real32 Angle = atan(VSum/HSum) * (180 / PI); v4 *TempValue = (v4 *)TempRow + (X); TempValue->r = Mag; TempValue->g = Angle; } } for(int Y = 1; Y < Buffer->Height - 1; ++Y) { uint16 *TempRow = ((uint16 *)Buffer->Scratch + Buffer->BytesPerPixel + ColorPitch + (ColorPitch*(Y))); uint16 *Row = ((uint16 *)Buffer->EffectBuffer + Buffer->BytesPerPixel + Buffer->Pitch + (Buffer->Pitch*Y)); for(int X = 1; X < Buffer->Width - 1; ++X) { uint32 *Pixel = (uint32 *)Row + X; v4 *TempValue = (v4 *)TempRow + X; if (TempValue->g < 45 && TempValue->g > -45) { v4 *Mag1 = (v4 *)TempRow + X + 1; v4 *Mag2 = (v4 *)TempRow + X - 1; if (TempValue->r > Mag1->r && TempValue->r > Mag2->r) TempValue->b = 1; } if (TempValue->g < 90 && TempValue->g > 45) { v4 *Mag1 = (v4 *)(TempRow + ColorPitch) + X + 1; v4 *Mag2 = (v4 *)(TempRow - ColorPitch) + X - 1; if (TempValue->r > Mag1->r && TempValue->r > Mag2->r) TempValue->b = 1; } if (TempValue->g < -45 && TempValue->g > -90) { v4 *Mag1 = (v4 *)(TempRow - ColorPitch) + X + 1; v4 *Mag2 = (v4 *)(TempRow + ColorPitch) + X - 1; if (TempValue->r > Mag1->r && TempValue->r > Mag2->r) TempValue->b = 1; } else { v4 *Mag1 = (v4 *)(TempRow + ColorPitch) + X; v4 *Mag2 = (v4 *)(TempRow - ColorPitch) + X; if (TempValue->r > Mag1->r && TempValue->r > Mag2->r) TempValue->b = 1; } } for(int Y = 1; Y < Buffer->Height - 1; ++Y) { uint16 *TempRow = ((uint16 *)Buffer->Scratch + Buffer->BytesPerPixel + ColorPitch + (ColorPitch*(Y))); uint8 *Row = ((uint8 *)Buffer->EffectBuffer + Buffer->BytesPerPixel + Buffer->Pitch + (Buffer->Pitch*Y)); for(int X = 1; X < Buffer->Width - 1; ++X) { uint32 *Pixel = (uint32 *)Row + X; v4 *TempValue = (v4 *)TempRow + (X); if (TempValue->b == 1) { if (TempValue->r > UpperThreshold) *Pixel = 0xFF0000FF; } else if (TempValue->r > Threshold) { bool32 pp = false; uint16 *TempRow2 = TempRow - ColorPitch; for(int Y2 = 0; Y2 < 3; ++Y2) { v4 *TempValue2 = (v4 *)TempRow + (X - 1); for(int X2 = 0; X2 < 3; ++X2) { if (TempValue2->r > UpperThreshold) pp = true; TempValue2++; } TempRow2 += ColorPitch; } if (pp) *Pixel = 0xFFFFFF00; } } } } } internal void Levels(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) { real32 Min = Property[0].CurrentValue.f; real32 Mid = Property[1].CurrentValue.f; real32 Max = Property[2].CurrentValue.f; v4 ColMin = Property[3].CurrentValue.col; v4 ColMid = Property[4].CurrentValue.col; v4 ColMax = Property[5].CurrentValue.col; if (!Property[0].Scratch) { Property[0].Scratch = (uint64 *)Memory->Address + Memory->CurrentPosition; Memory->CurrentPosition += Buffer->Width * Buffer->Height * Buffer->BytesPerPixel; uint16 *Levels = (uint16 *)Property[0].Scratch; uint8 *Row = ((uint8 *)Buffer->OriginalBuffer); for(int Y = 0; Y < Buffer->Height; ++Y) { uint32 *Pixel = (uint32 *)Row; for(int X = 0; X < Buffer->Width; ++X) { v4 Col = Uint32ToCol8(*Pixel); uint16 Global = (uint16)(RoundReal32ToUint32((Col.r + Col.g + Col.b)/3)); *(Levels + Global) += 1; *(Levels + 256 + (uint16)Col.r) += 1; *(Levels + 256*2 + (uint16)Col.g) += 1; *(Levels + 256*3 + (uint16)Col.b) += 1; *(Levels + 256*4 + (uint16)Col.a) += 1; Pixel++; } Row += Buffer->Pitch; } } uint8 *Row = ((uint8 *)Buffer->EffectBuffer); for(int Y = 0; Y < Buffer->Height; ++Y) { uint32 *Pixel = (uint32 *)Row; for(int X = 0; X < Buffer->Width; ++X) { // individual channels v4 ColorI = powv4(Uint32ToNormalizedCol(*Pixel), ColMid); v4 ValI = 1.0f/(ColMax-ColMin) * (ColorI - ColMin); // global channel v4 ColorG = powv4(ValI, Mid); v4 ValG = 1.0f/(Max-Min) * (ColorG - Min); *Pixel++ = ColToUint32(Clamp(0.0f, ValG, 1.0f)); } Row += Buffer->Pitch; } } internal void GaussianBlur(pixel_buffer *Buffer, memory *Memory, property_channel Property[]) { real32 Radius = Property[0].CurrentValue.f; if (!Buffer->Scratch) { Buffer->Scratch = (uint64 *)Memory->Address + Memory->CurrentPosition; Memory->CurrentPosition += Buffer->Width * Buffer->Height * Buffer->BytesPerPixel; } Gaussian(Buffer, Buffer->Scratch, Radius); } #endif