summaryrefslogtreecommitdiff
path: root/effects_software.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'effects_software.cpp')
-rw-r--r--effects_software.cpp216
1 files changed, 44 insertions, 172 deletions
diff --git a/effects_software.cpp b/effects_software.cpp
index 06e8543..27023e6 100644
--- a/effects_software.cpp
+++ b/effects_software.cpp
@@ -1,177 +1,49 @@
+
static void
-Effect_Software_DrawColor(source *Source, layer_bitmap_info *BitmapInfo, memory *Memory, property_channel Property[])
+Effect_Software_DrawColor(int Width, int Height, int BytesPerPixel, void *EffectBitmapAddress, v4 Color, blend_mode BlendMode)
{
-#if ARM
-#else
- v4 FloatColor = Property[0].CurrentValue.col;
- blend_mode BlendMode = Property[1].CurrentValue.blendmode;
-
- __m256 ZeroReal = _mm256_set1_ps(0);
- __m256 ZeroPointFive = _mm256_set1_ps(0.5);
- __m256 One = _mm256_set1_ps(1);
- __m256 Two = _mm256_set1_ps(2);
-
- __m256 Fraction255 = _mm256_set1_ps(1/255.0f);
- __m256 Real255 = _mm256_set1_ps(255);
-
- __m256i FF = _mm256_set1_epi32(0xFF);
-
- __m256 Alpha = _mm256_set1_ps(FloatColor.a);
- __m256 AlphaInv = _mm256_set1_ps(1.0f - FloatColor.a);
-
- __m256 R_Col = _mm256_set1_ps(FloatColor.E[0]);
- __m256 R_Colx2 = _mm256_mul_ps(R_Col, Two);
- __m256 R_ColInv = _mm256_set1_ps(1.0f - FloatColor.E[0]);
-
- __m256 G_Col = _mm256_set1_ps(FloatColor.E[1]);
- __m256 G_Colx2 = _mm256_mul_ps(G_Col, Two);
- __m256 G_ColInv = _mm256_set1_ps(1.0f - FloatColor.E[1]);
-
- __m256 B_Col = _mm256_set1_ps(FloatColor.E[2]);
- __m256 B_Colx2 = _mm256_mul_ps(B_Col, Two);
- __m256 B_ColInv = _mm256_set1_ps(1.0f - FloatColor.E[2]);
-
- for (int16 Y = 0; Y < Source->Info.Height; Y += 2)
- {
- for (int16 X = 0; X < Source->Info.Width; X += 4)
- {
- uint32 XLookup = (X >> 2)*16 + (X % 4);
- uint32 YLookup = (Y >> 2)*(Source->Info.Width*4) + (Y % 4)*4;
- uint32 PixelToSeek = XLookup + YLookup;
- uint8 *Pixel = (uint8 *)BitmapInfo->BitmapBuffer + PixelToSeek*Source->Info.BytesPerPixel;
- __m256i DestPixel = _mm256_loadu_si256((const __m256i *)Pixel);
-
- // normalized values
- __m256 R_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256( DestPixel, FF)), Fraction255);
- __m256 G_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(DestPixel, 8), FF)), Fraction255);
- __m256 B_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(_mm256_srli_epi32(DestPixel, 16), FF)), Fraction255);
- __m256i A_Out = _mm256_and_si256(_mm256_srli_epi32(DestPixel, 24), FF);
-
- __m256 R_Blend = R_Col;
- __m256 G_Blend = G_Col;
- __m256 B_Blend = B_Col;
- switch (BlendMode)
- {
- case blend_normal:
- {
- } break;
- case blend_multiply:
- {
- R_Blend = _mm256_mul_ps(R_Dest, R_Col);
- G_Blend = _mm256_mul_ps(G_Dest, G_Col);
- B_Blend = _mm256_mul_ps(B_Dest, B_Col);
- } break;
- case blend_colorburn:
- {
- R_Blend = _mm256_sub_ps(One, _mm256_div_ps(_mm256_sub_ps(One, R_Dest), R_Col));
- G_Blend = _mm256_sub_ps(One, _mm256_div_ps(_mm256_sub_ps(One, G_Dest), G_Col));
- B_Blend = _mm256_sub_ps(One, _mm256_div_ps(_mm256_sub_ps(One, B_Dest), B_Col));
- } break;
- case blend_linearburn:
- {
- R_Blend = _mm256_sub_ps(_mm256_add_ps(R_Dest, R_Col), One);
- G_Blend = _mm256_sub_ps(_mm256_add_ps(G_Dest, G_Col), One);
- B_Blend = _mm256_sub_ps(_mm256_add_ps(B_Dest, B_Col), One);
- } break;
- case blend_add:
- {
- R_Blend = _mm256_add_ps(R_Dest, R_Col);
- G_Blend = _mm256_add_ps(G_Dest, G_Col);
- B_Blend = _mm256_add_ps(B_Dest, B_Col);
- } break;
- case blend_screen:
- {
- R_Blend = _mm256_sub_ps(One, _mm256_mul_ps(_mm256_sub_ps(One, R_Dest), R_ColInv));
- G_Blend = _mm256_sub_ps(One, _mm256_mul_ps(_mm256_sub_ps(One, G_Dest), G_ColInv));
- B_Blend = _mm256_sub_ps(One, _mm256_mul_ps(_mm256_sub_ps(One, B_Dest), B_ColInv));
- } break;
- case blend_overlay:
- {
- __m256 R_Mask = _mm256_cmp_ps(R_Dest, ZeroPointFive, 1);
- __m256 G_Mask = _mm256_cmp_ps(G_Dest, ZeroPointFive, 1);
- __m256 B_Mask = _mm256_cmp_ps(B_Dest, ZeroPointFive, 1);
- __m256 R_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(R_Dest, R_Col));
- __m256 G_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(G_Dest, G_Col));
- __m256 B_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(B_Dest, B_Col));
- __m256 R_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, R_Dest), R_ColInv)));
- __m256 G_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, G_Dest), G_ColInv)));
- __m256 B_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, B_Dest), B_ColInv)));
- R_Blend = _mm256_blendv_ps(R_Upper, R_Lower, R_Mask);
- G_Blend = _mm256_blendv_ps(G_Upper, G_Lower, G_Mask);
- B_Blend = _mm256_blendv_ps(B_Upper, B_Lower, B_Mask);
- } break;
- case blend_softlight:
- {
- // using Pegtop's equation
- R_Blend = _mm256_add_ps(_mm256_mul_ps(_mm256_sub_ps(One, R_Colx2), _mm256_mul_ps(R_Dest, R_Dest)), _mm256_mul_ps(R_Colx2, R_Dest));
- G_Blend = _mm256_add_ps(_mm256_mul_ps(_mm256_sub_ps(One, G_Colx2), _mm256_mul_ps(G_Dest, G_Dest)), _mm256_mul_ps(G_Colx2, G_Dest));
- B_Blend = _mm256_add_ps(_mm256_mul_ps(_mm256_sub_ps(One, B_Colx2), _mm256_mul_ps(B_Dest, B_Dest)), _mm256_mul_ps(B_Colx2, B_Dest));
- } break;
- case blend_hardlight:
- {
- __m256 R_Mask = _mm256_cmp_ps(R_Dest, ZeroPointFive, 13);
- __m256 G_Mask = _mm256_cmp_ps(G_Dest, ZeroPointFive, 13);
- __m256 B_Mask = _mm256_cmp_ps(B_Dest, ZeroPointFive, 13);
- __m256 R_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(R_Dest, R_Col));
- __m256 G_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(G_Dest, G_Col));
- __m256 B_Lower = _mm256_mul_ps(Two, _mm256_mul_ps(B_Dest, B_Col));
- __m256 R_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, R_Dest), R_ColInv)));
- __m256 G_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, G_Dest), G_ColInv)));
- __m256 B_Upper = _mm256_sub_ps(One, _mm256_mul_ps(Two, _mm256_mul_ps(_mm256_sub_ps(One, B_Dest), B_ColInv)));
- R_Blend = _mm256_blendv_ps(R_Upper, R_Lower, R_Mask);
- G_Blend = _mm256_blendv_ps(G_Upper, G_Lower, G_Mask);
- B_Blend = _mm256_blendv_ps(B_Upper, B_Lower, B_Mask);
- } break;
- case blend_subtract:
- {
- R_Blend = _mm256_sub_ps(R_Dest, R_Col);
- G_Blend = _mm256_sub_ps(G_Dest, G_Col);
- B_Blend = _mm256_sub_ps(B_Dest, B_Col);
- } break;
- case blend_divide:
- {
- R_Blend = _mm256_div_ps(R_Dest, R_Col);
- G_Blend = _mm256_div_ps(G_Dest, G_Col);
- B_Blend = _mm256_div_ps(B_Dest, B_Col);
- } break;
- case blend_difference:
- {
- __m256 R_Lower = _mm256_sub_ps(R_Col, R_Dest);
- __m256 G_Lower = _mm256_sub_ps(G_Col, G_Dest);
- __m256 B_Lower = _mm256_sub_ps(B_Col, B_Dest);
- __m256 R_Upper = _mm256_sub_ps(R_Dest, R_Col);
- __m256 G_Upper = _mm256_sub_ps(G_Dest, G_Col);
- __m256 B_Upper = _mm256_sub_ps(B_Dest, B_Col);
- __m256 R_Mask = _mm256_cmp_ps(R_Lower, ZeroReal, 14);
- __m256 G_Mask = _mm256_cmp_ps(G_Lower, ZeroReal, 14);
- __m256 B_Mask = _mm256_cmp_ps(B_Lower, ZeroReal, 14);
- R_Blend = _mm256_blendv_ps(R_Upper, R_Lower, R_Mask);
- G_Blend = _mm256_blendv_ps(G_Upper, G_Lower, G_Mask);
- B_Blend = _mm256_blendv_ps(B_Upper, B_Lower, B_Mask);
- } break;
- }
-
- R_Blend = _mm256_add_ps(_mm256_mul_ps(R_Dest, AlphaInv),
- _mm256_mul_ps(R_Blend, Alpha));
- G_Blend = _mm256_add_ps(_mm256_mul_ps(G_Dest, AlphaInv),
- _mm256_mul_ps(G_Blend, Alpha));
- B_Blend = _mm256_add_ps(_mm256_mul_ps(B_Dest, AlphaInv),
- _mm256_mul_ps(B_Blend, Alpha));
-
- R_Blend = _mm256_max_ps(_mm256_min_ps(One, R_Blend), ZeroReal);
- G_Blend = _mm256_max_ps(_mm256_min_ps(One, G_Blend), ZeroReal);
- B_Blend = _mm256_max_ps(_mm256_min_ps(One, B_Blend), ZeroReal);
-
- __m256i R_Out = _mm256_cvttps_epi32(_mm256_mul_ps(R_Blend, Real255));
- __m256i G_Out = _mm256_cvttps_epi32(_mm256_mul_ps(G_Blend, Real255));
- __m256i B_Out = _mm256_cvttps_epi32(_mm256_mul_ps(B_Blend, Real255));
-
- __m256i OutputPixel = _mm256_or_si256(
- _mm256_or_si256(R_Out, _mm256_slli_epi32(G_Out, 8)),
- _mm256_or_si256(_mm256_slli_epi32(B_Out, 16), _mm256_slli_epi32(A_Out, 24)));
-
- _mm256_storeu_si256((__m256i *)Pixel, OutputPixel);
+ render_byte_info Bits = Bitmap_ByteInfo(BytesPerPixel);
+ rectangle RenderRegion = {0, 0, Width, Height};
+ transform_info T;
+ T.BlendMode = BlendMode;
+ for (int32 Y = RenderRegion.Min.y; Y < RenderRegion.Max.y; Y++) {
+ for (int32 X = RenderRegion.Min.x; X < RenderRegion.Max.x; X++) {
+ uint32 Offset = Y*Width*BytesPerPixel + X*BytesPerPixel;
+ uint8 *LayerPixel = (uint8 *)EffectBitmapAddress + Offset;
+
+ uint32 *R_DestAddress = (uint32 *)(LayerPixel + Bits.ByteOffset * 0);
+ uint32 *G_DestAddress = (uint32 *)(LayerPixel + Bits.ByteOffset * 1);
+ uint32 *B_DestAddress = (uint32 *)(LayerPixel + Bits.ByteOffset * 2);
+ uint32 *A_DestAddress = (uint32 *)(LayerPixel + Bits.ByteOffset * 3);
+
+ real32 R_Dest = (real32)(*R_DestAddress & Bits.MaskPixel) * Bits.Normalized;
+ real32 G_Dest = (real32)(*G_DestAddress & Bits.MaskPixel) * Bits.Normalized;
+ real32 B_Dest = (real32)(*B_DestAddress & Bits.MaskPixel) * Bits.Normalized;
+ real32 A_Dest = (real32)(*A_DestAddress & Bits.MaskPixel) * Bits.Normalized;
+
+ real32 R_Col = Color.r;
+ real32 G_Col = Color.g;
+ real32 B_Col = Color.b;
+ real32 A_Col = Color.a;
+
+ real32 LayerAlpha = A_Col * 1;
+
+ real32 R_Blend = R_Col;
+ real32 G_Blend = G_Col;
+ real32 B_Blend = B_Col;
+ real32 A_Blend = A_Col;
+
+ Fallback_Blend();
+
+ uint32 R_Out = (uint32)(Normalize(R_Blend) * Bits.Bits);
+ uint32 G_Out = (uint32)(Normalize(G_Blend) * Bits.Bits);
+ uint32 B_Out = (uint32)(Normalize(B_Blend) * Bits.Bits);
+ uint32 A_Out = (uint32)(Normalize(A_Blend) * Bits.Bits);
+
+ *R_DestAddress = (*R_DestAddress & ~Bits.MaskPixel) | R_Out;
+ *G_DestAddress = (*G_DestAddress & ~Bits.MaskPixel) | G_Out;
+ *B_DestAddress = (*B_DestAddress & ~Bits.MaskPixel) | B_Out;
+ *A_DestAddress = (*A_DestAddress & ~Bits.MaskPixel) | A_Out;
}
}
-#endif
}