summaryrefslogtreecommitdiff
path: root/prenderer.cpp
diff options
context:
space:
mode:
authorFox Caminiti <fox@foxcam.net>2022-08-22 15:57:38 -0400
committerFox Caminiti <fox@foxcam.net>2022-08-22 15:57:38 -0400
commite25257a0431ef475e5106f7534319b161b81fcae (patch)
tree0db25278f054eca42d5aff8346051a26536f7e8c /prenderer.cpp
parentd03d7187c1881237b1a98404a125507d33d85a0e (diff)
windows buildable; needs debugging
Diffstat (limited to 'prenderer.cpp')
-rw-r--r--prenderer.cpp12
1 files changed, 6 insertions, 6 deletions
diff --git a/prenderer.cpp b/prenderer.cpp
index 1fa3e71..9752663 100644
--- a/prenderer.cpp
+++ b/prenderer.cpp
@@ -494,12 +494,12 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion)
// Preventing overlap between threads for non-packed. One nice thing
// about packed is that the 4-padded bitmap means we can set up the
// boundaries so we don't have to check this ever.
- __m256i TileBarrier = _mm256_cmp_ps(PixelX, LayerBoundsMaxX, 13);
+ __m256i TileBarrier = _mm256_cvtps_epi32(_mm256_cmp_ps(PixelX, LayerBoundsMaxX, 13));
// Zero - no points pass
// One - all points pass; not an edge
- __m256i Mask = _mm256_cmp_ps(Avg, Zero, 14);
- __m256i NonEdge = _mm256_cmp_ps(Avg, One, 13);
+ __m256i Mask = _mm256_cvtps_epi32(_mm256_cmp_ps(Avg, Zero, 14));
+ __m256i NonEdge = _mm256_cvtps_epi32(_mm256_cmp_ps(Avg, One, 13));
__m256i TotalMask = _mm256_andnot_si256(TileBarrier, _mm256_and_si256(Mask, NonEdge));
// __m256 LayerMask = _mm256_and_ps(_mm256_and_ps(_mm256_cmp_ps(U, Zero, 13), _mm256_cmp_ps(U, One, 1)),
@@ -598,7 +598,7 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion)
// Apply anti-aliasing to edges if there are any
if (_mm256_movemask_epi8(EdgeMask))
{
- A_Col = _mm256_blendv_ps(A_Col, _mm256_mul_ps(A_Col, Avg), EdgeMask);
+ A_Col = _mm256_blendv_ps(A_Col, _mm256_mul_ps(A_Col, Avg), _mm256_cvtepi32_ps(EdgeMask));
}
IACA_END;
@@ -621,7 +621,7 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion)
__m256 A_Blend = LayerAlpha;
// Only load the dest pixel if we actually need to (a pixel's opacity isn't 255 or the blend mode requires it).
- if (T.BlendMode != blend_normal || _mm256_movemask_epi8(_mm256_cmp_ps(LayerAlpha, One, 2)))
+ if (T.BlendMode != blend_normal || _mm256_movemask_epi8(_mm256_cvtps_epi32(_mm256_cmp_ps(LayerAlpha, One, 2))))
{
__m256i DestPixel = _mm256_loadu_si256((const __m256i *)Pixel);
__m256 R_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256( DestPixel, FF)), Norm255);
@@ -972,7 +972,7 @@ SSE2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion)
__m128 B_Blend = B_Col;
__m128 A_Blend = LayerAlpha;
- if (!_mm_movemask_epi8(_mm_cmpeq_ps(LayerAlpha, One)) || T.BlendMode != blend_normal)
+ if (!_mm_movemask_epi8(_mm_cvtps_epi32(_mm_cmpeq_ps(LayerAlpha, One))) || T.BlendMode != blend_normal)
{
__m128i DestPixel = _mm_loadu_si128((const __m128i *)Pixel);
__m128 R_Dest = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128( DestPixel, FF)), Norm255);