From e25257a0431ef475e5106f7534319b161b81fcae Mon Sep 17 00:00:00 2001 From: Fox Caminiti Date: Mon, 22 Aug 2022 15:57:38 -0400 Subject: windows buildable; needs debugging --- prenderer.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'prenderer.cpp') diff --git a/prenderer.cpp b/prenderer.cpp index 1fa3e71..9752663 100644 --- a/prenderer.cpp +++ b/prenderer.cpp @@ -494,12 +494,12 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) // Preventing overlap between threads for non-packed. One nice thing // about packed is that the 4-padded bitmap means we can set up the // boundaries so we don't have to check this ever. - __m256i TileBarrier = _mm256_cmp_ps(PixelX, LayerBoundsMaxX, 13); + __m256i TileBarrier = _mm256_cvtps_epi32(_mm256_cmp_ps(PixelX, LayerBoundsMaxX, 13)); // Zero - no points pass // One - all points pass; not an edge - __m256i Mask = _mm256_cmp_ps(Avg, Zero, 14); - __m256i NonEdge = _mm256_cmp_ps(Avg, One, 13); + __m256i Mask = _mm256_cvtps_epi32(_mm256_cmp_ps(Avg, Zero, 14)); + __m256i NonEdge = _mm256_cvtps_epi32(_mm256_cmp_ps(Avg, One, 13)); __m256i TotalMask = _mm256_andnot_si256(TileBarrier, _mm256_and_si256(Mask, NonEdge)); // __m256 LayerMask = _mm256_and_ps(_mm256_and_ps(_mm256_cmp_ps(U, Zero, 13), _mm256_cmp_ps(U, One, 1)), @@ -598,7 +598,7 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) // Apply anti-aliasing to edges if there are any if (_mm256_movemask_epi8(EdgeMask)) { - A_Col = _mm256_blendv_ps(A_Col, _mm256_mul_ps(A_Col, Avg), EdgeMask); + A_Col = _mm256_blendv_ps(A_Col, _mm256_mul_ps(A_Col, Avg), _mm256_cvtepi32_ps(EdgeMask)); } IACA_END; @@ -621,7 +621,7 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) __m256 A_Blend = LayerAlpha; // Only load the dest pixel if we actually need to (a pixel's opacity isn't 255 or the blend mode requires it). - if (T.BlendMode != blend_normal || _mm256_movemask_epi8(_mm256_cmp_ps(LayerAlpha, One, 2))) + if (T.BlendMode != blend_normal || _mm256_movemask_epi8(_mm256_cvtps_epi32(_mm256_cmp_ps(LayerAlpha, One, 2)))) { __m256i DestPixel = _mm256_loadu_si256((const __m256i *)Pixel); __m256 R_Dest = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256( DestPixel, FF)), Norm255); @@ -972,7 +972,7 @@ SSE2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) __m128 B_Blend = B_Col; __m128 A_Blend = LayerAlpha; - if (!_mm_movemask_epi8(_mm_cmpeq_ps(LayerAlpha, One)) || T.BlendMode != blend_normal) + if (!_mm_movemask_epi8(_mm_cvtps_epi32(_mm_cmpeq_ps(LayerAlpha, One))) || T.BlendMode != blend_normal) { __m128i DestPixel = _mm_loadu_si128((const __m128i *)Pixel); __m128 R_Dest = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128( DestPixel, FF)), Norm255); -- cgit v1.2.3