diff options
author | Fox Caminiti <fox@foxcam.net> | 2022-08-21 10:20:31 -0400 |
---|---|---|
committer | Fox Caminiti <fox@foxcam.net> | 2022-08-21 10:20:31 -0400 |
commit | ed51dab429e467fc144f0bfbed70a5291c8a0a27 (patch) | |
tree | f79fc3bb577ca996b49b34f1bad5bff4a40ce6a9 /prenderer.cpp | |
parent | 8c5f06c37f3c267ecd8f867cd49765c366b5f47c (diff) |
multisampled gl masks
Diffstat (limited to 'prenderer.cpp')
-rw-r--r-- | prenderer.cpp | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/prenderer.cpp b/prenderer.cpp index 909fc4c..ccacbe8 100644 --- a/prenderer.cpp +++ b/prenderer.cpp @@ -355,6 +355,7 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) __m256 YAxisPY = _mm256_set1_ps(T.YAxisPY); __m256 LayerWidth = _mm256_set1_ps(T.LayerWidth); + __m256 LayerBoundsMaxX = _mm256_set1_ps(LayerBounds.Max.x); __m256i FullLayerWidth4i = _mm256_set1_epi32(T.FullLayerWidth*4); __m256i LayerWidthMinusOne = _mm256_set1_epi32(T.LayerWidth - 1); __m256i LayerHeightMinusOne = _mm256_set1_epi32(T.LayerHeight - 1); @@ -489,17 +490,23 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) __m256 Avg = _mm256_add_ps(_mm256_add_ps(_mm256_and_ps(LayerMask0, ZeroPoint25), _mm256_and_ps(LayerMask1, ZeroPoint25)), _mm256_add_ps(_mm256_and_ps(LayerMask2, ZeroPoint25), _mm256_and_ps(LayerMask3, ZeroPoint25))); + // Preventing overlap between threads for non-packed. One nice thing + // about packed is that the 4-padded bitmap means we can set up the + // boundaries so we don't have to check this ever. + __m256i TileBarrier = _mm256_cmp_ps(PixelX, LayerBoundsMaxX, 13); + // Zero - no points pass // One - all points pass; not an edge __m256i Mask = _mm256_cmp_ps(Avg, Zero, 14); __m256i NonEdge = _mm256_cmp_ps(Avg, One, 13); + __m256i TotalMask = _mm256_andnot_si256(TileBarrier, _mm256_and_si256(Mask, NonEdge)); - __m256 LayerMask = _mm256_and_ps(_mm256_and_ps(_mm256_cmp_ps(U, Zero, 13), _mm256_cmp_ps(U, One, 1)), - _mm256_and_ps(_mm256_cmp_ps(V, Zero, 13), _mm256_cmp_ps(V, One, 1))); + // __m256 LayerMask = _mm256_and_ps(_mm256_and_ps(_mm256_cmp_ps(U, Zero, 13), _mm256_cmp_ps(U, One, 1)), + // _mm256_and_ps(_mm256_cmp_ps(V, Zero, 13), _mm256_cmp_ps(V, One, 1))); // If all of the pixels are zeroed in the mask (aka fall outside // the UV lookup), we can skip the iteration. - if (_mm256_movemask_epi8(LayerMask)) + if (_mm256_movemask_epi8(TotalMask)) { __m256i EdgeMask = _mm256_andnot_si256(NonEdge, Mask); @@ -745,7 +752,7 @@ AVX2_RenderLayer(transform_info T, comp_buffer *Buffer, rectangle RenderRegion) _mm256_or_si256(R_Out, _mm256_slli_epi32(G_Out, 8)), _mm256_or_si256(_mm256_slli_epi32(B_Out, 16), _mm256_slli_epi32(A_Out, 24))); - _mm256_maskstore_epi32((int *)Pixel, LayerMask, OutputPixel); + _mm256_maskstore_epi32((int *)Pixel, TotalMask, OutputPixel); } #if PACKEDRGB PixelX = _mm256_add_ps(PixelX, Four); |