Reputation: 81
I was trying to shift zeros from Unsigned to signed range
i.e,. [0,255] -> [127, -128]
To perform this below operation
__m256i shifted = _mm256_xor_si256(a, _mm256_set1_epi8(-128));
__m256i maskCount = _mm256_cmpgt_epi8(shifted, zerosShifted);
when I wrote the code like this, It Runs
__m256i temp1 = _mm256_setzero_si256();
__m256i temp2 = _mm256_set1_epi8(-128);
__m256i zerosShifted = _mm256_xor_si256(temp1,temp2);
but when I wrote like this, It crashes saying Illegal Instruction
__m256i zerosShifted = _m256_xor_si256(_mm256_setzero_si256(),_mm256_set1_epi8(-128));
PS: I know there is no need to shift zeros to -128, I can set -128 directly to the register but just to know the root cause analysis.
The complete AVX2 code looks like this:
__m256i *pIn0, *pOut0;
__m256i a;
__m256i zerosShifted = _mm256_xor_si256(_mm256_setzero_si256(), _mm256_set1_epi8(-128));
__m256i ones = _mm256_set1_epi8(1);
__m256i zeros = _mm256_setzero_si256();
for (int x = 1; x < height - 1 ; x++)
{
int y = 1;
for (; y < width - 32; y+=32)
{
a = _MM256_LOAD_SI256((__m256i*)&boundary_image_array[x*width + y]);
__m256i shifted = _mm256_xor_si256(a, _mm256_set1_epi8(-128));
__m256i maskCount = _mm256_cmpgt_epi8(shifted, zerosShifted);
maskCount = _mm256_and_si256(maskCount, ones);
__m256i imageAgg = _mm256_blendv_epi8(a, zeros, maskCount);
__m256i pixelCount = _mm256_sad_epu8(maskCount, zeros);
__m256i imgAggSum = _mm256_sad_epu8(imageAgg, zeros);
boundary_pixel_count_r += _mm256_extract_epi64(pixelCount, 0) + _mm256_extract_epi64(pixelCount, 1)
+ _mm256_extract_epi64(pixelCount, 2) + _mm256_extract_epi64(pixelCount, 3);
boundary_grad_image_agg += _mm256_extract_epi64(imgAggSum, 0) + _mm256_extract_epi64(imgAggSum, 1)
+ _mm256_extract_epi64(imgAggSum, 2) + _mm256_extract_epi64(imgAggSum, 3);
}
for (; y < width - 1 ; y++)
{
//! Determine the boundary stats:
if (boundary_image_array[x*width + y] > 0)
{
boundary_pixel_count_r++;
boundary_grad_image_agg += boundary_image_array[x*width + y];
}
}
}
Upvotes: 1
Views: 112