69,371
社区成员
发帖
与我相关
我的任务
分享
#include <xmmintrin.h>
__m128 ConvertIntToFloatColor(__m128 scaleFactor, int r, int g, int b, int a)
{
__m128i intColor = _mm_set_epi32(a, b, g, r);
__m128 floatColor = _mm_cvtepi32_ps(intColor);
floatColor = _mm_mul_ps(floatColor, scaleFactor);
return floatColor;
}
__m128 AlphaBlend(__m128 srcColor, __m128 dstColor, __m128 oneOneOneZero, __m128 zeroZeroZeroOne)
{
__m128 srcAlpha = _mm_shuffle_ps(srcColor, srcColor, _MM_SHUFFLE(3, 3, 3, 3));
__m128 dstAlpha = _mm_shuffle_ps(dstColor, dstColor, _MM_SHUFFLE(3, 3, 3, 3));
__m128 one = _mm_set1_ps(1.0f);
__m128 oneMinusSrcAlpha = _mm_sub_ps(one, srcAlpha);
__m128 outAlpha = _mm_add_ps(srcAlpha, _mm_mul_ps(dstAlpha, oneMinusSrcAlpha));
__m128 blended = _mm_add_ps(_mm_mul_ps(srcColor, srcAlpha),
_mm_mul_ps(_mm_mul_ps(dstColor, dstAlpha), oneMinusSrcAlpha));
blended = _mm_and_ps(blended, oneOneOneZero);
outAlpha = _mm_and_ps(outAlpha, zeroZeroZeroOne);
return _mm_or_ps(blended, outAlpha);
}
void ConvertFloatToIntColor(__m128 floatColor, __m128 scaleFactor, int* r, int* g, int* b, int* a)
{
floatColor = _mm_mul_ps(floatColor, scaleFactor);
__m128i intColor = _mm_cvtps_epi32(floatColor);
int* i = (int*)&intColor;
*r = i[0];
*g = i[1];
*b = i[2];
*a = i[3];
}
void TestBlend()
{
__m128 oneOneOneZero;
__m128 zeroZeroZeroOne;
int* oneOneOneZeroPtr = (int*)&oneOneOneZero;
int* zeroZeroZeroOnePtr = (int*)&zeroZeroZeroOne;
oneOneOneZeroPtr[0] = 0xffffffff;
oneOneOneZeroPtr[1] = 0xffffffff;
oneOneOneZeroPtr[2] = 0xffffffff;
oneOneOneZeroPtr[3] = 0x00000000;
zeroZeroZeroOnePtr[0] = 0x00000000;
zeroZeroZeroOnePtr[1] = 0x00000000;
zeroZeroZeroOnePtr[2] = 0x00000000;
zeroZeroZeroOnePtr[3] = 0xffffffff;
__m128 scaleFactor0 = _mm_set1_ps(1.0f / 255.0f);
__m128 scaleFactor1 = _mm_set1_ps(255.0f);
__m128 red = ConvertIntToFloatColor(scaleFactor0, 255, 0, 0, 255);
__m128 green = ConvertIntToFloatColor(scaleFactor0, 0, 255, 0, 128);
__m128 color = AlphaBlend(green, red, oneOneOneZero, zeroZeroZeroOne);
int r, g, b, a;
ConvertFloatToIntColor(color, scaleFactor1, &r, &g, &b, &a);
printf("r %d, g %d, b %d, a %d\n", r, g, b, a);
}
使用SSE计算的版本,注意两个缩放系数scaleFactor0和scaleFactor1,
因为是8bit转成的浮点数,所以scaleFactor0是1.0f / 255.0f, 而 scaleFactor1 是 255.0f