关于16位色的Alpha,速度不理想,懂汇编的进来帮助优化一下
要实现的功能:把一张16位565格式位图alphablend到另一张16位565格式位图上。
没有alpha通道,把源位图每一个点的亮度作为alpha值,即:
alphavalue := (r * 77 + g * 150 + B * 29) shr 8;// / 255
r3 := (r2 * alphavalue + r1 * (255-alphavalue)) shr 8;
g3 := (g2 * alphavalue + g1 * (255-alphavalue)) shr 8;
b3 := (b2 * alphavalue + b1 * (255-alphavalue)) shr 8;
下面是我写的汇编,效率很低,麻烦高人耐心看完~~~最好给点指导!先谢了!
const
BMP_FILE_SIGN = $4D42; //tagBITMAPFILEHEADER.bfType
MASK_R: int64 = $f800f800f800f800;
MASK_G: int64 = $07e007e007e007e0;
MASK_B: int64 = $001f001f001f001f;
MAX_A: int64 = $00ff00ff00ff00ff;
MUL_R: int64 = $004d004d004d004d;
MUL_G: int64 = $0096009600960096;
MUL_B: int64 = $001d001d001d001d;
for j := 1 to (nDstRight - nDstX) div 4 do
begin
asm
mov ECX, ptrSrc//这个ptr指针指向16位565格式源位图数据
mov EDX, ptrDst//这个ptr指针指向16位565格式目标位图数据
movq mm0, [ECX]
movq mm7, [EDX]
movq mm1, mm0 //把源图像565色彩分离成为888
movq mm2, mm0
pand mm0, MASK_R
psrlw mm0, 8
pand mm1, MASK_G
psrlw mm1, 3
pand mm2, MASK_B
psllw mm2, 3
movq mm3, mm0 //计算源图像的每个像素点的亮度
movq mm4, mm1
movq mm5, mm2
pmullw mm3, MUL_R //r2 * 77
pmullw mm4, MUL_G //g2 * 150
pmullw mm5, MUL_B //b2 * 29
paddusw mm3, mm4
paddusw mm3, mm5
psrlw mm3, 8 //(r2 * 77 + g2 * 150 + b2 * 29) shr 8
pmullw mm0, mm3 //r2 * av
pmullw mm1, mm3
pmullw mm2, mm3
movq mm4, mm7 //把目标图像565色彩分离成为888
movq mm5, mm7
movq mm6, mm7
pand mm4, MASK_R
psrlw mm4, 8
pand mm5, MASK_G
psrlw mm5, 3
pand mm6, MASK_B
psllw mm6, 3
movq mm7, MAX_A
psubusw mm7, mm3 //255 - av
pmullw mm4, mm7 //r1 * (255 - av)
pmullw mm5, mm7
pmullw mm6, mm7
paddusw mm4, mm0 //(r2 * av + r1 * (255-av)
paddusw mm5, mm1
paddusw mm6, mm2
psrlw mm4, 8 //(r2 * av + r1 * (255-av)) shr 8;
psrlw mm5, 8
psrlw mm6, 8
psrlw mm4, 3 //RGB各取5,6,5位
psrlw mm5, 2
psrlw mm6, 3
psllw mm4, 11 //组合成16位565格式
psllw mm5, 5
por mm4, mm5
por mm4, mm6
movq [EDX], mm4 //写回缓冲区
end;
Inc(ptrSrc, 4);
Inc(ptrDst, 4);
end;
关于上面alpha的计算思路如下:
for j:=1 to (nDstRight - nDstX) mod 4 do
begin
if ptrSrc^ <> 0 then
begin
r1 := (ptrDst^ and $F800) shr 8;
g1 := (ptrDst^ and $07E0) shr 3;
b1 := (ptrDst^ and $001F) shl 3;
r2 := (ptrSrc^ and $F800) shr 8;
g2 := (ptrSrc^ and $07E0) shr 3;
b2 := (ptrSrc^ and $001F) shl 3;
av := (r2 * 77 + g2 * 150 + B2 * 29) shr 8;
r3 := (r2 * av + r1 * (255-av)) shr 8;
g3 := (g2 * av + g1 * (255-av)) shr 8;
b3 := (b2 * av + b1 * (255-av)) shr 8;
r3 := r3 shr 3;
g3 := g3 shr 2;
b3 := b3 shr 3;
ptrDst^ := (r3 shl 11) or (g3 shl 5) or (b3);
end;
Inc(ptrDst);
Inc(ptrSrc);
end;