• 主页

# 请教RGB => YUV的快速算法

Y = (0.257 * R) + (0.504 * G) + (0.098 * B) + 16
Cr = V = (0.439 * R) - (0.368 * G) - (0.071 * B) + 128
Cb = U = -(0.148 * R) - (0.291 * G) + (0.439 * B) + 128

...全文
229 点赞 收藏 10

10 条回复
huqiming 2002年05月24日

/* RGB to YUV MMX routine */

static short int ycoef[7][4] = { /* 32768 scaled y table, bgra order */
{2363, 23442, 6963, 0},
{3736, 19235, 9798, 0},
{3736, 19235, 9798, 0},
{3604, 19333, 9830, 0},
{3736, 19235, 9798, 0},
{3736, 19235, 9798, 0},
{2851, 22970, 6947, 0}};

static short int ucoef[7][4] = {
{16384, -12648, -3768, 0},
{16384, -10846, -5538, 0},
{16384, -10846, -5538, 0},
{16384, -10846, -5538, 0},
{16384, -10846, -5538, 0},
{16384, -10846, -5538, 0},
{16384, -12583, -3801, 0}};

static short int vcoef[7][4] = {
{-1507, -14877, 16384, 0},
{-2654, -13730, 16384, 0},
{-2654, -13730, 16384, 0},
{-2589, -13795, 16384, 0},
{-2654, -13730, 16384, 0},
{-2654, -13730, 16384, 0},
{-1802, -14582, 16384, 0}};

static short int *ycoefs, *ucoefs, *vcoefs;

void init_rgb_to_yuv_mmx(int coeffs)
{
int i;

i = coeffs;
if (i > 8)
i = 3;

ycoefs = &ycoef[i-1][0];
ucoefs = &ucoef[i-1][0];
vcoefs = &vcoef[i-1][0];
}

void RGBtoYUVmmx(unsigned char *src, unsigned char *desty, unsigned char *destu,
unsigned char *destv, int srcrowsize, int destrowsize,
int width, int height)
{
unsigned char *yp, *up, *vp;
unsigned char *prow;
int i, j;

_asm {
xor edx, edx
mov eax, width
sar eax,1
cmp edx, eax
jge yuvexit

mov j, eax
mov eax, height
mov i, eax
cmp edx, eax
jge yuvexit

mov eax, desty
mov yp, eax
mov eax, destu
mov up, eax
mov eax, destv
mov vp, eax
mov eax, src
mov prow, eax
pxor MM7, MM7
mov eax, i

heighttop:

mov i, eax
mov edi, j
mov ebx, prow
mov ecx, yp
mov edx, up
mov esi, vp

widthtop:
movq MM5, [ebx] // MM5 has 0 r2 g2 b2 0 r1 g1 b1, two pixels
movq MM6, MM5
punpcklbw MM5, MM7 // MM5 has 0 r1 g1 b1
punpckhbw MM6, MM7 // MM6 has 0 r2 g2 b2

movq MM0, MM5
movq MM1, MM6
mov eax, ycoefs
pmaddwd MM0, [eax] // MM0 has r1*cr and g1*cg+b1*cb
movq MM2, MM0
psrlq MM2, 32
paddd MM0, MM2 // MM0 has y1 in lower 32 bits
pmaddwd MM1, [eax] // MM1 has r2*cr and g2*cg+b2*cb
movq MM2, MM1
psrlq MM2, 32
paddd MM1, MM2 // MM1 has y2 in lower 32 bits
movd eax, MM0
imul eax, 219
shr eax, 8
shr eax, 15
mov [ecx], al
inc ecx
movd eax, MM1
imul eax, 219
shr eax, 8
shr eax, 15
mov [ecx], al
inc ecx

movq MM0, MM5
movq MM1, MM6
mov eax, ucoefs
pmaddwd MM0, [eax] // MM0 has r1*cr and g1*cg+b1*cb
movq MM2, MM0
psrlq MM2, 32
paddd MM0, MM2 // MM0 has u1 in lower 32 bits
pmaddwd MM1, [eax] // MM1 has r2*cr and g2*cg+b2*cb
movq MM2, MM1
psrlq MM2, 32
paddd MM1, MM2 // MM1 has u2 in lower 32 bits
movd eax, MM0
imul eax, 224
sar eax, 8
shr eax, 15
mov [edx], al
inc edx
movd eax, MM1
imul eax, 224
sar eax, 8
shr eax, 15
mov [edx], al
inc edx

mov eax, vcoefs
pmaddwd MM5, [eax] // MM5 has r1*cr and g1*cg+b1*cb
movq MM2, MM5
psrlq MM2, 32
paddd MM5, MM2 // MM5 has v1 in lower 32 bits
pmaddwd MM6, [eax] // MM6 has r2*cr and g2*cg+b2*cb
movq MM2, MM6
psrlq MM6, 32
paddd MM6, MM2 // MM6 has v2 in lower 32 bits
movd eax, MM5
imul eax, 224
sar eax, 8
shr eax, 15
mov [esi], al
inc esi
movd eax, MM6
imul eax, 224
sar eax, 8
shr eax, 15
mov [esi], al
inc esi

dec edi
jnz widthtop

mov eax, destrowsize
mov eax, srcrowsize
sub prow, eax
mov eax, i
dec eax
jnz heighttop

yuvexit:
emms
}
}

hhoking 2002年05月23日

huqiming 2002年05月23日
hehe

winne_ll 2002年05月23日

opengl3d 2002年05月23日
Intel MMX

huqiming 2002年05月23日

void yuv2rgb_24(uint8_t *puc_y, int stride_y,
uint8_t *puc_u, uint8_t *puc_v, int stride_uv,
uint8_t *puc_out, int width_y, int height_y,
unsigned int _stride_out) {

int y, horiz_count;
uint8_t *puc_out_remembered;
int stride_out = width_y * 3;

if (height_y < 0) {
/* we are flipping our output upside-down */
height_y = -height_y;
puc_y += (height_y - 1) * stride_y ;
puc_u += (height_y/2 - 1) * stride_uv;
puc_v += (height_y/2 - 1) * stride_uv;
stride_y = -stride_y;
stride_uv = -stride_uv;
}

horiz_count = -(width_y >> 3);

for (y=0; y<height_y; y++) {

if (y == height_y-1) {
/* this is the last output line - we need to be careful not to overrun the end of this line */
uint8_t temp_buff[3*MAXIMUM_Y_WIDTH+1];
puc_out_remembered = puc_out;
puc_out = temp_buff; /* write the RGB to a temporary store */
}

_asm {
push eax
push ebx
push ecx
push edx
push edi

mov eax, puc_out
mov ebx, puc_y
mov ecx, puc_u
mov edx, puc_v
mov edi, horiz_count

horiz_loop:

movd mm2, [ecx]
pxor mm7, mm7

movd mm3, [edx]
punpcklbw mm2, mm7 ; mm2 = __u3__u2__u1__u0

movq mm0, [ebx] ; mm0 = y7y6y5y4y3y2y1y0
punpcklbw mm3, mm7 ; mm3 = __v3__v2__v1__v0

movq mm1, mmw_0x00ff ; mm1 = 00ff00ff00ff00ff

psubusb mm0, mmb_0x10 ; mm0 -= 16

psubw mm2, mmw_0x0080 ; mm2 -= 128
pand mm1, mm0 ; mm1 = __y6__y4__y2__y0

psubw mm3, mmw_0x0080 ; mm3 -= 128
psllw mm1, 3 ; mm1 *= 8

psrlw mm0, 8 ; mm0 = __y7__y5__y3__y1
psllw mm2, 3 ; mm2 *= 8

pmulhw mm1, mmw_mult_Y ; mm1 *= luma coeff
psllw mm0, 3 ; mm0 *= 8

psllw mm3, 3 ; mm3 *= 8
movq mm5, mm3 ; mm5 = mm3 = v

pmulhw mm5, mmw_mult_V_R ; mm5 = red chroma
movq mm4, mm2 ; mm4 = mm2 = u

pmulhw mm0, mmw_mult_Y ; mm0 *= luma coeff
movq mm7, mm1 ; even luma part

pmulhw mm2, mmw_mult_U_G ; mm2 *= u green coeff
paddsw mm7, mm5 ; mm7 = luma + chroma __r6__r4__r2__r0

pmulhw mm3, mmw_mult_V_G ; mm3 *= v green coeff
packuswb mm7, mm7 ; mm7 = r6r4r2r0r6r4r2r0

pmulhw mm4, mmw_mult_U_B ; mm4 = blue chroma
paddsw mm5, mm0 ; mm5 = luma + chroma __r7__r5__r3__r1

packuswb mm5, mm5 ; mm6 = r7r5r3r1r7r5r3r1
paddsw mm2, mm3 ; mm2 = green chroma

movq mm3, mm1 ; mm3 = __y6__y4__y2__y0
movq mm6, mm1 ; mm6 = __y6__y4__y2__y0

paddsw mm3, mm4 ; mm3 = luma + chroma __b6__b4__b2__b0
paddsw mm6, mm2 ; mm6 = luma + chroma __g6__g4__g2__g0

punpcklbw mm7, mm5 ; mm7 = r7r6r5r4r3r2r1r0
paddsw mm2, mm0 ; odd luma part plus chroma part __g7__g5__g3__g1

packuswb mm6, mm6 ; mm2 = g6g4g2g0g6g4g2g0
packuswb mm2, mm2 ; mm2 = g7g5g3g1g7g5g3g1

packuswb mm3, mm3 ; mm3 = b6b4b2b0b6b4b2b0
paddsw mm4, mm0 ; odd luma part plus chroma part __b7__b5__b3__b1

packuswb mm4, mm4 ; mm4 = b7b5b3b1b7b5b3b1
punpcklbw mm6, mm2 ; mm6 = g7g6g5g4g3g2g1g0

punpcklbw mm3, mm4 ; mm3 = b7b6b5b4b3b2b1b0

/* 32-bit shuffle.... */
pxor mm0, mm0 ; is this needed?

movq mm1, mm6 ; mm1 = g7g6g5g4g3g2g1g0
punpcklbw mm1, mm0 ; mm1 = __g3__g2__g1__g0

movq mm0, mm3 ; mm0 = b7b6b5b4b3b2b1b0
punpcklbw mm0, mm7 ; mm0 = r3b3r2b2r1b1r0b0

movq mm2, mm0 ; mm2 = r3b3r2b2r1b1r0b0

punpcklbw mm0, mm1 ; mm0 = __r1g1b1__r0g0b0
punpckhbw mm2, mm1 ; mm2 = __r3g3b3__r2g2b2

/* 24-bit shuffle and save... */
movd [eax], mm0 ; eax[0] = __r0g0b0
psrlq mm0, 32 ; mm0 = __r1g1b1

movd 3[eax], mm0 ; eax[3] = __r1g1b1

movd 6[eax], mm2 ; eax[6] = __r2g2b2

psrlq mm2, 32 ; mm2 = __r3g3b3

movd 9[eax], mm2 ; eax[9] = __r3g3b3

/* 32-bit shuffle.... */
pxor mm0, mm0 ; is this needed?

movq mm1, mm6 ; mm1 = g7g6g5g4g3g2g1g0
punpckhbw mm1, mm0 ; mm1 = __g7__g6__g5__g4

movq mm0, mm3 ; mm0 = b7b6b5b4b3b2b1b0
punpckhbw mm0, mm7 ; mm0 = r7b7r6b6r5b5r4b4

movq mm2, mm0 ; mm2 = r7b7r6b6r5b5r4b4

punpcklbw mm0, mm1 ; mm0 = __r5g5b5__r4g4b4
punpckhbw mm2, mm1 ; mm2 = __r7g7b7__r6g6b6

/* 24-bit shuffle and save... */
movd 12[eax], mm0 ; eax[12] = __r4g4b4
psrlq mm0, 32 ; mm0 = __r5g5b5

movd 15[eax], mm0 ; eax[15] = __r5g5b5
add ebx, 8 ; puc_y += 8;

movd 18[eax], mm2 ; eax[18] = __r6g6b6
psrlq mm2, 32 ; mm2 = __r7g7b7

add ecx, 4 ; puc_u += 4;
add edx, 4 ; puc_v += 4;

movd 21[eax], mm2 ; eax[21] = __r7g7b7
add eax, 24 ; puc_out += 24

inc edi
jne horiz_loop

pop edi
pop edx
pop ecx
pop ebx
pop eax

emms

}

if (y == height_y-1) {
/* last line of output - we have used the temp_buff and need to copy... */
int x = 3 * width_y; /* interation counter */
uint8_t *ps = puc_out; /* source pointer (temporary line store) */
uint8_t *pd = puc_out_remembered; /* dest pointer */
while (x--) *(pd++) = *(ps++); /* copy the line */
}

puc_y += stride_y;
if (y%2) {
puc_u += stride_uv;
puc_v += stride_uv;
}
puc_out += stride_out;

}
}

jinbing 2002年05月23日

sunshine_djh 2002年05月22日

7205

2.3w+