/* 2004.02.01 first released source code for IOMP */ // Generic alpha renderers for all YUV modes and RGB depths. // These are "reference implementations", should be optimized later (MMX, etc) // Templating Code from Michael Niedermayer (michaelni@gmx.at) is under GPL //#define FAST_OSD //#define FAST_OSD_TABLE #include "config.h" #include "osd.h" #include "mp_msg.h" //#define ENABLE_PROFILE #include "../my_profile.h" #include #include "../cpudetect.h" #include "../mangle.h" extern int verbose; // defined in mplayer.c #ifdef ARCH_X86 #define CAN_COMPILE_X86_ASM #endif #ifdef CAN_COMPILE_X86_ASM static const uint64_t bFF __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL; static const unsigned long long mask24lh __attribute__((aligned(8))) = 0xFFFF000000000000ULL; static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FFFFFFFFFFFFULL; #endif //Note: we have C, X86-nommx, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one //Plain C versions #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) #define COMPILE_C #endif #ifdef CAN_COMPILE_X86_ASM #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) #define COMPILE_MMX #endif #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT) #define COMPILE_MMX2 #endif #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) #define COMPILE_3DNOW #endif #endif //CAN_COMPILE_X86_ASM #undef HAVE_MMX #undef HAVE_MMX2 #undef HAVE_3DNOW #undef ARCH_X86 #ifdef COMPILE_C #undef HAVE_MMX #undef HAVE_MMX2 #undef HAVE_3DNOW #undef ARCH_X86 #define RENAME(a) a ## _C #include "osd_template.c" #endif #ifdef CAN_COMPILE_X86_ASM //X86 noMMX versions #ifdef COMPILE_C #undef RENAME #undef HAVE_MMX #undef HAVE_MMX2 #undef HAVE_3DNOW #define ARCH_X86 #define RENAME(a) a ## _X86 #include "osd_template.c" #endif //MMX versions #ifdef COMPILE_MMX #undef RENAME #define HAVE_MMX #undef HAVE_MMX2 #undef HAVE_3DNOW #define ARCH_X86 #define RENAME(a) a ## _MMX #include "osd_template.c" #endif //MMX2 versions #ifdef COMPILE_MMX2 #undef RENAME #define HAVE_MMX #define HAVE_MMX2 #undef HAVE_3DNOW #define ARCH_X86 #define RENAME(a) a ## _MMX2 #include "osd_template.c" #endif //3DNOW versions #ifdef COMPILE_3DNOW #undef RENAME #define HAVE_MMX #undef HAVE_MMX2 #define HAVE_3DNOW #define ARCH_X86 #define RENAME(a) a ## _3DNow #include "osd_template.c" #endif #endif //CAN_COMPILE_X86_ASM void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ #ifdef RUNTIME_CPUDETECT #ifdef CAN_COMPILE_X86_ASM // ordered per speed fasterst first if(gCpuCaps.hasMMX2) vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride); else if(gCpuCaps.has3DNow) vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride); else if(gCpuCaps.hasMMX) vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride); else vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride); #else vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride); #endif #else //RUNTIME_CPUDETECT #ifdef HAVE_MMX2 vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride); #elif defined (HAVE_3DNOW) vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride); #elif defined (HAVE_MMX) vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride); #elif defined (ARCH_X86) vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride); #else vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride); #endif #endif //!RUNTIME_CPUDETECT } void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ #ifdef RUNTIME_CPUDETECT #ifdef CAN_COMPILE_X86_ASM // ordered per speed fasterst first if(gCpuCaps.hasMMX2) vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride); else if(gCpuCaps.has3DNow) vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride); else if(gCpuCaps.hasMMX) vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride); else vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride); #else vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride); #endif #else //RUNTIME_CPUDETECT #ifdef HAVE_MMX2 vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride); #elif defined (HAVE_3DNOW) vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride); #elif defined (HAVE_MMX) vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride); #elif defined (ARCH_X86) vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride); #else vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride); #endif #endif //!RUNTIME_CPUDETECT } void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ #ifdef RUNTIME_CPUDETECT #ifdef CAN_COMPILE_X86_ASM // ordered per speed fasterst first if(gCpuCaps.hasMMX2) vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride); else if(gCpuCaps.has3DNow) vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride); else if(gCpuCaps.hasMMX) vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride); else vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride); #else vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride); #endif #else //RUNTIME_CPUDETECT #ifdef HAVE_MMX2 vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride); #elif defined (HAVE_3DNOW) vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride); #elif defined (HAVE_MMX) vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride); #elif defined (ARCH_X86) vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride); #else vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride); #endif #endif //!RUNTIME_CPUDETECT } void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ #ifdef RUNTIME_CPUDETECT #ifdef CAN_COMPILE_X86_ASM // ordered per speed fasterst first if(gCpuCaps.hasMMX2) vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride); else if(gCpuCaps.has3DNow) vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride); else if(gCpuCaps.hasMMX) vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride); else vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride); #else vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride); #endif #else //RUNTIME_CPUDETECT #ifdef HAVE_MMX2 vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride); #elif defined (HAVE_3DNOW) vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride); #elif defined (HAVE_MMX) vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride); #elif defined (ARCH_X86) vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride); #else vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride); #endif #endif //!RUNTIME_CPUDETECT } #ifdef FAST_OSD_TABLE static unsigned short fast_osd_15bpp_table[256]; static unsigned short fast_osd_16bpp_table[256]; #endif void vo_draw_alpha_init(){ #ifdef FAST_OSD_TABLE int i; for(i=0;i<256;i++){ fast_osd_15bpp_table[i]=((i>>3)<<10)|((i>>3)<<5)|(i>>3); fast_osd_16bpp_table[i]=((i>>3)<<11)|((i>>2)<<5)|(i>>3); } #endif //FIXME the optimized stuff is a lie for 15/16bpp as they arent optimized yet if(verbose) { #ifdef RUNTIME_CPUDETECT #ifdef CAN_COMPILE_X86_ASM // ordered per speed fasterst first if(gCpuCaps.hasMMX2) mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n"); else if(gCpuCaps.has3DNow) mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n"); else if(gCpuCaps.hasMMX) mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n"); else mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n"); #else mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n"); #endif #else //RUNTIME_CPUDETECT #ifdef HAVE_MMX2 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n"); #elif defined (HAVE_3DNOW) mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n"); #elif defined (HAVE_MMX) mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n"); #elif defined (ARCH_X86) mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n"); #else mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n"); #endif #endif //!RUNTIME_CPUDETECT } } void vo_draw_alpha_rgb15(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ int y; for(y=0;y>3; dst[x]=(a<<10)|(a<<5)|a; #endif #else unsigned char r=dst[x]&0x1F; unsigned char g=(dst[x]>>5)&0x1F; unsigned char b=(dst[x]>>10)&0x1F; r=(((r*srca[x])>>5)+src[x])>>3; g=(((g*srca[x])>>5)+src[x])>>3; b=(((b*srca[x])>>5)+src[x])>>3; dst[x]=(b<<10)|(g<<5)|r; #endif } } src+=srcstride; srca+=srcstride; dstbase+=dststride; } return; } unsigned char osd_color_r = 0; unsigned char osd_color_g = 0; unsigned char osd_color_b = 0; static unsigned char t3=0,t4,BR,BG,BB,DR,DG,DB; #define rgb24torgb16(r,g,b) \ ((((short)b & 0x00f8) >> 3) | \ (((short)g & 0x00fc) << 3) | \ (((short)r & 0x00f8) << 8)) int start_c_x; int end_c_x; int start_c_y; int end_c_y; int col_color; void vo_draw_alpha_rgb16(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ int y; //printf("draw_alpha %d:%d\n",start_c_x,start_c_y); if (avamax_var->my_spu_draw) { for(y=0;y start_c_y) && (y < end_c_y)) { unsigned char BC,GC,RC; BC = (col_color >> 8) & 0xf0; GC = (col_color >> 4) & 0xf; RC = (col_color >> 0) & 0xf0; for (x = 0;((x < end_c_x) && (x < w)); x++) { if(srca[x]){ unsigned char b=dst[x]&0x1F; unsigned char g=(dst[x]>>5)&0x3F; unsigned char r=(dst[x]>>11)&0x1F; b=(((r*srca[x])>>5)+(src[x]*BC)>>8)>>3; g=(((g*srca[x])>>6)+(src[x]*GC)>>8)>>2; r=(((b*srca[x])>>5)+(src[x]*RC)>>8)>>3; dst[x]=(r<<11)|(g<<5)|b; } } for (;x < w; x++) { if(srca[x]){ unsigned char r=dst[x]&0x1F; unsigned char g=(dst[x]>>5)&0x3F; unsigned char b=(dst[x]>>11)&0x1F; r=(((r*srca[x])>>5)+src[x])>>3; g=(((g*srca[x])>>6)+src[x])>>2; b=(((b*srca[x])>>5)+src[x])>>3; dst[x]=(b<<11)|(g<<5)|r; } } } else { for(x=0;x>5)&0x3F; unsigned char b=(dst[x]>>11)&0x1F; r=(((r*srca[x])>>5)+src[x])>>3; g=(((g*srca[x])>>6)+src[x])>>2; b=(((b*srca[x])>>5)+src[x])>>3; dst[x]=(b<<11)|(g<<5)|r; } } } src+=srcstride; srca+=srcstride; dstbase+=dststride; } } else { // spu_draw if (osd_color_g) { for(y=0;y>5)&0x3F; unsigned char b=(dst[x]>>11)&0x1F; r=(((r*srca[x])>>5)+0x0f)>>3; g=(((g*srca[x])>>6)+src[x])>>2; b=(((b*srca[x])>>5)+0x0f)>>3; dst[x]=(b<<11)|(g<<5)|r; } } src+=srcstride; srca+=srcstride; dstbase+=dststride; } } else { for(y=0;y>5)&0x3F; unsigned char b=(dst[x]>>11)&0x1F; r=(((r*srca[x])>>5)+src[x])>>3; g=(((g*srca[x])>>6)+src[x])>>2; b=(((b*srca[x])>>5)+src[x])>>3; // Green //r=(((r*srca[x])>>5)+0x0f)>>3; //g=(((g*srca[x])>>6)+src[x])>>2; //b=(((b*srca[x])>>5)+0x0f)>>3; dst[x]=(b<<11)|(g<<5)|r; } } src+=srcstride; srca+=srcstride; dstbase+=dststride; } } } return; }