diff --git a/MacGLide/MacGLide b/MacGLide/MacGLide index 5f17549..6751e1d 100644 Binary files a/MacGLide/MacGLide and b/MacGLide/MacGLide differ diff --git a/MacGLide/OpenGLide/Framebuffer.cpp b/MacGLide/OpenGLide/Framebuffer.cpp index 19cf29a..849267b 100644 --- a/MacGLide/OpenGLide/Framebuffer.cpp +++ b/MacGLide/OpenGLide/Framebuffer.cpp @@ -1 +1 @@ -//************************************************************** //* OpenGLide - Glide to OpenGL Wrapper //* http://openglide.sourceforge.net //* //* framebuffer emulation //* //* OpenGLide is OpenSource under LGPL license //* Mac version and additional features by Jens-Olaf Hemprich //************************************************************** #include "Framebuffer.h" #include "Glide.h" #include "GlideApplication.h" #include "GlideSettings.h" #include "GLRender.h" #include "GLRenderUpdateState.h" #include "GLColorAlphaCombineEnvTables.h" // check if tile needs to be displayed #define CHECK_RENDER_TILE // Display small dots at opposite corners of rendered framebuffer tiles //#define DEBUG_TILE_RENDERING Framebuffer::Framebuffer() : m_x_step_start(0) , m_y_step_start(0) , m_x_step_start_opaque(0) , m_y_step_start_opaque(0) , m_width(0) , m_height(0) , m_framebuffer(NULL) , m_texbuffer(NULL) , m_origin(GR_ORIGIN_UPPER_LEFT) , m_glInternalFormat(-1) , m_glFormat(-1) , m_glType(-1) , m_glDepth(1.0f) , m_format_valid(false) , m_use_client_storage(false) , m_useRectangleARB(false) , m_must_clear_buffer(true) , m_custom_tilesizes(NULL) { // Don't set the checksum to 0, as this would cause white screen in Carmageddon // because fully black tile also have a 0 checksum and no texture data would be // download at all (and the tile would be rendered as if TEXTURE_2D was disabled) memset(m_tileChecksums, 0xff, sizeof(vector unsigned long) * m_tileCount); } Framebuffer::~Framebuffer() { } bool Framebuffer::initialise_buffers(BufferStruct* framebuffer, BufferStruct* texbuffer, FxU32 width, FxU32 height, const tilesize* tilesizetable) { #ifdef OGL_FRAMEBUFFER GlideMsg( "GlideFrameBuffer::initialise_buffers(---, ---, %d, %d, ---)\n", width, height); #endif m_custom_tilesizes = tilesizetable; return initialise_buffers(framebuffer, texbuffer, width, height, 0, 0); } bool Framebuffer::initialise_buffers(BufferStruct* framebuffer, BufferStruct* texbuffer, FxU32 width, FxU32 height, FxU32 x_tile, FxU32 y_tile) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::initialise_buffers(---, ---, %d, %d, %d, %d)\n", width, height, x_tile, y_tile); #endif m_framebuffer = framebuffer; m_texbuffer = texbuffer; m_framebuffer->WriteMode = m_texbuffer->WriteMode = GR_LFBWRITEMODE_UNUSED; m_width = width; m_height = height; // find out largest texture size GLint tile_size; glGetIntegerv(GL_MAX_TEXTURE_SIZE, &tile_size); m_x_step_start_opaque = tile_size; m_y_step_start_opaque = tile_size; m_x_step_start = min(tile_size, x_tile); m_y_step_start = min(tile_size, y_tile); m_x_step_start = max(16, m_x_step_start); m_y_step_start = max(16, m_y_step_start); // Disabled because this is a global setting now // m_use_client_storage = InternalConfig.EXT_Client_Storage; m_useRectangleARB = InternalConfig.ARB_texture_rectangle && InternalConfig.EXT_compiled_vertex_array; // The texture priority is set to minimun because // frame buffer textures are never used a second time // @todo: This is not true anymore in all cases // because of the altivec checksum feature const GLfloat zero = 0.0f; glGenTextures(m_tileCount, &m_textureNames[0]); glPrioritizeTextures(m_tileCount, &m_textureNames[0], &zero); for(int i = 0; i < m_tileCount; i++) { const GLenum textureTarget = m_useRectangleARB ? GL_TEXTURE_RECTANGLE_ARB : GL_TEXTURE_2D; glBindTexture(textureTarget, m_textureNames[i]); if (m_useRectangleARB) { // This will not probably not work right now, because // we're using RGBA (the OS9 prefered texture format) // @todo: use ABGR??? in order to avoid byte swizzling // glTextureRangeAPPLE(GLenum target, GLsizei length, GLvoid *pointer); glTexParameteri(textureTarget, GL_TEXTURE_STORAGE_HINT_APPLE, GL_STORAGE_CACHED_APPLE); glReportError(); } glTexParameteri(textureTarget, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(textureTarget, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(textureTarget, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(textureTarget, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); } // If a game has its own tilesize table, use // the largest tiles for opaque renderings GLint y_step = y_tile == 0 ? m_y_step_start_opaque : m_y_step_start; // init default/opaque tilesize table int w = 0; for(FxU32 y = 0; y < m_height && w < MaxTiles ; y += y_step, w++) { while (m_height - y < y_step) { y_step = y_step >> 1; } m_tilesizes[w].y = y_step; GLint x_step = x_tile == 0 ? m_x_step_start_opaque : m_x_step_start; int v = 0; for(FxU32 x = 0; x < m_width && v < MaxTiles; x += x_step, v++ ) { while (m_width - x < x_step) { x_step = x_step >> 1; } m_tilesizes[w].x[v] = x_step; } } // Build compiled vertex arrays if (InternalConfig.EXT_compiled_vertex_array) { // Store various render buffers indices m_tilesizesVertexArrayIndex = OGLRender.FrameBufferStartIndex; m_tilesizesCount = buildVertexArrays(&m_tilesizes[0], m_tilesizesVertexArrayIndex); if (m_custom_tilesizes) { m_customtilesizesVertexArrayIndex = m_tilesizesVertexArrayIndex + m_tilesizesCount * 2; m_customtilesizesCount = buildVertexArrays(m_custom_tilesizes, m_customtilesizesVertexArrayIndex); } } return m_width > 0 && m_height > 0 && m_x_step_start > 0 && m_y_step_start > 0 && m_format_valid; } void Framebuffer::free_buffers() { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::free_buffers()\n"); #endif if (m_tilesizes) FreeObject(m_tilesizes); glDeleteTextures(m_tileCount, &m_textureNames[0]); } void Framebuffer::initialise_format(GrLfbWriteMode_t writemode) { #if defined(OGL_PART_DONE) || defined(OGL_FRAMEBUFFER) GlideMsg("Framebuffer::initialise_format(0x%x)\n", writemode); #endif // Enlarge buffer? if (writemode >= GR_LFBWRITEMODE_888 && (m_framebuffer->WriteMode < GR_LFBWRITEMODE_888 || m_framebuffer->WriteMode == GR_LFBWRITEMODE_UNUSED) && m_framebuffer->Address) { // Delete existing buffer FreeFrameBuffer(m_framebuffer->Address); m_framebuffer->Address = NULL; m_texbuffer->Address = NULL; } // Allocate 32-bit buffer (16bit buffer has been allocated in grSstWinOpen() if (m_framebuffer->Address == NULL) { unsigned long openglpixels = OpenGL.WindowWidth * OpenGL.WindowHeight; // Framebuffer can be written to with 16bit or 32bit data unsigned long buffertypesize = (writemode >= GR_LFBWRITEMODE_888) ? sizeof(FxU32) : sizeof(FxU16); Glide.FrameBuffer.Address = (FxU16*) AllocFrameBuffer(Glide.WindowTotalPixels * buffertypesize + openglpixels * sizeof(FxU32), 1); Glide.TempBuffer.Address = &Glide.FrameBuffer.Address[Glide.WindowTotalPixels * buffertypesize >> 1]; memset( Glide.FrameBuffer.Address, 0, Glide.WindowTotalPixels * buffertypesize); memset( Glide.TempBuffer.Address, 0, openglpixels * sizeof(FxU32)); } m_framebuffer->WriteMode = writemode; m_glInternalFormat = 4; m_glFormat = GL_RGBA; m_glType = GL_UNSIGNED_BYTE; FxU16 chromakeyvalue; switch (writemode) { case GR_LFBWRITEMODE_565: chromakeyvalue = s_GlideApplication.GetType() == GlideApplication::Carmageddon ? 0x1f1f : 0x07ff; m_format_valid = true; break; case GR_LFBWRITEMODE_1555: chromakeyvalue = 0x03ff; m_format_valid = true; break; case GR_LFBWRITEMODE_888: chromakeyvalue = 0x7ffdfeff; m_format_valid = true; break; default: chromakeyvalue = 0x0; m_format_valid = false; break; } // When the chromakeyvalue changes, the buffer has to be cleared if (chromakeyvalue != m_ChromaKey.Scalar) { SetChromaKeyValue(chromakeyvalue); m_must_clear_buffer = true; } } bool Framebuffer::begin_write() { #ifdef OGL_FRAMEBUFFER GlideMsg("Framebuffer::begin_write()\n"); #endif if (m_must_clear_buffer) { Clear(); m_must_clear_buffer = false; } return true; } void Framebuffer::Clear() { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::Clear()\n"); #endif const FxU16 chromakey = GetChromaKeyValue(); const FxU32 count = m_width * m_height ; FxU16* framebuffer = m_framebuffer->Address; for ( int i = 0; i < count; i++) { framebuffer[i] = chromakey; } } bool Framebuffer::end_write(FxU32 alpha, GLfloat depth, bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg("Framebuffer::end_write(%d, %f, %d)\n", alpha, depth, pixelpipeline); #endif m_glDepth = depth; #ifdef __ALTIVEC__ for(int i = 0; i < 4; i++) { (&m_glAlpha.Scalar)[i] = alpha; } #else m_glAlpha.Scalar = alpha; #endif // if all pixels are invisible, nothing must be rendered. // The pixel conversion functions assume alpha is != 0 in order // to determine if a tile contains any pixels to be rendered. if (m_glAlpha.Scalar == 0) return false; set_gl_state(pixelpipeline); if (InternalConfig.EXT_compiled_vertex_array) { if (m_custom_tilesizes) { drawCompiledVertexArrays(m_custom_tilesizes, m_customtilesizesVertexArrayIndex, m_customtilesizesCount, pixelpipeline); } else { drawCompiledVertexArrays(m_tilesizes, m_tilesizesVertexArrayIndex, m_tilesizesCount, pixelpipeline); } } else { const tilesize* tilesizes = m_custom_tilesizes ? m_custom_tilesizes : m_tilesizes; draw(tilesizes, pixelpipeline); } restore_gl_state(pixelpipeline); return true; } bool Framebuffer::end_write(FxU32 alpha) { #ifdef OGL_DONE GlideMsg("Framebuffer::end_write(%d)\n", alpha); #endif // draw frame buffer // @todo: Depth should OpenGL.ZNear, but that breaks overlays in Myth FxBool result = end_write(alpha, 0.0, false); return result; } bool Framebuffer::end_write() { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::end_write( )\n" ); #endif return end_write(0x000000ff); } bool Framebuffer::end_write_opaque() { #ifdef OGL_FRAMEBUFFER GlideMsg("Framebuffer::end_write_opaque()\n"); #endif // @todo: Depth should OpenGL.ZNear, but that breaks overlays in Myth return end_write(0x000000ff, 0.0, false); } void Framebuffer::set_gl_state(bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::set_gl_state(%d)\n", pixelpipeline); #endif glReportErrors("Framebuffer::set_gl_state"); VERIFY_ACTIVE_TEXTURE_UNIT(OpenGL.ColorAlphaUnit1); // Disable the cull mode glDisable(GL_CULL_FACE); // Disable clip volume hint manually to avoid recursion if (InternalConfig.EXT_clip_volume_hint && OpenGL.ClipVerticesEnabledState) { glHint(GL_CLIP_VOLUME_CLIPPING_HINT_EXT, GL_FASTEST); } if (pixelpipeline) { if (OpenGL.ColorAlphaUnit2) { // Pixelpipeline support for env cobine based rendering: // Framebuffer pixels must be routed through the coloralpha unit // as if they were produced by the vertex iterators without an // additional GL texture unit -> source must be changed accordingly m_bRestoreColorCombine = false; if (Glide.State.ColorCombineLocal == GR_COMBINE_LOCAL_ITERATED) { Glide.State.ColorCombineLocal = GR_COMBINE_LOCAL_PIXELPIPELINE; m_bRestoreColorCombine = true; } if (Glide.State.ColorCombineOther == GR_COMBINE_OTHER_ITERATED) { Glide.State.ColorCombineOther = GR_COMBINE_OTHER_PIXELPIPELINE; m_bRestoreColorCombine = true; } if (m_bRestoreColorCombine) SetColorCombineState(); m_bRestoreAlphaCombine = false; if (Glide.State.AlphaLocal == GR_COMBINE_LOCAL_ITERATED) { Glide.State.AlphaLocal = GR_COMBINE_LOCAL_PIXELPIPELINE; m_bRestoreAlphaCombine = true; } if (Glide.State.AlphaOther == GR_COMBINE_OTHER_ITERATED) { Glide.State.AlphaOther = GR_COMBINE_OTHER_PIXELPIPELINE; m_bRestoreAlphaCombine = true; } if (m_bRestoreAlphaCombine) SetAlphaCombineState(); // Update the opengl state for the pixel pipeline RenderUpdateState(); // If the write mode doesn't provide alpha then m_glAlpha is used // as the constant alpha value, and we can use the alpha test // to mask out chromakey pixels switch (m_framebuffer->WriteMode) { case GR_LFBWRITEMODE_565: case GR_LFBWRITEMODE_888: glEnable(GL_ALPHA_TEST); const GLenum alphaTestFunction = GL_EQUAL; const GLfloat alphaTestReferenceValue= m_glAlpha.Scalar * D1OVER255; OpenGL.AlphaTestFunction = alphaTestFunction; OpenGL.AlphaReferenceValue = alphaTestReferenceValue; glAlphaFunc(alphaTestFunction, alphaTestReferenceValue); glReportError(); break; } } // GL_RECTANGLE_ARB overrides GL_TEXTURE_2D if (m_useRectangleARB) { const GLenum textureTarget = GL_TEXTURE_RECTANGLE_ARB; const bool enableColoralphaTextureUnit1 = OpenGL.ColorAlphaUnitColorEnabledState[0] || OpenGL.ColorAlphaUnitAlphaEnabledState[0]; if (enableColoralphaTextureUnit1) { glEnable(textureTarget); glReportError(); } if (OpenGL.ColorAlphaUnit2) { const bool enableColoralphaTextureUnit2 = OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]; if (enableColoralphaTextureUnit2) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glReportError(); glEnable(textureTarget); glReportError(); glActiveTextureARB(OpenGL.ColorAlphaUnit1); glReportError(); } } // The client texture state is already setup correctly since we just // have to adjust the texture rectangle state // (the texture_2d state is not changed when the pixelpipeline mode is active) } // Set the origin with clipping glMatrixMode(GL_PROJECTION); glLoadIdentity(); if (m_origin == GR_ORIGIN_LOWER_LEFT) { glOrtho(Glide.State.ClipMinX, Glide.State.ClipMaxX, Glide.State.ClipMinY, Glide.State.ClipMaxY, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX + OpenGL.ClipMinX, OpenGL.OriginY + OpenGL.ClipMinY, OpenGL.ClipMaxX - OpenGL.ClipMinX, OpenGL.ClipMaxY - OpenGL.ClipMinY); } else { glOrtho(Glide.State.ClipMinX, Glide.State.ClipMaxX, Glide.State.ClipMaxY, Glide.State.ClipMinY, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX + OpenGL.ClipMinX, OpenGL.OriginY + OpenGL.WindowHeight - OpenGL.ClipMaxY, OpenGL.ClipMaxX - OpenGL.ClipMinX, OpenGL.ClipMaxY - OpenGL.ClipMinY); } // The scissor rectangle is not reset, because scissor mode // is only enabled when clearing the buffer glMatrixMode(GL_MODELVIEW); glReportError(); } else { // disable blend glDisable(GL_BLEND); // disable depth buffer glDepthMask(false); // Enable colormask glColorMask( true, true, true, false); // Needed for displaying in-game menus if (Glide.State.DepthBufferMode != GR_DEPTHBUFFER_DISABLE) { glDisable(GL_DEPTH_TEST); } glEnable(GL_ALPHA_TEST); // Update state as we're calling update triggers on restore const GLenum alphaTestFunction = GL_GREATER; const GLfloat alphaTestReferenceValue= 0.0; OpenGL.AlphaTestFunction = alphaTestFunction; OpenGL.AlphaReferenceValue = alphaTestReferenceValue; glAlphaFunc(alphaTestFunction, alphaTestReferenceValue); glReportError(); if (InternalConfig.EXT_secondary_color) { glDisable(GL_COLOR_SUM_EXT); glReportError(); } // Reset the clipping window // and set the origin glMatrixMode(GL_PROJECTION); glLoadIdentity(); if (m_origin == GR_ORIGIN_LOWER_LEFT) { glOrtho(0, Glide.WindowWidth, 0, Glide.WindowHeight, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX, OpenGL.OriginY, OpenGL.WindowWidth, OpenGL.WindowHeight); } else { glOrtho(0, Glide.WindowWidth, Glide.WindowHeight, 0, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX, OpenGL.OriginY, OpenGL.WindowWidth, OpenGL.WindowHeight); } // The scissor rectangle is not changed, because scissor mode // is only enabled when clearing the buffer glMatrixMode(GL_MODELVIEW); glReportError(); // Disable fog bool disable_fog_texture_unit = OpenGL.FogTextureUnit; if (disable_fog_texture_unit) { glActiveTextureARB(OpenGL.FogTextureUnit); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.FogTextureUnit); glDisableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, NULL); } glDisable(GL_TEXTURE_2D); } if (OpenGL.Fog && InternalConfig.FogMode != OpenGLideFogEmulation_None && InternalConfig.FogMode != OpenGLideFogEmulation_EnvCombine) { glDisable(GL_FOG); } glReportError(); // enable framebuffer texture unit if (OpenGL.ColorAlphaUnit2) { bool disable_coloralpha_texture_unit_2 = OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]; if (disable_coloralpha_texture_unit_2) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit2); glDisableClientState(GL_TEXTURE_COORD_ARRAY); // On MacOS9 (Classic?) the texcoord pointer needs to be reset // to the default value when glLockArrays/glUnlockArrays is used glTexCoordPointer( 4, GL_FLOAT, 0, NULL ); } glDisable(GL_TEXTURE_2D); } if (disable_fog_texture_unit || disable_coloralpha_texture_unit_2) { glActiveTextureARB(OpenGL.ColorAlphaUnit1); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit1); } } if (!OpenGL.ColorAlphaUnitColorEnabledState[0] && !OpenGL.ColorAlphaUnitAlphaEnabledState[0]) { glEnable(GL_TEXTURE_2D); if (InternalConfig.EXT_compiled_vertex_array) { glEnableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, &OGLRender.TTexture[0]); } } } else { if (disable_fog_texture_unit) { glActiveTextureARB(OpenGL.ColorAlphaUnit1); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit1); } } if (OpenGL.Texture == false) { // GL_RECTANGLE_ARB overrides GL_RECTANGLE_2D if (!m_useRectangleARB) glEnable(GL_TEXTURE_2D); if (InternalConfig.EXT_compiled_vertex_array) { glEnableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, &OGLRender.TTexture[0]); } } } if (m_useRectangleARB) { glEnable(GL_TEXTURE_RECTANGLE_ARB); glReportError(); } glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE); glReportError(); } } void Framebuffer::restore_gl_state(bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::restore_gl_state(%d)\n", pixelpipeline); #endif glReportErrors("Framebuffer::restore_gl_state"); // Restore the cull mode switch (Glide.State.CullMode) { case GR_CULL_DISABLE: break; case GR_CULL_NEGATIVE: case GR_CULL_POSITIVE: glEnable(GL_CULL_FACE); break; } if (InternalConfig.EXT_clip_volume_hint && OpenGL.ClipVerticesEnabledState) { glHint(GL_CLIP_VOLUME_CLIPPING_HINT_EXT, GL_NICEST); } // Restore the clipping window glMatrixMode(GL_PROJECTION); glLoadIdentity(); if ( Glide.State.OriginInformation == GR_ORIGIN_LOWER_LEFT ) { glOrtho(Glide.State.ClipMinX, Glide.State.ClipMaxX, Glide.State.ClipMinY, Glide.State.ClipMaxY, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX + OpenGL.ClipMinX, OpenGL.OriginY + OpenGL.ClipMinY, OpenGL.ClipMaxX - OpenGL.ClipMinX, OpenGL.ClipMaxY - OpenGL.ClipMinY); } else { glOrtho(Glide.State.ClipMinX, Glide.State.ClipMaxX, Glide.State.ClipMaxY, Glide.State.ClipMinY, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX + OpenGL.ClipMinX, OpenGL.OriginY + OpenGL.WindowHeight - OpenGL.ClipMaxY, OpenGL.ClipMaxX - OpenGL.ClipMinX, OpenGL.ClipMaxY - OpenGL.ClipMinY); } // The scissor rectangle is not reset, because scissor mode // is only enabled when clearing the buffer glMatrixMode( GL_MODELVIEW ); glReportError(); if (pixelpipeline) { if (OpenGL.ColorAlphaUnit2) { // restore current values if (m_bRestoreColorCombine) { if (Glide.State.ColorCombineLocal == GR_COMBINE_LOCAL_PIXELPIPELINE) Glide.State.ColorCombineLocal = GR_COMBINE_LOCAL_ITERATED; if (Glide.State.ColorCombineOther == GR_COMBINE_OTHER_PIXELPIPELINE) Glide.State.ColorCombineOther = GR_COMBINE_OTHER_ITERATED; SetColorCombineState(); } if(m_bRestoreAlphaCombine) { if (Glide.State.AlphaLocal == GR_COMBINE_LOCAL_PIXELPIPELINE) Glide.State.AlphaLocal = GR_COMBINE_LOCAL_ITERATED; if (Glide.State.AlphaOther == GR_COMBINE_OTHER_PIXELPIPELINE) Glide.State.AlphaOther = GR_COMBINE_LOCAL_ITERATED; SetAlphaCombineState(); } } switch (m_framebuffer->WriteMode) { case GR_LFBWRITEMODE_565: case GR_LFBWRITEMODE_888: SetChromaKeyAndAlphaState(); break; } // GL_RECTANGLE_ARB overrides GL_TEXTURE_2D if (m_useRectangleARB) { const GLenum textureTarget = GL_TEXTURE_RECTANGLE_ARB; const bool disableColoralphaTextureUnit1 = !(OpenGL.ColorAlphaUnitColorEnabledState[0] || OpenGL.ColorAlphaUnitAlphaEnabledState[0]); if (disableColoralphaTextureUnit1) { glDisable(textureTarget); glReportError(); } if (OpenGL.ColorAlphaUnit2) { const bool disableColoralphaTextureUnit2 = !(OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]); if (disableColoralphaTextureUnit2) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glReportError(); glDisable(textureTarget); glReportError(); glActiveTextureARB(OpenGL.ColorAlphaUnit1); glReportError(); } } // The client texture state is already setup correctly since we just // have to adjust the texture rectangle state to the texture_2d state } } else { // restore previous state if (OpenGL.DepthBufferWritting ) { glDepthMask( true ); } if (Glide.State.DepthBufferMode != GR_DEPTHBUFFER_DISABLE) { glEnable( GL_DEPTH_TEST ); } // Restore colormask bool rgb = Glide.State.ColorMask; glColorMask(rgb, rgb, rgb, Glide.State.AlphaMask); if ( OpenGL.Blend ) { glEnable( GL_BLEND ); } if ( InternalConfig.EXT_secondary_color ) { glEnable( GL_COLOR_SUM_EXT ); } glReportError(); // Enable fog? bool enable_fog_texture_unit = OpenGL.FogTextureUnit && ((OpenGL.Fog && InternalConfig.FogMode == OpenGLideFogEmulation_EnvCombine) || Glide.State.ColorCombineInvert || Glide.State.AlphaInvert); if (enable_fog_texture_unit) { glActiveTextureARB(OpenGL.FogTextureUnit); glEnable(GL_TEXTURE_2D); // We're not using glDrawArrays to render the frame buffer, // but without disabling the client state the next texture drawn // by RenderDrawTriangles would get the wrong coordinates. // Can be observed in Carmageddon: The sky texture is rendered "too high" if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.FogTextureUnit); glEnableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(1, GL_FLOAT, 0, &OGLRender.TFog[0]); } } if (OpenGL.Fog && InternalConfig.FogMode != OpenGLideFogEmulation_None && InternalConfig.FogMode != OpenGLideFogEmulation_EnvCombine) { glEnable(GL_FOG); } glReportError(); if (OpenGL.ColorAlphaUnit2) { bool enable_coloralpha_texture_unit_2 = OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]; if (enable_coloralpha_texture_unit_2) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glEnable(GL_TEXTURE_2D); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit2); glEnableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, &OGLRender.TTexture[0]); } } if (enable_fog_texture_unit || enable_coloralpha_texture_unit_2) { glActiveTextureARB(OpenGL.ColorAlphaUnit1); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit1); } } if (!OpenGL.ColorAlphaUnitColorEnabledState[0] && !OpenGL.ColorAlphaUnitAlphaEnabledState[0]) { if (InternalConfig.EXT_compiled_vertex_array) { glDisableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer( 4, GL_FLOAT, 0, NULL ); } glDisable(GL_TEXTURE_2D); } // Restore the previous texture environment glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_COMBINE_EXT); } else { if (enable_fog_texture_unit) { glActiveTextureARB(OpenGL.ColorAlphaUnit1); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit1); } } if (OpenGL.Texture == false) { if (InternalConfig.EXT_compiled_vertex_array) { glDisableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer( 4, GL_FLOAT, 0, NULL ); } // GL_RECTANGLE_ARB overrides GL_TEXTURE_2D and this // has already been skipped in setState if (!m_useRectangleARB) glDisable(GL_TEXTURE_2D); } // Restore the previous texture environment SetColorCombineState(); } // GL_RECTANGLE_ARB overrides GL_TEXTURE_2D if (m_useRectangleARB) { glDisable(GL_TEXTURE_RECTANGLE_ARB); glReportError(); } glReportError(); // This must be a forced update because GlideState changes of ChromaKeyMode // that don't change the corresponding GL-state are filtered out ForceChromaKeyAndAlphaStateUpdate(); } glReportError(); VERIFY_ACTIVE_TEXTURE_UNIT(OpenGL.ColorAlphaUnit1); VERIFY_TEXTURE_ENABLED_STATE(); } bool Framebuffer::draw(const tilesize* tilesizetable, bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::draw(---, %d)\n", pixelpipeline); #endif glReportErrors("Framebuffer::draw()"); bool init_second_textureunit = pixelpipeline && OpenGL.ColorAlphaUnit2; FxU32* texbuffer = reinterpret_cast(m_texbuffer->Address); if (m_use_client_storage) { glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, true); glReportError(); } // Render the tiles GLint n = 0; GLint x; GLint y = 0; GLint y_step; for(int w = 0; y < m_height && w < MaxTiles; w++, y += y_step) { y_step = tilesizetable[w].y; x = 0; GLint x_step; for(int v = 0; x < m_width && v < MaxTiles; v++, x += x_step) { x_step = tilesizetable[w].x[v]; // Use unique (but always the same) name for each texture in order // to maintain the size and avoid vram memory reallocation GLint texturename = m_textureNames[n]; const TileUpdateState updateState = createTextureData(texbuffer, x, y, x_step, y_step, n); if (updateState != TileUpdateState_TileEmpty) { #ifdef DEBUG_TILE_RENDERING unsigned int color; if (updateState == TileUpdateState_TileDownloadToGPU) // edges of downloaded tiles are red color = 0xff0000ff; else // edges of rendered tiles are cyan color = 0x00ffffff; ((long*) texbuffer)[0] = ((long*) texbuffer)[1] = ((long*) texbuffer)[x_step] = ((long*) texbuffer)[x_step -1] = ((long*) texbuffer)[x_step -2] = ((long*) texbuffer)[2 * x_step -1] = ((long*) texbuffer)[(x_step - 1) * y_step] = ((long*) texbuffer)[(x_step - 1) * y_step + 1] = ((long*) texbuffer)[(x_step - 2) * y_step] = ((long*) texbuffer)[x_step * y_step - 1] = ((long*) texbuffer)[x_step * y_step - 2] = ((long*) texbuffer)[x_step * (y_step - 1) - 1] = color; #endif glBindTexture(GL_TEXTURE_2D, texturename); if (init_second_textureunit) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glBindTexture(GL_TEXTURE_2D, texturename); glActiveTextureARB(OpenGL.ColorAlphaUnit1); } #ifndef DEBUG_TILE_RENDERING if (updateState == TileUpdateState_TileDownloadToGPU || InternalConfig.EXT_Client_Storage == false) { glTexImage2D(GL_TEXTURE_2D, 0, m_glInternalFormat, x_step, y_step, 0, m_glFormat, m_glType, texbuffer); glReportError(); } #endif static struct { const GLfloat bl[4]; const GLfloat br[4]; const GLfloat tr[4]; const GLfloat tl[4]; } texcoords = { {0.0, 0.0, 1.0, 1.0}, {1.0, 0.0, 1.0, 1.0}, {1.0, 1.0, 1.0, 1.0}, {0.0, 1.0, 1.0, 1.0} }; glBegin(GL_QUADS); // counter clockwise glColor3f(1.0, 1.0, 1.0); glTexCoord4fv(&texcoords.bl[0]); if (init_second_textureunit) { glMultiTexCoord4fvARB(OpenGL.ColorAlphaUnit2, &texcoords.bl[0]); } glVertex3f(x, y, m_glDepth); glColor3f(1.0, 1.0, 1.0); glTexCoord4fv(&texcoords.br[0]); if (init_second_textureunit) { glMultiTexCoord4fvARB(OpenGL.ColorAlphaUnit2, &texcoords.br[0]); } glVertex3f(x + x_step, y, m_glDepth); glColor3f(1.0, 1.0, 1.0); glTexCoord4fv(&texcoords.tr[0]); if (init_second_textureunit) { glMultiTexCoord4fvARB(OpenGL.ColorAlphaUnit2, &texcoords.tr[0]); } glVertex3f(x + x_step, y + y_step, m_glDepth); glColor3f(1.0, 1.0, 1.0); glTexCoord4fv(&texcoords.tl[0]); if (init_second_textureunit) { glMultiTexCoord4fvARB(OpenGL.ColorAlphaUnit2, &texcoords.tl[0]); } glVertex3f(x, y + y_step , m_glDepth); glEnd(); glReportError(); // Advance to the next texbuffer location texbuffer += x_step * y_step; } n ++; } } if (m_use_client_storage) { glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE , false); glReportError(); } s_Framebuffer.SetRenderBufferChanged(); return y == m_height && x == m_width; } bool Framebuffer::drawCompiledVertexArrays(const tilesize* tilesizetable, int vertexarrayindex, int tilecount, bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::draw(---, %d)\n", pixelpipeline); #endif glReportErrors("Framebuffer::drawCompiledVertexArrays()"); if (m_use_client_storage) { glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, true); glReportError(); } // Finish rendering RenderUnlockArrays(); // Transfer coords to VRAM glLockArraysEXT(vertexarrayindex * 3, tilecount * 6); OGLRender.BufferLocked = true; const bool init_second_textureunit = pixelpipeline && (OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]); FxU32* texbuffer = reinterpret_cast(m_texbuffer->Address); // Render the tiles GLint n = 0; GLint x; GLint y = 0; GLint y_step; const GLenum textureTarget = m_useRectangleARB ? GL_TEXTURE_RECTANGLE_ARB : GL_TEXTURE_2D; for(int w = 0; y < m_height && w < MaxTiles; w++, y += y_step) { y_step = tilesizetable[w].y; x = 0; GLint x_step; for(int v = 0; x < m_width && v < MaxTiles; v++, x += x_step) { x_step = tilesizetable[w].x[v]; const TileUpdateState updateState = createTextureData(texbuffer, x, y, x_step, y_step, n); if (updateState != TileUpdateState_TileEmpty) { #ifdef DEBUG_TILE_RENDERING unsigned int color; if (updateState == TileUpdateState_TileDownloadToGPU) // edges of downloaded tiles are red color = 0xff0000ff; else // edges of rendered tiles are cyan color = 0x00ffffff; ((long*) texbuffer)[0] = ((long*) texbuffer)[1] = ((long*) texbuffer)[x_step] = ((long*) texbuffer)[x_step -1] = ((long*) texbuffer)[x_step -2] = ((long*) texbuffer)[2 * x_step -1] = ((long*) texbuffer)[x_step * (y_step - 2)] = ((long*) texbuffer)[x_step * (y_step - 1)] = ((long*) texbuffer)[x_step * (y_step - 1) + 1] = ((long*) texbuffer)[x_step * y_step - 1] = ((long*) texbuffer)[x_step * y_step - 2] = ((long*) texbuffer)[x_step * (y_step - 1) - 1] = color; #endif // Use unique (but always the same) name for each // texture in order to be able to reuse tile data const GLint texturename = m_textureNames[n]; // The texture rectangle is better suited for video, // which is close to a framebuffer if (init_second_textureunit) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glReportError(); glBindTexture(textureTarget, texturename); glReportError(); glActiveTextureARB(OpenGL.ColorAlphaUnit1); glReportError(); } glBindTexture(textureTarget, texturename); glReportError(); #ifndef DEBUG_TILE_RENDERING if (updateState == TileUpdateState_TileDownloadToGPU || InternalConfig.EXT_Client_Storage == false) #endif { glTexImage2D(textureTarget, 0, m_glInternalFormat, x_step, y_step, 0, m_glFormat, m_glType, texbuffer); glReportError(); } // Draw the tile glDrawArrays(GL_TRIANGLES, vertexarrayindex * 3 + n * 6, 6); // Advance to the next texbuffer location texbuffer += x_step * y_step; glReportError(); } n++; } } if (m_use_client_storage) { glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE , false); glReportError(); } s_Framebuffer.SetRenderBufferChanged(); return y == m_height && x == m_width; } int Framebuffer::buildVertexArrays(const tilesize* tilesizetable, int vertexarrayindex) { // Compute coordinates for compiled vertex arrays TColorStruct* pC = &OGLRender.TColor[vertexarrayindex]; TVertexStruct* pV = &OGLRender.TVertex[vertexarrayindex]; TTextureStruct* pTS = &OGLRender.TTexture[vertexarrayindex]; int n = 0; GLint y = 0; GLint y_step; for(int w = 0; y < m_height && w < MaxTiles; w++, y += y_step) { y_step = tilesizetable[w].y; GLint x = 0; GLint x_step; for(int v = 0; x < m_width && v < MaxTiles; v++, x += x_step) { x_step = tilesizetable[w].x[v]; // Write coordinates counter clockwise into render buffers pC->ar = pC->ag = pC->ab = pC->br = pC->bg = pC->bb = pC->cr = pC->cg = pC->cb = pC->aa = pC->ba = pC->ca = 1.0f; pV->ax = x; pV->ay = y; pV->bx = x + x_step; pV->by = y; pV->cx = x + x_step; pV->cy = y + y_step; pV->az = pV->bz = pV->cz = m_glDepth; pTS->as = 0.0; pTS->at = 0.0; pTS->bs = m_useRectangleARB ? x_step : 1.0f; pTS->bt = 0.0; pTS->cs = m_useRectangleARB ? x_step : 1.0f; pTS->ct = m_useRectangleARB ? y_step : 1.0f; pTS->aq = pTS->bq = pTS->cq = 0.0f; pTS->aoow = pTS->boow = pTS->coow = 1.0f; pC++; pV++; pTS++; pC->ar = pC->ag = pC->ab = pC->br = pC->bg = pC->bb = pC->cr = pC->cg = pC->cb = pC->aa = pC->ba = pC->ca = 1.0f; pV->ax = x + x_step; pV->ay = y + y_step; pV->bx = x; pV->by = y + y_step; pV->cx = x; pV->cy = y; pV->az = pV->bz = pV->cz = m_glDepth; pTS->as = m_useRectangleARB ? x_step : 1.0f; pTS->at = m_useRectangleARB ? y_step : 1.0f; pTS->bs = 0.0; pTS->bt = m_useRectangleARB ? y_step : 1.0f; pTS->cs = 0.0; pTS->ct = 0.0; pTS->aq = pTS->bq = pTS->cq = 0.0f; pTS->aoow = pTS->boow = pTS->coow = 1.0f; pC++; pV++; pTS++; n++; } } return n; } #ifdef __ALTIVEC__ // altivec code inline Framebuffer::TileUpdateState Framebuffer::Convert565Kto8888_AV(FxU16* buffer1, register FxU32* buffer2, register FxU32 width, register FxU32 height, register FxU32 stride, int checksumIndex) { const vector bool short chromakey_565_av = m_ChromaKey.Vector; const int width_av = width >> 3; // 8 16-bit words const int stride_av = stride >> 3; // 8 16-bit words const int jump_av = width_av + stride_av; vector bool short* src_av = (vector bool short*) buffer1; // Setup channel 0 for reading one row of 565 ushorts from src into the L1 cache // This isn't read again soon, just written back once so we can bypass L2 cache const int src_control = (((width_av >> 4) & 0x1f) << 3) + (1 << 8) + (stride_av << 16); vec_dstt(src_av, src_control, 0); int h = height; // loop through the src to check whether anything has to be copied at all vector bool short* stop_zero_av = &src_av[width_av]; do { do { const vector bool short pixels_565_av = *src_av; if (!vec_all_eq(pixels_565_av, chromakey_565_av)) goto create_8888_texture_1_av; // Test clear first before jumping to create_8888_texture_1_av src_av++; } while (src_av != stop_zero_av); src_av += stride_av; // Update channel 0 to prefetch the next row into the L1 cache vec_dstt(src_av, src_control, 0); stop_zero_av += jump_av; } while (--h); return TileUpdateState_TileEmpty; create_8888_texture_1_av: // Delete dst up to the last chromakey entry in src stop_zero_av = src_av; src_av = (vector bool short*) buffer1; vector unsigned long* dst_av = (vector unsigned long*) buffer2; const vector unsigned long null_av = vec_splat_u32(0); // We're just writing to dst, no reading must occur, and starting a prefetch is a bad idea h = height; vector bool short* stop_av = &src_av[width_av]; do { do { if (src_av == stop_zero_av) goto create_8888_texture_2_av; // Test clear first // clear cacheline to prevent it from being read-in (32 bytes = 2 altivec writes) // we're just clearing the cache line since we're going to write zeros anyway __dcbz(dst_av, 0); dst_av += 2; src_av++; } while (src_av != stop_av); src_av += stride_av; stop_av += jump_av; } while (--h); return TileUpdateState_TileEmpty; create_8888_texture_2_av: // Build permute vector for storing high/lo 565 pixels into RgbxA // - results in R = r565+ggg, G = gggbbbbb, B = 0, A = glAlpha // -> good for comparison, green and blue are converted afterwards const vector unsigned char permute_hi_av = {0x00, 0x01, 0x12, 0x13, 0x02, 0x03, 0x16, 0x17, 0x04, 0x05, 0x1a, 0x1b, 0x06, 0x07, 0x1e, 0x1f}; // Computing the permute table just takes 2 instructions instead of 1 instruction + 4 memory reads const vector unsigned char permute_lo_av = vec_or(permute_hi_av, vec_splat_u8(8)); // const vector unsigned long alpha_8888_av = {m_glAlpha, m_glAlpha, m_glAlpha, m_glAlpha}; const vector unsigned long alpha_8888_av = m_glAlpha.Vector; // Build chromakey and alpha RgbxA vector const vector unsigned long chromakey_RgbxA_av = vec_perm((const vector unsigned long) chromakey_565_av, alpha_8888_av, permute_lo_av); // Constants const vector unsigned long const_3_av = vec_splat_u32(3); const vector unsigned long const_5_av = vec_splat_u32(5); // R5G6B500AA color masks const vector unsigned long mask_8888_ra = {0xf80000ff, 0xf80000ff, 0xf80000ff, 0xf80000ff}; const vector unsigned long mask_8888_g = {0x07e00000, 0x07e00000, 0x07e00000, 0x07e00000}; // Computing the mask just takes 2 instructions instead of 1 instruction + 4 memory reads const vector unsigned long mask_8888_b = vec_sr(mask_8888_ra, vec_splat_u32(11)); vector unsigned long pixels_8888_src_av; vector bool long mask; vector unsigned long p; vector unsigned long q; vector unsigned long pixels_8888_dst_av; // Checksum the tile vector unsigned long c = null_av; vector unsigned long d; // Continue the loop and convert pixels from 565 to 8888 vec_dstt(src_av, src_control, 0); do { do { const vector unsigned long pixels_565_av = (const vector unsigned long) (*src_av); // tile checksum part 1 d = vec_sr(c, const_5_av); c = vec_add(c, pixels_565_av); // restore chroma key for next update *src_av++ = chromakey_565_av; // hi-word pixels pixels_8888_src_av = vec_perm(pixels_565_av, alpha_8888_av, permute_hi_av); mask = vec_cmpeq(pixels_8888_src_av, chromakey_RgbxA_av); // Keep red and alpha component pixels_8888_dst_av = vec_and(pixels_8888_src_av, mask_8888_ra); // add green component p = vec_and(pixels_8888_src_av, mask_8888_g); q = vec_sr(p, const_3_av); pixels_8888_dst_av = vec_or(pixels_8888_dst_av, q); // add blue component p = vec_and(pixels_8888_src_av, mask_8888_b); q = vec_sr(p, const_5_av); pixels_8888_dst_av = vec_or(pixels_8888_dst_av, q); // We're just writing to dst and thus can clear the cacheline in order // to avoid the read-in from system memory (32 bytes = 2 altivec writes) // Note: This is a G4 hack, but on a G5 the code will be fast enough anyway __dcbz(dst_av, 0); // Select between pixels and chromakey *dst_av++ = vec_sel(pixels_8888_dst_av, null_av, mask); // tile checksum part 2 c = vec_xor(c, d); // lo-word pixels pixels_8888_src_av = vec_perm(pixels_565_av, alpha_8888_av, permute_lo_av); mask = vec_cmpeq(pixels_8888_src_av, chromakey_RgbxA_av); // Keep red and alpha component pixels_8888_dst_av = vec_and(pixels_8888_src_av, mask_8888_ra); // add green component p = vec_and(pixels_8888_src_av, mask_8888_g); q = vec_sr(p, const_3_av); pixels_8888_dst_av = vec_or(pixels_8888_dst_av, q); // add blue component p = vec_and(pixels_8888_src_av, mask_8888_b); q = vec_sr(p, const_5_av); pixels_8888_dst_av = vec_or(pixels_8888_dst_av, q); // Select between pixels and chromakey *dst_av++ = vec_sel(pixels_8888_dst_av, null_av, mask); } while (src_av != stop_av); src_av += stride_av; vec_dstt(src_av, src_control, 0); stop_av += jump_av; } while (--h); // Skip downloading tile data to the gpu if the content hasn't changed if (vec_all_eq(c, m_tileChecksums[checksumIndex])) return TileUpdateState_TileDrawOnly; // The tile has been converted, been changed and must be downloaded to the gpu m_tileChecksums[checksumIndex] = c; return TileUpdateState_TileDownloadToGPU; } #endif // Non-Altivec-code inline Framebuffer::TileUpdateState Framebuffer::Convert565Kto8888(FxU16* buffer1, register FxU32* buffer2, register FxU32 width, register FxU32 height, register FxU32 stride) { // Process two pixels at once const register unsigned long chromakey1 = m_ChromaKey.Scalar << 16; const register unsigned long chromakey2 = m_ChromaKey.Scalar; const register unsigned long chromakey12 = chromakey1 | chromakey2; width = width >> 1; stride = stride >> 1; register unsigned long pixel; register unsigned long* stop; register unsigned long jump = width + stride; register unsigned long* src = reinterpret_cast(buffer1); // check if tile must be processed in advance // to avoid useless writes to main memory // The tile should at least fit into the second level cache // so reading it again wouldn't hurt as much as doing needless writes register unsigned long h = height; stop = &src[width]; do { do { pixel = *src++; if (pixel != chromakey12) goto create_8888_texture; } while (src != stop); src += stride; stop += jump; } while (--h); return TileUpdateState_TileEmpty; create_8888_texture: const register unsigned long alpha = m_glAlpha.Scalar; const register unsigned long null = 0x00000000; const register unsigned long mask_pixel1 = 0xffff0000; const register unsigned long mask_pixel2 = 0x0000ffff; const register unsigned long mask_pixel1_r = 0xf8000000; const register unsigned long mask_pixel1_g = 0x07e00000; const register unsigned long mask_pixel1_b = 0x001f0000; const register unsigned long mask_pixel2_r = 0x0000f800; const register unsigned long mask_pixel2_g = 0x000007e0; const register unsigned long mask_pixel2_b = 0x0000001f; src = reinterpret_cast(buffer1); stop = &src[width]; do { do { // GL_RGBA pixel = *src; if (pixel == chromakey12) { *buffer2++ = null; *buffer2++ = null; } else { *src = chromakey12; if ( (pixel & mask_pixel1) == chromakey1) { *buffer2++ = null; } else { *buffer2++ = ( alpha | // A ( pixel & mask_pixel1_b ) >> 5 | // B ( pixel & mask_pixel1_g ) >> 3 | // G ( pixel & mask_pixel1_r )); // R } if ( (pixel & mask_pixel2) == chromakey2) { *buffer2++ = null; } else { *buffer2++ = ( alpha | // A ( pixel & mask_pixel2_b ) << 11 | // B ( pixel & mask_pixel2_g ) << 13 | // G ( pixel & mask_pixel2_r ) << 16); // R } } src++; } while (src != stop); src += stride; stop += jump; } while (--height); return TileUpdateState_TileDownloadToGPU; } inline Framebuffer::TileUpdateState Framebuffer::Convert1555Kto8888(FxU16* buffer1, register FxU32* buffer2, register FxU32 width, register FxU32 height, register FxU32 stride) { // Process two pixels at once register unsigned long pixel; register unsigned long x; register unsigned long* src = reinterpret_cast(buffer1); const unsigned long null = 0x00000000; register unsigned long dstpixel = null; const register unsigned long chromakey1 = m_ChromaKey.Scalar << 16; const register unsigned long chromakey2 = m_ChromaKey.Scalar; const register unsigned long chromakey12 = chromakey1 | chromakey2; const register unsigned long alpha = m_glAlpha.Scalar; const register unsigned long mask_pixel1 = 0xffff0000; const register unsigned long mask_pixel2 = 0x0000ffff; const register unsigned long mask_pixel1_r = 0x7c000000; const register unsigned long mask_pixel1_g = 0x03e00000; const register unsigned long mask_pixel1_b = 0x001f0000; const register unsigned long mask_pixel2_r = 0x00007c00; const register unsigned long mask_pixel2_g = 0x000003e0; const register unsigned long mask_pixel2_b = 0x0000001f; width >>= 1; stride >>= 1; do { x = width; do { // GL_RGBA pixel = *src; if (pixel == chromakey12) { *buffer2++ = null; *buffer2++ = null; } else { *src = chromakey12; if ( (pixel & mask_pixel1) == chromakey1) { *buffer2++ = null; } else { dstpixel = ( alpha | // A ( pixel & mask_pixel1_b ) >> 5 | // B ( pixel & mask_pixel1_g ) >> 2 | // G ( pixel & mask_pixel1_r ) << 1); // R *buffer2++ = dstpixel; } if ( (pixel & mask_pixel2) == chromakey2) { *buffer2++ = null; } else { dstpixel = ( alpha | // A ( pixel & mask_pixel2_b ) << 11 | // B ( pixel & mask_pixel2_g ) << 14 | // G ( pixel & mask_pixel2_r ) << 17); // R *buffer2++ = dstpixel; } } src++; } while (--x); src += stride; } while (--height); return dstpixel != null ? TileUpdateState_TileDownloadToGPU : TileUpdateState_TileEmpty; } inline Framebuffer::TileUpdateState Framebuffer::ConvertARGB8888Kto8888(FxU32* buffer1, register FxU32* buffer2, register FxU32 width, register FxU32 height, register FxU32 stride) { // Process two pixels at once const register unsigned long chromakey = m_ChromaKey.Scalar || (m_ChromaKey.Scalar << 16); register unsigned long pixel; register unsigned long* stop; register unsigned jump = width + stride; register unsigned long* src = buffer1; // check if tile must be processed in advance // to avoid useless writes to main memory // The tile should at least fit into the second level cache // so reading it again wouldn't hurt as much as doing needless writes register unsigned long h = height; stop = &src[width]; do { do { pixel = *src++; if (pixel != chromakey) goto create_8888_texture; } while (src != stop); src += stride; stop += jump; } while (--h); return TileUpdateState_TileEmpty; create_8888_texture: const register unsigned long alpha = m_glAlpha.Scalar; src = buffer1; stop = &src[width]; do { do { // GL_RGBA pixel = *src; if (pixel == chromakey) { *buffer2++ = 0; } else { *src = chromakey; *buffer2++ = (pixel << 8) | alpha; } src++; } while (src != stop); src += stride; stop += jump; } while (--height); return TileUpdateState_TileDownloadToGPU; } inline Framebuffer::TileUpdateState Framebuffer::createTextureData(FxU32* texbuffer, FxU32 x, FxU32 y, FxU32 x_step, FxU32 y_step, int checksumIndex) { FxU32 stride = (m_width - x_step); FxU32 index = x + y * m_width; if (m_framebuffer->WriteMode == GR_LFBWRITEMODE_565) { #ifdef __ALTIVEC__ if (UserConfig.VectorUnitType == OpenGLideVectorUnitType_Altivec) { #ifdef OGL_FRAMEBUFFER const vector unsigned long c = m_tileChecksums[checksumIndex]; #endif TileUpdateState state = Convert565Kto8888_AV(&m_framebuffer->Address[index], texbuffer, x_step, y_step, stride, checksumIndex); #ifdef OGL_FRAMEBUFFER GlideMsg("Tile %d (%d,%d)-(%d,%d) update state is %s (%vlx)->(%vlx)\n", checksumIndex, x, y, x_step, y_step, (state ==TileUpdateState_TileDownloadToGPU) ? "DownLoadToGPU" : ((state == TileUpdateState_TileDrawOnly) ? "DrawOnly" : "TileEmpty"), c, m_tileChecksums[checksumIndex]); #endif return state; } else #endif return Convert565Kto8888(&m_framebuffer->Address[index], texbuffer, x_step, y_step, stride); } else if (m_framebuffer->WriteMode == GR_LFBWRITEMODE_1555) { return Convert1555Kto8888(&m_framebuffer->Address[index], texbuffer, x_step, y_step, stride); } else if (m_framebuffer->WriteMode == GR_LFBWRITEMODE_888) { FxU32* framebuffer = &reinterpret_cast(m_framebuffer->Address)[index]; return ConvertARGB8888Kto8888(framebuffer, texbuffer, x_step, y_step, stride); } else { return TileUpdateState_TileEmpty; } } \ No newline at end of file +//************************************************************** //* OpenGLide - Glide to OpenGL Wrapper //* http://openglide.sourceforge.net //* //* framebuffer emulation //* //* OpenGLide is OpenSource under LGPL license //* Mac version and additional features by Jens-Olaf Hemprich //************************************************************** #include "Framebuffer.h" #include "Glide.h" #include "GlideApplication.h" #include "GlideSettings.h" #include "GLRender.h" #include "GLRenderUpdateState.h" #include "GLColorAlphaCombineEnvTables.h" // check if tile needs to be displayed #define CHECK_RENDER_TILE // Display small dots at opposite corners of rendered framebuffer tiles //#define DEBUG_TILE_RENDERING Framebuffer::Framebuffer() : m_x_step_start(0) , m_y_step_start(0) , m_x_step_start_opaque(0) , m_y_step_start_opaque(0) , m_width(0) , m_height(0) , m_framebuffer(NULL) , m_texbuffer(NULL) , m_origin(GR_ORIGIN_UPPER_LEFT) , m_glInternalFormat(-1) , m_glFormat(-1) , m_glType(-1) , m_glDepth(1.0f) , m_format_valid(false) , m_use_client_storage(false) , m_useRectangleARB(false) , m_must_clear_buffer(true) , m_custom_tilesizes(NULL) { // Don't set the checksum to 0, as this would cause white screen in Carmageddon // because fully black tile also have a 0 checksum and no texture data would be // download at all (and the tile would be rendered as if TEXTURE_2D was disabled) memset(m_tileChecksums, 0xff, sizeof(vector unsigned long) * m_tileCount); } Framebuffer::~Framebuffer() { } bool Framebuffer::initialise_buffers(BufferStruct* framebuffer, BufferStruct* texbuffer, FxU32 width, FxU32 height, const tilesize* tilesizetable) { #ifdef OGL_FRAMEBUFFER GlideMsg( "GlideFrameBuffer::initialise_buffers(---, ---, %d, %d, ---)\n", width, height); #endif m_custom_tilesizes = tilesizetable; return initialise_buffers(framebuffer, texbuffer, width, height, 0, 0); } bool Framebuffer::initialise_buffers(BufferStruct* framebuffer, BufferStruct* texbuffer, FxU32 width, FxU32 height, FxU32 x_tile, FxU32 y_tile) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::initialise_buffers(---, ---, %d, %d, %d, %d)\n", width, height, x_tile, y_tile); #endif m_framebuffer = framebuffer; m_texbuffer = texbuffer; m_framebuffer->WriteMode = m_texbuffer->WriteMode = GR_LFBWRITEMODE_UNUSED; m_width = width; m_height = height; // find out largest texture size GLint tile_size; glGetIntegerv(GL_MAX_TEXTURE_SIZE, &tile_size); m_x_step_start_opaque = tile_size; m_y_step_start_opaque = tile_size; m_x_step_start = min(tile_size, x_tile); m_y_step_start = min(tile_size, y_tile); m_x_step_start = max(16, m_x_step_start); m_y_step_start = max(16, m_y_step_start); m_useRectangleARB = InternalConfig.ARB_texture_rectangle && InternalConfig.EXT_compiled_vertex_array; // The texture priority is set to minimun because // frame buffer textures are never used a second time // @todo: This is not true anymore in all cases // because of the altivec checksum feature const GLfloat zero = 0.0f; glGenTextures(m_tileCount, &m_textureNames[0]); glPrioritizeTextures(m_tileCount, &m_textureNames[0], &zero); for(int i = 0; i < m_tileCount; i++) { const GLenum textureTarget = m_useRectangleARB ? GL_TEXTURE_RECTANGLE_ARB : GL_TEXTURE_2D; glBindTexture(textureTarget, m_textureNames[i]); if (m_useRectangleARB) { // This will not probably not work right now, because // we're using RGBA (the OS9 prefered texture format) // @todo: use ABGR??? in order to avoid byte swizzling // glTextureRangeAPPLE(GLenum target, GLsizei length, GLvoid *pointer); glTexParameteri(textureTarget, GL_TEXTURE_STORAGE_HINT_APPLE, GL_STORAGE_CACHED_APPLE); glReportError(); } glTexParameteri(textureTarget, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(textureTarget, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(textureTarget, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(textureTarget, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); } // If a game has its own tilesize table, use // the largest tiles for opaque renderings GLint y_step = y_tile == 0 ? m_y_step_start_opaque : m_y_step_start; // init default/opaque tilesize table int w = 0; for(FxU32 y = 0; y < m_height && w < MaxTiles ; y += y_step, w++) { while (m_height - y < y_step) { y_step = y_step >> 1; } m_tilesizes[w].y = y_step; GLint x_step = x_tile == 0 ? m_x_step_start_opaque : m_x_step_start; int v = 0; for(FxU32 x = 0; x < m_width && v < MaxTiles; x += x_step, v++ ) { while (m_width - x < x_step) { x_step = x_step >> 1; } m_tilesizes[w].x[v] = x_step; } } // Build compiled vertex arrays if (InternalConfig.EXT_compiled_vertex_array) { // Store various render buffers indices m_tilesizesVertexArrayIndex = OGLRender.FrameBufferStartIndex; m_tilesizesCount = buildVertexArrays(&m_tilesizes[0], m_tilesizesVertexArrayIndex); if (m_custom_tilesizes) { m_customtilesizesVertexArrayIndex = m_tilesizesVertexArrayIndex + m_tilesizesCount * 2; m_customtilesizesCount = buildVertexArrays(m_custom_tilesizes, m_customtilesizesVertexArrayIndex); } } return m_width > 0 && m_height > 0 && m_x_step_start > 0 && m_y_step_start > 0 && m_format_valid; } void Framebuffer::free_buffers() { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::free_buffers()\n"); #endif if (m_tilesizes) FreeObject(m_tilesizes); glDeleteTextures(m_tileCount, &m_textureNames[0]); } void Framebuffer::initialise_format(GrLfbWriteMode_t writemode) { #if defined(OGL_PART_DONE) || defined(OGL_FRAMEBUFFER) GlideMsg("Framebuffer::initialise_format(0x%x)\n", writemode); #endif // Enlarge buffer? if (writemode >= GR_LFBWRITEMODE_888 && (m_framebuffer->WriteMode < GR_LFBWRITEMODE_888 || m_framebuffer->WriteMode == GR_LFBWRITEMODE_UNUSED) && m_framebuffer->Address) { // Delete existing buffer FreeFrameBuffer(m_framebuffer->Address); m_framebuffer->Address = NULL; m_texbuffer->Address = NULL; } // Allocate 32-bit buffer (16bit buffer has been allocated in grSstWinOpen() if (m_framebuffer->Address == NULL) { unsigned long openglpixels = OpenGL.WindowWidth * OpenGL.WindowHeight; // Framebuffer can be written to with 16bit or 32bit data unsigned long buffertypesize = (writemode >= GR_LFBWRITEMODE_888) ? sizeof(FxU32) : sizeof(FxU16); Glide.FrameBuffer.Address = (FxU16*) AllocFrameBuffer(Glide.WindowTotalPixels * buffertypesize + openglpixels * sizeof(FxU32), 1); Glide.TempBuffer.Address = &Glide.FrameBuffer.Address[Glide.WindowTotalPixels * buffertypesize >> 1]; memset( Glide.FrameBuffer.Address, 0, Glide.WindowTotalPixels * buffertypesize); memset( Glide.TempBuffer.Address, 0, openglpixels * sizeof(FxU32)); } m_framebuffer->WriteMode = writemode; m_glInternalFormat = 4; m_glFormat = GL_RGBA; m_glType = GL_UNSIGNED_BYTE; FxU16 chromakeyvalue; switch (writemode) { case GR_LFBWRITEMODE_565: chromakeyvalue = s_GlideApplication.GetType() == GlideApplication::Carmageddon ? 0x1f1f : 0x07ff; m_format_valid = true; break; case GR_LFBWRITEMODE_1555: chromakeyvalue = 0x03ff; m_format_valid = true; break; case GR_LFBWRITEMODE_888: chromakeyvalue = 0x7ffdfeff; m_format_valid = true; break; default: chromakeyvalue = 0x0; m_format_valid = false; break; } // When the chromakeyvalue changes, the buffer has to be cleared if (chromakeyvalue != m_ChromaKey.Scalar) { SetChromaKeyValue(chromakeyvalue); m_must_clear_buffer = true; } } bool Framebuffer::begin_write() { #ifdef OGL_FRAMEBUFFER GlideMsg("Framebuffer::begin_write()\n"); #endif if (m_must_clear_buffer) { Clear(); m_must_clear_buffer = false; } return true; } void Framebuffer::Clear() { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::Clear()\n"); #endif const FxU16 chromakey = GetChromaKeyValue(); const FxU32 count = m_width * m_height ; FxU16* framebuffer = m_framebuffer->Address; for ( int i = 0; i < count; i++) { framebuffer[i] = chromakey; } } bool Framebuffer::end_write(FxU32 alpha, GLfloat depth, bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg("Framebuffer::end_write(%d, %f, %d)\n", alpha, depth, pixelpipeline); #endif m_glDepth = depth; #ifdef __ALTIVEC__ for(int i = 0; i < 4; i++) { (&m_glAlpha.Scalar)[i] = alpha; } #else m_glAlpha.Scalar = alpha; #endif // if all pixels are invisible, nothing must be rendered. // The pixel conversion functions assume alpha is != 0 in order // to determine if a tile contains any pixels to be rendered. if (m_glAlpha.Scalar == 0) return false; set_gl_state(pixelpipeline); if (InternalConfig.EXT_compiled_vertex_array) { if (m_custom_tilesizes) { drawCompiledVertexArrays(m_custom_tilesizes, m_customtilesizesVertexArrayIndex, m_customtilesizesCount, pixelpipeline); } else { drawCompiledVertexArrays(m_tilesizes, m_tilesizesVertexArrayIndex, m_tilesizesCount, pixelpipeline); } } else { const tilesize* tilesizes = m_custom_tilesizes ? m_custom_tilesizes : m_tilesizes; draw(tilesizes, pixelpipeline); } restore_gl_state(pixelpipeline); return true; } bool Framebuffer::end_write(FxU32 alpha) { #ifdef OGL_DONE GlideMsg("Framebuffer::end_write(%d)\n", alpha); #endif // draw frame buffer // @todo: Depth should OpenGL.ZNear, but that breaks overlays in Myth FxBool result = end_write(alpha, 0.0, false); return result; } bool Framebuffer::end_write() { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::end_write( )\n" ); #endif return end_write(0x000000ff); } bool Framebuffer::end_write_opaque() { #ifdef OGL_FRAMEBUFFER GlideMsg("Framebuffer::end_write_opaque()\n"); #endif // @todo: Depth should OpenGL.ZNear, but that breaks overlays in Myth return end_write(0x000000ff, 0.0, false); } void Framebuffer::set_gl_state(bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::set_gl_state(%d)\n", pixelpipeline); #endif glReportErrors("Framebuffer::set_gl_state"); VERIFY_ACTIVE_TEXTURE_UNIT(OpenGL.ColorAlphaUnit1); // Disable the cull mode glDisable(GL_CULL_FACE); // Disable clip volume hint manually to avoid recursion if (InternalConfig.EXT_clip_volume_hint && OpenGL.ClipVerticesEnabledState) { glHint(GL_CLIP_VOLUME_CLIPPING_HINT_EXT, GL_FASTEST); } if (pixelpipeline) { if (OpenGL.ColorAlphaUnit2) { // Pixelpipeline support for env cobine based rendering: // Framebuffer pixels must be routed through the coloralpha unit // as if they were produced by the vertex iterators without an // additional GL texture unit -> source must be changed accordingly m_bRestoreColorCombine = false; if (Glide.State.ColorCombineLocal == GR_COMBINE_LOCAL_ITERATED) { Glide.State.ColorCombineLocal = GR_COMBINE_LOCAL_PIXELPIPELINE; m_bRestoreColorCombine = true; } if (Glide.State.ColorCombineOther == GR_COMBINE_OTHER_ITERATED) { Glide.State.ColorCombineOther = GR_COMBINE_OTHER_PIXELPIPELINE; m_bRestoreColorCombine = true; } if (m_bRestoreColorCombine) SetColorCombineState(); m_bRestoreAlphaCombine = false; if (Glide.State.AlphaLocal == GR_COMBINE_LOCAL_ITERATED) { Glide.State.AlphaLocal = GR_COMBINE_LOCAL_PIXELPIPELINE; m_bRestoreAlphaCombine = true; } if (Glide.State.AlphaOther == GR_COMBINE_OTHER_ITERATED) { Glide.State.AlphaOther = GR_COMBINE_OTHER_PIXELPIPELINE; m_bRestoreAlphaCombine = true; } if (m_bRestoreAlphaCombine) SetAlphaCombineState(); // Update the opengl state for the pixel pipeline RenderUpdateState(); // If the write mode doesn't provide alpha then m_glAlpha is used // as the constant alpha value, and we can use the alpha test // to mask out chromakey pixels switch (m_framebuffer->WriteMode) { case GR_LFBWRITEMODE_565: case GR_LFBWRITEMODE_888: glEnable(GL_ALPHA_TEST); const GLenum alphaTestFunction = GL_EQUAL; const GLfloat alphaTestReferenceValue= m_glAlpha.Scalar * D1OVER255; OpenGL.AlphaTestFunction = alphaTestFunction; OpenGL.AlphaReferenceValue = alphaTestReferenceValue; glAlphaFunc(alphaTestFunction, alphaTestReferenceValue); glReportError(); break; } } // GL_RECTANGLE_ARB overrides GL_TEXTURE_2D if (m_useRectangleARB) { const GLenum textureTarget = GL_TEXTURE_RECTANGLE_ARB; const bool enableColoralphaTextureUnit1 = OpenGL.ColorAlphaUnitColorEnabledState[0] || OpenGL.ColorAlphaUnitAlphaEnabledState[0]; if (enableColoralphaTextureUnit1) { glEnable(textureTarget); glReportError(); } if (OpenGL.ColorAlphaUnit2) { const bool enableColoralphaTextureUnit2 = OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]; if (enableColoralphaTextureUnit2) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glReportError(); glEnable(textureTarget); glReportError(); glActiveTextureARB(OpenGL.ColorAlphaUnit1); glReportError(); } } // The client texture state is already setup correctly since we just // have to adjust the texture rectangle state // (the texture_2d state is not changed when the pixelpipeline mode is active) } // Set the origin with clipping glMatrixMode(GL_PROJECTION); glLoadIdentity(); if (m_origin == GR_ORIGIN_LOWER_LEFT) { glOrtho(Glide.State.ClipMinX, Glide.State.ClipMaxX, Glide.State.ClipMinY, Glide.State.ClipMaxY, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX + OpenGL.ClipMinX, OpenGL.OriginY + OpenGL.ClipMinY, OpenGL.ClipMaxX - OpenGL.ClipMinX, OpenGL.ClipMaxY - OpenGL.ClipMinY); } else { glOrtho(Glide.State.ClipMinX, Glide.State.ClipMaxX, Glide.State.ClipMaxY, Glide.State.ClipMinY, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX + OpenGL.ClipMinX, OpenGL.OriginY + OpenGL.WindowHeight - OpenGL.ClipMaxY, OpenGL.ClipMaxX - OpenGL.ClipMinX, OpenGL.ClipMaxY - OpenGL.ClipMinY); } // The scissor rectangle is not reset, because scissor mode // is only enabled when clearing the buffer glMatrixMode(GL_MODELVIEW); glReportError(); } else { // disable blend glDisable(GL_BLEND); // disable depth buffer glDepthMask(false); // Enable colormask glColorMask( true, true, true, false); // Needed for displaying in-game menus if (Glide.State.DepthBufferMode != GR_DEPTHBUFFER_DISABLE) { glDisable(GL_DEPTH_TEST); } glEnable(GL_ALPHA_TEST); // Update state as we're calling update triggers on restore const GLenum alphaTestFunction = GL_GREATER; const GLfloat alphaTestReferenceValue= 0.0; OpenGL.AlphaTestFunction = alphaTestFunction; OpenGL.AlphaReferenceValue = alphaTestReferenceValue; glAlphaFunc(alphaTestFunction, alphaTestReferenceValue); glReportError(); if (InternalConfig.EXT_secondary_color) { glDisable(GL_COLOR_SUM_EXT); glReportError(); } // Reset the clipping window // and set the origin glMatrixMode(GL_PROJECTION); glLoadIdentity(); if (m_origin == GR_ORIGIN_LOWER_LEFT) { glOrtho(0, Glide.WindowWidth, 0, Glide.WindowHeight, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX, OpenGL.OriginY, OpenGL.WindowWidth, OpenGL.WindowHeight); } else { glOrtho(0, Glide.WindowWidth, Glide.WindowHeight, 0, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX, OpenGL.OriginY, OpenGL.WindowWidth, OpenGL.WindowHeight); } // The scissor rectangle is not changed, because scissor mode // is only enabled when clearing the buffer glMatrixMode(GL_MODELVIEW); glReportError(); // Disable fog bool disable_fog_texture_unit = OpenGL.FogTextureUnit; if (disable_fog_texture_unit) { glActiveTextureARB(OpenGL.FogTextureUnit); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.FogTextureUnit); glDisableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, NULL); } glDisable(GL_TEXTURE_2D); } if (OpenGL.Fog && InternalConfig.FogMode != OpenGLideFogEmulation_None && InternalConfig.FogMode != OpenGLideFogEmulation_EnvCombine) { glDisable(GL_FOG); } glReportError(); // enable framebuffer texture unit if (OpenGL.ColorAlphaUnit2) { bool disable_coloralpha_texture_unit_2 = OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]; if (disable_coloralpha_texture_unit_2) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit2); glDisableClientState(GL_TEXTURE_COORD_ARRAY); // On MacOS9 (Classic?) the texcoord pointer needs to be reset // to the default value when glLockArrays/glUnlockArrays is used glTexCoordPointer( 4, GL_FLOAT, 0, NULL ); } glDisable(GL_TEXTURE_2D); } if (disable_fog_texture_unit || disable_coloralpha_texture_unit_2) { glActiveTextureARB(OpenGL.ColorAlphaUnit1); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit1); } } if (!OpenGL.ColorAlphaUnitColorEnabledState[0] && !OpenGL.ColorAlphaUnitAlphaEnabledState[0]) { glEnable(GL_TEXTURE_2D); if (InternalConfig.EXT_compiled_vertex_array) { glEnableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, &OGLRender.TTexture[0]); } } } else { if (disable_fog_texture_unit) { glActiveTextureARB(OpenGL.ColorAlphaUnit1); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit1); } } if (OpenGL.Texture == false) { // GL_RECTANGLE_ARB overrides GL_RECTANGLE_2D if (!m_useRectangleARB) glEnable(GL_TEXTURE_2D); if (InternalConfig.EXT_compiled_vertex_array) { glEnableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, &OGLRender.TTexture[0]); } } } if (m_useRectangleARB) { glEnable(GL_TEXTURE_RECTANGLE_ARB); glReportError(); } glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE); glReportError(); } } void Framebuffer::restore_gl_state(bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::restore_gl_state(%d)\n", pixelpipeline); #endif glReportErrors("Framebuffer::restore_gl_state"); // Restore the cull mode switch (Glide.State.CullMode) { case GR_CULL_DISABLE: break; case GR_CULL_NEGATIVE: case GR_CULL_POSITIVE: glEnable(GL_CULL_FACE); break; } if (InternalConfig.EXT_clip_volume_hint && OpenGL.ClipVerticesEnabledState) { glHint(GL_CLIP_VOLUME_CLIPPING_HINT_EXT, GL_NICEST); } // Restore the clipping window glMatrixMode(GL_PROJECTION); glLoadIdentity(); if ( Glide.State.OriginInformation == GR_ORIGIN_LOWER_LEFT ) { glOrtho(Glide.State.ClipMinX, Glide.State.ClipMaxX, Glide.State.ClipMinY, Glide.State.ClipMaxY, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX + OpenGL.ClipMinX, OpenGL.OriginY + OpenGL.ClipMinY, OpenGL.ClipMaxX - OpenGL.ClipMinX, OpenGL.ClipMaxY - OpenGL.ClipMinY); } else { glOrtho(Glide.State.ClipMinX, Glide.State.ClipMaxX, Glide.State.ClipMaxY, Glide.State.ClipMinY, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX + OpenGL.ClipMinX, OpenGL.OriginY + OpenGL.WindowHeight - OpenGL.ClipMaxY, OpenGL.ClipMaxX - OpenGL.ClipMinX, OpenGL.ClipMaxY - OpenGL.ClipMinY); } // The scissor rectangle is not reset, because scissor mode // is only enabled when clearing the buffer glMatrixMode( GL_MODELVIEW ); glReportError(); if (pixelpipeline) { if (OpenGL.ColorAlphaUnit2) { // restore current values if (m_bRestoreColorCombine) { if (Glide.State.ColorCombineLocal == GR_COMBINE_LOCAL_PIXELPIPELINE) Glide.State.ColorCombineLocal = GR_COMBINE_LOCAL_ITERATED; if (Glide.State.ColorCombineOther == GR_COMBINE_OTHER_PIXELPIPELINE) Glide.State.ColorCombineOther = GR_COMBINE_OTHER_ITERATED; SetColorCombineState(); } if(m_bRestoreAlphaCombine) { if (Glide.State.AlphaLocal == GR_COMBINE_LOCAL_PIXELPIPELINE) Glide.State.AlphaLocal = GR_COMBINE_LOCAL_ITERATED; if (Glide.State.AlphaOther == GR_COMBINE_OTHER_PIXELPIPELINE) Glide.State.AlphaOther = GR_COMBINE_LOCAL_ITERATED; SetAlphaCombineState(); } } switch (m_framebuffer->WriteMode) { case GR_LFBWRITEMODE_565: case GR_LFBWRITEMODE_888: SetChromaKeyAndAlphaState(); break; } // GL_RECTANGLE_ARB overrides GL_TEXTURE_2D if (m_useRectangleARB) { const GLenum textureTarget = GL_TEXTURE_RECTANGLE_ARB; const bool disableColoralphaTextureUnit1 = !(OpenGL.ColorAlphaUnitColorEnabledState[0] || OpenGL.ColorAlphaUnitAlphaEnabledState[0]); if (disableColoralphaTextureUnit1) { glDisable(textureTarget); glReportError(); } if (OpenGL.ColorAlphaUnit2) { const bool disableColoralphaTextureUnit2 = !(OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]); if (disableColoralphaTextureUnit2) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glReportError(); glDisable(textureTarget); glReportError(); glActiveTextureARB(OpenGL.ColorAlphaUnit1); glReportError(); } } // The client texture state is already setup correctly since we just // have to adjust the texture rectangle state to the texture_2d state } } else { // restore previous state if (OpenGL.DepthBufferWritting ) { glDepthMask( true ); } if (Glide.State.DepthBufferMode != GR_DEPTHBUFFER_DISABLE) { glEnable( GL_DEPTH_TEST ); } // Restore colormask bool rgb = Glide.State.ColorMask; glColorMask(rgb, rgb, rgb, Glide.State.AlphaMask); if ( OpenGL.Blend ) { glEnable( GL_BLEND ); } if ( InternalConfig.EXT_secondary_color ) { glEnable( GL_COLOR_SUM_EXT ); } glReportError(); // Enable fog? bool enable_fog_texture_unit = OpenGL.FogTextureUnit && ((OpenGL.Fog && InternalConfig.FogMode == OpenGLideFogEmulation_EnvCombine) || Glide.State.ColorCombineInvert || Glide.State.AlphaInvert); if (enable_fog_texture_unit) { glActiveTextureARB(OpenGL.FogTextureUnit); glEnable(GL_TEXTURE_2D); // We're not using glDrawArrays to render the frame buffer, // but without disabling the client state the next texture drawn // by RenderDrawTriangles would get the wrong coordinates. // Can be observed in Carmageddon: The sky texture is rendered "too high" if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.FogTextureUnit); glEnableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(1, GL_FLOAT, 0, &OGLRender.TFog[0]); } } if (OpenGL.Fog && InternalConfig.FogMode != OpenGLideFogEmulation_None && InternalConfig.FogMode != OpenGLideFogEmulation_EnvCombine) { glEnable(GL_FOG); } glReportError(); if (OpenGL.ColorAlphaUnit2) { bool enable_coloralpha_texture_unit_2 = OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]; if (enable_coloralpha_texture_unit_2) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glEnable(GL_TEXTURE_2D); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit2); glEnableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, &OGLRender.TTexture[0]); } } if (enable_fog_texture_unit || enable_coloralpha_texture_unit_2) { glActiveTextureARB(OpenGL.ColorAlphaUnit1); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit1); } } if (!OpenGL.ColorAlphaUnitColorEnabledState[0] && !OpenGL.ColorAlphaUnitAlphaEnabledState[0]) { if (InternalConfig.EXT_compiled_vertex_array) { glDisableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer( 4, GL_FLOAT, 0, NULL ); } glDisable(GL_TEXTURE_2D); } // Restore the previous texture environment glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_COMBINE_EXT); } else { if (enable_fog_texture_unit) { glActiveTextureARB(OpenGL.ColorAlphaUnit1); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit1); } } if (OpenGL.Texture == false) { if (InternalConfig.EXT_compiled_vertex_array) { glDisableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer( 4, GL_FLOAT, 0, NULL ); } // GL_RECTANGLE_ARB overrides GL_TEXTURE_2D and this // has already been skipped in setState if (!m_useRectangleARB) glDisable(GL_TEXTURE_2D); } // Restore the previous texture environment SetColorCombineState(); } // GL_RECTANGLE_ARB overrides GL_TEXTURE_2D if (m_useRectangleARB) { glDisable(GL_TEXTURE_RECTANGLE_ARB); glReportError(); } glReportError(); // This must be a forced update because GlideState changes of ChromaKeyMode // that don't change the corresponding GL-state are filtered out ForceChromaKeyAndAlphaStateUpdate(); } glReportError(); VERIFY_ACTIVE_TEXTURE_UNIT(OpenGL.ColorAlphaUnit1); VERIFY_TEXTURE_ENABLED_STATE(); } bool Framebuffer::draw(const tilesize* tilesizetable, bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::draw(---, %d)\n", pixelpipeline); #endif glReportErrors("Framebuffer::draw()"); bool init_second_textureunit = pixelpipeline && OpenGL.ColorAlphaUnit2; FxU32* texbuffer = reinterpret_cast(m_texbuffer->Address); // Render the tiles GLint n = 0; GLint x; GLint y = 0; GLint y_step; for(int w = 0; y < m_height && w < MaxTiles; w++, y += y_step) { y_step = tilesizetable[w].y; x = 0; GLint x_step; for(int v = 0; x < m_width && v < MaxTiles; v++, x += x_step) { x_step = tilesizetable[w].x[v]; // Use unique (but always the same) name for each texture in order // to maintain the size and avoid vram memory reallocation GLint texturename = m_textureNames[n]; const TileUpdateState updateState = createTextureData(texbuffer, x, y, x_step, y_step, n); if (updateState != TileUpdateState_TileEmpty) { #ifdef DEBUG_TILE_RENDERING unsigned int color; if (updateState == TileUpdateState_TileDownloadToGPU) // edges of downloaded tiles are red color = 0xff0000ff; else // edges of rendered tiles are cyan color = 0x00ffffff; ((long*) texbuffer)[0] = ((long*) texbuffer)[1] = ((long*) texbuffer)[x_step] = ((long*) texbuffer)[x_step -1] = ((long*) texbuffer)[x_step -2] = ((long*) texbuffer)[2 * x_step -1] = ((long*) texbuffer)[(x_step - 1) * y_step] = ((long*) texbuffer)[(x_step - 1) * y_step + 1] = ((long*) texbuffer)[(x_step - 2) * y_step] = ((long*) texbuffer)[x_step * y_step - 1] = ((long*) texbuffer)[x_step * y_step - 2] = ((long*) texbuffer)[x_step * (y_step - 1) - 1] = color; #endif glBindTexture(GL_TEXTURE_2D, texturename); if (init_second_textureunit) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glBindTexture(GL_TEXTURE_2D, texturename); glActiveTextureARB(OpenGL.ColorAlphaUnit1); } #ifndef DEBUG_TILE_RENDERING if (updateState == TileUpdateState_TileDownloadToGPU || InternalConfig.APPLE_client_storage == false) { glTexImage2D(GL_TEXTURE_2D, 0, m_glInternalFormat, x_step, y_step, 0, m_glFormat, m_glType, texbuffer); glReportError(); } #endif static struct { const GLfloat bl[4]; const GLfloat br[4]; const GLfloat tr[4]; const GLfloat tl[4]; } texcoords = { {0.0, 0.0, 1.0, 1.0}, {1.0, 0.0, 1.0, 1.0}, {1.0, 1.0, 1.0, 1.0}, {0.0, 1.0, 1.0, 1.0} }; glBegin(GL_QUADS); // counter clockwise glColor3f(1.0, 1.0, 1.0); glTexCoord4fv(&texcoords.bl[0]); if (init_second_textureunit) { glMultiTexCoord4fvARB(OpenGL.ColorAlphaUnit2, &texcoords.bl[0]); } glVertex3f(x, y, m_glDepth); glColor3f(1.0, 1.0, 1.0); glTexCoord4fv(&texcoords.br[0]); if (init_second_textureunit) { glMultiTexCoord4fvARB(OpenGL.ColorAlphaUnit2, &texcoords.br[0]); } glVertex3f(x + x_step, y, m_glDepth); glColor3f(1.0, 1.0, 1.0); glTexCoord4fv(&texcoords.tr[0]); if (init_second_textureunit) { glMultiTexCoord4fvARB(OpenGL.ColorAlphaUnit2, &texcoords.tr[0]); } glVertex3f(x + x_step, y + y_step, m_glDepth); glColor3f(1.0, 1.0, 1.0); glTexCoord4fv(&texcoords.tl[0]); if (init_second_textureunit) { glMultiTexCoord4fvARB(OpenGL.ColorAlphaUnit2, &texcoords.tl[0]); } glVertex3f(x, y + y_step , m_glDepth); glEnd(); glReportError(); // Advance to the next texbuffer location texbuffer += x_step * y_step; } n ++; } } s_Framebuffer.SetRenderBufferChanged(); return y == m_height && x == m_width; } bool Framebuffer::drawCompiledVertexArrays(const tilesize* tilesizetable, int vertexarrayindex, int tilecount, bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::draw(---, %d)\n", pixelpipeline); #endif glReportErrors("Framebuffer::drawCompiledVertexArrays()"); // Finish rendering RenderUnlockArrays(); // Transfer coords to VRAM glLockArraysEXT(vertexarrayindex * 3, tilecount * 6); OGLRender.BufferLocked = true; const bool init_second_textureunit = pixelpipeline && (OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]); FxU32* texbuffer = reinterpret_cast(m_texbuffer->Address); // Render the tiles GLint n = 0; GLint x; GLint y = 0; GLint y_step; const GLenum textureTarget = m_useRectangleARB ? GL_TEXTURE_RECTANGLE_ARB : GL_TEXTURE_2D; for(int w = 0; y < m_height && w < MaxTiles; w++, y += y_step) { y_step = tilesizetable[w].y; x = 0; GLint x_step; for(int v = 0; x < m_width && v < MaxTiles; v++, x += x_step) { x_step = tilesizetable[w].x[v]; const TileUpdateState updateState = createTextureData(texbuffer, x, y, x_step, y_step, n); if (updateState != TileUpdateState_TileEmpty) { #ifdef DEBUG_TILE_RENDERING unsigned int color; if (updateState == TileUpdateState_TileDownloadToGPU) // edges of downloaded tiles are red color = 0xff0000ff; else // edges of rendered tiles are cyan color = 0x00ffffff; ((long*) texbuffer)[0] = ((long*) texbuffer)[1] = ((long*) texbuffer)[x_step] = ((long*) texbuffer)[x_step -1] = ((long*) texbuffer)[x_step -2] = ((long*) texbuffer)[2 * x_step -1] = ((long*) texbuffer)[x_step * (y_step - 2)] = ((long*) texbuffer)[x_step * (y_step - 1)] = ((long*) texbuffer)[x_step * (y_step - 1) + 1] = ((long*) texbuffer)[x_step * y_step - 1] = ((long*) texbuffer)[x_step * y_step - 2] = ((long*) texbuffer)[x_step * (y_step - 1) - 1] = color; #endif // Use unique (but always the same) name for each // texture in order to be able to reuse tile data const GLint texturename = m_textureNames[n]; // The texture rectangle is better suited for video, // which is close to a framebuffer if (init_second_textureunit) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glReportError(); glBindTexture(textureTarget, texturename); glReportError(); glActiveTextureARB(OpenGL.ColorAlphaUnit1); glReportError(); } glBindTexture(textureTarget, texturename); glReportError(); #ifndef DEBUG_TILE_RENDERING if (updateState == TileUpdateState_TileDownloadToGPU || InternalConfig.APPLE_client_storage == false) #endif { glTexImage2D(textureTarget, 0, m_glInternalFormat, x_step, y_step, 0, m_glFormat, m_glType, texbuffer); glReportError(); } // Draw the tile glDrawArrays(GL_TRIANGLES, vertexarrayindex * 3 + n * 6, 6); // Advance to the next texbuffer location texbuffer += x_step * y_step; glReportError(); } n++; } } s_Framebuffer.SetRenderBufferChanged(); return y == m_height && x == m_width; } int Framebuffer::buildVertexArrays(const tilesize* tilesizetable, int vertexarrayindex) { // Compute coordinates for compiled vertex arrays TColorStruct* pC = &OGLRender.TColor[vertexarrayindex]; TVertexStruct* pV = &OGLRender.TVertex[vertexarrayindex]; TTextureStruct* pTS = &OGLRender.TTexture[vertexarrayindex]; int n = 0; GLint y = 0; GLint y_step; for(int w = 0; y < m_height && w < MaxTiles; w++, y += y_step) { y_step = tilesizetable[w].y; GLint x = 0; GLint x_step; for(int v = 0; x < m_width && v < MaxTiles; v++, x += x_step) { x_step = tilesizetable[w].x[v]; // Write coordinates counter clockwise into render buffers pC->ar = pC->ag = pC->ab = pC->br = pC->bg = pC->bb = pC->cr = pC->cg = pC->cb = pC->aa = pC->ba = pC->ca = 1.0f; pV->ax = x; pV->ay = y; pV->bx = x + x_step; pV->by = y; pV->cx = x + x_step; pV->cy = y + y_step; pV->az = pV->bz = pV->cz = m_glDepth; pTS->as = 0.0; pTS->at = 0.0; pTS->bs = m_useRectangleARB ? x_step : 1.0f; pTS->bt = 0.0; pTS->cs = m_useRectangleARB ? x_step : 1.0f; pTS->ct = m_useRectangleARB ? y_step : 1.0f; pTS->aq = pTS->bq = pTS->cq = 0.0f; pTS->aoow = pTS->boow = pTS->coow = 1.0f; pC++; pV++; pTS++; pC->ar = pC->ag = pC->ab = pC->br = pC->bg = pC->bb = pC->cr = pC->cg = pC->cb = pC->aa = pC->ba = pC->ca = 1.0f; pV->ax = x + x_step; pV->ay = y + y_step; pV->bx = x; pV->by = y + y_step; pV->cx = x; pV->cy = y; pV->az = pV->bz = pV->cz = m_glDepth; pTS->as = m_useRectangleARB ? x_step : 1.0f; pTS->at = m_useRectangleARB ? y_step : 1.0f; pTS->bs = 0.0; pTS->bt = m_useRectangleARB ? y_step : 1.0f; pTS->cs = 0.0; pTS->ct = 0.0; pTS->aq = pTS->bq = pTS->cq = 0.0f; pTS->aoow = pTS->boow = pTS->coow = 1.0f; pC++; pV++; pTS++; n++; } } return n; } #ifdef __ALTIVEC__ // altivec code inline Framebuffer::TileUpdateState Framebuffer::Convert565Kto8888_AV(FxU16* buffer1, register FxU32* buffer2, register FxU32 width, register FxU32 height, register FxU32 stride, int checksumIndex) { const vector bool short chromakey_565_av = m_ChromaKey.Vector; const int width_av = width >> 3; // 8 16-bit words const int stride_av = stride >> 3; // 8 16-bit words const int jump_av = width_av + stride_av; vector bool short* src_av = (vector bool short*) buffer1; // Setup channel 0 for reading one row of 565 ushorts from src into the L1 cache // This isn't read again soon, just written back once so we can bypass L2 cache const int src_control = (((width_av >> 4) & 0x1f) << 3) + (1 << 8) + (stride_av << 16); vec_dstt(src_av, src_control, 0); int h = height; // loop through the src to check whether anything has to be copied at all vector bool short* stop_zero_av = &src_av[width_av]; do { do { const vector bool short pixels_565_av = *src_av; if (!vec_all_eq(pixels_565_av, chromakey_565_av)) goto create_8888_texture_1_av; // Test clear first before jumping to create_8888_texture_1_av src_av++; } while (src_av != stop_zero_av); src_av += stride_av; // Update channel 0 to prefetch the next row into the L1 cache vec_dstt(src_av, src_control, 0); stop_zero_av += jump_av; } while (--h); return TileUpdateState_TileEmpty; create_8888_texture_1_av: // Delete dst up to the last chromakey entry in src stop_zero_av = src_av; src_av = (vector bool short*) buffer1; vector unsigned long* dst_av = (vector unsigned long*) buffer2; const vector unsigned long null_av = vec_splat_u32(0); // We're just writing to dst, no reading must occur, and starting a prefetch is a bad idea h = height; vector bool short* stop_av = &src_av[width_av]; do { do { if (src_av == stop_zero_av) goto create_8888_texture_2_av; // Test clear first // clear cacheline to prevent it from being read-in (32 bytes = 2 altivec writes) // we're just clearing the cache line since we're going to write zeros anyway __dcbz(dst_av, 0); dst_av += 2; src_av++; } while (src_av != stop_av); src_av += stride_av; stop_av += jump_av; } while (--h); return TileUpdateState_TileEmpty; create_8888_texture_2_av: // Build permute vector for storing high/lo 565 pixels into RgbxA // - results in R = r565+ggg, G = gggbbbbb, B = 0, A = glAlpha // -> good for comparison, green and blue are converted afterwards const vector unsigned char permute_hi_av = {0x00, 0x01, 0x12, 0x13, 0x02, 0x03, 0x16, 0x17, 0x04, 0x05, 0x1a, 0x1b, 0x06, 0x07, 0x1e, 0x1f}; // Computing the permute table just takes 2 instructions instead of 1 instruction + 4 memory reads const vector unsigned char permute_lo_av = vec_or(permute_hi_av, vec_splat_u8(8)); // const vector unsigned long alpha_8888_av = {m_glAlpha, m_glAlpha, m_glAlpha, m_glAlpha}; const vector unsigned long alpha_8888_av = m_glAlpha.Vector; // Build chromakey and alpha RgbxA vector const vector unsigned long chromakey_RgbxA_av = vec_perm((const vector unsigned long) chromakey_565_av, alpha_8888_av, permute_lo_av); // Constants const vector unsigned long const_3_av = vec_splat_u32(3); const vector unsigned long const_5_av = vec_splat_u32(5); // R5G6B500AA color masks const vector unsigned long mask_8888_ra = {0xf80000ff, 0xf80000ff, 0xf80000ff, 0xf80000ff}; const vector unsigned long mask_8888_g = {0x07e00000, 0x07e00000, 0x07e00000, 0x07e00000}; // Computing the mask just takes 2 instructions instead of 1 instruction + 4 memory reads const vector unsigned long mask_8888_b = vec_sr(mask_8888_ra, vec_splat_u32(11)); vector unsigned long pixels_8888_src_av; vector bool long mask; vector unsigned long p; vector unsigned long q; vector unsigned long pixels_8888_dst_av; // Checksum the tile vector unsigned long c = null_av; vector unsigned long d; // Continue the loop and convert pixels from 565 to 8888 vec_dstt(src_av, src_control, 0); do { do { const vector unsigned long pixels_565_av = (const vector unsigned long) (*src_av); // tile checksum part 1 d = vec_sr(c, const_5_av); c = vec_add(c, pixels_565_av); // restore chroma key for next update *src_av++ = chromakey_565_av; // hi-word pixels pixels_8888_src_av = vec_perm(pixels_565_av, alpha_8888_av, permute_hi_av); mask = vec_cmpeq(pixels_8888_src_av, chromakey_RgbxA_av); // Keep red and alpha component pixels_8888_dst_av = vec_and(pixels_8888_src_av, mask_8888_ra); // add green component p = vec_and(pixels_8888_src_av, mask_8888_g); q = vec_sr(p, const_3_av); pixels_8888_dst_av = vec_or(pixels_8888_dst_av, q); // add blue component p = vec_and(pixels_8888_src_av, mask_8888_b); q = vec_sr(p, const_5_av); pixels_8888_dst_av = vec_or(pixels_8888_dst_av, q); // We're just writing to dst and thus can clear the cacheline in order // to avoid the read-in from system memory (32 bytes = 2 altivec writes) // Note: This is a G4 hack, but on a G5 the code will be fast enough anyway __dcbz(dst_av, 0); // Select between pixels and chromakey *dst_av++ = vec_sel(pixels_8888_dst_av, null_av, mask); // tile checksum part 2 c = vec_xor(c, d); // lo-word pixels pixels_8888_src_av = vec_perm(pixels_565_av, alpha_8888_av, permute_lo_av); mask = vec_cmpeq(pixels_8888_src_av, chromakey_RgbxA_av); // Keep red and alpha component pixels_8888_dst_av = vec_and(pixels_8888_src_av, mask_8888_ra); // add green component p = vec_and(pixels_8888_src_av, mask_8888_g); q = vec_sr(p, const_3_av); pixels_8888_dst_av = vec_or(pixels_8888_dst_av, q); // add blue component p = vec_and(pixels_8888_src_av, mask_8888_b); q = vec_sr(p, const_5_av); pixels_8888_dst_av = vec_or(pixels_8888_dst_av, q); // Select between pixels and chromakey *dst_av++ = vec_sel(pixels_8888_dst_av, null_av, mask); } while (src_av != stop_av); src_av += stride_av; vec_dstt(src_av, src_control, 0); stop_av += jump_av; } while (--h); // Skip downloading tile data to the gpu if the content hasn't changed if (vec_all_eq(c, m_tileChecksums[checksumIndex])) return TileUpdateState_TileDrawOnly; // The tile has been converted, been changed and must be downloaded to the gpu m_tileChecksums[checksumIndex] = c; return TileUpdateState_TileDownloadToGPU; } #endif // Non-Altivec-code inline Framebuffer::TileUpdateState Framebuffer::Convert565Kto8888(FxU16* buffer1, register FxU32* buffer2, register FxU32 width, register FxU32 height, register FxU32 stride) { // Process two pixels at once const register unsigned long chromakey1 = m_ChromaKey.Scalar << 16; const register unsigned long chromakey2 = m_ChromaKey.Scalar; const register unsigned long chromakey12 = chromakey1 | chromakey2; width = width >> 1; stride = stride >> 1; register unsigned long pixel; register unsigned long* stop; register unsigned long jump = width + stride; register unsigned long* src = reinterpret_cast(buffer1); // check if tile must be processed in advance // to avoid useless writes to main memory // The tile should at least fit into the second level cache // so reading it again wouldn't hurt as much as doing needless writes register unsigned long h = height; stop = &src[width]; do { do { pixel = *src++; if (pixel != chromakey12) goto create_8888_texture; } while (src != stop); src += stride; stop += jump; } while (--h); return TileUpdateState_TileEmpty; create_8888_texture: const register unsigned long alpha = m_glAlpha.Scalar; const register unsigned long null = 0x00000000; const register unsigned long mask_pixel1 = 0xffff0000; const register unsigned long mask_pixel2 = 0x0000ffff; const register unsigned long mask_pixel1_r = 0xf8000000; const register unsigned long mask_pixel1_g = 0x07e00000; const register unsigned long mask_pixel1_b = 0x001f0000; const register unsigned long mask_pixel2_r = 0x0000f800; const register unsigned long mask_pixel2_g = 0x000007e0; const register unsigned long mask_pixel2_b = 0x0000001f; src = reinterpret_cast(buffer1); stop = &src[width]; do { do { // GL_RGBA pixel = *src; if (pixel == chromakey12) { *buffer2++ = null; *buffer2++ = null; } else { *src = chromakey12; if ( (pixel & mask_pixel1) == chromakey1) { *buffer2++ = null; } else { *buffer2++ = ( alpha | // A ( pixel & mask_pixel1_b ) >> 5 | // B ( pixel & mask_pixel1_g ) >> 3 | // G ( pixel & mask_pixel1_r )); // R } if ( (pixel & mask_pixel2) == chromakey2) { *buffer2++ = null; } else { *buffer2++ = ( alpha | // A ( pixel & mask_pixel2_b ) << 11 | // B ( pixel & mask_pixel2_g ) << 13 | // G ( pixel & mask_pixel2_r ) << 16); // R } } src++; } while (src != stop); src += stride; stop += jump; } while (--height); return TileUpdateState_TileDownloadToGPU; } inline Framebuffer::TileUpdateState Framebuffer::Convert1555Kto8888(FxU16* buffer1, register FxU32* buffer2, register FxU32 width, register FxU32 height, register FxU32 stride) { // Process two pixels at once register unsigned long pixel; register unsigned long x; register unsigned long* src = reinterpret_cast(buffer1); const unsigned long null = 0x00000000; register unsigned long dstpixel = null; const register unsigned long chromakey1 = m_ChromaKey.Scalar << 16; const register unsigned long chromakey2 = m_ChromaKey.Scalar; const register unsigned long chromakey12 = chromakey1 | chromakey2; const register unsigned long alpha = m_glAlpha.Scalar; const register unsigned long mask_pixel1 = 0xffff0000; const register unsigned long mask_pixel2 = 0x0000ffff; const register unsigned long mask_pixel1_r = 0x7c000000; const register unsigned long mask_pixel1_g = 0x03e00000; const register unsigned long mask_pixel1_b = 0x001f0000; const register unsigned long mask_pixel2_r = 0x00007c00; const register unsigned long mask_pixel2_g = 0x000003e0; const register unsigned long mask_pixel2_b = 0x0000001f; width >>= 1; stride >>= 1; do { x = width; do { // GL_RGBA pixel = *src; if (pixel == chromakey12) { *buffer2++ = null; *buffer2++ = null; } else { *src = chromakey12; if ( (pixel & mask_pixel1) == chromakey1) { *buffer2++ = null; } else { dstpixel = ( alpha | // A ( pixel & mask_pixel1_b ) >> 5 | // B ( pixel & mask_pixel1_g ) >> 2 | // G ( pixel & mask_pixel1_r ) << 1); // R *buffer2++ = dstpixel; } if ( (pixel & mask_pixel2) == chromakey2) { *buffer2++ = null; } else { dstpixel = ( alpha | // A ( pixel & mask_pixel2_b ) << 11 | // B ( pixel & mask_pixel2_g ) << 14 | // G ( pixel & mask_pixel2_r ) << 17); // R *buffer2++ = dstpixel; } } src++; } while (--x); src += stride; } while (--height); return dstpixel != null ? TileUpdateState_TileDownloadToGPU : TileUpdateState_TileEmpty; } inline Framebuffer::TileUpdateState Framebuffer::ConvertARGB8888Kto8888(FxU32* buffer1, register FxU32* buffer2, register FxU32 width, register FxU32 height, register FxU32 stride) { // Process two pixels at once const register unsigned long chromakey = m_ChromaKey.Scalar || (m_ChromaKey.Scalar << 16); register unsigned long pixel; register unsigned long* stop; register unsigned jump = width + stride; register unsigned long* src = buffer1; // check if tile must be processed in advance // to avoid useless writes to main memory // The tile should at least fit into the second level cache // so reading it again wouldn't hurt as much as doing needless writes register unsigned long h = height; stop = &src[width]; do { do { pixel = *src++; if (pixel != chromakey) goto create_8888_texture; } while (src != stop); src += stride; stop += jump; } while (--h); return TileUpdateState_TileEmpty; create_8888_texture: const register unsigned long alpha = m_glAlpha.Scalar; src = buffer1; stop = &src[width]; do { do { // GL_RGBA pixel = *src; if (pixel == chromakey) { *buffer2++ = 0; } else { *src = chromakey; *buffer2++ = (pixel << 8) | alpha; } src++; } while (src != stop); src += stride; stop += jump; } while (--height); return TileUpdateState_TileDownloadToGPU; } inline Framebuffer::TileUpdateState Framebuffer::createTextureData(FxU32* texbuffer, FxU32 x, FxU32 y, FxU32 x_step, FxU32 y_step, int checksumIndex) { FxU32 stride = (m_width - x_step); FxU32 index = x + y * m_width; if (m_framebuffer->WriteMode == GR_LFBWRITEMODE_565) { #ifdef __ALTIVEC__ if (UserConfig.VectorUnitType == OpenGLideVectorUnitType_Altivec) { #ifdef OGL_FRAMEBUFFER const vector unsigned long c = m_tileChecksums[checksumIndex]; #endif TileUpdateState state = Convert565Kto8888_AV(&m_framebuffer->Address[index], texbuffer, x_step, y_step, stride, checksumIndex); #ifdef OGL_FRAMEBUFFER GlideMsg("Tile %d (%d,%d)-(%d,%d) update state is %s (%vlx)->(%vlx)\n", checksumIndex, x, y, x_step, y_step, (state ==TileUpdateState_TileDownloadToGPU) ? "DownLoadToGPU" : ((state == TileUpdateState_TileDrawOnly) ? "DrawOnly" : "TileEmpty"), c, m_tileChecksums[checksumIndex]); #endif return state; } else #endif return Convert565Kto8888(&m_framebuffer->Address[index], texbuffer, x_step, y_step, stride); } else if (m_framebuffer->WriteMode == GR_LFBWRITEMODE_1555) { return Convert1555Kto8888(&m_framebuffer->Address[index], texbuffer, x_step, y_step, stride); } else if (m_framebuffer->WriteMode == GR_LFBWRITEMODE_888) { FxU32* framebuffer = &reinterpret_cast(m_framebuffer->Address)[index]; return ConvertARGB8888Kto8888(framebuffer, texbuffer, x_step, y_step, stride); } else { return TileUpdateState_TileEmpty; } } \ No newline at end of file diff --git a/MacGLide/OpenGLide/GLExtensions.cpp b/MacGLide/OpenGLide/GLExtensions.cpp index d14519f..888411b 100644 --- a/MacGLide/OpenGLide/GLExtensions.cpp +++ b/MacGLide/OpenGLide/GLExtensions.cpp @@ -65,7 +65,7 @@ stExtensionSupport glNecessaryExt[] = { "GL_SGIS_texture_edge_clamp", OGL_EXT_DESIRED, &dummyExtVariable, &InternalConfig.EXT_SGIS_texture_edge_clamp }, { "GL_EXT_paletted_texture", OGL_EXT_DESIRED, &UserConfig.EXT_paletted_texture, &InternalConfig.EXT_paletted_texture }, { "GL_APPLE_packed_pixels", OGL_EXT_REQUIRED, &dummyExtVariable, &dummyExtVariable2 }, - { "GL_APPLE_client_storage", OGL_EXT_DESIRED, &dummyExtVariable, &InternalConfig.EXT_Client_Storage }, + { "GL_APPLE_client_storage", OGL_EXT_DESIRED, &UserConfig.APPLE_client_storage, &InternalConfig.APPLE_client_storage }, { "GL_EXT_compiled_vertex_array", OGL_EXT_DESIRED, &UserConfig.EXT_compiled_vertex_array,&InternalConfig.EXT_compiled_vertex_array }, { "GL_ARB_texture_rectangle", OGL_EXT_DESIRED, &UserConfig.ARB_texture_rectangle, &InternalConfig.ARB_texture_rectangle }, #ifdef OPENGLIDE_SYSTEM_HAS_FOGCOORD @@ -249,7 +249,7 @@ void GLExtensions(void) GlideMsg("Maximum level of anisotropy = %d\n", MaxAnisotropyLevel); // Since this a global setting, texture data must not be uploaded from temp buffers - if (InternalConfig.EXT_Client_Storage) + if (InternalConfig.APPLE_client_storage) { glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, true); glReportError(); diff --git a/MacGLide/OpenGLide/GlideSettings.cpp b/MacGLide/OpenGLide/GlideSettings.cpp index 979b13f..5a53739 100644 --- a/MacGLide/OpenGLide/GlideSettings.cpp +++ b/MacGLide/OpenGLide/GlideSettings.cpp @@ -194,7 +194,7 @@ void GlideSettings::defaults() EXT_secondary_color = true; EXT_SGIS_generate_mipmap = true; EXT_SGIS_texture_edge_clamp = true; - EXT_Client_Storage = true; + APPLE_client_storage = true; EXT_compiled_vertex_array = true; ARB_texture_rectangle = true; EXT_texture_filter_anisotropic = true; @@ -255,6 +255,7 @@ GlideSettings::IOErr GlideSettings::read_settings() get("PedanticFrameBufferEmulation", &PedanticFrameBufferEmulation); get("EXT_compiled_vertex_array", &EXT_compiled_vertex_array); get("ARB_texture_rectangle", &ARB_texture_rectangle); + get("APPLE_client_storage", &APPLE_client_storage); get("ARB_multitexture", &ARB_multitexture); get("EXT_paletted_texture", &EXT_paletted_texture); get("EXT_clip_volume_hint", &EXT_clip_volume_hint); @@ -502,6 +503,7 @@ GlideSettings::IOErr GlideSettings::saveSettings() put(); put("EXT_compiled_vertex_array", EXT_compiled_vertex_array); put("ARB_texture_rectangle", ARB_texture_rectangle); + put("APPLE_client_storage", APPLE_client_storage); put("ARB_multitexture", ARB_multitexture); put("EXT_paletted_texture", EXT_paletted_texture); put("EXT_clip_volume_hint", EXT_clip_volume_hint); diff --git a/MacGLide/OpenGLide/GlideSettings.h b/MacGLide/OpenGLide/GlideSettings.h index dc0f8c3..388e2e0 100644 --- a/MacGLide/OpenGLide/GlideSettings.h +++ b/MacGLide/OpenGLide/GlideSettings.h @@ -98,7 +98,7 @@ struct ConfigStruct bool EXT_paletted_texture; bool EXT_SGIS_generate_mipmap; bool EXT_SGIS_texture_edge_clamp; - bool EXT_Client_Storage; + bool APPLE_client_storage; bool EXT_compiled_vertex_array; bool ARB_texture_rectangle; #ifdef OPENGLIDE_SYSTEM_HAS_FOGCOORD diff --git a/MacGLide/OpenGLide/PGTexture.cpp b/MacGLide/OpenGLide/PGTexture.cpp index 6e6aed3..f3a4352 100644 --- a/MacGLide/OpenGLide/PGTexture.cpp +++ b/MacGLide/OpenGLide/PGTexture.cpp @@ -60,7 +60,7 @@ void PGTexture::genPaletteMipmaps( FxU32 width, FxU32 height, const FxU8 *data ) } } -PGTexture::PGTexture( int mem_size ) +PGTexture::PGTexture(int mem_size) { m_db = new TexDB( mem_size ); m_palette_dirty = true; @@ -68,8 +68,17 @@ PGTexture::PGTexture( int mem_size ) m_chromakey_mode = GR_CHROMAKEY_DISABLE; m_tex_memory_size = mem_size; m_memory = (FxU8*) AllocBuffer(mem_size, sizeof(FxU8)); - if (InternalConfig.EXT_Client_Storage) + // This is initialized before the rendering context is available and before + // the list of OpenGL extensions can be checked against APPLE_client_storage. + // As a result the texture buffer is allocated bases on the user setting. + // And it might be legal to download textures before grWinOpen() so + // using the user config setting is the best we can do + if (UserConfig.APPLE_client_storage) { + // use client storage to avoid OpenGL-internal copy + // OpenGL may still make a copy if the color format isn't supported natively + // by the graphics card but all xto8888 conversions should benefit from this + // @todo: Not true for OSX, but for now we stay compatible to native OS9 // Alloc 4-times the size of the Glide buffer for texture storage m_textureCache = reinterpret_cast(AllocBuffer(m_tex_memory_size, sizeof(FxU32))); // No memory is wasted here because we save the internal OpenGL copy @@ -623,14 +632,9 @@ bool PGTexture::MakeReady(TTextureStruct* tex_coords, unsigned long number_of_tr } glReportError(); - // use client storage to avoid OpenGL-internal copy - // OpenGL may still make a copy if the color format isn't supported natively - // by the graphics card but all xto8888 conversions should benefit from this - // @todo: Not true for OSX, but for now we stay compatible to native OS9 - const bool useClientStorage = InternalConfig.EXT_Client_Storage /* && !subtexcoords */; if (subtexcoords) { - // EXT_Client_Storage doesn't explicitely forbid to adjust pixel unpack :^) + // APPLE_client_storage doesn't explicitely forbid to adjust pixel unpack :^) glPixelStorei(GL_UNPACK_SKIP_PIXELS, subtexcoords->left); glPixelStorei(GL_UNPACK_SKIP_ROWS, subtexcoords->top); glPixelStorei(GL_UNPACK_ROW_LENGTH, texVals.width); @@ -640,11 +644,9 @@ bool PGTexture::MakeReady(TTextureStruct* tex_coords, unsigned long number_of_tr } FxU32* texBuffer; // Which buffer - if (useClientStorage) + if (m_textureCache) { texBuffer = &m_textureCache[m_startAddress]; - // glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, true); - // glReportError(); } else { @@ -658,7 +660,7 @@ bool PGTexture::MakeReady(TTextureStruct* tex_coords, unsigned long number_of_tr { // Read about anisotropy and chromakey issues in macFormatConversions.cpp Convert565Kto8888((FxU16*)data, m_chromakey_value_565, texBuffer, texVals.nPixels); - DownloadMipmapsToOpenGL(4, GL_RGBA, GL_UNSIGNED_BYTE, m_tex_temp, texVals, !use_mipmap_ext); + DownloadMipmapsToOpenGL(4, GL_RGBA, GL_UNSIGNED_BYTE, texBuffer, texVals, !use_mipmap_ext); } else {